annotate src/libid3tag-0.15.1b/utf16.c @ 23:619f715526df sv_v2.1

Update Vamp plugin SDK to 2.5
author Chris Cannam
date Thu, 09 May 2013 10:52:46 +0100
parents c7265573341e
children
rev   line source
Chris@0 1 /*
Chris@0 2 * libid3tag - ID3 tag manipulation library
Chris@0 3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
Chris@0 4 *
Chris@0 5 * This program is free software; you can redistribute it and/or modify
Chris@0 6 * it under the terms of the GNU General Public License as published by
Chris@0 7 * the Free Software Foundation; either version 2 of the License, or
Chris@0 8 * (at your option) any later version.
Chris@0 9 *
Chris@0 10 * This program is distributed in the hope that it will be useful,
Chris@0 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 13 * GNU General Public License for more details.
Chris@0 14 *
Chris@0 15 * You should have received a copy of the GNU General Public License
Chris@0 16 * along with this program; if not, write to the Free Software
Chris@0 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Chris@0 18 *
Chris@0 19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $
Chris@0 20 */
Chris@0 21
Chris@0 22 # ifdef HAVE_CONFIG_H
Chris@0 23 # include "config.h"
Chris@0 24 # endif
Chris@0 25
Chris@0 26 # include "global.h"
Chris@0 27
Chris@0 28 # include <stdlib.h>
Chris@0 29
Chris@0 30 # include "id3tag.h"
Chris@0 31 # include "utf16.h"
Chris@0 32 # include "ucs4.h"
Chris@0 33
Chris@0 34 /*
Chris@0 35 * NAME: utf16->length()
Chris@0 36 * DESCRIPTION: return the number of ucs4 chars represented by a utf16 string
Chris@0 37 */
Chris@0 38 id3_length_t id3_utf16_length(id3_utf16_t const *utf16)
Chris@0 39 {
Chris@0 40 id3_length_t length = 0;
Chris@0 41
Chris@0 42 while (*utf16) {
Chris@0 43 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff)
Chris@0 44 ++length;
Chris@0 45 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
Chris@0 46 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
Chris@0 47 ++length;
Chris@0 48 ++utf16;
Chris@0 49 }
Chris@0 50
Chris@0 51 ++utf16;
Chris@0 52 }
Chris@0 53
Chris@0 54 return length;
Chris@0 55 }
Chris@0 56
Chris@0 57 /*
Chris@0 58 * NAME: utf16->size()
Chris@0 59 * DESCRIPTION: return the encoding size of a utf16 string
Chris@0 60 */
Chris@0 61 id3_length_t id3_utf16_size(id3_utf16_t const *utf16)
Chris@0 62 {
Chris@0 63 id3_utf16_t const *ptr = utf16;
Chris@0 64
Chris@0 65 while (*ptr)
Chris@0 66 ++ptr;
Chris@0 67
Chris@0 68 return ptr - utf16 + 1;
Chris@0 69 }
Chris@0 70
Chris@0 71 /*
Chris@0 72 * NAME: utf16->ucs4duplicate()
Chris@0 73 * DESCRIPTION: duplicate and decode a utf16 string into ucs4
Chris@0 74 */
Chris@0 75 id3_ucs4_t *id3_utf16_ucs4duplicate(id3_utf16_t const *utf16)
Chris@0 76 {
Chris@0 77 id3_ucs4_t *ucs4;
Chris@0 78
Chris@0 79 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
Chris@0 80 if (ucs4)
Chris@0 81 id3_utf16_decode(utf16, ucs4);
Chris@0 82
Chris@0 83 return release(ucs4);
Chris@0 84 }
Chris@0 85
Chris@0 86 /*
Chris@0 87 * NAME: utf16->decodechar()
Chris@0 88 * DESCRIPTION: decode a series of utf16 chars into a single ucs4 char
Chris@0 89 */
Chris@0 90 id3_length_t id3_utf16_decodechar(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
Chris@0 91 {
Chris@0 92 id3_utf16_t const *start = utf16;
Chris@0 93
Chris@0 94 while (1) {
Chris@0 95 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) {
Chris@0 96 *ucs4 = utf16[0];
Chris@0 97 return utf16 - start + 1;
Chris@0 98 }
Chris@0 99 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
Chris@0 100 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
Chris@0 101 *ucs4 = (((utf16[0] & 0x03ffL) << 10) |
Chris@0 102 ((utf16[1] & 0x03ffL) << 0)) + 0x00010000L;
Chris@0 103 return utf16 - start + 2;
Chris@0 104 }
Chris@0 105
Chris@0 106 ++utf16;
Chris@0 107 }
Chris@0 108 }
Chris@0 109
Chris@0 110 /*
Chris@0 111 * NAME: utf16->encodechar()
Chris@0 112 * DESCRIPTION: encode a single ucs4 char into a series of up to 2 utf16 chars
Chris@0 113 */
Chris@0 114 id3_length_t id3_utf16_encodechar(id3_utf16_t *utf16, id3_ucs4_t ucs4)
Chris@0 115 {
Chris@0 116 if (ucs4 < 0x00010000L) {
Chris@0 117 utf16[0] = ucs4;
Chris@0 118
Chris@0 119 return 1;
Chris@0 120 }
Chris@0 121 else if (ucs4 < 0x00110000L) {
Chris@0 122 ucs4 -= 0x00010000L;
Chris@0 123
Chris@0 124 utf16[0] = ((ucs4 >> 10) & 0x3ff) | 0xd800;
Chris@0 125 utf16[1] = ((ucs4 >> 0) & 0x3ff) | 0xdc00;
Chris@0 126
Chris@0 127 return 2;
Chris@0 128 }
Chris@0 129
Chris@0 130 /* default */
Chris@0 131
Chris@0 132 return id3_utf16_encodechar(utf16, ID3_UCS4_REPLACEMENTCHAR);
Chris@0 133 }
Chris@0 134
Chris@0 135 /*
Chris@0 136 * NAME: utf16->decode()
Chris@0 137 * DESCRIPTION: decode a complete utf16 string into a ucs4 string
Chris@0 138 */
Chris@0 139 void id3_utf16_decode(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
Chris@0 140 {
Chris@0 141 do
Chris@0 142 utf16 += id3_utf16_decodechar(utf16, ucs4);
Chris@0 143 while (*ucs4++);
Chris@0 144 }
Chris@0 145
Chris@0 146 /*
Chris@0 147 * NAME: utf16->encode()
Chris@0 148 * DESCRIPTION: encode a complete ucs4 string into a utf16 string
Chris@0 149 */
Chris@0 150 void id3_utf16_encode(id3_utf16_t *utf16, id3_ucs4_t const *ucs4)
Chris@0 151 {
Chris@0 152 do
Chris@0 153 utf16 += id3_utf16_encodechar(utf16, *ucs4);
Chris@0 154 while (*ucs4++);
Chris@0 155 }
Chris@0 156
Chris@0 157 /*
Chris@0 158 * NAME: utf16->put()
Chris@0 159 * DESCRIPTION: serialize a single utf16 character
Chris@0 160 */
Chris@0 161 id3_length_t id3_utf16_put(id3_byte_t **ptr, id3_utf16_t utf16,
Chris@0 162 enum id3_utf16_byteorder byteorder)
Chris@0 163 {
Chris@0 164 if (ptr) {
Chris@0 165 switch (byteorder) {
Chris@0 166 default:
Chris@0 167 case ID3_UTF16_BYTEORDER_BE:
Chris@0 168 (*ptr)[0] = (utf16 >> 8) & 0xff;
Chris@0 169 (*ptr)[1] = (utf16 >> 0) & 0xff;
Chris@0 170 break;
Chris@0 171
Chris@0 172 case ID3_UTF16_BYTEORDER_LE:
Chris@0 173 (*ptr)[0] = (utf16 >> 0) & 0xff;
Chris@0 174 (*ptr)[1] = (utf16 >> 8) & 0xff;
Chris@0 175 break;
Chris@0 176 }
Chris@0 177
Chris@0 178 *ptr += 2;
Chris@0 179 }
Chris@0 180
Chris@0 181 return 2;
Chris@0 182 }
Chris@0 183
Chris@0 184 /*
Chris@0 185 * NAME: utf16->get()
Chris@0 186 * DESCRIPTION: deserialize a single utf16 character
Chris@0 187 */
Chris@0 188 id3_utf16_t id3_utf16_get(id3_byte_t const **ptr,
Chris@0 189 enum id3_utf16_byteorder byteorder)
Chris@0 190 {
Chris@0 191 id3_utf16_t utf16;
Chris@0 192
Chris@0 193 switch (byteorder) {
Chris@0 194 default:
Chris@0 195 case ID3_UTF16_BYTEORDER_BE:
Chris@0 196 utf16 =
Chris@0 197 ((*ptr)[0] << 8) |
Chris@0 198 ((*ptr)[1] << 0);
Chris@0 199 break;
Chris@0 200
Chris@0 201 case ID3_UTF16_BYTEORDER_LE:
Chris@0 202 utf16 =
Chris@0 203 ((*ptr)[0] << 0) |
Chris@0 204 ((*ptr)[1] << 8);
Chris@0 205 break;
Chris@0 206 }
Chris@0 207
Chris@0 208 *ptr += 2;
Chris@0 209
Chris@0 210 return utf16;
Chris@0 211 }
Chris@0 212
Chris@0 213 /*
Chris@0 214 * NAME: utf16->serialize()
Chris@0 215 * DESCRIPTION: serialize a ucs4 string using utf16 encoding
Chris@0 216 */
Chris@0 217 id3_length_t id3_utf16_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4,
Chris@0 218 enum id3_utf16_byteorder byteorder,
Chris@0 219 int terminate)
Chris@0 220 {
Chris@0 221 id3_length_t size = 0;
Chris@0 222 id3_utf16_t utf16[2], *out;
Chris@0 223
Chris@0 224 if (byteorder == ID3_UTF16_BYTEORDER_ANY)
Chris@0 225 size += id3_utf16_put(ptr, 0xfeff, byteorder);
Chris@0 226
Chris@0 227 while (*ucs4) {
Chris@0 228 switch (id3_utf16_encodechar(out = utf16, *ucs4++)) {
Chris@0 229 case 2: size += id3_utf16_put(ptr, *out++, byteorder);
Chris@0 230 case 1: size += id3_utf16_put(ptr, *out++, byteorder);
Chris@0 231 case 0: break;
Chris@0 232 }
Chris@0 233 }
Chris@0 234
Chris@0 235 if (terminate)
Chris@0 236 size += id3_utf16_put(ptr, 0, byteorder);
Chris@0 237
Chris@0 238 return size;
Chris@0 239 }
Chris@0 240
Chris@0 241 /*
Chris@0 242 * NAME: utf16->deserialize()
Chris@0 243 * DESCRIPTION: deserialize a ucs4 string using utf16 encoding
Chris@0 244 */
Chris@0 245 id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length,
Chris@0 246 enum id3_utf16_byteorder byteorder)
Chris@0 247 {
Chris@0 248 id3_byte_t const *end;
Chris@0 249 id3_utf16_t *utf16ptr, *utf16;
Chris@0 250 id3_ucs4_t *ucs4;
Chris@0 251
Chris@0 252 end = *ptr + (length & ~1);
Chris@0 253
Chris@0 254 utf16 = malloc((length / 2 + 1) * sizeof(*utf16));
Chris@0 255 if (utf16 == 0)
Chris@0 256 return 0;
Chris@0 257
Chris@0 258 if (byteorder == ID3_UTF16_BYTEORDER_ANY && end - *ptr > 0) {
Chris@0 259 switch (((*ptr)[0] << 8) |
Chris@0 260 ((*ptr)[1] << 0)) {
Chris@0 261 case 0xfeff:
Chris@0 262 byteorder = ID3_UTF16_BYTEORDER_BE;
Chris@0 263 *ptr += 2;
Chris@0 264 break;
Chris@0 265
Chris@0 266 case 0xfffe:
Chris@0 267 byteorder = ID3_UTF16_BYTEORDER_LE;
Chris@0 268 *ptr += 2;
Chris@0 269 break;
Chris@0 270 }
Chris@0 271 }
Chris@0 272
Chris@0 273 utf16ptr = utf16;
Chris@0 274 while (end - *ptr > 0 && (*utf16ptr = id3_utf16_get(ptr, byteorder)))
Chris@0 275 ++utf16ptr;
Chris@0 276
Chris@0 277 *utf16ptr = 0;
Chris@0 278
Chris@0 279 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
Chris@0 280 if (ucs4)
Chris@0 281 id3_utf16_decode(utf16, ucs4);
Chris@0 282
Chris@0 283 free(utf16);
Chris@0 284
Chris@0 285 return ucs4;
Chris@0 286 }