annotate src/libid3tag-0.15.1b/utf16.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents c7265573341e
children
rev   line source
Chris@0 1 /*
Chris@0 2 * libid3tag - ID3 tag manipulation library
Chris@0 3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
Chris@0 4 *
Chris@0 5 * This program is free software; you can redistribute it and/or modify
Chris@0 6 * it under the terms of the GNU General Public License as published by
Chris@0 7 * the Free Software Foundation; either version 2 of the License, or
Chris@0 8 * (at your option) any later version.
Chris@0 9 *
Chris@0 10 * This program is distributed in the hope that it will be useful,
Chris@0 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 13 * GNU General Public License for more details.
Chris@0 14 *
Chris@0 15 * You should have received a copy of the GNU General Public License
Chris@0 16 * along with this program; if not, write to the Free Software
Chris@0 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Chris@0 18 *
Chris@0 19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $
Chris@0 20 */
Chris@0 21
Chris@0 22 # ifdef HAVE_CONFIG_H
Chris@0 23 # include "config.h"
Chris@0 24 # endif
Chris@0 25
Chris@0 26 # include "global.h"
Chris@0 27
Chris@0 28 # include <stdlib.h>
Chris@0 29
Chris@0 30 # include "id3tag.h"
Chris@0 31 # include "utf16.h"
Chris@0 32 # include "ucs4.h"
Chris@0 33
Chris@0 34 /*
Chris@0 35 * NAME: utf16->length()
Chris@0 36 * DESCRIPTION: return the number of ucs4 chars represented by a utf16 string
Chris@0 37 */
Chris@0 38 id3_length_t id3_utf16_length(id3_utf16_t const *utf16)
Chris@0 39 {
Chris@0 40 id3_length_t length = 0;
Chris@0 41
Chris@0 42 while (*utf16) {
Chris@0 43 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff)
Chris@0 44 ++length;
Chris@0 45 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
Chris@0 46 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
Chris@0 47 ++length;
Chris@0 48 ++utf16;
Chris@0 49 }
Chris@0 50
Chris@0 51 ++utf16;
Chris@0 52 }
Chris@0 53
Chris@0 54 return length;
Chris@0 55 }
Chris@0 56
Chris@0 57 /*
Chris@0 58 * NAME: utf16->size()
Chris@0 59 * DESCRIPTION: return the encoding size of a utf16 string
Chris@0 60 */
Chris@0 61 id3_length_t id3_utf16_size(id3_utf16_t const *utf16)
Chris@0 62 {
Chris@0 63 id3_utf16_t const *ptr = utf16;
Chris@0 64
Chris@0 65 while (*ptr)
Chris@0 66 ++ptr;
Chris@0 67
Chris@0 68 return ptr - utf16 + 1;
Chris@0 69 }
Chris@0 70
Chris@0 71 /*
Chris@0 72 * NAME: utf16->ucs4duplicate()
Chris@0 73 * DESCRIPTION: duplicate and decode a utf16 string into ucs4
Chris@0 74 */
Chris@0 75 id3_ucs4_t *id3_utf16_ucs4duplicate(id3_utf16_t const *utf16)
Chris@0 76 {
Chris@0 77 id3_ucs4_t *ucs4;
Chris@0 78
Chris@0 79 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
Chris@0 80 if (ucs4)
Chris@0 81 id3_utf16_decode(utf16, ucs4);
Chris@0 82
Chris@0 83 return release(ucs4);
Chris@0 84 }
Chris@0 85
Chris@0 86 /*
Chris@0 87 * NAME: utf16->decodechar()
Chris@0 88 * DESCRIPTION: decode a series of utf16 chars into a single ucs4 char
Chris@0 89 */
Chris@0 90 id3_length_t id3_utf16_decodechar(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
Chris@0 91 {
Chris@0 92 id3_utf16_t const *start = utf16;
Chris@0 93
Chris@0 94 while (1) {
Chris@0 95 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) {
Chris@0 96 *ucs4 = utf16[0];
Chris@0 97 return utf16 - start + 1;
Chris@0 98 }
Chris@0 99 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
Chris@0 100 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
Chris@0 101 *ucs4 = (((utf16[0] & 0x03ffL) << 10) |
Chris@0 102 ((utf16[1] & 0x03ffL) << 0)) + 0x00010000L;
Chris@0 103 return utf16 - start + 2;
Chris@0 104 }
Chris@0 105
Chris@0 106 ++utf16;
Chris@0 107 }
Chris@0 108 }
Chris@0 109
Chris@0 110 /*
Chris@0 111 * NAME: utf16->encodechar()
Chris@0 112 * DESCRIPTION: encode a single ucs4 char into a series of up to 2 utf16 chars
Chris@0 113 */
Chris@0 114 id3_length_t id3_utf16_encodechar(id3_utf16_t *utf16, id3_ucs4_t ucs4)
Chris@0 115 {
Chris@0 116 if (ucs4 < 0x00010000L) {
Chris@0 117 utf16[0] = ucs4;
Chris@0 118
Chris@0 119 return 1;
Chris@0 120 }
Chris@0 121 else if (ucs4 < 0x00110000L) {
Chris@0 122 ucs4 -= 0x00010000L;
Chris@0 123
Chris@0 124 utf16[0] = ((ucs4 >> 10) & 0x3ff) | 0xd800;
Chris@0 125 utf16[1] = ((ucs4 >> 0) & 0x3ff) | 0xdc00;
Chris@0 126
Chris@0 127 return 2;
Chris@0 128 }
Chris@0 129
Chris@0 130 /* default */
Chris@0 131
Chris@0 132 return id3_utf16_encodechar(utf16, ID3_UCS4_REPLACEMENTCHAR);
Chris@0 133 }
Chris@0 134
Chris@0 135 /*
Chris@0 136 * NAME: utf16->decode()
Chris@0 137 * DESCRIPTION: decode a complete utf16 string into a ucs4 string
Chris@0 138 */
Chris@0 139 void id3_utf16_decode(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
Chris@0 140 {
Chris@0 141 do
Chris@0 142 utf16 += id3_utf16_decodechar(utf16, ucs4);
Chris@0 143 while (*ucs4++);
Chris@0 144 }
Chris@0 145
Chris@0 146 /*
Chris@0 147 * NAME: utf16->encode()
Chris@0 148 * DESCRIPTION: encode a complete ucs4 string into a utf16 string
Chris@0 149 */
Chris@0 150 void id3_utf16_encode(id3_utf16_t *utf16, id3_ucs4_t const *ucs4)
Chris@0 151 {
Chris@0 152 do
Chris@0 153 utf16 += id3_utf16_encodechar(utf16, *ucs4);
Chris@0 154 while (*ucs4++);
Chris@0 155 }
Chris@0 156
Chris@0 157 /*
Chris@0 158 * NAME: utf16->put()
Chris@0 159 * DESCRIPTION: serialize a single utf16 character
Chris@0 160 */
Chris@0 161 id3_length_t id3_utf16_put(id3_byte_t **ptr, id3_utf16_t utf16,
Chris@0 162 enum id3_utf16_byteorder byteorder)
Chris@0 163 {
Chris@0 164 if (ptr) {
Chris@0 165 switch (byteorder) {
Chris@0 166 default:
Chris@0 167 case ID3_UTF16_BYTEORDER_BE:
Chris@0 168 (*ptr)[0] = (utf16 >> 8) & 0xff;
Chris@0 169 (*ptr)[1] = (utf16 >> 0) & 0xff;
Chris@0 170 break;
Chris@0 171
Chris@0 172 case ID3_UTF16_BYTEORDER_LE:
Chris@0 173 (*ptr)[0] = (utf16 >> 0) & 0xff;
Chris@0 174 (*ptr)[1] = (utf16 >> 8) & 0xff;
Chris@0 175 break;
Chris@0 176 }
Chris@0 177
Chris@0 178 *ptr += 2;
Chris@0 179 }
Chris@0 180
Chris@0 181 return 2;
Chris@0 182 }
Chris@0 183
Chris@0 184 /*
Chris@0 185 * NAME: utf16->get()
Chris@0 186 * DESCRIPTION: deserialize a single utf16 character
Chris@0 187 */
Chris@0 188 id3_utf16_t id3_utf16_get(id3_byte_t const **ptr,
Chris@0 189 enum id3_utf16_byteorder byteorder)
Chris@0 190 {
Chris@0 191 id3_utf16_t utf16;
Chris@0 192
Chris@0 193 switch (byteorder) {
Chris@0 194 default:
Chris@0 195 case ID3_UTF16_BYTEORDER_BE:
Chris@0 196 utf16 =
Chris@0 197 ((*ptr)[0] << 8) |
Chris@0 198 ((*ptr)[1] << 0);
Chris@0 199 break;
Chris@0 200
Chris@0 201 case ID3_UTF16_BYTEORDER_LE:
Chris@0 202 utf16 =
Chris@0 203 ((*ptr)[0] << 0) |
Chris@0 204 ((*ptr)[1] << 8);
Chris@0 205 break;
Chris@0 206 }
Chris@0 207
Chris@0 208 *ptr += 2;
Chris@0 209
Chris@0 210 return utf16;
Chris@0 211 }
Chris@0 212
Chris@0 213 /*
Chris@0 214 * NAME: utf16->serialize()
Chris@0 215 * DESCRIPTION: serialize a ucs4 string using utf16 encoding
Chris@0 216 */
Chris@0 217 id3_length_t id3_utf16_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4,
Chris@0 218 enum id3_utf16_byteorder byteorder,
Chris@0 219 int terminate)
Chris@0 220 {
Chris@0 221 id3_length_t size = 0;
Chris@0 222 id3_utf16_t utf16[2], *out;
Chris@0 223
Chris@0 224 if (byteorder == ID3_UTF16_BYTEORDER_ANY)
Chris@0 225 size += id3_utf16_put(ptr, 0xfeff, byteorder);
Chris@0 226
Chris@0 227 while (*ucs4) {
Chris@0 228 switch (id3_utf16_encodechar(out = utf16, *ucs4++)) {
Chris@0 229 case 2: size += id3_utf16_put(ptr, *out++, byteorder);
Chris@0 230 case 1: size += id3_utf16_put(ptr, *out++, byteorder);
Chris@0 231 case 0: break;
Chris@0 232 }
Chris@0 233 }
Chris@0 234
Chris@0 235 if (terminate)
Chris@0 236 size += id3_utf16_put(ptr, 0, byteorder);
Chris@0 237
Chris@0 238 return size;
Chris@0 239 }
Chris@0 240
Chris@0 241 /*
Chris@0 242 * NAME: utf16->deserialize()
Chris@0 243 * DESCRIPTION: deserialize a ucs4 string using utf16 encoding
Chris@0 244 */
Chris@0 245 id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length,
Chris@0 246 enum id3_utf16_byteorder byteorder)
Chris@0 247 {
Chris@0 248 id3_byte_t const *end;
Chris@0 249 id3_utf16_t *utf16ptr, *utf16;
Chris@0 250 id3_ucs4_t *ucs4;
Chris@0 251
Chris@0 252 end = *ptr + (length & ~1);
Chris@0 253
Chris@0 254 utf16 = malloc((length / 2 + 1) * sizeof(*utf16));
Chris@0 255 if (utf16 == 0)
Chris@0 256 return 0;
Chris@0 257
Chris@0 258 if (byteorder == ID3_UTF16_BYTEORDER_ANY && end - *ptr > 0) {
Chris@0 259 switch (((*ptr)[0] << 8) |
Chris@0 260 ((*ptr)[1] << 0)) {
Chris@0 261 case 0xfeff:
Chris@0 262 byteorder = ID3_UTF16_BYTEORDER_BE;
Chris@0 263 *ptr += 2;
Chris@0 264 break;
Chris@0 265
Chris@0 266 case 0xfffe:
Chris@0 267 byteorder = ID3_UTF16_BYTEORDER_LE;
Chris@0 268 *ptr += 2;
Chris@0 269 break;
Chris@0 270 }
Chris@0 271 }
Chris@0 272
Chris@0 273 utf16ptr = utf16;
Chris@0 274 while (end - *ptr > 0 && (*utf16ptr = id3_utf16_get(ptr, byteorder)))
Chris@0 275 ++utf16ptr;
Chris@0 276
Chris@0 277 *utf16ptr = 0;
Chris@0 278
Chris@0 279 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
Chris@0 280 if (ucs4)
Chris@0 281 id3_utf16_decode(utf16, ucs4);
Chris@0 282
Chris@0 283 free(utf16);
Chris@0 284
Chris@0 285 return ucs4;
Chris@0 286 }