annotate src/libid3tag-0.15.1b/utf16.c @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 545efbb81310
children
rev   line source
cannam@85 1 /*
cannam@85 2 * libid3tag - ID3 tag manipulation library
cannam@85 3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
cannam@85 4 *
cannam@85 5 * This program is free software; you can redistribute it and/or modify
cannam@85 6 * it under the terms of the GNU General Public License as published by
cannam@85 7 * the Free Software Foundation; either version 2 of the License, or
cannam@85 8 * (at your option) any later version.
cannam@85 9 *
cannam@85 10 * This program is distributed in the hope that it will be useful,
cannam@85 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cannam@85 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@85 13 * GNU General Public License for more details.
cannam@85 14 *
cannam@85 15 * You should have received a copy of the GNU General Public License
cannam@85 16 * along with this program; if not, write to the Free Software
cannam@85 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
cannam@85 18 *
cannam@85 19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $
cannam@85 20 */
cannam@85 21
cannam@85 22 # ifdef HAVE_CONFIG_H
cannam@85 23 # include "config.h"
cannam@85 24 # endif
cannam@85 25
cannam@85 26 # include "global.h"
cannam@85 27
cannam@85 28 # include <stdlib.h>
cannam@85 29
cannam@85 30 # include "id3tag.h"
cannam@85 31 # include "utf16.h"
cannam@85 32 # include "ucs4.h"
cannam@85 33
cannam@85 34 /*
cannam@85 35 * NAME: utf16->length()
cannam@85 36 * DESCRIPTION: return the number of ucs4 chars represented by a utf16 string
cannam@85 37 */
cannam@85 38 id3_length_t id3_utf16_length(id3_utf16_t const *utf16)
cannam@85 39 {
cannam@85 40 id3_length_t length = 0;
cannam@85 41
cannam@85 42 while (*utf16) {
cannam@85 43 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff)
cannam@85 44 ++length;
cannam@85 45 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
cannam@85 46 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
cannam@85 47 ++length;
cannam@85 48 ++utf16;
cannam@85 49 }
cannam@85 50
cannam@85 51 ++utf16;
cannam@85 52 }
cannam@85 53
cannam@85 54 return length;
cannam@85 55 }
cannam@85 56
cannam@85 57 /*
cannam@85 58 * NAME: utf16->size()
cannam@85 59 * DESCRIPTION: return the encoding size of a utf16 string
cannam@85 60 */
cannam@85 61 id3_length_t id3_utf16_size(id3_utf16_t const *utf16)
cannam@85 62 {
cannam@85 63 id3_utf16_t const *ptr = utf16;
cannam@85 64
cannam@85 65 while (*ptr)
cannam@85 66 ++ptr;
cannam@85 67
cannam@85 68 return ptr - utf16 + 1;
cannam@85 69 }
cannam@85 70
cannam@85 71 /*
cannam@85 72 * NAME: utf16->ucs4duplicate()
cannam@85 73 * DESCRIPTION: duplicate and decode a utf16 string into ucs4
cannam@85 74 */
cannam@85 75 id3_ucs4_t *id3_utf16_ucs4duplicate(id3_utf16_t const *utf16)
cannam@85 76 {
cannam@85 77 id3_ucs4_t *ucs4;
cannam@85 78
cannam@85 79 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
cannam@85 80 if (ucs4)
cannam@85 81 id3_utf16_decode(utf16, ucs4);
cannam@85 82
cannam@85 83 return release(ucs4);
cannam@85 84 }
cannam@85 85
cannam@85 86 /*
cannam@85 87 * NAME: utf16->decodechar()
cannam@85 88 * DESCRIPTION: decode a series of utf16 chars into a single ucs4 char
cannam@85 89 */
cannam@85 90 id3_length_t id3_utf16_decodechar(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
cannam@85 91 {
cannam@85 92 id3_utf16_t const *start = utf16;
cannam@85 93
cannam@85 94 while (1) {
cannam@85 95 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) {
cannam@85 96 *ucs4 = utf16[0];
cannam@85 97 return utf16 - start + 1;
cannam@85 98 }
cannam@85 99 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
cannam@85 100 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
cannam@85 101 *ucs4 = (((utf16[0] & 0x03ffL) << 10) |
cannam@85 102 ((utf16[1] & 0x03ffL) << 0)) + 0x00010000L;
cannam@85 103 return utf16 - start + 2;
cannam@85 104 }
cannam@85 105
cannam@85 106 ++utf16;
cannam@85 107 }
cannam@85 108 }
cannam@85 109
cannam@85 110 /*
cannam@85 111 * NAME: utf16->encodechar()
cannam@85 112 * DESCRIPTION: encode a single ucs4 char into a series of up to 2 utf16 chars
cannam@85 113 */
cannam@85 114 id3_length_t id3_utf16_encodechar(id3_utf16_t *utf16, id3_ucs4_t ucs4)
cannam@85 115 {
cannam@85 116 if (ucs4 < 0x00010000L) {
cannam@85 117 utf16[0] = ucs4;
cannam@85 118
cannam@85 119 return 1;
cannam@85 120 }
cannam@85 121 else if (ucs4 < 0x00110000L) {
cannam@85 122 ucs4 -= 0x00010000L;
cannam@85 123
cannam@85 124 utf16[0] = ((ucs4 >> 10) & 0x3ff) | 0xd800;
cannam@85 125 utf16[1] = ((ucs4 >> 0) & 0x3ff) | 0xdc00;
cannam@85 126
cannam@85 127 return 2;
cannam@85 128 }
cannam@85 129
cannam@85 130 /* default */
cannam@85 131
cannam@85 132 return id3_utf16_encodechar(utf16, ID3_UCS4_REPLACEMENTCHAR);
cannam@85 133 }
cannam@85 134
cannam@85 135 /*
cannam@85 136 * NAME: utf16->decode()
cannam@85 137 * DESCRIPTION: decode a complete utf16 string into a ucs4 string
cannam@85 138 */
cannam@85 139 void id3_utf16_decode(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
cannam@85 140 {
cannam@85 141 do
cannam@85 142 utf16 += id3_utf16_decodechar(utf16, ucs4);
cannam@85 143 while (*ucs4++);
cannam@85 144 }
cannam@85 145
cannam@85 146 /*
cannam@85 147 * NAME: utf16->encode()
cannam@85 148 * DESCRIPTION: encode a complete ucs4 string into a utf16 string
cannam@85 149 */
cannam@85 150 void id3_utf16_encode(id3_utf16_t *utf16, id3_ucs4_t const *ucs4)
cannam@85 151 {
cannam@85 152 do
cannam@85 153 utf16 += id3_utf16_encodechar(utf16, *ucs4);
cannam@85 154 while (*ucs4++);
cannam@85 155 }
cannam@85 156
cannam@85 157 /*
cannam@85 158 * NAME: utf16->put()
cannam@85 159 * DESCRIPTION: serialize a single utf16 character
cannam@85 160 */
cannam@85 161 id3_length_t id3_utf16_put(id3_byte_t **ptr, id3_utf16_t utf16,
cannam@85 162 enum id3_utf16_byteorder byteorder)
cannam@85 163 {
cannam@85 164 if (ptr) {
cannam@85 165 switch (byteorder) {
cannam@85 166 default:
cannam@85 167 case ID3_UTF16_BYTEORDER_BE:
cannam@85 168 (*ptr)[0] = (utf16 >> 8) & 0xff;
cannam@85 169 (*ptr)[1] = (utf16 >> 0) & 0xff;
cannam@85 170 break;
cannam@85 171
cannam@85 172 case ID3_UTF16_BYTEORDER_LE:
cannam@85 173 (*ptr)[0] = (utf16 >> 0) & 0xff;
cannam@85 174 (*ptr)[1] = (utf16 >> 8) & 0xff;
cannam@85 175 break;
cannam@85 176 }
cannam@85 177
cannam@85 178 *ptr += 2;
cannam@85 179 }
cannam@85 180
cannam@85 181 return 2;
cannam@85 182 }
cannam@85 183
cannam@85 184 /*
cannam@85 185 * NAME: utf16->get()
cannam@85 186 * DESCRIPTION: deserialize a single utf16 character
cannam@85 187 */
cannam@85 188 id3_utf16_t id3_utf16_get(id3_byte_t const **ptr,
cannam@85 189 enum id3_utf16_byteorder byteorder)
cannam@85 190 {
cannam@85 191 id3_utf16_t utf16;
cannam@85 192
cannam@85 193 switch (byteorder) {
cannam@85 194 default:
cannam@85 195 case ID3_UTF16_BYTEORDER_BE:
cannam@85 196 utf16 =
cannam@85 197 ((*ptr)[0] << 8) |
cannam@85 198 ((*ptr)[1] << 0);
cannam@85 199 break;
cannam@85 200
cannam@85 201 case ID3_UTF16_BYTEORDER_LE:
cannam@85 202 utf16 =
cannam@85 203 ((*ptr)[0] << 0) |
cannam@85 204 ((*ptr)[1] << 8);
cannam@85 205 break;
cannam@85 206 }
cannam@85 207
cannam@85 208 *ptr += 2;
cannam@85 209
cannam@85 210 return utf16;
cannam@85 211 }
cannam@85 212
cannam@85 213 /*
cannam@85 214 * NAME: utf16->serialize()
cannam@85 215 * DESCRIPTION: serialize a ucs4 string using utf16 encoding
cannam@85 216 */
cannam@85 217 id3_length_t id3_utf16_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4,
cannam@85 218 enum id3_utf16_byteorder byteorder,
cannam@85 219 int terminate)
cannam@85 220 {
cannam@85 221 id3_length_t size = 0;
cannam@85 222 id3_utf16_t utf16[2], *out;
cannam@85 223
cannam@85 224 if (byteorder == ID3_UTF16_BYTEORDER_ANY)
cannam@85 225 size += id3_utf16_put(ptr, 0xfeff, byteorder);
cannam@85 226
cannam@85 227 while (*ucs4) {
cannam@85 228 switch (id3_utf16_encodechar(out = utf16, *ucs4++)) {
cannam@85 229 case 2: size += id3_utf16_put(ptr, *out++, byteorder);
cannam@85 230 case 1: size += id3_utf16_put(ptr, *out++, byteorder);
cannam@85 231 case 0: break;
cannam@85 232 }
cannam@85 233 }
cannam@85 234
cannam@85 235 if (terminate)
cannam@85 236 size += id3_utf16_put(ptr, 0, byteorder);
cannam@85 237
cannam@85 238 return size;
cannam@85 239 }
cannam@85 240
cannam@85 241 /*
cannam@85 242 * NAME: utf16->deserialize()
cannam@85 243 * DESCRIPTION: deserialize a ucs4 string using utf16 encoding
cannam@85 244 */
cannam@85 245 id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length,
cannam@85 246 enum id3_utf16_byteorder byteorder)
cannam@85 247 {
cannam@85 248 id3_byte_t const *end;
cannam@85 249 id3_utf16_t *utf16ptr, *utf16;
cannam@85 250 id3_ucs4_t *ucs4;
cannam@85 251
cannam@85 252 end = *ptr + (length & ~1);
cannam@85 253
cannam@85 254 utf16 = malloc((length / 2 + 1) * sizeof(*utf16));
cannam@85 255 if (utf16 == 0)
cannam@85 256 return 0;
cannam@85 257
cannam@85 258 if (byteorder == ID3_UTF16_BYTEORDER_ANY && end - *ptr > 0) {
cannam@85 259 switch (((*ptr)[0] << 8) |
cannam@85 260 ((*ptr)[1] << 0)) {
cannam@85 261 case 0xfeff:
cannam@85 262 byteorder = ID3_UTF16_BYTEORDER_BE;
cannam@85 263 *ptr += 2;
cannam@85 264 break;
cannam@85 265
cannam@85 266 case 0xfffe:
cannam@85 267 byteorder = ID3_UTF16_BYTEORDER_LE;
cannam@85 268 *ptr += 2;
cannam@85 269 break;
cannam@85 270 }
cannam@85 271 }
cannam@85 272
cannam@85 273 utf16ptr = utf16;
cannam@85 274 while (end - *ptr > 0 && (*utf16ptr = id3_utf16_get(ptr, byteorder)))
cannam@85 275 ++utf16ptr;
cannam@85 276
cannam@85 277 *utf16ptr = 0;
cannam@85 278
cannam@85 279 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
cannam@85 280 if (ucs4)
cannam@85 281 id3_utf16_decode(utf16, ucs4);
cannam@85 282
cannam@85 283 free(utf16);
cannam@85 284
cannam@85 285 return ucs4;
cannam@85 286 }