Mercurial > hg > sv-dependency-builds
comparison src/libid3tag-0.15.1b/utf16.c @ 0:c7265573341e
Import initial set of sources
author | Chris Cannam |
---|---|
date | Mon, 18 Mar 2013 14:12:14 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7265573341e |
---|---|
1 /* | |
2 * libid3tag - ID3 tag manipulation library | |
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc. | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 * | |
19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $ | |
20 */ | |
21 | |
22 # ifdef HAVE_CONFIG_H | |
23 # include "config.h" | |
24 # endif | |
25 | |
26 # include "global.h" | |
27 | |
28 # include <stdlib.h> | |
29 | |
30 # include "id3tag.h" | |
31 # include "utf16.h" | |
32 # include "ucs4.h" | |
33 | |
34 /* | |
35 * NAME: utf16->length() | |
36 * DESCRIPTION: return the number of ucs4 chars represented by a utf16 string | |
37 */ | |
38 id3_length_t id3_utf16_length(id3_utf16_t const *utf16) | |
39 { | |
40 id3_length_t length = 0; | |
41 | |
42 while (*utf16) { | |
43 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) | |
44 ++length; | |
45 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff && | |
46 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) { | |
47 ++length; | |
48 ++utf16; | |
49 } | |
50 | |
51 ++utf16; | |
52 } | |
53 | |
54 return length; | |
55 } | |
56 | |
57 /* | |
58 * NAME: utf16->size() | |
59 * DESCRIPTION: return the encoding size of a utf16 string | |
60 */ | |
61 id3_length_t id3_utf16_size(id3_utf16_t const *utf16) | |
62 { | |
63 id3_utf16_t const *ptr = utf16; | |
64 | |
65 while (*ptr) | |
66 ++ptr; | |
67 | |
68 return ptr - utf16 + 1; | |
69 } | |
70 | |
71 /* | |
72 * NAME: utf16->ucs4duplicate() | |
73 * DESCRIPTION: duplicate and decode a utf16 string into ucs4 | |
74 */ | |
75 id3_ucs4_t *id3_utf16_ucs4duplicate(id3_utf16_t const *utf16) | |
76 { | |
77 id3_ucs4_t *ucs4; | |
78 | |
79 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4)); | |
80 if (ucs4) | |
81 id3_utf16_decode(utf16, ucs4); | |
82 | |
83 return release(ucs4); | |
84 } | |
85 | |
86 /* | |
87 * NAME: utf16->decodechar() | |
88 * DESCRIPTION: decode a series of utf16 chars into a single ucs4 char | |
89 */ | |
90 id3_length_t id3_utf16_decodechar(id3_utf16_t const *utf16, id3_ucs4_t *ucs4) | |
91 { | |
92 id3_utf16_t const *start = utf16; | |
93 | |
94 while (1) { | |
95 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) { | |
96 *ucs4 = utf16[0]; | |
97 return utf16 - start + 1; | |
98 } | |
99 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff && | |
100 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) { | |
101 *ucs4 = (((utf16[0] & 0x03ffL) << 10) | | |
102 ((utf16[1] & 0x03ffL) << 0)) + 0x00010000L; | |
103 return utf16 - start + 2; | |
104 } | |
105 | |
106 ++utf16; | |
107 } | |
108 } | |
109 | |
110 /* | |
111 * NAME: utf16->encodechar() | |
112 * DESCRIPTION: encode a single ucs4 char into a series of up to 2 utf16 chars | |
113 */ | |
114 id3_length_t id3_utf16_encodechar(id3_utf16_t *utf16, id3_ucs4_t ucs4) | |
115 { | |
116 if (ucs4 < 0x00010000L) { | |
117 utf16[0] = ucs4; | |
118 | |
119 return 1; | |
120 } | |
121 else if (ucs4 < 0x00110000L) { | |
122 ucs4 -= 0x00010000L; | |
123 | |
124 utf16[0] = ((ucs4 >> 10) & 0x3ff) | 0xd800; | |
125 utf16[1] = ((ucs4 >> 0) & 0x3ff) | 0xdc00; | |
126 | |
127 return 2; | |
128 } | |
129 | |
130 /* default */ | |
131 | |
132 return id3_utf16_encodechar(utf16, ID3_UCS4_REPLACEMENTCHAR); | |
133 } | |
134 | |
135 /* | |
136 * NAME: utf16->decode() | |
137 * DESCRIPTION: decode a complete utf16 string into a ucs4 string | |
138 */ | |
139 void id3_utf16_decode(id3_utf16_t const *utf16, id3_ucs4_t *ucs4) | |
140 { | |
141 do | |
142 utf16 += id3_utf16_decodechar(utf16, ucs4); | |
143 while (*ucs4++); | |
144 } | |
145 | |
146 /* | |
147 * NAME: utf16->encode() | |
148 * DESCRIPTION: encode a complete ucs4 string into a utf16 string | |
149 */ | |
150 void id3_utf16_encode(id3_utf16_t *utf16, id3_ucs4_t const *ucs4) | |
151 { | |
152 do | |
153 utf16 += id3_utf16_encodechar(utf16, *ucs4); | |
154 while (*ucs4++); | |
155 } | |
156 | |
157 /* | |
158 * NAME: utf16->put() | |
159 * DESCRIPTION: serialize a single utf16 character | |
160 */ | |
161 id3_length_t id3_utf16_put(id3_byte_t **ptr, id3_utf16_t utf16, | |
162 enum id3_utf16_byteorder byteorder) | |
163 { | |
164 if (ptr) { | |
165 switch (byteorder) { | |
166 default: | |
167 case ID3_UTF16_BYTEORDER_BE: | |
168 (*ptr)[0] = (utf16 >> 8) & 0xff; | |
169 (*ptr)[1] = (utf16 >> 0) & 0xff; | |
170 break; | |
171 | |
172 case ID3_UTF16_BYTEORDER_LE: | |
173 (*ptr)[0] = (utf16 >> 0) & 0xff; | |
174 (*ptr)[1] = (utf16 >> 8) & 0xff; | |
175 break; | |
176 } | |
177 | |
178 *ptr += 2; | |
179 } | |
180 | |
181 return 2; | |
182 } | |
183 | |
184 /* | |
185 * NAME: utf16->get() | |
186 * DESCRIPTION: deserialize a single utf16 character | |
187 */ | |
188 id3_utf16_t id3_utf16_get(id3_byte_t const **ptr, | |
189 enum id3_utf16_byteorder byteorder) | |
190 { | |
191 id3_utf16_t utf16; | |
192 | |
193 switch (byteorder) { | |
194 default: | |
195 case ID3_UTF16_BYTEORDER_BE: | |
196 utf16 = | |
197 ((*ptr)[0] << 8) | | |
198 ((*ptr)[1] << 0); | |
199 break; | |
200 | |
201 case ID3_UTF16_BYTEORDER_LE: | |
202 utf16 = | |
203 ((*ptr)[0] << 0) | | |
204 ((*ptr)[1] << 8); | |
205 break; | |
206 } | |
207 | |
208 *ptr += 2; | |
209 | |
210 return utf16; | |
211 } | |
212 | |
213 /* | |
214 * NAME: utf16->serialize() | |
215 * DESCRIPTION: serialize a ucs4 string using utf16 encoding | |
216 */ | |
217 id3_length_t id3_utf16_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4, | |
218 enum id3_utf16_byteorder byteorder, | |
219 int terminate) | |
220 { | |
221 id3_length_t size = 0; | |
222 id3_utf16_t utf16[2], *out; | |
223 | |
224 if (byteorder == ID3_UTF16_BYTEORDER_ANY) | |
225 size += id3_utf16_put(ptr, 0xfeff, byteorder); | |
226 | |
227 while (*ucs4) { | |
228 switch (id3_utf16_encodechar(out = utf16, *ucs4++)) { | |
229 case 2: size += id3_utf16_put(ptr, *out++, byteorder); | |
230 case 1: size += id3_utf16_put(ptr, *out++, byteorder); | |
231 case 0: break; | |
232 } | |
233 } | |
234 | |
235 if (terminate) | |
236 size += id3_utf16_put(ptr, 0, byteorder); | |
237 | |
238 return size; | |
239 } | |
240 | |
241 /* | |
242 * NAME: utf16->deserialize() | |
243 * DESCRIPTION: deserialize a ucs4 string using utf16 encoding | |
244 */ | |
245 id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length, | |
246 enum id3_utf16_byteorder byteorder) | |
247 { | |
248 id3_byte_t const *end; | |
249 id3_utf16_t *utf16ptr, *utf16; | |
250 id3_ucs4_t *ucs4; | |
251 | |
252 end = *ptr + (length & ~1); | |
253 | |
254 utf16 = malloc((length / 2 + 1) * sizeof(*utf16)); | |
255 if (utf16 == 0) | |
256 return 0; | |
257 | |
258 if (byteorder == ID3_UTF16_BYTEORDER_ANY && end - *ptr > 0) { | |
259 switch (((*ptr)[0] << 8) | | |
260 ((*ptr)[1] << 0)) { | |
261 case 0xfeff: | |
262 byteorder = ID3_UTF16_BYTEORDER_BE; | |
263 *ptr += 2; | |
264 break; | |
265 | |
266 case 0xfffe: | |
267 byteorder = ID3_UTF16_BYTEORDER_LE; | |
268 *ptr += 2; | |
269 break; | |
270 } | |
271 } | |
272 | |
273 utf16ptr = utf16; | |
274 while (end - *ptr > 0 && (*utf16ptr = id3_utf16_get(ptr, byteorder))) | |
275 ++utf16ptr; | |
276 | |
277 *utf16ptr = 0; | |
278 | |
279 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4)); | |
280 if (ucs4) | |
281 id3_utf16_decode(utf16, ucs4); | |
282 | |
283 free(utf16); | |
284 | |
285 return ucs4; | |
286 } |