cannam@85
|
1 /*
|
cannam@85
|
2 * libid3tag - ID3 tag manipulation library
|
cannam@85
|
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
|
cannam@85
|
4 *
|
cannam@85
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@85
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@85
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@85
|
8 * (at your option) any later version.
|
cannam@85
|
9 *
|
cannam@85
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@85
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@85
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@85
|
13 * GNU General Public License for more details.
|
cannam@85
|
14 *
|
cannam@85
|
15 * You should have received a copy of the GNU General Public License
|
cannam@85
|
16 * along with this program; if not, write to the Free Software
|
cannam@85
|
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
cannam@85
|
18 *
|
cannam@85
|
19 * $Id: ucs4.c,v 1.13 2004/01/23 09:41:32 rob Exp $
|
cannam@85
|
20 */
|
cannam@85
|
21
|
cannam@85
|
22 # ifdef HAVE_CONFIG_H
|
cannam@85
|
23 # include "config.h"
|
cannam@85
|
24 # endif
|
cannam@85
|
25
|
cannam@85
|
26 # include "global.h"
|
cannam@85
|
27
|
cannam@85
|
28 # include <stdlib.h>
|
cannam@85
|
29
|
cannam@85
|
30 # include "id3tag.h"
|
cannam@85
|
31 # include "ucs4.h"
|
cannam@85
|
32 # include "latin1.h"
|
cannam@85
|
33 # include "utf16.h"
|
cannam@85
|
34 # include "utf8.h"
|
cannam@85
|
35
|
cannam@85
|
36 id3_ucs4_t const id3_ucs4_empty[] = { 0 };
|
cannam@85
|
37
|
cannam@85
|
38 /*
|
cannam@85
|
39 * NAME: ucs4->length()
|
cannam@85
|
40 * DESCRIPTION: return the number of ucs4 chars represented by a ucs4 string
|
cannam@85
|
41 */
|
cannam@85
|
42 id3_length_t id3_ucs4_length(id3_ucs4_t const *ucs4)
|
cannam@85
|
43 {
|
cannam@85
|
44 id3_ucs4_t const *ptr = ucs4;
|
cannam@85
|
45
|
cannam@85
|
46 while (*ptr)
|
cannam@85
|
47 ++ptr;
|
cannam@85
|
48
|
cannam@85
|
49 return ptr - ucs4;
|
cannam@85
|
50 }
|
cannam@85
|
51
|
cannam@85
|
52 /*
|
cannam@85
|
53 * NAME: ucs4->size()
|
cannam@85
|
54 * DESCRIPTION: return the encoding size of a ucs4 string
|
cannam@85
|
55 */
|
cannam@85
|
56 id3_length_t id3_ucs4_size(id3_ucs4_t const *ucs4)
|
cannam@85
|
57 {
|
cannam@85
|
58 return id3_ucs4_length(ucs4) + 1;
|
cannam@85
|
59 }
|
cannam@85
|
60
|
cannam@85
|
61 /*
|
cannam@85
|
62 * NAME: ucs4->latin1size()
|
cannam@85
|
63 * DESCRIPTION: return the encoding size of a latin1-encoded ucs4 string
|
cannam@85
|
64 */
|
cannam@85
|
65 id3_length_t id3_ucs4_latin1size(id3_ucs4_t const *ucs4)
|
cannam@85
|
66 {
|
cannam@85
|
67 return id3_ucs4_size(ucs4);
|
cannam@85
|
68 }
|
cannam@85
|
69
|
cannam@85
|
70 /*
|
cannam@85
|
71 * NAME: ucs4->utf16size()
|
cannam@85
|
72 * DESCRIPTION: return the encoding size of a utf16-encoded ucs4 string
|
cannam@85
|
73 */
|
cannam@85
|
74 id3_length_t id3_ucs4_utf16size(id3_ucs4_t const *ucs4)
|
cannam@85
|
75 {
|
cannam@85
|
76 id3_length_t size = 0;
|
cannam@85
|
77
|
cannam@85
|
78 while (*ucs4) {
|
cannam@85
|
79 ++size;
|
cannam@85
|
80 if (*ucs4 >= 0x00010000L &&
|
cannam@85
|
81 *ucs4 <= 0x0010ffffL)
|
cannam@85
|
82 ++size;
|
cannam@85
|
83
|
cannam@85
|
84 ++ucs4;
|
cannam@85
|
85 }
|
cannam@85
|
86
|
cannam@85
|
87 return size + 1;
|
cannam@85
|
88 }
|
cannam@85
|
89
|
cannam@85
|
90 /*
|
cannam@85
|
91 * NAME: ucs4->utf8size()
|
cannam@85
|
92 * DESCRIPTION: return the encoding size of a utf8-encoded ucs4 string
|
cannam@85
|
93 */
|
cannam@85
|
94 id3_length_t id3_ucs4_utf8size(id3_ucs4_t const *ucs4)
|
cannam@85
|
95 {
|
cannam@85
|
96 id3_length_t size = 0;
|
cannam@85
|
97
|
cannam@85
|
98 while (*ucs4) {
|
cannam@85
|
99 if (*ucs4 <= 0x0000007fL)
|
cannam@85
|
100 size += 1;
|
cannam@85
|
101 else if (*ucs4 <= 0x000007ffL)
|
cannam@85
|
102 size += 2;
|
cannam@85
|
103 else if (*ucs4 <= 0x0000ffffL)
|
cannam@85
|
104 size += 3;
|
cannam@85
|
105 else if (*ucs4 <= 0x001fffffL)
|
cannam@85
|
106 size += 4;
|
cannam@85
|
107 else if (*ucs4 <= 0x03ffffffL)
|
cannam@85
|
108 size += 5;
|
cannam@85
|
109 else if (*ucs4 <= 0x7fffffffL)
|
cannam@85
|
110 size += 6;
|
cannam@85
|
111 else
|
cannam@85
|
112 size += 2; /* based on U+00B7 replacement char */
|
cannam@85
|
113
|
cannam@85
|
114 ++ucs4;
|
cannam@85
|
115 }
|
cannam@85
|
116
|
cannam@85
|
117 return size + 1;
|
cannam@85
|
118 }
|
cannam@85
|
119
|
cannam@85
|
120 /*
|
cannam@85
|
121 * NAME: ucs4->latin1duplicate()
|
cannam@85
|
122 * DESCRIPTION: duplicate and encode a ucs4 string into latin1
|
cannam@85
|
123 */
|
cannam@85
|
124 id3_latin1_t *id3_ucs4_latin1duplicate(id3_ucs4_t const *ucs4)
|
cannam@85
|
125 {
|
cannam@85
|
126 id3_latin1_t *latin1;
|
cannam@85
|
127
|
cannam@85
|
128 latin1 = malloc(id3_ucs4_latin1size(ucs4) * sizeof(*latin1));
|
cannam@85
|
129 if (latin1)
|
cannam@85
|
130 id3_latin1_encode(latin1, ucs4);
|
cannam@85
|
131
|
cannam@85
|
132 return release(latin1);
|
cannam@85
|
133 }
|
cannam@85
|
134
|
cannam@85
|
135 /*
|
cannam@85
|
136 * NAME: ucs4->utf16duplicate()
|
cannam@85
|
137 * DESCRIPTION: duplicate and encode a ucs4 string into utf16
|
cannam@85
|
138 */
|
cannam@85
|
139 id3_utf16_t *id3_ucs4_utf16duplicate(id3_ucs4_t const *ucs4)
|
cannam@85
|
140 {
|
cannam@85
|
141 id3_utf16_t *utf16;
|
cannam@85
|
142
|
cannam@85
|
143 utf16 = malloc(id3_ucs4_utf16size(ucs4) * sizeof(*utf16));
|
cannam@85
|
144 if (utf16)
|
cannam@85
|
145 id3_utf16_encode(utf16, ucs4);
|
cannam@85
|
146
|
cannam@85
|
147 return release(utf16);
|
cannam@85
|
148 }
|
cannam@85
|
149
|
cannam@85
|
150 /*
|
cannam@85
|
151 * NAME: ucs4->utf8duplicate()
|
cannam@85
|
152 * DESCRIPTION: duplicate and encode a ucs4 string into utf8
|
cannam@85
|
153 */
|
cannam@85
|
154 id3_utf8_t *id3_ucs4_utf8duplicate(id3_ucs4_t const *ucs4)
|
cannam@85
|
155 {
|
cannam@85
|
156 id3_utf8_t *utf8;
|
cannam@85
|
157
|
cannam@85
|
158 utf8 = malloc(id3_ucs4_utf8size(ucs4) * sizeof(*utf8));
|
cannam@85
|
159 if (utf8)
|
cannam@85
|
160 id3_utf8_encode(utf8, ucs4);
|
cannam@85
|
161
|
cannam@85
|
162 return release(utf8);
|
cannam@85
|
163 }
|
cannam@85
|
164
|
cannam@85
|
165 /*
|
cannam@85
|
166 * NAME: ucs4->copy()
|
cannam@85
|
167 * DESCRIPTION: copy a ucs4 string
|
cannam@85
|
168 */
|
cannam@85
|
169 void id3_ucs4_copy(id3_ucs4_t *dest, id3_ucs4_t const *src)
|
cannam@85
|
170 {
|
cannam@85
|
171 while ((*dest++ = *src++))
|
cannam@85
|
172 ;
|
cannam@85
|
173 }
|
cannam@85
|
174
|
cannam@85
|
175 /*
|
cannam@85
|
176 * NAME: ucs4->duplicate()
|
cannam@85
|
177 * DESCRIPTION: duplicate a ucs4 string
|
cannam@85
|
178 */
|
cannam@85
|
179 id3_ucs4_t *id3_ucs4_duplicate(id3_ucs4_t const *src)
|
cannam@85
|
180 {
|
cannam@85
|
181 id3_ucs4_t *ucs4;
|
cannam@85
|
182
|
cannam@85
|
183 ucs4 = malloc(id3_ucs4_size(src) * sizeof(*ucs4));
|
cannam@85
|
184 if (ucs4)
|
cannam@85
|
185 id3_ucs4_copy(ucs4, src);
|
cannam@85
|
186
|
cannam@85
|
187 return ucs4;
|
cannam@85
|
188 }
|
cannam@85
|
189
|
cannam@85
|
190 /*
|
cannam@85
|
191 * NAME: ucs4->putnumber()
|
cannam@85
|
192 * DESCRIPTION: write a ucs4 string containing a (positive) decimal number
|
cannam@85
|
193 */
|
cannam@85
|
194 void id3_ucs4_putnumber(id3_ucs4_t *ucs4, unsigned long number)
|
cannam@85
|
195 {
|
cannam@85
|
196 int digits[10], *digit;
|
cannam@85
|
197
|
cannam@85
|
198 digit = digits;
|
cannam@85
|
199
|
cannam@85
|
200 do {
|
cannam@85
|
201 *digit++ = number % 10;
|
cannam@85
|
202 number /= 10;
|
cannam@85
|
203 }
|
cannam@85
|
204 while (number);
|
cannam@85
|
205
|
cannam@85
|
206 while (digit != digits)
|
cannam@85
|
207 *ucs4++ = '0' + *--digit;
|
cannam@85
|
208
|
cannam@85
|
209 *ucs4 = 0;
|
cannam@85
|
210 }
|
cannam@85
|
211
|
cannam@85
|
212 /*
|
cannam@85
|
213 * NAME: ucs4->getnumber()
|
cannam@85
|
214 * DESCRIPTION: read a ucs4 string containing a (positive) decimal number
|
cannam@85
|
215 */
|
cannam@85
|
216 unsigned long id3_ucs4_getnumber(id3_ucs4_t const *ucs4)
|
cannam@85
|
217 {
|
cannam@85
|
218 unsigned long number = 0;
|
cannam@85
|
219
|
cannam@85
|
220 while (*ucs4 >= '0' && *ucs4 <= '9')
|
cannam@85
|
221 number = 10 * number + (*ucs4++ - '0');
|
cannam@85
|
222
|
cannam@85
|
223 return number;
|
cannam@85
|
224 }
|