cannam@85
|
1 /*
|
cannam@85
|
2 * libid3tag - ID3 tag manipulation library
|
cannam@85
|
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
|
cannam@85
|
4 *
|
cannam@85
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@85
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@85
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@85
|
8 * (at your option) any later version.
|
cannam@85
|
9 *
|
cannam@85
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@85
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@85
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@85
|
13 * GNU General Public License for more details.
|
cannam@85
|
14 *
|
cannam@85
|
15 * You should have received a copy of the GNU General Public License
|
cannam@85
|
16 * along with this program; if not, write to the Free Software
|
cannam@85
|
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
cannam@85
|
18 *
|
cannam@85
|
19 * $Id: latin1.c,v 1.10 2004/01/23 09:41:32 rob Exp $
|
cannam@85
|
20 */
|
cannam@85
|
21
|
cannam@85
|
22 # ifdef HAVE_CONFIG_H
|
cannam@85
|
23 # include "config.h"
|
cannam@85
|
24 # endif
|
cannam@85
|
25
|
cannam@85
|
26 # include "global.h"
|
cannam@85
|
27
|
cannam@85
|
28 # include <stdlib.h>
|
cannam@85
|
29
|
cannam@85
|
30 # include "id3tag.h"
|
cannam@85
|
31 # include "latin1.h"
|
cannam@85
|
32 # include "ucs4.h"
|
cannam@85
|
33
|
cannam@85
|
34 /*
|
cannam@85
|
35 * NAME: latin1->length()
|
cannam@85
|
36 * DESCRIPTION: return the number of ucs4 chars represented by a latin1 string
|
cannam@85
|
37 */
|
cannam@85
|
38 id3_length_t id3_latin1_length(id3_latin1_t const *latin1)
|
cannam@85
|
39 {
|
cannam@85
|
40 id3_latin1_t const *ptr = latin1;
|
cannam@85
|
41
|
cannam@85
|
42 while (*ptr)
|
cannam@85
|
43 ++ptr;
|
cannam@85
|
44
|
cannam@85
|
45 return ptr - latin1;
|
cannam@85
|
46 }
|
cannam@85
|
47
|
cannam@85
|
48 /*
|
cannam@85
|
49 * NAME: latin1->size()
|
cannam@85
|
50 * DESCRIPTION: return the encoding size of a latin1 string
|
cannam@85
|
51 */
|
cannam@85
|
52 id3_length_t id3_latin1_size(id3_latin1_t const *latin1)
|
cannam@85
|
53 {
|
cannam@85
|
54 return id3_latin1_length(latin1) + 1;
|
cannam@85
|
55 }
|
cannam@85
|
56
|
cannam@85
|
57 /*
|
cannam@85
|
58 * NAME: latin1->copy()
|
cannam@85
|
59 * DESCRIPTION: copy a latin1 string
|
cannam@85
|
60 */
|
cannam@85
|
61 void id3_latin1_copy(id3_latin1_t *dest, id3_latin1_t const *src)
|
cannam@85
|
62 {
|
cannam@85
|
63 while ((*dest++ = *src++))
|
cannam@85
|
64 ;
|
cannam@85
|
65 }
|
cannam@85
|
66
|
cannam@85
|
67 /*
|
cannam@85
|
68 * NAME: latin1->duplicate()
|
cannam@85
|
69 * DESCRIPTION: duplicate a latin1 string
|
cannam@85
|
70 */
|
cannam@85
|
71 id3_latin1_t *id3_latin1_duplicate(id3_latin1_t const *src)
|
cannam@85
|
72 {
|
cannam@85
|
73 id3_latin1_t *latin1;
|
cannam@85
|
74
|
cannam@85
|
75 latin1 = malloc(id3_latin1_size(src) * sizeof(*latin1));
|
cannam@85
|
76 if (latin1)
|
cannam@85
|
77 id3_latin1_copy(latin1, src);
|
cannam@85
|
78
|
cannam@85
|
79 return latin1;
|
cannam@85
|
80 }
|
cannam@85
|
81
|
cannam@85
|
82 /*
|
cannam@85
|
83 * NAME: latin1->ucs4duplicate()
|
cannam@85
|
84 * DESCRIPTION: duplicate and decode a latin1 string into ucs4
|
cannam@85
|
85 */
|
cannam@85
|
86 id3_ucs4_t *id3_latin1_ucs4duplicate(id3_latin1_t const *latin1)
|
cannam@85
|
87 {
|
cannam@85
|
88 id3_ucs4_t *ucs4;
|
cannam@85
|
89
|
cannam@85
|
90 ucs4 = malloc((id3_latin1_length(latin1) + 1) * sizeof(*ucs4));
|
cannam@85
|
91 if (ucs4)
|
cannam@85
|
92 id3_latin1_decode(latin1, ucs4);
|
cannam@85
|
93
|
cannam@85
|
94 return release(ucs4);
|
cannam@85
|
95 }
|
cannam@85
|
96
|
cannam@85
|
97 /*
|
cannam@85
|
98 * NAME: latin1->decodechar()
|
cannam@85
|
99 * DESCRIPTION: decode a (single) latin1 char into a single ucs4 char
|
cannam@85
|
100 */
|
cannam@85
|
101 id3_length_t id3_latin1_decodechar(id3_latin1_t const *latin1,
|
cannam@85
|
102 id3_ucs4_t *ucs4)
|
cannam@85
|
103 {
|
cannam@85
|
104 *ucs4 = *latin1;
|
cannam@85
|
105
|
cannam@85
|
106 return 1;
|
cannam@85
|
107 }
|
cannam@85
|
108
|
cannam@85
|
109 /*
|
cannam@85
|
110 * NAME: latin1->encodechar()
|
cannam@85
|
111 * DESCRIPTION: encode a single ucs4 char into a (single) latin1 char
|
cannam@85
|
112 */
|
cannam@85
|
113 id3_length_t id3_latin1_encodechar(id3_latin1_t *latin1, id3_ucs4_t ucs4)
|
cannam@85
|
114 {
|
cannam@85
|
115 *latin1 = ucs4;
|
cannam@85
|
116 if (ucs4 > 0x000000ffL)
|
cannam@85
|
117 *latin1 = ID3_UCS4_REPLACEMENTCHAR;
|
cannam@85
|
118
|
cannam@85
|
119 return 1;
|
cannam@85
|
120 }
|
cannam@85
|
121
|
cannam@85
|
122 /*
|
cannam@85
|
123 * NAME: latin1->decode()
|
cannam@85
|
124 * DESCRIPTION: decode a complete latin1 string into a ucs4 string
|
cannam@85
|
125 */
|
cannam@85
|
126 void id3_latin1_decode(id3_latin1_t const *latin1, id3_ucs4_t *ucs4)
|
cannam@85
|
127 {
|
cannam@85
|
128 do
|
cannam@85
|
129 latin1 += id3_latin1_decodechar(latin1, ucs4);
|
cannam@85
|
130 while (*ucs4++);
|
cannam@85
|
131 }
|
cannam@85
|
132
|
cannam@85
|
133 /*
|
cannam@85
|
134 * NAME: latin1->encode()
|
cannam@85
|
135 * DESCRIPTION: encode a complete ucs4 string into a latin1 string
|
cannam@85
|
136 */
|
cannam@85
|
137 void id3_latin1_encode(id3_latin1_t *latin1, id3_ucs4_t const *ucs4)
|
cannam@85
|
138 {
|
cannam@85
|
139 do
|
cannam@85
|
140 latin1 += id3_latin1_encodechar(latin1, *ucs4);
|
cannam@85
|
141 while (*ucs4++);
|
cannam@85
|
142 }
|
cannam@85
|
143
|
cannam@85
|
144 /*
|
cannam@85
|
145 * NAME: latin1->put()
|
cannam@85
|
146 * DESCRIPTION: serialize a single latin1 character
|
cannam@85
|
147 */
|
cannam@85
|
148 id3_length_t id3_latin1_put(id3_byte_t **ptr, id3_latin1_t latin1)
|
cannam@85
|
149 {
|
cannam@85
|
150 if (ptr)
|
cannam@85
|
151 *(*ptr)++ = latin1;
|
cannam@85
|
152
|
cannam@85
|
153 return 1;
|
cannam@85
|
154 }
|
cannam@85
|
155
|
cannam@85
|
156 /*
|
cannam@85
|
157 * NAME: latin1->get()
|
cannam@85
|
158 * DESCRIPTION: deserialize a single latin1 character
|
cannam@85
|
159 */
|
cannam@85
|
160 id3_latin1_t id3_latin1_get(id3_byte_t const **ptr)
|
cannam@85
|
161 {
|
cannam@85
|
162 return *(*ptr)++;
|
cannam@85
|
163 }
|
cannam@85
|
164
|
cannam@85
|
165 /*
|
cannam@85
|
166 * NAME: latin1->serialize()
|
cannam@85
|
167 * DESCRIPTION: serialize a ucs4 string using latin1 encoding
|
cannam@85
|
168 */
|
cannam@85
|
169 id3_length_t id3_latin1_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4,
|
cannam@85
|
170 int terminate)
|
cannam@85
|
171 {
|
cannam@85
|
172 id3_length_t size = 0;
|
cannam@85
|
173 id3_latin1_t latin1[1], *out;
|
cannam@85
|
174
|
cannam@85
|
175 while (*ucs4) {
|
cannam@85
|
176 switch (id3_latin1_encodechar(out = latin1, *ucs4++)) {
|
cannam@85
|
177 case 1: size += id3_latin1_put(ptr, *out++);
|
cannam@85
|
178 case 0: break;
|
cannam@85
|
179 }
|
cannam@85
|
180 }
|
cannam@85
|
181
|
cannam@85
|
182 if (terminate)
|
cannam@85
|
183 size += id3_latin1_put(ptr, 0);
|
cannam@85
|
184
|
cannam@85
|
185 return size;
|
cannam@85
|
186 }
|
cannam@85
|
187
|
cannam@85
|
188 /*
|
cannam@85
|
189 * NAME: latin1->deserialize()
|
cannam@85
|
190 * DESCRIPTION: deserialize a ucs4 string using latin1 encoding
|
cannam@85
|
191 */
|
cannam@85
|
192 id3_ucs4_t *id3_latin1_deserialize(id3_byte_t const **ptr, id3_length_t length)
|
cannam@85
|
193 {
|
cannam@85
|
194 id3_byte_t const *end;
|
cannam@85
|
195 id3_latin1_t *latin1ptr, *latin1;
|
cannam@85
|
196 id3_ucs4_t *ucs4;
|
cannam@85
|
197
|
cannam@85
|
198 end = *ptr + length;
|
cannam@85
|
199
|
cannam@85
|
200 latin1 = malloc((length + 1) * sizeof(*latin1));
|
cannam@85
|
201 if (latin1 == 0)
|
cannam@85
|
202 return 0;
|
cannam@85
|
203
|
cannam@85
|
204 latin1ptr = latin1;
|
cannam@85
|
205 while (end - *ptr > 0 && (*latin1ptr = id3_latin1_get(ptr)))
|
cannam@85
|
206 ++latin1ptr;
|
cannam@85
|
207
|
cannam@85
|
208 *latin1ptr = 0;
|
cannam@85
|
209
|
cannam@85
|
210 ucs4 = malloc((id3_latin1_length(latin1) + 1) * sizeof(*ucs4));
|
cannam@85
|
211 if (ucs4)
|
cannam@85
|
212 id3_latin1_decode(latin1, ucs4);
|
cannam@85
|
213
|
cannam@85
|
214 free(latin1);
|
cannam@85
|
215
|
cannam@85
|
216 return ucs4;
|
cannam@85
|
217 }
|