sv-dependency-builds: src/flac-1.2.1/src/share/utf8/utf8.c annotate

annotate src/flac-1.2.1/src/share/utf8/utf8.c @ 20:ab7c38c4c577

Ranlib

author	Chris Cannam
date	Mon, 25 Mar 2013 16:28:19 +0000
parents	05aa0afa9217
children

rev	line source
Chris@1	1 /*
Chris@1	2 * Copyright (C) 2001 Peter Harris <peter.harris@hummingbird.com>
Chris@1	3 * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
Chris@1	4 *
Chris@1	5 * Buffer overflow checking added: Josh Coalson, 9/9/2007
Chris@1	6 *
Chris@1	7 * This program is free software; you can redistribute it and/or modify
Chris@1	8 * it under the terms of the GNU General Public License as published by
Chris@1	9 * the Free Software Foundation; either version 2 of the License, or
Chris@1	10 * (at your option) any later version.
Chris@1	11 *
Chris@1	12 * This program is distributed in the hope that it will be useful,
Chris@1	13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@1	14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@1	15 * GNU General Public License for more details.
Chris@1	16 *
Chris@1	17 * You should have received a copy of the GNU General Public License
Chris@1	18 * along with this program; if not, write to the Free Software
Chris@1	19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Chris@1	20 */
Chris@1	21
Chris@1	22 /*
Chris@1	23 * Convert a string between UTF-8 and the locale's charset.
Chris@1	24 */
Chris@1	25
Chris@1	26 #if HAVE_CONFIG_H
Chris@1	27 # include <config.h>
Chris@1	28 #endif
Chris@1	29
Chris@1	30 #include <stdlib.h>
Chris@1	31 #include <string.h>
Chris@1	32
Chris@1	33 #include "share/alloc.h"
Chris@1	34 #include "utf8.h"
Chris@1	35 #include "charset.h"
Chris@1	36
Chris@1	37
Chris@1	38 #ifdef _WIN32
Chris@1	39
Chris@1	40 /* Thanks to Peter Harris <peter.harris@hummingbird.com> for this win32
Chris@1	41 * code.
Chris@1	42 */
Chris@1	43
Chris@1	44 #include <stdio.h>
Chris@1	45 #include <windows.h>
Chris@1	46
Chris@1	47 static unsigned char make_utf8_string(const wchar_t unicode)
Chris@1	48 {
Chris@1	49 size_t size = 0, n;
Chris@1	50 int index = 0, out_index = 0;
Chris@1	51 unsigned char *out;
Chris@1	52 unsigned short c;
Chris@1	53
Chris@1	54 /* first calculate the size of the target string */
Chris@1	55 c = unicode[index++];
Chris@1	56 while(c) {
Chris@1	57 if(c < 0x0080) {
Chris@1	58 n = 1;
Chris@1	59 } else if(c < 0x0800) {
Chris@1	60 n = 2;
Chris@1	61 } else {
Chris@1	62 n = 3;
Chris@1	63 }
Chris@1	64 if(size+n < size) /* overflow check */
Chris@1	65 return NULL;
Chris@1	66 size += n;
Chris@1	67 c = unicode[index++];
Chris@1	68 }
Chris@1	69
Chris@1	70 out = safe_malloc_add_2op_(size, /+/1);
Chris@1	71 if (out == NULL)
Chris@1	72 return NULL;
Chris@1	73 index = 0;
Chris@1	74
Chris@1	75 c = unicode[index++];
Chris@1	76 while(c)
Chris@1	77 {
Chris@1	78 if(c < 0x080) {
Chris@1	79 out[out_index++] = (unsigned char)c;
Chris@1	80 } else if(c < 0x800) {
Chris@1	81 out[out_index++] = 0xc0 \| (c >> 6);
Chris@1	82 out[out_index++] = 0x80 \| (c & 0x3f);
Chris@1	83 } else {
Chris@1	84 out[out_index++] = 0xe0 \| (c >> 12);
Chris@1	85 out[out_index++] = 0x80 \| ((c >> 6) & 0x3f);
Chris@1	86 out[out_index++] = 0x80 \| (c & 0x3f);
Chris@1	87 }
Chris@1	88 c = unicode[index++];
Chris@1	89 }
Chris@1	90 out[out_index] = 0x00;
Chris@1	91
Chris@1	92 return out;
Chris@1	93 }
Chris@1	94
Chris@1	95 static wchar_t make_unicode_string(const unsigned char utf8)
Chris@1	96 {
Chris@1	97 size_t size = 0;
Chris@1	98 int index = 0, out_index = 0;
Chris@1	99 wchar_t *out;
Chris@1	100 unsigned char c;
Chris@1	101
Chris@1	102 /* first calculate the size of the target string */
Chris@1	103 c = utf8[index++];
Chris@1	104 while(c) {
Chris@1	105 if((c & 0x80) == 0) {
Chris@1	106 index += 0;
Chris@1	107 } else if((c & 0xe0) == 0xe0) {
Chris@1	108 index += 2;
Chris@1	109 } else {
Chris@1	110 index += 1;
Chris@1	111 }
Chris@1	112 if(size + 1 == 0) /* overflow check */
Chris@1	113 return NULL;
Chris@1	114 size++;
Chris@1	115 c = utf8[index++];
Chris@1	116 }
Chris@1	117
Chris@1	118 if(size + 1 == 0) /* overflow check */
Chris@1	119 return NULL;
Chris@1	120 out = safe_malloc_mul_2op_(size+1, /times/sizeof(wchar_t));
Chris@1	121 if (out == NULL)
Chris@1	122 return NULL;
Chris@1	123 index = 0;
Chris@1	124
Chris@1	125 c = utf8[index++];
Chris@1	126 while(c)
Chris@1	127 {
Chris@1	128 if((c & 0x80) == 0) {
Chris@1	129 out[out_index++] = c;
Chris@1	130 } else if((c & 0xe0) == 0xe0) {
Chris@1	131 out[out_index] = (c & 0x1F) << 12;
Chris@1	132 c = utf8[index++];
Chris@1	133 out[out_index] \|= (c & 0x3F) << 6;
Chris@1	134 c = utf8[index++];
Chris@1	135 out[out_index++] \|= (c & 0x3F);
Chris@1	136 } else {
Chris@1	137 out[out_index] = (c & 0x3F) << 6;
Chris@1	138 c = utf8[index++];
Chris@1	139 out[out_index++] \|= (c & 0x3F);
Chris@1	140 }
Chris@1	141 c = utf8[index++];
Chris@1	142 }
Chris@1	143 out[out_index] = 0;
Chris@1	144
Chris@1	145 return out;
Chris@1	146 }
Chris@1	147
Chris@1	148 int utf8_encode(const char from, char *to)
Chris@1	149 {
Chris@1	150 wchar_t *unicode;
Chris@1	151 int wchars, err;
Chris@1	152
Chris@1	153 wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
Chris@1	154 strlen(from), NULL, 0);
Chris@1	155
Chris@1	156 if(wchars == 0)
Chris@1	157 {
Chris@1	158 fprintf(stderr, "Unicode translation error %d\n", GetLastError());
Chris@1	159 return -1;
Chris@1	160 }
Chris@1	161
Chris@1	162 if(wchars < 0) /* underflow check */
Chris@1	163 return -1;
Chris@1	164
Chris@1	165 unicode = safe_calloc_((size_t)wchars + 1, sizeof(unsigned short));
Chris@1	166 if(unicode == NULL)
Chris@1	167 {
Chris@1	168 fprintf(stderr, "Out of memory processing string to UTF8\n");
Chris@1	169 return -1;
Chris@1	170 }
Chris@1	171
Chris@1	172 err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
Chris@1	173 strlen(from), unicode, wchars);
Chris@1	174 if(err != wchars)
Chris@1	175 {
Chris@1	176 free(unicode);
Chris@1	177 fprintf(stderr, "Unicode translation error %d\n", GetLastError());
Chris@1	178 return -1;
Chris@1	179 }
Chris@1	180
Chris@1	181 /* On NT-based windows systems, we could use WideCharToMultiByte(), but
Chris@1	182 * MS doesn't actually have a consistent API across win32.
Chris@1	183 */
Chris@1	184 *to = make_utf8_string(unicode);
Chris@1	185
Chris@1	186 free(unicode);
Chris@1	187 return 0;
Chris@1	188 }
Chris@1	189
Chris@1	190 int utf8_decode(const char from, char *to)
Chris@1	191 {
Chris@1	192 wchar_t *unicode;
Chris@1	193 int chars, err;
Chris@1	194
Chris@1	195 /* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but
Chris@1	196 * MS doesn't actually have a consistent API across win32.
Chris@1	197 */
Chris@1	198 unicode = make_unicode_string(from);
Chris@1	199 if(unicode == NULL)
Chris@1	200 {
Chris@1	201 fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");
Chris@1	202 return -1;
Chris@1	203 }
Chris@1	204
Chris@1	205 chars = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
Chris@1	206 -1, NULL, 0, NULL, NULL);
Chris@1	207
Chris@1	208 if(chars < 0) /* underflow check */
Chris@1	209 return -1;
Chris@1	210
Chris@1	211 if(chars == 0)
Chris@1	212 {
Chris@1	213 fprintf(stderr, "Unicode translation error %d\n", GetLastError());
Chris@1	214 free(unicode);
Chris@1	215 return -1;
Chris@1	216 }
Chris@1	217
Chris@1	218 *to = safe_calloc_((size_t)chars + 1, sizeof(unsigned char));
Chris@1	219 if(*to == NULL)
Chris@1	220 {
Chris@1	221 fprintf(stderr, "Out of memory processing string to local charset\n");
Chris@1	222 free(unicode);
Chris@1	223 return -1;
Chris@1	224 }
Chris@1	225
Chris@1	226 err = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
Chris@1	227 -1, *to, chars, NULL, NULL);
Chris@1	228 if(err != chars)
Chris@1	229 {
Chris@1	230 fprintf(stderr, "Unicode translation error %d\n", GetLastError());
Chris@1	231 free(unicode);
Chris@1	232 free(*to);
Chris@1	233 *to = NULL;
Chris@1	234 return -1;
Chris@1	235 }
Chris@1	236
Chris@1	237 free(unicode);
Chris@1	238 return 0;
Chris@1	239 }
Chris@1	240
Chris@1	241 #else /* End win32. Rest is for real operating systems */
Chris@1	242
Chris@1	243
Chris@1	244 #ifdef HAVE_LANGINFO_CODESET
Chris@1	245 #include <langinfo.h>
Chris@1	246 #endif
Chris@1	247
Chris@1	248 #include "iconvert.h"
Chris@1	249
Chris@1	250 static const char *current_charset(void)
Chris@1	251 {
Chris@1	252 const char *c = 0;
Chris@1	253 #ifdef HAVE_LANGINFO_CODESET
Chris@1	254 c = nl_langinfo(CODESET);
Chris@1	255 #endif
Chris@1	256
Chris@1	257 if (!c)
Chris@1	258 c = getenv("CHARSET");
Chris@1	259
Chris@1	260 return c? c : "US-ASCII";
Chris@1	261 }
Chris@1	262
Chris@1	263 static int convert_buffer(const char fromcode, const char tocode,
Chris@1	264 const char *from, size_t fromlen,
Chris@1	265 char *to, size_t tolen)
Chris@1	266 {
Chris@1	267 int ret = -1;
Chris@1	268
Chris@1	269 #ifdef HAVE_ICONV
Chris@1	270 ret = iconvert(fromcode, tocode, from, fromlen, to, tolen);
Chris@1	271 if (ret != -1)
Chris@1	272 return ret;
Chris@1	273 #endif
Chris@1	274
Chris@1	275 #ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */
Chris@1	276 ret = charset_convert(fromcode, tocode, from, fromlen, to, tolen);
Chris@1	277 if (ret != -1)
Chris@1	278 return ret;
Chris@1	279 #endif
Chris@1	280
Chris@1	281 return ret;
Chris@1	282 }
Chris@1	283
Chris@1	284 static int convert_string(const char fromcode, const char tocode,
Chris@1	285 const char from, char *to, char replace)
Chris@1	286 {
Chris@1	287 int ret;
Chris@1	288 size_t fromlen;
Chris@1	289 char *s;
Chris@1	290
Chris@1	291 fromlen = strlen(from);
Chris@1	292 ret = convert_buffer(fromcode, tocode, from, fromlen, to, 0);
Chris@1	293 if (ret == -2)
Chris@1	294 return -1;
Chris@1	295 if (ret != -1)
Chris@1	296 return ret;
Chris@1	297
Chris@1	298 s = safe_malloc_add_2op_(fromlen, /+/1);
Chris@1	299 if (!s)
Chris@1	300 return -1;
Chris@1	301 strcpy(s, from);
Chris@1	302 *to = s;
Chris@1	303 for (; *s; s++)
Chris@1	304 if (*s & ~0x7f)
Chris@1	305 *s = replace;
Chris@1	306 return 3;
Chris@1	307 }
Chris@1	308
Chris@1	309 int utf8_encode(const char from, char *to)
Chris@1	310 {
Chris@1	311 return convert_string(current_charset(), "UTF-8", from, to, '#');
Chris@1	312 }
Chris@1	313
Chris@1	314 int utf8_decode(const char from, char *to)
Chris@1	315 {
Chris@1	316 return convert_string("UTF-8", current_charset(), from, to, '?');
Chris@1	317 }
Chris@1	318
Chris@1	319 #endif

Mercurial > hg > sv-dependency-builds

annotate src/flac-1.2.1/src/share/utf8/utf8.c @ 20:ab7c38c4c577