cannam@85: /* cannam@85: * libid3tag - ID3 tag manipulation library cannam@85: * Copyright (C) 2000-2004 Underbit Technologies, Inc. cannam@85: * cannam@85: * This program is free software; you can redistribute it and/or modify cannam@85: * it under the terms of the GNU General Public License as published by cannam@85: * the Free Software Foundation; either version 2 of the License, or cannam@85: * (at your option) any later version. cannam@85: * cannam@85: * This program is distributed in the hope that it will be useful, cannam@85: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@85: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@85: * GNU General Public License for more details. cannam@85: * cannam@85: * You should have received a copy of the GNU General Public License cannam@85: * along with this program; if not, write to the Free Software cannam@85: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA cannam@85: * cannam@85: * $Id: ucs4.c,v 1.13 2004/01/23 09:41:32 rob Exp $ cannam@85: */ cannam@85: cannam@85: # ifdef HAVE_CONFIG_H cannam@85: # include "config.h" cannam@85: # endif cannam@85: cannam@85: # include "global.h" cannam@85: cannam@85: # include cannam@85: cannam@85: # include "id3tag.h" cannam@85: # include "ucs4.h" cannam@85: # include "latin1.h" cannam@85: # include "utf16.h" cannam@85: # include "utf8.h" cannam@85: cannam@85: id3_ucs4_t const id3_ucs4_empty[] = { 0 }; cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->length() cannam@85: * DESCRIPTION: return the number of ucs4 chars represented by a ucs4 string cannam@85: */ cannam@85: id3_length_t id3_ucs4_length(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: id3_ucs4_t const *ptr = ucs4; cannam@85: cannam@85: while (*ptr) cannam@85: ++ptr; cannam@85: cannam@85: return ptr - ucs4; cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->size() cannam@85: * DESCRIPTION: return the encoding size of a ucs4 string cannam@85: */ cannam@85: id3_length_t id3_ucs4_size(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: return id3_ucs4_length(ucs4) + 1; cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->latin1size() cannam@85: * DESCRIPTION: return the encoding size of a latin1-encoded ucs4 string cannam@85: */ cannam@85: id3_length_t id3_ucs4_latin1size(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: return id3_ucs4_size(ucs4); cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->utf16size() cannam@85: * DESCRIPTION: return the encoding size of a utf16-encoded ucs4 string cannam@85: */ cannam@85: id3_length_t id3_ucs4_utf16size(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: id3_length_t size = 0; cannam@85: cannam@85: while (*ucs4) { cannam@85: ++size; cannam@85: if (*ucs4 >= 0x00010000L && cannam@85: *ucs4 <= 0x0010ffffL) cannam@85: ++size; cannam@85: cannam@85: ++ucs4; cannam@85: } cannam@85: cannam@85: return size + 1; cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->utf8size() cannam@85: * DESCRIPTION: return the encoding size of a utf8-encoded ucs4 string cannam@85: */ cannam@85: id3_length_t id3_ucs4_utf8size(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: id3_length_t size = 0; cannam@85: cannam@85: while (*ucs4) { cannam@85: if (*ucs4 <= 0x0000007fL) cannam@85: size += 1; cannam@85: else if (*ucs4 <= 0x000007ffL) cannam@85: size += 2; cannam@85: else if (*ucs4 <= 0x0000ffffL) cannam@85: size += 3; cannam@85: else if (*ucs4 <= 0x001fffffL) cannam@85: size += 4; cannam@85: else if (*ucs4 <= 0x03ffffffL) cannam@85: size += 5; cannam@85: else if (*ucs4 <= 0x7fffffffL) cannam@85: size += 6; cannam@85: else cannam@85: size += 2; /* based on U+00B7 replacement char */ cannam@85: cannam@85: ++ucs4; cannam@85: } cannam@85: cannam@85: return size + 1; cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->latin1duplicate() cannam@85: * DESCRIPTION: duplicate and encode a ucs4 string into latin1 cannam@85: */ cannam@85: id3_latin1_t *id3_ucs4_latin1duplicate(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: id3_latin1_t *latin1; cannam@85: cannam@85: latin1 = malloc(id3_ucs4_latin1size(ucs4) * sizeof(*latin1)); cannam@85: if (latin1) cannam@85: id3_latin1_encode(latin1, ucs4); cannam@85: cannam@85: return release(latin1); cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->utf16duplicate() cannam@85: * DESCRIPTION: duplicate and encode a ucs4 string into utf16 cannam@85: */ cannam@85: id3_utf16_t *id3_ucs4_utf16duplicate(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: id3_utf16_t *utf16; cannam@85: cannam@85: utf16 = malloc(id3_ucs4_utf16size(ucs4) * sizeof(*utf16)); cannam@85: if (utf16) cannam@85: id3_utf16_encode(utf16, ucs4); cannam@85: cannam@85: return release(utf16); cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->utf8duplicate() cannam@85: * DESCRIPTION: duplicate and encode a ucs4 string into utf8 cannam@85: */ cannam@85: id3_utf8_t *id3_ucs4_utf8duplicate(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: id3_utf8_t *utf8; cannam@85: cannam@85: utf8 = malloc(id3_ucs4_utf8size(ucs4) * sizeof(*utf8)); cannam@85: if (utf8) cannam@85: id3_utf8_encode(utf8, ucs4); cannam@85: cannam@85: return release(utf8); cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->copy() cannam@85: * DESCRIPTION: copy a ucs4 string cannam@85: */ cannam@85: void id3_ucs4_copy(id3_ucs4_t *dest, id3_ucs4_t const *src) cannam@85: { cannam@85: while ((*dest++ = *src++)) cannam@85: ; cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->duplicate() cannam@85: * DESCRIPTION: duplicate a ucs4 string cannam@85: */ cannam@85: id3_ucs4_t *id3_ucs4_duplicate(id3_ucs4_t const *src) cannam@85: { cannam@85: id3_ucs4_t *ucs4; cannam@85: cannam@85: ucs4 = malloc(id3_ucs4_size(src) * sizeof(*ucs4)); cannam@85: if (ucs4) cannam@85: id3_ucs4_copy(ucs4, src); cannam@85: cannam@85: return ucs4; cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->putnumber() cannam@85: * DESCRIPTION: write a ucs4 string containing a (positive) decimal number cannam@85: */ cannam@85: void id3_ucs4_putnumber(id3_ucs4_t *ucs4, unsigned long number) cannam@85: { cannam@85: int digits[10], *digit; cannam@85: cannam@85: digit = digits; cannam@85: cannam@85: do { cannam@85: *digit++ = number % 10; cannam@85: number /= 10; cannam@85: } cannam@85: while (number); cannam@85: cannam@85: while (digit != digits) cannam@85: *ucs4++ = '0' + *--digit; cannam@85: cannam@85: *ucs4 = 0; cannam@85: } cannam@85: cannam@85: /* cannam@85: * NAME: ucs4->getnumber() cannam@85: * DESCRIPTION: read a ucs4 string containing a (positive) decimal number cannam@85: */ cannam@85: unsigned long id3_ucs4_getnumber(id3_ucs4_t const *ucs4) cannam@85: { cannam@85: unsigned long number = 0; cannam@85: cannam@85: while (*ucs4 >= '0' && *ucs4 <= '9') cannam@85: number = 10 * number + (*ucs4++ - '0'); cannam@85: cannam@85: return number; cannam@85: }