Chris@87
|
1 #ifndef Py_UNICODEOBJECT_H
|
Chris@87
|
2 #define Py_UNICODEOBJECT_H
|
Chris@87
|
3
|
Chris@87
|
4 #include <stdarg.h>
|
Chris@87
|
5
|
Chris@87
|
6 /*
|
Chris@87
|
7
|
Chris@87
|
8 Unicode implementation based on original code by Fredrik Lundh,
|
Chris@87
|
9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
|
Chris@87
|
10 Unicode Integration Proposal (see file Misc/unicode.txt).
|
Chris@87
|
11
|
Chris@87
|
12 Copyright (c) Corporation for National Research Initiatives.
|
Chris@87
|
13
|
Chris@87
|
14
|
Chris@87
|
15 Original header:
|
Chris@87
|
16 --------------------------------------------------------------------
|
Chris@87
|
17
|
Chris@87
|
18 * Yet another Unicode string type for Python. This type supports the
|
Chris@87
|
19 * 16-bit Basic Multilingual Plane (BMP) only.
|
Chris@87
|
20 *
|
Chris@87
|
21 * Written by Fredrik Lundh, January 1999.
|
Chris@87
|
22 *
|
Chris@87
|
23 * Copyright (c) 1999 by Secret Labs AB.
|
Chris@87
|
24 * Copyright (c) 1999 by Fredrik Lundh.
|
Chris@87
|
25 *
|
Chris@87
|
26 * fredrik@pythonware.com
|
Chris@87
|
27 * http://www.pythonware.com
|
Chris@87
|
28 *
|
Chris@87
|
29 * --------------------------------------------------------------------
|
Chris@87
|
30 * This Unicode String Type is
|
Chris@87
|
31 *
|
Chris@87
|
32 * Copyright (c) 1999 by Secret Labs AB
|
Chris@87
|
33 * Copyright (c) 1999 by Fredrik Lundh
|
Chris@87
|
34 *
|
Chris@87
|
35 * By obtaining, using, and/or copying this software and/or its
|
Chris@87
|
36 * associated documentation, you agree that you have read, understood,
|
Chris@87
|
37 * and will comply with the following terms and conditions:
|
Chris@87
|
38 *
|
Chris@87
|
39 * Permission to use, copy, modify, and distribute this software and its
|
Chris@87
|
40 * associated documentation for any purpose and without fee is hereby
|
Chris@87
|
41 * granted, provided that the above copyright notice appears in all
|
Chris@87
|
42 * copies, and that both that copyright notice and this permission notice
|
Chris@87
|
43 * appear in supporting documentation, and that the name of Secret Labs
|
Chris@87
|
44 * AB or the author not be used in advertising or publicity pertaining to
|
Chris@87
|
45 * distribution of the software without specific, written prior
|
Chris@87
|
46 * permission.
|
Chris@87
|
47 *
|
Chris@87
|
48 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
Chris@87
|
49 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
Chris@87
|
50 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
|
Chris@87
|
51 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
Chris@87
|
52 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
Chris@87
|
53 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
Chris@87
|
54 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
Chris@87
|
55 * -------------------------------------------------------------------- */
|
Chris@87
|
56
|
Chris@87
|
57 #include <ctype.h>
|
Chris@87
|
58
|
Chris@87
|
59 /* === Internal API ======================================================= */
|
Chris@87
|
60
|
Chris@87
|
61 /* --- Internal Unicode Format -------------------------------------------- */
|
Chris@87
|
62
|
Chris@87
|
63 #ifndef Py_USING_UNICODE
|
Chris@87
|
64
|
Chris@87
|
65 #define PyUnicode_Check(op) 0
|
Chris@87
|
66 #define PyUnicode_CheckExact(op) 0
|
Chris@87
|
67
|
Chris@87
|
68 #else
|
Chris@87
|
69
|
Chris@87
|
70 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
|
Chris@87
|
71 properly set, but the default rules below doesn't set it. I'll
|
Chris@87
|
72 sort this out some other day -- fredrik@pythonware.com */
|
Chris@87
|
73
|
Chris@87
|
74 #ifndef Py_UNICODE_SIZE
|
Chris@87
|
75 #error Must define Py_UNICODE_SIZE
|
Chris@87
|
76 #endif
|
Chris@87
|
77
|
Chris@87
|
78 /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
|
Chris@87
|
79 strings are stored as UCS-2 (with limited support for UTF-16) */
|
Chris@87
|
80
|
Chris@87
|
81 #if Py_UNICODE_SIZE >= 4
|
Chris@87
|
82 #define Py_UNICODE_WIDE
|
Chris@87
|
83 #endif
|
Chris@87
|
84
|
Chris@87
|
85 /* Set these flags if the platform has "wchar.h", "wctype.h" and the
|
Chris@87
|
86 wchar_t type is a 16-bit unsigned type */
|
Chris@87
|
87 /* #define HAVE_WCHAR_H */
|
Chris@87
|
88 /* #define HAVE_USABLE_WCHAR_T */
|
Chris@87
|
89
|
Chris@87
|
90 /* Defaults for various platforms */
|
Chris@87
|
91 #ifndef PY_UNICODE_TYPE
|
Chris@87
|
92
|
Chris@87
|
93 /* Windows has a usable wchar_t type (unless we're using UCS-4) */
|
Chris@87
|
94 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
|
Chris@87
|
95 # define HAVE_USABLE_WCHAR_T
|
Chris@87
|
96 # define PY_UNICODE_TYPE wchar_t
|
Chris@87
|
97 # endif
|
Chris@87
|
98
|
Chris@87
|
99 # if defined(Py_UNICODE_WIDE)
|
Chris@87
|
100 # define PY_UNICODE_TYPE Py_UCS4
|
Chris@87
|
101 # endif
|
Chris@87
|
102
|
Chris@87
|
103 #endif
|
Chris@87
|
104
|
Chris@87
|
105 /* If the compiler provides a wchar_t type we try to support it
|
Chris@87
|
106 through the interface functions PyUnicode_FromWideChar() and
|
Chris@87
|
107 PyUnicode_AsWideChar(). */
|
Chris@87
|
108
|
Chris@87
|
109 #ifdef HAVE_USABLE_WCHAR_T
|
Chris@87
|
110 # ifndef HAVE_WCHAR_H
|
Chris@87
|
111 # define HAVE_WCHAR_H
|
Chris@87
|
112 # endif
|
Chris@87
|
113 #endif
|
Chris@87
|
114
|
Chris@87
|
115 #ifdef HAVE_WCHAR_H
|
Chris@87
|
116 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
|
Chris@87
|
117 # ifdef _HAVE_BSDI
|
Chris@87
|
118 # include <time.h>
|
Chris@87
|
119 # endif
|
Chris@87
|
120 # include <wchar.h>
|
Chris@87
|
121 #endif
|
Chris@87
|
122
|
Chris@87
|
123 /*
|
Chris@87
|
124 * Use this typedef when you need to represent a UTF-16 surrogate pair
|
Chris@87
|
125 * as single unsigned integer.
|
Chris@87
|
126 */
|
Chris@87
|
127 #if SIZEOF_INT >= 4
|
Chris@87
|
128 typedef unsigned int Py_UCS4;
|
Chris@87
|
129 #elif SIZEOF_LONG >= 4
|
Chris@87
|
130 typedef unsigned long Py_UCS4;
|
Chris@87
|
131 #endif
|
Chris@87
|
132
|
Chris@87
|
133 /* Py_UNICODE is the native Unicode storage format (code unit) used by
|
Chris@87
|
134 Python and represents a single Unicode element in the Unicode
|
Chris@87
|
135 type. */
|
Chris@87
|
136
|
Chris@87
|
137 typedef PY_UNICODE_TYPE Py_UNICODE;
|
Chris@87
|
138
|
Chris@87
|
139 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
|
Chris@87
|
140
|
Chris@87
|
141 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
|
Chris@87
|
142 produce different external names and thus cause import errors in
|
Chris@87
|
143 case Python interpreters and extensions with mixed compiled in
|
Chris@87
|
144 Unicode width assumptions are combined. */
|
Chris@87
|
145
|
Chris@87
|
146 #ifndef Py_UNICODE_WIDE
|
Chris@87
|
147
|
Chris@87
|
148 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
|
Chris@87
|
149 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
|
Chris@87
|
150 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
|
Chris@87
|
151 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
|
Chris@87
|
152 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
|
Chris@87
|
153 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
|
Chris@87
|
154 # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
|
Chris@87
|
155 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
|
Chris@87
|
156 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
|
Chris@87
|
157 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
|
Chris@87
|
158 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
|
Chris@87
|
159 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
|
Chris@87
|
160 # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
|
Chris@87
|
161 # define PyUnicode_Compare PyUnicodeUCS2_Compare
|
Chris@87
|
162 # define PyUnicode_Concat PyUnicodeUCS2_Concat
|
Chris@87
|
163 # define PyUnicode_Contains PyUnicodeUCS2_Contains
|
Chris@87
|
164 # define PyUnicode_Count PyUnicodeUCS2_Count
|
Chris@87
|
165 # define PyUnicode_Decode PyUnicodeUCS2_Decode
|
Chris@87
|
166 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
|
Chris@87
|
167 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
|
Chris@87
|
168 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
Chris@87
|
169 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
Chris@87
|
170 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
|
Chris@87
|
171 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
|
Chris@87
|
172 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
Chris@87
|
173 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
|
Chris@87
|
174 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
Chris@87
|
175 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
|
Chris@87
|
176 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
|
Chris@87
|
177 # define PyUnicode_Encode PyUnicodeUCS2_Encode
|
Chris@87
|
178 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
|
Chris@87
|
179 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
|
Chris@87
|
180 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
|
Chris@87
|
181 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
|
Chris@87
|
182 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
|
Chris@87
|
183 # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
|
Chris@87
|
184 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
|
Chris@87
|
185 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
|
Chris@87
|
186 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
|
Chris@87
|
187 # define PyUnicode_Find PyUnicodeUCS2_Find
|
Chris@87
|
188 # define PyUnicode_Format PyUnicodeUCS2_Format
|
Chris@87
|
189 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
|
Chris@87
|
190 # define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
|
Chris@87
|
191 # define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
|
Chris@87
|
192 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
|
Chris@87
|
193 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
|
Chris@87
|
194 # define PyUnicode_FromString PyUnicodeUCS2_FromString
|
Chris@87
|
195 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
|
Chris@87
|
196 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
|
Chris@87
|
197 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
|
Chris@87
|
198 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
|
Chris@87
|
199 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
|
Chris@87
|
200 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
|
Chris@87
|
201 # define PyUnicode_Join PyUnicodeUCS2_Join
|
Chris@87
|
202 # define PyUnicode_Partition PyUnicodeUCS2_Partition
|
Chris@87
|
203 # define PyUnicode_RPartition PyUnicodeUCS2_RPartition
|
Chris@87
|
204 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
|
Chris@87
|
205 # define PyUnicode_Replace PyUnicodeUCS2_Replace
|
Chris@87
|
206 # define PyUnicode_Resize PyUnicodeUCS2_Resize
|
Chris@87
|
207 # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
|
Chris@87
|
208 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
|
Chris@87
|
209 # define PyUnicode_Split PyUnicodeUCS2_Split
|
Chris@87
|
210 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
|
Chris@87
|
211 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
|
Chris@87
|
212 # define PyUnicode_Translate PyUnicodeUCS2_Translate
|
Chris@87
|
213 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
|
Chris@87
|
214 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
|
Chris@87
|
215 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
|
Chris@87
|
216 # define _PyUnicode_Init _PyUnicodeUCS2_Init
|
Chris@87
|
217 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
|
Chris@87
|
218 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
|
Chris@87
|
219 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
|
Chris@87
|
220 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
|
Chris@87
|
221 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
|
Chris@87
|
222 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
|
Chris@87
|
223 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
|
Chris@87
|
224 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
|
Chris@87
|
225 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
|
Chris@87
|
226 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
|
Chris@87
|
227 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
|
Chris@87
|
228 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
|
Chris@87
|
229 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
|
Chris@87
|
230 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
|
Chris@87
|
231 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
|
Chris@87
|
232
|
Chris@87
|
233 #else
|
Chris@87
|
234
|
Chris@87
|
235 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
|
Chris@87
|
236 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
|
Chris@87
|
237 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
|
Chris@87
|
238 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
|
Chris@87
|
239 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
|
Chris@87
|
240 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
|
Chris@87
|
241 # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
|
Chris@87
|
242 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
|
Chris@87
|
243 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
|
Chris@87
|
244 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
|
Chris@87
|
245 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
|
Chris@87
|
246 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
|
Chris@87
|
247 # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
|
Chris@87
|
248 # define PyUnicode_Compare PyUnicodeUCS4_Compare
|
Chris@87
|
249 # define PyUnicode_Concat PyUnicodeUCS4_Concat
|
Chris@87
|
250 # define PyUnicode_Contains PyUnicodeUCS4_Contains
|
Chris@87
|
251 # define PyUnicode_Count PyUnicodeUCS4_Count
|
Chris@87
|
252 # define PyUnicode_Decode PyUnicodeUCS4_Decode
|
Chris@87
|
253 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
|
Chris@87
|
254 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
|
Chris@87
|
255 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
Chris@87
|
256 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
Chris@87
|
257 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
|
Chris@87
|
258 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
|
Chris@87
|
259 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
Chris@87
|
260 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
|
Chris@87
|
261 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
Chris@87
|
262 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
|
Chris@87
|
263 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
|
Chris@87
|
264 # define PyUnicode_Encode PyUnicodeUCS4_Encode
|
Chris@87
|
265 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
|
Chris@87
|
266 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
|
Chris@87
|
267 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
|
Chris@87
|
268 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
|
Chris@87
|
269 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
|
Chris@87
|
270 # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
|
Chris@87
|
271 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
|
Chris@87
|
272 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
|
Chris@87
|
273 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
|
Chris@87
|
274 # define PyUnicode_Find PyUnicodeUCS4_Find
|
Chris@87
|
275 # define PyUnicode_Format PyUnicodeUCS4_Format
|
Chris@87
|
276 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
|
Chris@87
|
277 # define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
|
Chris@87
|
278 # define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
|
Chris@87
|
279 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
|
Chris@87
|
280 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
|
Chris@87
|
281 # define PyUnicode_FromString PyUnicodeUCS4_FromString
|
Chris@87
|
282 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
|
Chris@87
|
283 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
|
Chris@87
|
284 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
|
Chris@87
|
285 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
|
Chris@87
|
286 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
|
Chris@87
|
287 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
|
Chris@87
|
288 # define PyUnicode_Join PyUnicodeUCS4_Join
|
Chris@87
|
289 # define PyUnicode_Partition PyUnicodeUCS4_Partition
|
Chris@87
|
290 # define PyUnicode_RPartition PyUnicodeUCS4_RPartition
|
Chris@87
|
291 # define PyUnicode_RSplit PyUnicodeUCS4_RSplit
|
Chris@87
|
292 # define PyUnicode_Replace PyUnicodeUCS4_Replace
|
Chris@87
|
293 # define PyUnicode_Resize PyUnicodeUCS4_Resize
|
Chris@87
|
294 # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
|
Chris@87
|
295 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
|
Chris@87
|
296 # define PyUnicode_Split PyUnicodeUCS4_Split
|
Chris@87
|
297 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
|
Chris@87
|
298 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
|
Chris@87
|
299 # define PyUnicode_Translate PyUnicodeUCS4_Translate
|
Chris@87
|
300 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
|
Chris@87
|
301 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
|
Chris@87
|
302 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
|
Chris@87
|
303 # define _PyUnicode_Init _PyUnicodeUCS4_Init
|
Chris@87
|
304 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
|
Chris@87
|
305 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
|
Chris@87
|
306 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
|
Chris@87
|
307 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
|
Chris@87
|
308 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
|
Chris@87
|
309 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
|
Chris@87
|
310 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
|
Chris@87
|
311 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
|
Chris@87
|
312 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
|
Chris@87
|
313 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
|
Chris@87
|
314 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
|
Chris@87
|
315 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
|
Chris@87
|
316 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
|
Chris@87
|
317 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
|
Chris@87
|
318 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
|
Chris@87
|
319
|
Chris@87
|
320
|
Chris@87
|
321 #endif
|
Chris@87
|
322
|
Chris@87
|
323 /* --- Internal Unicode Operations ---------------------------------------- */
|
Chris@87
|
324
|
Chris@87
|
325 /* If you want Python to use the compiler's wctype.h functions instead
|
Chris@87
|
326 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
|
Chris@87
|
327 configure Python using --with-wctype-functions. This reduces the
|
Chris@87
|
328 interpreter's code size. */
|
Chris@87
|
329
|
Chris@87
|
330 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
|
Chris@87
|
331
|
Chris@87
|
332 #include <wctype.h>
|
Chris@87
|
333
|
Chris@87
|
334 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
|
Chris@87
|
335
|
Chris@87
|
336 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
|
Chris@87
|
337 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
|
Chris@87
|
338 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
|
Chris@87
|
339 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
|
Chris@87
|
340
|
Chris@87
|
341 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
|
Chris@87
|
342 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
|
Chris@87
|
343 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
|
Chris@87
|
344
|
Chris@87
|
345 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
|
Chris@87
|
346 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
|
Chris@87
|
347 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
|
Chris@87
|
348
|
Chris@87
|
349 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
|
Chris@87
|
350 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
|
Chris@87
|
351 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
|
Chris@87
|
352
|
Chris@87
|
353 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
|
Chris@87
|
354
|
Chris@87
|
355 #else
|
Chris@87
|
356
|
Chris@87
|
357 /* Since splitting on whitespace is an important use case, and
|
Chris@87
|
358 whitespace in most situations is solely ASCII whitespace, we
|
Chris@87
|
359 optimize for the common case by using a quick look-up table
|
Chris@87
|
360 _Py_ascii_whitespace (see below) with an inlined check.
|
Chris@87
|
361
|
Chris@87
|
362 */
|
Chris@87
|
363 #define Py_UNICODE_ISSPACE(ch) \
|
Chris@87
|
364 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
|
Chris@87
|
365
|
Chris@87
|
366 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
|
Chris@87
|
367 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
|
Chris@87
|
368 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
|
Chris@87
|
369 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
|
Chris@87
|
370
|
Chris@87
|
371 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
|
Chris@87
|
372 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
|
Chris@87
|
373 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
|
Chris@87
|
374
|
Chris@87
|
375 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
|
Chris@87
|
376 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
|
Chris@87
|
377 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
|
Chris@87
|
378
|
Chris@87
|
379 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
|
Chris@87
|
380 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
|
Chris@87
|
381 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
|
Chris@87
|
382
|
Chris@87
|
383 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
|
Chris@87
|
384
|
Chris@87
|
385 #endif
|
Chris@87
|
386
|
Chris@87
|
387 #define Py_UNICODE_ISALNUM(ch) \
|
Chris@87
|
388 (Py_UNICODE_ISALPHA(ch) || \
|
Chris@87
|
389 Py_UNICODE_ISDECIMAL(ch) || \
|
Chris@87
|
390 Py_UNICODE_ISDIGIT(ch) || \
|
Chris@87
|
391 Py_UNICODE_ISNUMERIC(ch))
|
Chris@87
|
392
|
Chris@87
|
393 #define Py_UNICODE_COPY(target, source, length) \
|
Chris@87
|
394 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
|
Chris@87
|
395
|
Chris@87
|
396 #define Py_UNICODE_FILL(target, value, length) \
|
Chris@87
|
397 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
|
Chris@87
|
398 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
|
Chris@87
|
399 } while (0)
|
Chris@87
|
400
|
Chris@87
|
401 /* Check if substring matches at given offset. the offset must be
|
Chris@87
|
402 valid, and the substring must not be empty */
|
Chris@87
|
403
|
Chris@87
|
404 #define Py_UNICODE_MATCH(string, offset, substring) \
|
Chris@87
|
405 ((*((string)->str + (offset)) == *((substring)->str)) && \
|
Chris@87
|
406 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
|
Chris@87
|
407 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
|
Chris@87
|
408
|
Chris@87
|
409 #ifdef __cplusplus
|
Chris@87
|
410 extern "C" {
|
Chris@87
|
411 #endif
|
Chris@87
|
412
|
Chris@87
|
413 /* --- Unicode Type ------------------------------------------------------- */
|
Chris@87
|
414
|
Chris@87
|
415 typedef struct {
|
Chris@87
|
416 PyObject_HEAD
|
Chris@87
|
417 Py_ssize_t length; /* Length of raw Unicode data in buffer */
|
Chris@87
|
418 Py_UNICODE *str; /* Raw Unicode buffer */
|
Chris@87
|
419 long hash; /* Hash value; -1 if not set */
|
Chris@87
|
420 PyObject *defenc; /* (Default) Encoded version as Python
|
Chris@87
|
421 string, or NULL; this is used for
|
Chris@87
|
422 implementing the buffer protocol */
|
Chris@87
|
423 } PyUnicodeObject;
|
Chris@87
|
424
|
Chris@87
|
425 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
|
Chris@87
|
426
|
Chris@87
|
427 #define PyUnicode_Check(op) \
|
Chris@87
|
428 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
|
Chris@87
|
429 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
|
Chris@87
|
430
|
Chris@87
|
431 /* Fast access macros */
|
Chris@87
|
432 #define PyUnicode_GET_SIZE(op) \
|
Chris@87
|
433 (((PyUnicodeObject *)(op))->length)
|
Chris@87
|
434 #define PyUnicode_GET_DATA_SIZE(op) \
|
Chris@87
|
435 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
|
Chris@87
|
436 #define PyUnicode_AS_UNICODE(op) \
|
Chris@87
|
437 (((PyUnicodeObject *)(op))->str)
|
Chris@87
|
438 #define PyUnicode_AS_DATA(op) \
|
Chris@87
|
439 ((const char *)((PyUnicodeObject *)(op))->str)
|
Chris@87
|
440
|
Chris@87
|
441 /* --- Constants ---------------------------------------------------------- */
|
Chris@87
|
442
|
Chris@87
|
443 /* This Unicode character will be used as replacement character during
|
Chris@87
|
444 decoding if the errors argument is set to "replace". Note: the
|
Chris@87
|
445 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
|
Chris@87
|
446 Unicode 3.0. */
|
Chris@87
|
447
|
Chris@87
|
448 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
|
Chris@87
|
449
|
Chris@87
|
450 /* === Public API ========================================================= */
|
Chris@87
|
451
|
Chris@87
|
452 /* --- Plain Py_UNICODE --------------------------------------------------- */
|
Chris@87
|
453
|
Chris@87
|
454 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
|
Chris@87
|
455 size.
|
Chris@87
|
456
|
Chris@87
|
457 u may be NULL which causes the contents to be undefined. It is the
|
Chris@87
|
458 user's responsibility to fill in the needed data afterwards. Note
|
Chris@87
|
459 that modifying the Unicode object contents after construction is
|
Chris@87
|
460 only allowed if u was set to NULL.
|
Chris@87
|
461
|
Chris@87
|
462 The buffer is copied into the new object. */
|
Chris@87
|
463
|
Chris@87
|
464 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
|
Chris@87
|
465 const Py_UNICODE *u, /* Unicode buffer */
|
Chris@87
|
466 Py_ssize_t size /* size of buffer */
|
Chris@87
|
467 );
|
Chris@87
|
468
|
Chris@87
|
469 /* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
|
Chris@87
|
470 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
|
Chris@87
|
471 const char *u, /* char buffer */
|
Chris@87
|
472 Py_ssize_t size /* size of buffer */
|
Chris@87
|
473 );
|
Chris@87
|
474
|
Chris@87
|
475 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
|
Chris@87
|
476 Latin-1 encoded bytes */
|
Chris@87
|
477 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
|
Chris@87
|
478 const char *u /* string */
|
Chris@87
|
479 );
|
Chris@87
|
480
|
Chris@87
|
481 /* Return a read-only pointer to the Unicode object's internal
|
Chris@87
|
482 Py_UNICODE buffer. */
|
Chris@87
|
483
|
Chris@87
|
484 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
|
Chris@87
|
485 PyObject *unicode /* Unicode object */
|
Chris@87
|
486 );
|
Chris@87
|
487
|
Chris@87
|
488 /* Get the length of the Unicode object. */
|
Chris@87
|
489
|
Chris@87
|
490 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
|
Chris@87
|
491 PyObject *unicode /* Unicode object */
|
Chris@87
|
492 );
|
Chris@87
|
493
|
Chris@87
|
494 /* Get the maximum ordinal for a Unicode character. */
|
Chris@87
|
495 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
|
Chris@87
|
496
|
Chris@87
|
497 /* Resize an already allocated Unicode object to the new size length.
|
Chris@87
|
498
|
Chris@87
|
499 *unicode is modified to point to the new (resized) object and 0
|
Chris@87
|
500 returned on success.
|
Chris@87
|
501
|
Chris@87
|
502 This API may only be called by the function which also called the
|
Chris@87
|
503 Unicode constructor. The refcount on the object must be 1. Otherwise,
|
Chris@87
|
504 an error is returned.
|
Chris@87
|
505
|
Chris@87
|
506 Error handling is implemented as follows: an exception is set, -1
|
Chris@87
|
507 is returned and *unicode left untouched.
|
Chris@87
|
508
|
Chris@87
|
509 */
|
Chris@87
|
510
|
Chris@87
|
511 PyAPI_FUNC(int) PyUnicode_Resize(
|
Chris@87
|
512 PyObject **unicode, /* Pointer to the Unicode object */
|
Chris@87
|
513 Py_ssize_t length /* New length */
|
Chris@87
|
514 );
|
Chris@87
|
515
|
Chris@87
|
516 /* Coerce obj to an Unicode object and return a reference with
|
Chris@87
|
517 *incremented* refcount.
|
Chris@87
|
518
|
Chris@87
|
519 Coercion is done in the following way:
|
Chris@87
|
520
|
Chris@87
|
521 1. String and other char buffer compatible objects are decoded
|
Chris@87
|
522 under the assumptions that they contain data using the current
|
Chris@87
|
523 default encoding. Decoding is done in "strict" mode.
|
Chris@87
|
524
|
Chris@87
|
525 2. All other objects (including Unicode objects) raise an
|
Chris@87
|
526 exception.
|
Chris@87
|
527
|
Chris@87
|
528 The API returns NULL in case of an error. The caller is responsible
|
Chris@87
|
529 for decref'ing the returned objects.
|
Chris@87
|
530
|
Chris@87
|
531 */
|
Chris@87
|
532
|
Chris@87
|
533 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
|
Chris@87
|
534 register PyObject *obj, /* Object */
|
Chris@87
|
535 const char *encoding, /* encoding */
|
Chris@87
|
536 const char *errors /* error handling */
|
Chris@87
|
537 );
|
Chris@87
|
538
|
Chris@87
|
539 /* Coerce obj to an Unicode object and return a reference with
|
Chris@87
|
540 *incremented* refcount.
|
Chris@87
|
541
|
Chris@87
|
542 Unicode objects are passed back as-is (subclasses are converted to
|
Chris@87
|
543 true Unicode objects), all other objects are delegated to
|
Chris@87
|
544 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
|
Chris@87
|
545 using the default encoding as basis for decoding the object.
|
Chris@87
|
546
|
Chris@87
|
547 The API returns NULL in case of an error. The caller is responsible
|
Chris@87
|
548 for decref'ing the returned objects.
|
Chris@87
|
549
|
Chris@87
|
550 */
|
Chris@87
|
551
|
Chris@87
|
552 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
|
Chris@87
|
553 register PyObject *obj /* Object */
|
Chris@87
|
554 );
|
Chris@87
|
555
|
Chris@87
|
556 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
|
Chris@87
|
557 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
|
Chris@87
|
558
|
Chris@87
|
559 /* Format the object based on the format_spec, as defined in PEP 3101
|
Chris@87
|
560 (Advanced String Formatting). */
|
Chris@87
|
561 PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
|
Chris@87
|
562 Py_UNICODE *format_spec,
|
Chris@87
|
563 Py_ssize_t format_spec_len);
|
Chris@87
|
564
|
Chris@87
|
565 /* --- wchar_t support for platforms which support it --------------------- */
|
Chris@87
|
566
|
Chris@87
|
567 #ifdef HAVE_WCHAR_H
|
Chris@87
|
568
|
Chris@87
|
569 /* Create a Unicode Object from the whcar_t buffer w of the given
|
Chris@87
|
570 size.
|
Chris@87
|
571
|
Chris@87
|
572 The buffer is copied into the new object. */
|
Chris@87
|
573
|
Chris@87
|
574 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
|
Chris@87
|
575 register const wchar_t *w, /* wchar_t buffer */
|
Chris@87
|
576 Py_ssize_t size /* size of buffer */
|
Chris@87
|
577 );
|
Chris@87
|
578
|
Chris@87
|
579 /* Copies the Unicode Object contents into the wchar_t buffer w. At
|
Chris@87
|
580 most size wchar_t characters are copied.
|
Chris@87
|
581
|
Chris@87
|
582 Note that the resulting wchar_t string may or may not be
|
Chris@87
|
583 0-terminated. It is the responsibility of the caller to make sure
|
Chris@87
|
584 that the wchar_t string is 0-terminated in case this is required by
|
Chris@87
|
585 the application.
|
Chris@87
|
586
|
Chris@87
|
587 Returns the number of wchar_t characters copied (excluding a
|
Chris@87
|
588 possibly trailing 0-termination character) or -1 in case of an
|
Chris@87
|
589 error. */
|
Chris@87
|
590
|
Chris@87
|
591 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
|
Chris@87
|
592 PyUnicodeObject *unicode, /* Unicode object */
|
Chris@87
|
593 register wchar_t *w, /* wchar_t buffer */
|
Chris@87
|
594 Py_ssize_t size /* size of buffer */
|
Chris@87
|
595 );
|
Chris@87
|
596
|
Chris@87
|
597 #endif
|
Chris@87
|
598
|
Chris@87
|
599 /* --- Unicode ordinals --------------------------------------------------- */
|
Chris@87
|
600
|
Chris@87
|
601 /* Create a Unicode Object from the given Unicode code point ordinal.
|
Chris@87
|
602
|
Chris@87
|
603 The ordinal must be in range(0x10000) on narrow Python builds
|
Chris@87
|
604 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
|
Chris@87
|
605 raised in case it is not.
|
Chris@87
|
606
|
Chris@87
|
607 */
|
Chris@87
|
608
|
Chris@87
|
609 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
|
Chris@87
|
610
|
Chris@87
|
611 /* --- Free-list management ----------------------------------------------- */
|
Chris@87
|
612
|
Chris@87
|
613 /* Clear the free list used by the Unicode implementation.
|
Chris@87
|
614
|
Chris@87
|
615 This can be used to release memory used for objects on the free
|
Chris@87
|
616 list back to the Python memory allocator.
|
Chris@87
|
617
|
Chris@87
|
618 */
|
Chris@87
|
619
|
Chris@87
|
620 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
|
Chris@87
|
621
|
Chris@87
|
622 /* === Builtin Codecs =====================================================
|
Chris@87
|
623
|
Chris@87
|
624 Many of these APIs take two arguments encoding and errors. These
|
Chris@87
|
625 parameters encoding and errors have the same semantics as the ones
|
Chris@87
|
626 of the builtin unicode() API.
|
Chris@87
|
627
|
Chris@87
|
628 Setting encoding to NULL causes the default encoding to be used.
|
Chris@87
|
629
|
Chris@87
|
630 Error handling is set by errors which may also be set to NULL
|
Chris@87
|
631 meaning to use the default handling defined for the codec. Default
|
Chris@87
|
632 error handling for all builtin codecs is "strict" (ValueErrors are
|
Chris@87
|
633 raised).
|
Chris@87
|
634
|
Chris@87
|
635 The codecs all use a similar interface. Only deviation from the
|
Chris@87
|
636 generic ones are documented.
|
Chris@87
|
637
|
Chris@87
|
638 */
|
Chris@87
|
639
|
Chris@87
|
640 /* --- Manage the default encoding ---------------------------------------- */
|
Chris@87
|
641
|
Chris@87
|
642 /* Return a Python string holding the default encoded value of the
|
Chris@87
|
643 Unicode object.
|
Chris@87
|
644
|
Chris@87
|
645 The resulting string is cached in the Unicode object for subsequent
|
Chris@87
|
646 usage by this function. The cached version is needed to implement
|
Chris@87
|
647 the character buffer interface and will live (at least) as long as
|
Chris@87
|
648 the Unicode object itself.
|
Chris@87
|
649
|
Chris@87
|
650 The refcount of the string is *not* incremented.
|
Chris@87
|
651
|
Chris@87
|
652 *** Exported for internal use by the interpreter only !!! ***
|
Chris@87
|
653
|
Chris@87
|
654 */
|
Chris@87
|
655
|
Chris@87
|
656 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
|
Chris@87
|
657 PyObject *, const char *);
|
Chris@87
|
658
|
Chris@87
|
659 /* Returns the currently active default encoding.
|
Chris@87
|
660
|
Chris@87
|
661 The default encoding is currently implemented as run-time settable
|
Chris@87
|
662 process global. This may change in future versions of the
|
Chris@87
|
663 interpreter to become a parameter which is managed on a per-thread
|
Chris@87
|
664 basis.
|
Chris@87
|
665
|
Chris@87
|
666 */
|
Chris@87
|
667
|
Chris@87
|
668 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
|
Chris@87
|
669
|
Chris@87
|
670 /* Sets the currently active default encoding.
|
Chris@87
|
671
|
Chris@87
|
672 Returns 0 on success, -1 in case of an error.
|
Chris@87
|
673
|
Chris@87
|
674 */
|
Chris@87
|
675
|
Chris@87
|
676 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
|
Chris@87
|
677 const char *encoding /* Encoding name in standard form */
|
Chris@87
|
678 );
|
Chris@87
|
679
|
Chris@87
|
680 /* --- Generic Codecs ----------------------------------------------------- */
|
Chris@87
|
681
|
Chris@87
|
682 /* Create a Unicode object by decoding the encoded string s of the
|
Chris@87
|
683 given size. */
|
Chris@87
|
684
|
Chris@87
|
685 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
|
Chris@87
|
686 const char *s, /* encoded string */
|
Chris@87
|
687 Py_ssize_t size, /* size of buffer */
|
Chris@87
|
688 const char *encoding, /* encoding */
|
Chris@87
|
689 const char *errors /* error handling */
|
Chris@87
|
690 );
|
Chris@87
|
691
|
Chris@87
|
692 /* Encodes a Py_UNICODE buffer of the given size and returns a
|
Chris@87
|
693 Python string object. */
|
Chris@87
|
694
|
Chris@87
|
695 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
|
Chris@87
|
696 const Py_UNICODE *s, /* Unicode char buffer */
|
Chris@87
|
697 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
|
Chris@87
|
698 const char *encoding, /* encoding */
|
Chris@87
|
699 const char *errors /* error handling */
|
Chris@87
|
700 );
|
Chris@87
|
701
|
Chris@87
|
702 /* Encodes a Unicode object and returns the result as Python
|
Chris@87
|
703 object. */
|
Chris@87
|
704
|
Chris@87
|
705 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
|
Chris@87
|
706 PyObject *unicode, /* Unicode object */
|
Chris@87
|
707 const char *encoding, /* encoding */
|
Chris@87
|
708 const char *errors /* error handling */
|
Chris@87
|
709 );
|
Chris@87
|
710
|
Chris@87
|
711 /* Encodes a Unicode object and returns the result as Python string
|
Chris@87
|
712 object. */
|
Chris@87
|
713
|
Chris@87
|
714 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
|
Chris@87
|
715 PyObject *unicode, /* Unicode object */
|
Chris@87
|
716 const char *encoding, /* encoding */
|
Chris@87
|
717 const char *errors /* error handling */
|
Chris@87
|
718 );
|
Chris@87
|
719
|
Chris@87
|
720 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
|
Chris@87
|
721 PyObject* string /* 256 character map */
|
Chris@87
|
722 );
|
Chris@87
|
723
|
Chris@87
|
724
|
Chris@87
|
725 /* --- UTF-7 Codecs ------------------------------------------------------- */
|
Chris@87
|
726
|
Chris@87
|
727 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
|
Chris@87
|
728 const char *string, /* UTF-7 encoded string */
|
Chris@87
|
729 Py_ssize_t length, /* size of string */
|
Chris@87
|
730 const char *errors /* error handling */
|
Chris@87
|
731 );
|
Chris@87
|
732
|
Chris@87
|
733 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
|
Chris@87
|
734 const char *string, /* UTF-7 encoded string */
|
Chris@87
|
735 Py_ssize_t length, /* size of string */
|
Chris@87
|
736 const char *errors, /* error handling */
|
Chris@87
|
737 Py_ssize_t *consumed /* bytes consumed */
|
Chris@87
|
738 );
|
Chris@87
|
739
|
Chris@87
|
740 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
|
Chris@87
|
741 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
742 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
Chris@87
|
743 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
|
Chris@87
|
744 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
|
Chris@87
|
745 const char *errors /* error handling */
|
Chris@87
|
746 );
|
Chris@87
|
747
|
Chris@87
|
748 /* --- UTF-8 Codecs ------------------------------------------------------- */
|
Chris@87
|
749
|
Chris@87
|
750 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
|
Chris@87
|
751 const char *string, /* UTF-8 encoded string */
|
Chris@87
|
752 Py_ssize_t length, /* size of string */
|
Chris@87
|
753 const char *errors /* error handling */
|
Chris@87
|
754 );
|
Chris@87
|
755
|
Chris@87
|
756 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
|
Chris@87
|
757 const char *string, /* UTF-8 encoded string */
|
Chris@87
|
758 Py_ssize_t length, /* size of string */
|
Chris@87
|
759 const char *errors, /* error handling */
|
Chris@87
|
760 Py_ssize_t *consumed /* bytes consumed */
|
Chris@87
|
761 );
|
Chris@87
|
762
|
Chris@87
|
763 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
|
Chris@87
|
764 PyObject *unicode /* Unicode object */
|
Chris@87
|
765 );
|
Chris@87
|
766
|
Chris@87
|
767 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
|
Chris@87
|
768 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
769 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
Chris@87
|
770 const char *errors /* error handling */
|
Chris@87
|
771 );
|
Chris@87
|
772
|
Chris@87
|
773 /* --- UTF-32 Codecs ------------------------------------------------------ */
|
Chris@87
|
774
|
Chris@87
|
775 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
|
Chris@87
|
776 the corresponding Unicode object.
|
Chris@87
|
777
|
Chris@87
|
778 errors (if non-NULL) defines the error handling. It defaults
|
Chris@87
|
779 to "strict".
|
Chris@87
|
780
|
Chris@87
|
781 If byteorder is non-NULL, the decoder starts decoding using the
|
Chris@87
|
782 given byte order:
|
Chris@87
|
783
|
Chris@87
|
784 *byteorder == -1: little endian
|
Chris@87
|
785 *byteorder == 0: native order
|
Chris@87
|
786 *byteorder == 1: big endian
|
Chris@87
|
787
|
Chris@87
|
788 In native mode, the first four bytes of the stream are checked for a
|
Chris@87
|
789 BOM mark. If found, the BOM mark is analysed, the byte order
|
Chris@87
|
790 adjusted and the BOM skipped. In the other modes, no BOM mark
|
Chris@87
|
791 interpretation is done. After completion, *byteorder is set to the
|
Chris@87
|
792 current byte order at the end of input data.
|
Chris@87
|
793
|
Chris@87
|
794 If byteorder is NULL, the codec starts in native order mode.
|
Chris@87
|
795
|
Chris@87
|
796 */
|
Chris@87
|
797
|
Chris@87
|
798 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
|
Chris@87
|
799 const char *string, /* UTF-32 encoded string */
|
Chris@87
|
800 Py_ssize_t length, /* size of string */
|
Chris@87
|
801 const char *errors, /* error handling */
|
Chris@87
|
802 int *byteorder /* pointer to byteorder to use
|
Chris@87
|
803 0=native;-1=LE,1=BE; updated on
|
Chris@87
|
804 exit */
|
Chris@87
|
805 );
|
Chris@87
|
806
|
Chris@87
|
807 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
|
Chris@87
|
808 const char *string, /* UTF-32 encoded string */
|
Chris@87
|
809 Py_ssize_t length, /* size of string */
|
Chris@87
|
810 const char *errors, /* error handling */
|
Chris@87
|
811 int *byteorder, /* pointer to byteorder to use
|
Chris@87
|
812 0=native;-1=LE,1=BE; updated on
|
Chris@87
|
813 exit */
|
Chris@87
|
814 Py_ssize_t *consumed /* bytes consumed */
|
Chris@87
|
815 );
|
Chris@87
|
816
|
Chris@87
|
817 /* Returns a Python string using the UTF-32 encoding in native byte
|
Chris@87
|
818 order. The string always starts with a BOM mark. */
|
Chris@87
|
819
|
Chris@87
|
820 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
|
Chris@87
|
821 PyObject *unicode /* Unicode object */
|
Chris@87
|
822 );
|
Chris@87
|
823
|
Chris@87
|
824 /* Returns a Python string object holding the UTF-32 encoded value of
|
Chris@87
|
825 the Unicode data.
|
Chris@87
|
826
|
Chris@87
|
827 If byteorder is not 0, output is written according to the following
|
Chris@87
|
828 byte order:
|
Chris@87
|
829
|
Chris@87
|
830 byteorder == -1: little endian
|
Chris@87
|
831 byteorder == 0: native byte order (writes a BOM mark)
|
Chris@87
|
832 byteorder == 1: big endian
|
Chris@87
|
833
|
Chris@87
|
834 If byteorder is 0, the output string will always start with the
|
Chris@87
|
835 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
|
Chris@87
|
836 prepended.
|
Chris@87
|
837
|
Chris@87
|
838 */
|
Chris@87
|
839
|
Chris@87
|
840 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
|
Chris@87
|
841 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
842 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
Chris@87
|
843 const char *errors, /* error handling */
|
Chris@87
|
844 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
|
Chris@87
|
845 );
|
Chris@87
|
846
|
Chris@87
|
847 /* --- UTF-16 Codecs ------------------------------------------------------ */
|
Chris@87
|
848
|
Chris@87
|
849 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
|
Chris@87
|
850 the corresponding Unicode object.
|
Chris@87
|
851
|
Chris@87
|
852 errors (if non-NULL) defines the error handling. It defaults
|
Chris@87
|
853 to "strict".
|
Chris@87
|
854
|
Chris@87
|
855 If byteorder is non-NULL, the decoder starts decoding using the
|
Chris@87
|
856 given byte order:
|
Chris@87
|
857
|
Chris@87
|
858 *byteorder == -1: little endian
|
Chris@87
|
859 *byteorder == 0: native order
|
Chris@87
|
860 *byteorder == 1: big endian
|
Chris@87
|
861
|
Chris@87
|
862 In native mode, the first two bytes of the stream are checked for a
|
Chris@87
|
863 BOM mark. If found, the BOM mark is analysed, the byte order
|
Chris@87
|
864 adjusted and the BOM skipped. In the other modes, no BOM mark
|
Chris@87
|
865 interpretation is done. After completion, *byteorder is set to the
|
Chris@87
|
866 current byte order at the end of input data.
|
Chris@87
|
867
|
Chris@87
|
868 If byteorder is NULL, the codec starts in native order mode.
|
Chris@87
|
869
|
Chris@87
|
870 */
|
Chris@87
|
871
|
Chris@87
|
872 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
|
Chris@87
|
873 const char *string, /* UTF-16 encoded string */
|
Chris@87
|
874 Py_ssize_t length, /* size of string */
|
Chris@87
|
875 const char *errors, /* error handling */
|
Chris@87
|
876 int *byteorder /* pointer to byteorder to use
|
Chris@87
|
877 0=native;-1=LE,1=BE; updated on
|
Chris@87
|
878 exit */
|
Chris@87
|
879 );
|
Chris@87
|
880
|
Chris@87
|
881 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
|
Chris@87
|
882 const char *string, /* UTF-16 encoded string */
|
Chris@87
|
883 Py_ssize_t length, /* size of string */
|
Chris@87
|
884 const char *errors, /* error handling */
|
Chris@87
|
885 int *byteorder, /* pointer to byteorder to use
|
Chris@87
|
886 0=native;-1=LE,1=BE; updated on
|
Chris@87
|
887 exit */
|
Chris@87
|
888 Py_ssize_t *consumed /* bytes consumed */
|
Chris@87
|
889 );
|
Chris@87
|
890
|
Chris@87
|
891 /* Returns a Python string using the UTF-16 encoding in native byte
|
Chris@87
|
892 order. The string always starts with a BOM mark. */
|
Chris@87
|
893
|
Chris@87
|
894 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
|
Chris@87
|
895 PyObject *unicode /* Unicode object */
|
Chris@87
|
896 );
|
Chris@87
|
897
|
Chris@87
|
898 /* Returns a Python string object holding the UTF-16 encoded value of
|
Chris@87
|
899 the Unicode data.
|
Chris@87
|
900
|
Chris@87
|
901 If byteorder is not 0, output is written according to the following
|
Chris@87
|
902 byte order:
|
Chris@87
|
903
|
Chris@87
|
904 byteorder == -1: little endian
|
Chris@87
|
905 byteorder == 0: native byte order (writes a BOM mark)
|
Chris@87
|
906 byteorder == 1: big endian
|
Chris@87
|
907
|
Chris@87
|
908 If byteorder is 0, the output string will always start with the
|
Chris@87
|
909 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
|
Chris@87
|
910 prepended.
|
Chris@87
|
911
|
Chris@87
|
912 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
|
Chris@87
|
913 UCS-2. This trick makes it possible to add full UTF-16 capabilities
|
Chris@87
|
914 at a later point without compromising the APIs.
|
Chris@87
|
915
|
Chris@87
|
916 */
|
Chris@87
|
917
|
Chris@87
|
918 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
|
Chris@87
|
919 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
920 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
Chris@87
|
921 const char *errors, /* error handling */
|
Chris@87
|
922 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
|
Chris@87
|
923 );
|
Chris@87
|
924
|
Chris@87
|
925 /* --- Unicode-Escape Codecs ---------------------------------------------- */
|
Chris@87
|
926
|
Chris@87
|
927 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
|
Chris@87
|
928 const char *string, /* Unicode-Escape encoded string */
|
Chris@87
|
929 Py_ssize_t length, /* size of string */
|
Chris@87
|
930 const char *errors /* error handling */
|
Chris@87
|
931 );
|
Chris@87
|
932
|
Chris@87
|
933 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
|
Chris@87
|
934 PyObject *unicode /* Unicode object */
|
Chris@87
|
935 );
|
Chris@87
|
936
|
Chris@87
|
937 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
|
Chris@87
|
938 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
939 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
940 );
|
Chris@87
|
941
|
Chris@87
|
942 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
|
Chris@87
|
943
|
Chris@87
|
944 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
|
Chris@87
|
945 const char *string, /* Raw-Unicode-Escape encoded string */
|
Chris@87
|
946 Py_ssize_t length, /* size of string */
|
Chris@87
|
947 const char *errors /* error handling */
|
Chris@87
|
948 );
|
Chris@87
|
949
|
Chris@87
|
950 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
|
Chris@87
|
951 PyObject *unicode /* Unicode object */
|
Chris@87
|
952 );
|
Chris@87
|
953
|
Chris@87
|
954 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
|
Chris@87
|
955 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
956 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
957 );
|
Chris@87
|
958
|
Chris@87
|
959 /* --- Unicode Internal Codec ---------------------------------------------
|
Chris@87
|
960
|
Chris@87
|
961 Only for internal use in _codecsmodule.c */
|
Chris@87
|
962
|
Chris@87
|
963 PyObject *_PyUnicode_DecodeUnicodeInternal(
|
Chris@87
|
964 const char *string,
|
Chris@87
|
965 Py_ssize_t length,
|
Chris@87
|
966 const char *errors
|
Chris@87
|
967 );
|
Chris@87
|
968
|
Chris@87
|
969 /* --- Latin-1 Codecs -----------------------------------------------------
|
Chris@87
|
970
|
Chris@87
|
971 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
|
Chris@87
|
972
|
Chris@87
|
973 */
|
Chris@87
|
974
|
Chris@87
|
975 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
|
Chris@87
|
976 const char *string, /* Latin-1 encoded string */
|
Chris@87
|
977 Py_ssize_t length, /* size of string */
|
Chris@87
|
978 const char *errors /* error handling */
|
Chris@87
|
979 );
|
Chris@87
|
980
|
Chris@87
|
981 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
|
Chris@87
|
982 PyObject *unicode /* Unicode object */
|
Chris@87
|
983 );
|
Chris@87
|
984
|
Chris@87
|
985 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
|
Chris@87
|
986 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
987 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
988 const char *errors /* error handling */
|
Chris@87
|
989 );
|
Chris@87
|
990
|
Chris@87
|
991 /* --- ASCII Codecs -------------------------------------------------------
|
Chris@87
|
992
|
Chris@87
|
993 Only 7-bit ASCII data is excepted. All other codes generate errors.
|
Chris@87
|
994
|
Chris@87
|
995 */
|
Chris@87
|
996
|
Chris@87
|
997 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
|
Chris@87
|
998 const char *string, /* ASCII encoded string */
|
Chris@87
|
999 Py_ssize_t length, /* size of string */
|
Chris@87
|
1000 const char *errors /* error handling */
|
Chris@87
|
1001 );
|
Chris@87
|
1002
|
Chris@87
|
1003 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
|
Chris@87
|
1004 PyObject *unicode /* Unicode object */
|
Chris@87
|
1005 );
|
Chris@87
|
1006
|
Chris@87
|
1007 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
|
Chris@87
|
1008 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
1009 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
1010 const char *errors /* error handling */
|
Chris@87
|
1011 );
|
Chris@87
|
1012
|
Chris@87
|
1013 /* --- Character Map Codecs -----------------------------------------------
|
Chris@87
|
1014
|
Chris@87
|
1015 This codec uses mappings to encode and decode characters.
|
Chris@87
|
1016
|
Chris@87
|
1017 Decoding mappings must map single string characters to single
|
Chris@87
|
1018 Unicode characters, integers (which are then interpreted as Unicode
|
Chris@87
|
1019 ordinals) or None (meaning "undefined mapping" and causing an
|
Chris@87
|
1020 error).
|
Chris@87
|
1021
|
Chris@87
|
1022 Encoding mappings must map single Unicode characters to single
|
Chris@87
|
1023 string characters, integers (which are then interpreted as Latin-1
|
Chris@87
|
1024 ordinals) or None (meaning "undefined mapping" and causing an
|
Chris@87
|
1025 error).
|
Chris@87
|
1026
|
Chris@87
|
1027 If a character lookup fails with a LookupError, the character is
|
Chris@87
|
1028 copied as-is meaning that its ordinal value will be interpreted as
|
Chris@87
|
1029 Unicode or Latin-1 ordinal resp. Because of this mappings only need
|
Chris@87
|
1030 to contain those mappings which map characters to different code
|
Chris@87
|
1031 points.
|
Chris@87
|
1032
|
Chris@87
|
1033 */
|
Chris@87
|
1034
|
Chris@87
|
1035 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
|
Chris@87
|
1036 const char *string, /* Encoded string */
|
Chris@87
|
1037 Py_ssize_t length, /* size of string */
|
Chris@87
|
1038 PyObject *mapping, /* character mapping
|
Chris@87
|
1039 (char ordinal -> unicode ordinal) */
|
Chris@87
|
1040 const char *errors /* error handling */
|
Chris@87
|
1041 );
|
Chris@87
|
1042
|
Chris@87
|
1043 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
|
Chris@87
|
1044 PyObject *unicode, /* Unicode object */
|
Chris@87
|
1045 PyObject *mapping /* character mapping
|
Chris@87
|
1046 (unicode ordinal -> char ordinal) */
|
Chris@87
|
1047 );
|
Chris@87
|
1048
|
Chris@87
|
1049 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
|
Chris@87
|
1050 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
1051 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
1052 PyObject *mapping, /* character mapping
|
Chris@87
|
1053 (unicode ordinal -> char ordinal) */
|
Chris@87
|
1054 const char *errors /* error handling */
|
Chris@87
|
1055 );
|
Chris@87
|
1056
|
Chris@87
|
1057 /* Translate a Py_UNICODE buffer of the given length by applying a
|
Chris@87
|
1058 character mapping table to it and return the resulting Unicode
|
Chris@87
|
1059 object.
|
Chris@87
|
1060
|
Chris@87
|
1061 The mapping table must map Unicode ordinal integers to Unicode
|
Chris@87
|
1062 ordinal integers or None (causing deletion of the character).
|
Chris@87
|
1063
|
Chris@87
|
1064 Mapping tables may be dictionaries or sequences. Unmapped character
|
Chris@87
|
1065 ordinals (ones which cause a LookupError) are left untouched and
|
Chris@87
|
1066 are copied as-is.
|
Chris@87
|
1067
|
Chris@87
|
1068 */
|
Chris@87
|
1069
|
Chris@87
|
1070 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
|
Chris@87
|
1071 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
1072 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
1073 PyObject *table, /* Translate table */
|
Chris@87
|
1074 const char *errors /* error handling */
|
Chris@87
|
1075 );
|
Chris@87
|
1076
|
Chris@87
|
1077 #ifdef MS_WIN32
|
Chris@87
|
1078
|
Chris@87
|
1079 /* --- MBCS codecs for Windows -------------------------------------------- */
|
Chris@87
|
1080
|
Chris@87
|
1081 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
|
Chris@87
|
1082 const char *string, /* MBCS encoded string */
|
Chris@87
|
1083 Py_ssize_t length, /* size of string */
|
Chris@87
|
1084 const char *errors /* error handling */
|
Chris@87
|
1085 );
|
Chris@87
|
1086
|
Chris@87
|
1087 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
|
Chris@87
|
1088 const char *string, /* MBCS encoded string */
|
Chris@87
|
1089 Py_ssize_t length, /* size of string */
|
Chris@87
|
1090 const char *errors, /* error handling */
|
Chris@87
|
1091 Py_ssize_t *consumed /* bytes consumed */
|
Chris@87
|
1092 );
|
Chris@87
|
1093
|
Chris@87
|
1094 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
|
Chris@87
|
1095 PyObject *unicode /* Unicode object */
|
Chris@87
|
1096 );
|
Chris@87
|
1097
|
Chris@87
|
1098 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
|
Chris@87
|
1099 const Py_UNICODE *data, /* Unicode char buffer */
|
Chris@87
|
1100 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
1101 const char *errors /* error handling */
|
Chris@87
|
1102 );
|
Chris@87
|
1103
|
Chris@87
|
1104 #endif /* MS_WIN32 */
|
Chris@87
|
1105
|
Chris@87
|
1106 /* --- Decimal Encoder ---------------------------------------------------- */
|
Chris@87
|
1107
|
Chris@87
|
1108 /* Takes a Unicode string holding a decimal value and writes it into
|
Chris@87
|
1109 an output buffer using standard ASCII digit codes.
|
Chris@87
|
1110
|
Chris@87
|
1111 The output buffer has to provide at least length+1 bytes of storage
|
Chris@87
|
1112 area. The output string is 0-terminated.
|
Chris@87
|
1113
|
Chris@87
|
1114 The encoder converts whitespace to ' ', decimal characters to their
|
Chris@87
|
1115 corresponding ASCII digit and all other Latin-1 characters except
|
Chris@87
|
1116 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
|
Chris@87
|
1117 are treated as errors. This includes embedded NULL bytes.
|
Chris@87
|
1118
|
Chris@87
|
1119 Error handling is defined by the errors argument:
|
Chris@87
|
1120
|
Chris@87
|
1121 NULL or "strict": raise a ValueError
|
Chris@87
|
1122 "ignore": ignore the wrong characters (these are not copied to the
|
Chris@87
|
1123 output buffer)
|
Chris@87
|
1124 "replace": replaces illegal characters with '?'
|
Chris@87
|
1125
|
Chris@87
|
1126 Returns 0 on success, -1 on failure.
|
Chris@87
|
1127
|
Chris@87
|
1128 */
|
Chris@87
|
1129
|
Chris@87
|
1130 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
|
Chris@87
|
1131 Py_UNICODE *s, /* Unicode buffer */
|
Chris@87
|
1132 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
Chris@87
|
1133 char *output, /* Output buffer; must have size >= length */
|
Chris@87
|
1134 const char *errors /* error handling */
|
Chris@87
|
1135 );
|
Chris@87
|
1136
|
Chris@87
|
1137 /* --- Methods & Slots ----------------------------------------------------
|
Chris@87
|
1138
|
Chris@87
|
1139 These are capable of handling Unicode objects and strings on input
|
Chris@87
|
1140 (we refer to them as strings in the descriptions) and return
|
Chris@87
|
1141 Unicode objects or integers as apporpriate. */
|
Chris@87
|
1142
|
Chris@87
|
1143 /* Concat two strings giving a new Unicode string. */
|
Chris@87
|
1144
|
Chris@87
|
1145 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
|
Chris@87
|
1146 PyObject *left, /* Left string */
|
Chris@87
|
1147 PyObject *right /* Right string */
|
Chris@87
|
1148 );
|
Chris@87
|
1149
|
Chris@87
|
1150 /* Split a string giving a list of Unicode strings.
|
Chris@87
|
1151
|
Chris@87
|
1152 If sep is NULL, splitting will be done at all whitespace
|
Chris@87
|
1153 substrings. Otherwise, splits occur at the given separator.
|
Chris@87
|
1154
|
Chris@87
|
1155 At most maxsplit splits will be done. If negative, no limit is set.
|
Chris@87
|
1156
|
Chris@87
|
1157 Separators are not included in the resulting list.
|
Chris@87
|
1158
|
Chris@87
|
1159 */
|
Chris@87
|
1160
|
Chris@87
|
1161 PyAPI_FUNC(PyObject*) PyUnicode_Split(
|
Chris@87
|
1162 PyObject *s, /* String to split */
|
Chris@87
|
1163 PyObject *sep, /* String separator */
|
Chris@87
|
1164 Py_ssize_t maxsplit /* Maxsplit count */
|
Chris@87
|
1165 );
|
Chris@87
|
1166
|
Chris@87
|
1167 /* Dito, but split at line breaks.
|
Chris@87
|
1168
|
Chris@87
|
1169 CRLF is considered to be one line break. Line breaks are not
|
Chris@87
|
1170 included in the resulting list. */
|
Chris@87
|
1171
|
Chris@87
|
1172 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
|
Chris@87
|
1173 PyObject *s, /* String to split */
|
Chris@87
|
1174 int keepends /* If true, line end markers are included */
|
Chris@87
|
1175 );
|
Chris@87
|
1176
|
Chris@87
|
1177 /* Partition a string using a given separator. */
|
Chris@87
|
1178
|
Chris@87
|
1179 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
|
Chris@87
|
1180 PyObject *s, /* String to partition */
|
Chris@87
|
1181 PyObject *sep /* String separator */
|
Chris@87
|
1182 );
|
Chris@87
|
1183
|
Chris@87
|
1184 /* Partition a string using a given separator, searching from the end of the
|
Chris@87
|
1185 string. */
|
Chris@87
|
1186
|
Chris@87
|
1187 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
|
Chris@87
|
1188 PyObject *s, /* String to partition */
|
Chris@87
|
1189 PyObject *sep /* String separator */
|
Chris@87
|
1190 );
|
Chris@87
|
1191
|
Chris@87
|
1192 /* Split a string giving a list of Unicode strings.
|
Chris@87
|
1193
|
Chris@87
|
1194 If sep is NULL, splitting will be done at all whitespace
|
Chris@87
|
1195 substrings. Otherwise, splits occur at the given separator.
|
Chris@87
|
1196
|
Chris@87
|
1197 At most maxsplit splits will be done. But unlike PyUnicode_Split
|
Chris@87
|
1198 PyUnicode_RSplit splits from the end of the string. If negative,
|
Chris@87
|
1199 no limit is set.
|
Chris@87
|
1200
|
Chris@87
|
1201 Separators are not included in the resulting list.
|
Chris@87
|
1202
|
Chris@87
|
1203 */
|
Chris@87
|
1204
|
Chris@87
|
1205 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
|
Chris@87
|
1206 PyObject *s, /* String to split */
|
Chris@87
|
1207 PyObject *sep, /* String separator */
|
Chris@87
|
1208 Py_ssize_t maxsplit /* Maxsplit count */
|
Chris@87
|
1209 );
|
Chris@87
|
1210
|
Chris@87
|
1211 /* Translate a string by applying a character mapping table to it and
|
Chris@87
|
1212 return the resulting Unicode object.
|
Chris@87
|
1213
|
Chris@87
|
1214 The mapping table must map Unicode ordinal integers to Unicode
|
Chris@87
|
1215 ordinal integers or None (causing deletion of the character).
|
Chris@87
|
1216
|
Chris@87
|
1217 Mapping tables may be dictionaries or sequences. Unmapped character
|
Chris@87
|
1218 ordinals (ones which cause a LookupError) are left untouched and
|
Chris@87
|
1219 are copied as-is.
|
Chris@87
|
1220
|
Chris@87
|
1221 */
|
Chris@87
|
1222
|
Chris@87
|
1223 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
|
Chris@87
|
1224 PyObject *str, /* String */
|
Chris@87
|
1225 PyObject *table, /* Translate table */
|
Chris@87
|
1226 const char *errors /* error handling */
|
Chris@87
|
1227 );
|
Chris@87
|
1228
|
Chris@87
|
1229 /* Join a sequence of strings using the given separator and return
|
Chris@87
|
1230 the resulting Unicode string. */
|
Chris@87
|
1231
|
Chris@87
|
1232 PyAPI_FUNC(PyObject*) PyUnicode_Join(
|
Chris@87
|
1233 PyObject *separator, /* Separator string */
|
Chris@87
|
1234 PyObject *seq /* Sequence object */
|
Chris@87
|
1235 );
|
Chris@87
|
1236
|
Chris@87
|
1237 /* Return 1 if substr matches str[start:end] at the given tail end, 0
|
Chris@87
|
1238 otherwise. */
|
Chris@87
|
1239
|
Chris@87
|
1240 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
|
Chris@87
|
1241 PyObject *str, /* String */
|
Chris@87
|
1242 PyObject *substr, /* Prefix or Suffix string */
|
Chris@87
|
1243 Py_ssize_t start, /* Start index */
|
Chris@87
|
1244 Py_ssize_t end, /* Stop index */
|
Chris@87
|
1245 int direction /* Tail end: -1 prefix, +1 suffix */
|
Chris@87
|
1246 );
|
Chris@87
|
1247
|
Chris@87
|
1248 /* Return the first position of substr in str[start:end] using the
|
Chris@87
|
1249 given search direction or -1 if not found. -2 is returned in case
|
Chris@87
|
1250 an error occurred and an exception is set. */
|
Chris@87
|
1251
|
Chris@87
|
1252 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
|
Chris@87
|
1253 PyObject *str, /* String */
|
Chris@87
|
1254 PyObject *substr, /* Substring to find */
|
Chris@87
|
1255 Py_ssize_t start, /* Start index */
|
Chris@87
|
1256 Py_ssize_t end, /* Stop index */
|
Chris@87
|
1257 int direction /* Find direction: +1 forward, -1 backward */
|
Chris@87
|
1258 );
|
Chris@87
|
1259
|
Chris@87
|
1260 /* Count the number of occurrences of substr in str[start:end]. */
|
Chris@87
|
1261
|
Chris@87
|
1262 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
|
Chris@87
|
1263 PyObject *str, /* String */
|
Chris@87
|
1264 PyObject *substr, /* Substring to count */
|
Chris@87
|
1265 Py_ssize_t start, /* Start index */
|
Chris@87
|
1266 Py_ssize_t end /* Stop index */
|
Chris@87
|
1267 );
|
Chris@87
|
1268
|
Chris@87
|
1269 /* Replace at most maxcount occurrences of substr in str with replstr
|
Chris@87
|
1270 and return the resulting Unicode object. */
|
Chris@87
|
1271
|
Chris@87
|
1272 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
|
Chris@87
|
1273 PyObject *str, /* String */
|
Chris@87
|
1274 PyObject *substr, /* Substring to find */
|
Chris@87
|
1275 PyObject *replstr, /* Substring to replace */
|
Chris@87
|
1276 Py_ssize_t maxcount /* Max. number of replacements to apply;
|
Chris@87
|
1277 -1 = all */
|
Chris@87
|
1278 );
|
Chris@87
|
1279
|
Chris@87
|
1280 /* Compare two strings and return -1, 0, 1 for less than, equal,
|
Chris@87
|
1281 greater than resp. */
|
Chris@87
|
1282
|
Chris@87
|
1283 PyAPI_FUNC(int) PyUnicode_Compare(
|
Chris@87
|
1284 PyObject *left, /* Left string */
|
Chris@87
|
1285 PyObject *right /* Right string */
|
Chris@87
|
1286 );
|
Chris@87
|
1287
|
Chris@87
|
1288 /* Rich compare two strings and return one of the following:
|
Chris@87
|
1289
|
Chris@87
|
1290 - NULL in case an exception was raised
|
Chris@87
|
1291 - Py_True or Py_False for successfuly comparisons
|
Chris@87
|
1292 - Py_NotImplemented in case the type combination is unknown
|
Chris@87
|
1293
|
Chris@87
|
1294 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
|
Chris@87
|
1295 case the conversion of the arguments to Unicode fails with a
|
Chris@87
|
1296 UnicodeDecodeError.
|
Chris@87
|
1297
|
Chris@87
|
1298 Possible values for op:
|
Chris@87
|
1299
|
Chris@87
|
1300 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
|
Chris@87
|
1301
|
Chris@87
|
1302 */
|
Chris@87
|
1303
|
Chris@87
|
1304 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
|
Chris@87
|
1305 PyObject *left, /* Left string */
|
Chris@87
|
1306 PyObject *right, /* Right string */
|
Chris@87
|
1307 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
|
Chris@87
|
1308 );
|
Chris@87
|
1309
|
Chris@87
|
1310 /* Apply a argument tuple or dictionary to a format string and return
|
Chris@87
|
1311 the resulting Unicode string. */
|
Chris@87
|
1312
|
Chris@87
|
1313 PyAPI_FUNC(PyObject *) PyUnicode_Format(
|
Chris@87
|
1314 PyObject *format, /* Format string */
|
Chris@87
|
1315 PyObject *args /* Argument tuple or dictionary */
|
Chris@87
|
1316 );
|
Chris@87
|
1317
|
Chris@87
|
1318 /* Checks whether element is contained in container and return 1/0
|
Chris@87
|
1319 accordingly.
|
Chris@87
|
1320
|
Chris@87
|
1321 element has to coerce to an one element Unicode string. -1 is
|
Chris@87
|
1322 returned in case of an error. */
|
Chris@87
|
1323
|
Chris@87
|
1324 PyAPI_FUNC(int) PyUnicode_Contains(
|
Chris@87
|
1325 PyObject *container, /* Container string */
|
Chris@87
|
1326 PyObject *element /* Element string */
|
Chris@87
|
1327 );
|
Chris@87
|
1328
|
Chris@87
|
1329 /* Externally visible for str.strip(unicode) */
|
Chris@87
|
1330 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
|
Chris@87
|
1331 PyUnicodeObject *self,
|
Chris@87
|
1332 int striptype,
|
Chris@87
|
1333 PyObject *sepobj
|
Chris@87
|
1334 );
|
Chris@87
|
1335
|
Chris@87
|
1336 /* === Characters Type APIs =============================================== */
|
Chris@87
|
1337
|
Chris@87
|
1338 /* Helper array used by Py_UNICODE_ISSPACE(). */
|
Chris@87
|
1339
|
Chris@87
|
1340 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
|
Chris@87
|
1341
|
Chris@87
|
1342 /* These should not be used directly. Use the Py_UNICODE_IS* and
|
Chris@87
|
1343 Py_UNICODE_TO* macros instead.
|
Chris@87
|
1344
|
Chris@87
|
1345 These APIs are implemented in Objects/unicodectype.c.
|
Chris@87
|
1346
|
Chris@87
|
1347 */
|
Chris@87
|
1348
|
Chris@87
|
1349 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
|
Chris@87
|
1350 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1351 );
|
Chris@87
|
1352
|
Chris@87
|
1353 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
|
Chris@87
|
1354 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1355 );
|
Chris@87
|
1356
|
Chris@87
|
1357 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
|
Chris@87
|
1358 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1359 );
|
Chris@87
|
1360
|
Chris@87
|
1361 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
|
Chris@87
|
1362 const Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1363 );
|
Chris@87
|
1364
|
Chris@87
|
1365 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
|
Chris@87
|
1366 const Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1367 );
|
Chris@87
|
1368
|
Chris@87
|
1369 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
|
Chris@87
|
1370 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1371 );
|
Chris@87
|
1372
|
Chris@87
|
1373 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
|
Chris@87
|
1374 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1375 );
|
Chris@87
|
1376
|
Chris@87
|
1377 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
|
Chris@87
|
1378 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1379 );
|
Chris@87
|
1380
|
Chris@87
|
1381 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
|
Chris@87
|
1382 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1383 );
|
Chris@87
|
1384
|
Chris@87
|
1385 PyAPI_FUNC(int) _PyUnicode_ToDigit(
|
Chris@87
|
1386 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1387 );
|
Chris@87
|
1388
|
Chris@87
|
1389 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
|
Chris@87
|
1390 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1391 );
|
Chris@87
|
1392
|
Chris@87
|
1393 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
|
Chris@87
|
1394 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1395 );
|
Chris@87
|
1396
|
Chris@87
|
1397 PyAPI_FUNC(int) _PyUnicode_IsDigit(
|
Chris@87
|
1398 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1399 );
|
Chris@87
|
1400
|
Chris@87
|
1401 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
|
Chris@87
|
1402 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1403 );
|
Chris@87
|
1404
|
Chris@87
|
1405 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
|
Chris@87
|
1406 Py_UNICODE ch /* Unicode character */
|
Chris@87
|
1407 );
|
Chris@87
|
1408
|
Chris@87
|
1409 #ifdef __cplusplus
|
Chris@87
|
1410 }
|
Chris@87
|
1411 #endif
|
Chris@87
|
1412 #endif /* Py_USING_UNICODE */
|
Chris@87
|
1413 #endif /* !Py_UNICODEOBJECT_H */
|