Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/mingw32/Python27/include/unicodeobject.h @ 87:2a2c65a20a8b
Add Python libs and headers
author | Chris Cannam |
---|---|
date | Wed, 25 Feb 2015 14:05:22 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
86:413a9d26189e | 87:2a2c65a20a8b |
---|---|
1 #ifndef Py_UNICODEOBJECT_H | |
2 #define Py_UNICODEOBJECT_H | |
3 | |
4 #include <stdarg.h> | |
5 | |
6 /* | |
7 | |
8 Unicode implementation based on original code by Fredrik Lundh, | |
9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the | |
10 Unicode Integration Proposal (see file Misc/unicode.txt). | |
11 | |
12 Copyright (c) Corporation for National Research Initiatives. | |
13 | |
14 | |
15 Original header: | |
16 -------------------------------------------------------------------- | |
17 | |
18 * Yet another Unicode string type for Python. This type supports the | |
19 * 16-bit Basic Multilingual Plane (BMP) only. | |
20 * | |
21 * Written by Fredrik Lundh, January 1999. | |
22 * | |
23 * Copyright (c) 1999 by Secret Labs AB. | |
24 * Copyright (c) 1999 by Fredrik Lundh. | |
25 * | |
26 * fredrik@pythonware.com | |
27 * http://www.pythonware.com | |
28 * | |
29 * -------------------------------------------------------------------- | |
30 * This Unicode String Type is | |
31 * | |
32 * Copyright (c) 1999 by Secret Labs AB | |
33 * Copyright (c) 1999 by Fredrik Lundh | |
34 * | |
35 * By obtaining, using, and/or copying this software and/or its | |
36 * associated documentation, you agree that you have read, understood, | |
37 * and will comply with the following terms and conditions: | |
38 * | |
39 * Permission to use, copy, modify, and distribute this software and its | |
40 * associated documentation for any purpose and without fee is hereby | |
41 * granted, provided that the above copyright notice appears in all | |
42 * copies, and that both that copyright notice and this permission notice | |
43 * appear in supporting documentation, and that the name of Secret Labs | |
44 * AB or the author not be used in advertising or publicity pertaining to | |
45 * distribution of the software without specific, written prior | |
46 * permission. | |
47 * | |
48 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO | |
49 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
50 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR | |
51 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
52 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
53 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT | |
54 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
55 * -------------------------------------------------------------------- */ | |
56 | |
57 #include <ctype.h> | |
58 | |
59 /* === Internal API ======================================================= */ | |
60 | |
61 /* --- Internal Unicode Format -------------------------------------------- */ | |
62 | |
63 #ifndef Py_USING_UNICODE | |
64 | |
65 #define PyUnicode_Check(op) 0 | |
66 #define PyUnicode_CheckExact(op) 0 | |
67 | |
68 #else | |
69 | |
70 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is | |
71 properly set, but the default rules below doesn't set it. I'll | |
72 sort this out some other day -- fredrik@pythonware.com */ | |
73 | |
74 #ifndef Py_UNICODE_SIZE | |
75 #error Must define Py_UNICODE_SIZE | |
76 #endif | |
77 | |
78 /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode | |
79 strings are stored as UCS-2 (with limited support for UTF-16) */ | |
80 | |
81 #if Py_UNICODE_SIZE >= 4 | |
82 #define Py_UNICODE_WIDE | |
83 #endif | |
84 | |
85 /* Set these flags if the platform has "wchar.h", "wctype.h" and the | |
86 wchar_t type is a 16-bit unsigned type */ | |
87 /* #define HAVE_WCHAR_H */ | |
88 /* #define HAVE_USABLE_WCHAR_T */ | |
89 | |
90 /* Defaults for various platforms */ | |
91 #ifndef PY_UNICODE_TYPE | |
92 | |
93 /* Windows has a usable wchar_t type (unless we're using UCS-4) */ | |
94 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2 | |
95 # define HAVE_USABLE_WCHAR_T | |
96 # define PY_UNICODE_TYPE wchar_t | |
97 # endif | |
98 | |
99 # if defined(Py_UNICODE_WIDE) | |
100 # define PY_UNICODE_TYPE Py_UCS4 | |
101 # endif | |
102 | |
103 #endif | |
104 | |
105 /* If the compiler provides a wchar_t type we try to support it | |
106 through the interface functions PyUnicode_FromWideChar() and | |
107 PyUnicode_AsWideChar(). */ | |
108 | |
109 #ifdef HAVE_USABLE_WCHAR_T | |
110 # ifndef HAVE_WCHAR_H | |
111 # define HAVE_WCHAR_H | |
112 # endif | |
113 #endif | |
114 | |
115 #ifdef HAVE_WCHAR_H | |
116 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ | |
117 # ifdef _HAVE_BSDI | |
118 # include <time.h> | |
119 # endif | |
120 # include <wchar.h> | |
121 #endif | |
122 | |
123 /* | |
124 * Use this typedef when you need to represent a UTF-16 surrogate pair | |
125 * as single unsigned integer. | |
126 */ | |
127 #if SIZEOF_INT >= 4 | |
128 typedef unsigned int Py_UCS4; | |
129 #elif SIZEOF_LONG >= 4 | |
130 typedef unsigned long Py_UCS4; | |
131 #endif | |
132 | |
133 /* Py_UNICODE is the native Unicode storage format (code unit) used by | |
134 Python and represents a single Unicode element in the Unicode | |
135 type. */ | |
136 | |
137 typedef PY_UNICODE_TYPE Py_UNICODE; | |
138 | |
139 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */ | |
140 | |
141 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds | |
142 produce different external names and thus cause import errors in | |
143 case Python interpreters and extensions with mixed compiled in | |
144 Unicode width assumptions are combined. */ | |
145 | |
146 #ifndef Py_UNICODE_WIDE | |
147 | |
148 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString | |
149 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString | |
150 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject | |
151 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString | |
152 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String | |
153 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString | |
154 # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String | |
155 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String | |
156 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String | |
157 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode | |
158 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString | |
159 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar | |
160 # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist | |
161 # define PyUnicode_Compare PyUnicodeUCS2_Compare | |
162 # define PyUnicode_Concat PyUnicodeUCS2_Concat | |
163 # define PyUnicode_Contains PyUnicodeUCS2_Contains | |
164 # define PyUnicode_Count PyUnicodeUCS2_Count | |
165 # define PyUnicode_Decode PyUnicodeUCS2_Decode | |
166 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII | |
167 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap | |
168 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 | |
169 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape | |
170 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 | |
171 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful | |
172 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16 | |
173 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful | |
174 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8 | |
175 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful | |
176 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape | |
177 # define PyUnicode_Encode PyUnicodeUCS2_Encode | |
178 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII | |
179 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap | |
180 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal | |
181 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1 | |
182 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape | |
183 # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32 | |
184 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16 | |
185 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8 | |
186 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape | |
187 # define PyUnicode_Find PyUnicodeUCS2_Find | |
188 # define PyUnicode_Format PyUnicodeUCS2_Format | |
189 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject | |
190 # define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat | |
191 # define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV | |
192 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject | |
193 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal | |
194 # define PyUnicode_FromString PyUnicodeUCS2_FromString | |
195 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize | |
196 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode | |
197 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar | |
198 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding | |
199 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax | |
200 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize | |
201 # define PyUnicode_Join PyUnicodeUCS2_Join | |
202 # define PyUnicode_Partition PyUnicodeUCS2_Partition | |
203 # define PyUnicode_RPartition PyUnicodeUCS2_RPartition | |
204 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit | |
205 # define PyUnicode_Replace PyUnicodeUCS2_Replace | |
206 # define PyUnicode_Resize PyUnicodeUCS2_Resize | |
207 # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare | |
208 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding | |
209 # define PyUnicode_Split PyUnicodeUCS2_Split | |
210 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines | |
211 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch | |
212 # define PyUnicode_Translate PyUnicodeUCS2_Translate | |
213 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap | |
214 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString | |
215 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini | |
216 # define _PyUnicode_Init _PyUnicodeUCS2_Init | |
217 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha | |
218 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit | |
219 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit | |
220 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak | |
221 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase | |
222 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric | |
223 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase | |
224 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase | |
225 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace | |
226 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit | |
227 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit | |
228 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase | |
229 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric | |
230 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase | |
231 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase | |
232 | |
233 #else | |
234 | |
235 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString | |
236 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString | |
237 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject | |
238 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString | |
239 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String | |
240 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString | |
241 # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String | |
242 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String | |
243 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String | |
244 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode | |
245 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString | |
246 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar | |
247 # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist | |
248 # define PyUnicode_Compare PyUnicodeUCS4_Compare | |
249 # define PyUnicode_Concat PyUnicodeUCS4_Concat | |
250 # define PyUnicode_Contains PyUnicodeUCS4_Contains | |
251 # define PyUnicode_Count PyUnicodeUCS4_Count | |
252 # define PyUnicode_Decode PyUnicodeUCS4_Decode | |
253 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII | |
254 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap | |
255 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 | |
256 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape | |
257 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 | |
258 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful | |
259 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16 | |
260 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful | |
261 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8 | |
262 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful | |
263 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape | |
264 # define PyUnicode_Encode PyUnicodeUCS4_Encode | |
265 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII | |
266 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap | |
267 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal | |
268 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1 | |
269 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape | |
270 # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32 | |
271 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16 | |
272 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8 | |
273 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape | |
274 # define PyUnicode_Find PyUnicodeUCS4_Find | |
275 # define PyUnicode_Format PyUnicodeUCS4_Format | |
276 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject | |
277 # define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat | |
278 # define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV | |
279 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject | |
280 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal | |
281 # define PyUnicode_FromString PyUnicodeUCS4_FromString | |
282 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize | |
283 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode | |
284 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar | |
285 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding | |
286 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax | |
287 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize | |
288 # define PyUnicode_Join PyUnicodeUCS4_Join | |
289 # define PyUnicode_Partition PyUnicodeUCS4_Partition | |
290 # define PyUnicode_RPartition PyUnicodeUCS4_RPartition | |
291 # define PyUnicode_RSplit PyUnicodeUCS4_RSplit | |
292 # define PyUnicode_Replace PyUnicodeUCS4_Replace | |
293 # define PyUnicode_Resize PyUnicodeUCS4_Resize | |
294 # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare | |
295 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding | |
296 # define PyUnicode_Split PyUnicodeUCS4_Split | |
297 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines | |
298 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch | |
299 # define PyUnicode_Translate PyUnicodeUCS4_Translate | |
300 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap | |
301 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString | |
302 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini | |
303 # define _PyUnicode_Init _PyUnicodeUCS4_Init | |
304 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha | |
305 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit | |
306 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit | |
307 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak | |
308 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase | |
309 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric | |
310 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase | |
311 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase | |
312 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace | |
313 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit | |
314 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit | |
315 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase | |
316 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric | |
317 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase | |
318 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase | |
319 | |
320 | |
321 #endif | |
322 | |
323 /* --- Internal Unicode Operations ---------------------------------------- */ | |
324 | |
325 /* If you want Python to use the compiler's wctype.h functions instead | |
326 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or | |
327 configure Python using --with-wctype-functions. This reduces the | |
328 interpreter's code size. */ | |
329 | |
330 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) | |
331 | |
332 #include <wctype.h> | |
333 | |
334 #define Py_UNICODE_ISSPACE(ch) iswspace(ch) | |
335 | |
336 #define Py_UNICODE_ISLOWER(ch) iswlower(ch) | |
337 #define Py_UNICODE_ISUPPER(ch) iswupper(ch) | |
338 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) | |
339 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) | |
340 | |
341 #define Py_UNICODE_TOLOWER(ch) towlower(ch) | |
342 #define Py_UNICODE_TOUPPER(ch) towupper(ch) | |
343 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) | |
344 | |
345 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) | |
346 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) | |
347 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) | |
348 | |
349 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) | |
350 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) | |
351 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) | |
352 | |
353 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch) | |
354 | |
355 #else | |
356 | |
357 /* Since splitting on whitespace is an important use case, and | |
358 whitespace in most situations is solely ASCII whitespace, we | |
359 optimize for the common case by using a quick look-up table | |
360 _Py_ascii_whitespace (see below) with an inlined check. | |
361 | |
362 */ | |
363 #define Py_UNICODE_ISSPACE(ch) \ | |
364 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) | |
365 | |
366 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) | |
367 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) | |
368 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) | |
369 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) | |
370 | |
371 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) | |
372 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) | |
373 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) | |
374 | |
375 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) | |
376 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) | |
377 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) | |
378 | |
379 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) | |
380 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) | |
381 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) | |
382 | |
383 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) | |
384 | |
385 #endif | |
386 | |
387 #define Py_UNICODE_ISALNUM(ch) \ | |
388 (Py_UNICODE_ISALPHA(ch) || \ | |
389 Py_UNICODE_ISDECIMAL(ch) || \ | |
390 Py_UNICODE_ISDIGIT(ch) || \ | |
391 Py_UNICODE_ISNUMERIC(ch)) | |
392 | |
393 #define Py_UNICODE_COPY(target, source, length) \ | |
394 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) | |
395 | |
396 #define Py_UNICODE_FILL(target, value, length) \ | |
397 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ | |
398 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ | |
399 } while (0) | |
400 | |
401 /* Check if substring matches at given offset. the offset must be | |
402 valid, and the substring must not be empty */ | |
403 | |
404 #define Py_UNICODE_MATCH(string, offset, substring) \ | |
405 ((*((string)->str + (offset)) == *((substring)->str)) && \ | |
406 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ | |
407 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) | |
408 | |
409 #ifdef __cplusplus | |
410 extern "C" { | |
411 #endif | |
412 | |
413 /* --- Unicode Type ------------------------------------------------------- */ | |
414 | |
415 typedef struct { | |
416 PyObject_HEAD | |
417 Py_ssize_t length; /* Length of raw Unicode data in buffer */ | |
418 Py_UNICODE *str; /* Raw Unicode buffer */ | |
419 long hash; /* Hash value; -1 if not set */ | |
420 PyObject *defenc; /* (Default) Encoded version as Python | |
421 string, or NULL; this is used for | |
422 implementing the buffer protocol */ | |
423 } PyUnicodeObject; | |
424 | |
425 PyAPI_DATA(PyTypeObject) PyUnicode_Type; | |
426 | |
427 #define PyUnicode_Check(op) \ | |
428 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) | |
429 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) | |
430 | |
431 /* Fast access macros */ | |
432 #define PyUnicode_GET_SIZE(op) \ | |
433 (((PyUnicodeObject *)(op))->length) | |
434 #define PyUnicode_GET_DATA_SIZE(op) \ | |
435 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)) | |
436 #define PyUnicode_AS_UNICODE(op) \ | |
437 (((PyUnicodeObject *)(op))->str) | |
438 #define PyUnicode_AS_DATA(op) \ | |
439 ((const char *)((PyUnicodeObject *)(op))->str) | |
440 | |
441 /* --- Constants ---------------------------------------------------------- */ | |
442 | |
443 /* This Unicode character will be used as replacement character during | |
444 decoding if the errors argument is set to "replace". Note: the | |
445 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in | |
446 Unicode 3.0. */ | |
447 | |
448 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD) | |
449 | |
450 /* === Public API ========================================================= */ | |
451 | |
452 /* --- Plain Py_UNICODE --------------------------------------------------- */ | |
453 | |
454 /* Create a Unicode Object from the Py_UNICODE buffer u of the given | |
455 size. | |
456 | |
457 u may be NULL which causes the contents to be undefined. It is the | |
458 user's responsibility to fill in the needed data afterwards. Note | |
459 that modifying the Unicode object contents after construction is | |
460 only allowed if u was set to NULL. | |
461 | |
462 The buffer is copied into the new object. */ | |
463 | |
464 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( | |
465 const Py_UNICODE *u, /* Unicode buffer */ | |
466 Py_ssize_t size /* size of buffer */ | |
467 ); | |
468 | |
469 /* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ | |
470 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( | |
471 const char *u, /* char buffer */ | |
472 Py_ssize_t size /* size of buffer */ | |
473 ); | |
474 | |
475 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated | |
476 Latin-1 encoded bytes */ | |
477 PyAPI_FUNC(PyObject*) PyUnicode_FromString( | |
478 const char *u /* string */ | |
479 ); | |
480 | |
481 /* Return a read-only pointer to the Unicode object's internal | |
482 Py_UNICODE buffer. */ | |
483 | |
484 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( | |
485 PyObject *unicode /* Unicode object */ | |
486 ); | |
487 | |
488 /* Get the length of the Unicode object. */ | |
489 | |
490 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( | |
491 PyObject *unicode /* Unicode object */ | |
492 ); | |
493 | |
494 /* Get the maximum ordinal for a Unicode character. */ | |
495 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); | |
496 | |
497 /* Resize an already allocated Unicode object to the new size length. | |
498 | |
499 *unicode is modified to point to the new (resized) object and 0 | |
500 returned on success. | |
501 | |
502 This API may only be called by the function which also called the | |
503 Unicode constructor. The refcount on the object must be 1. Otherwise, | |
504 an error is returned. | |
505 | |
506 Error handling is implemented as follows: an exception is set, -1 | |
507 is returned and *unicode left untouched. | |
508 | |
509 */ | |
510 | |
511 PyAPI_FUNC(int) PyUnicode_Resize( | |
512 PyObject **unicode, /* Pointer to the Unicode object */ | |
513 Py_ssize_t length /* New length */ | |
514 ); | |
515 | |
516 /* Coerce obj to an Unicode object and return a reference with | |
517 *incremented* refcount. | |
518 | |
519 Coercion is done in the following way: | |
520 | |
521 1. String and other char buffer compatible objects are decoded | |
522 under the assumptions that they contain data using the current | |
523 default encoding. Decoding is done in "strict" mode. | |
524 | |
525 2. All other objects (including Unicode objects) raise an | |
526 exception. | |
527 | |
528 The API returns NULL in case of an error. The caller is responsible | |
529 for decref'ing the returned objects. | |
530 | |
531 */ | |
532 | |
533 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( | |
534 register PyObject *obj, /* Object */ | |
535 const char *encoding, /* encoding */ | |
536 const char *errors /* error handling */ | |
537 ); | |
538 | |
539 /* Coerce obj to an Unicode object and return a reference with | |
540 *incremented* refcount. | |
541 | |
542 Unicode objects are passed back as-is (subclasses are converted to | |
543 true Unicode objects), all other objects are delegated to | |
544 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in | |
545 using the default encoding as basis for decoding the object. | |
546 | |
547 The API returns NULL in case of an error. The caller is responsible | |
548 for decref'ing the returned objects. | |
549 | |
550 */ | |
551 | |
552 PyAPI_FUNC(PyObject*) PyUnicode_FromObject( | |
553 register PyObject *obj /* Object */ | |
554 ); | |
555 | |
556 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); | |
557 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); | |
558 | |
559 /* Format the object based on the format_spec, as defined in PEP 3101 | |
560 (Advanced String Formatting). */ | |
561 PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, | |
562 Py_UNICODE *format_spec, | |
563 Py_ssize_t format_spec_len); | |
564 | |
565 /* --- wchar_t support for platforms which support it --------------------- */ | |
566 | |
567 #ifdef HAVE_WCHAR_H | |
568 | |
569 /* Create a Unicode Object from the whcar_t buffer w of the given | |
570 size. | |
571 | |
572 The buffer is copied into the new object. */ | |
573 | |
574 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( | |
575 register const wchar_t *w, /* wchar_t buffer */ | |
576 Py_ssize_t size /* size of buffer */ | |
577 ); | |
578 | |
579 /* Copies the Unicode Object contents into the wchar_t buffer w. At | |
580 most size wchar_t characters are copied. | |
581 | |
582 Note that the resulting wchar_t string may or may not be | |
583 0-terminated. It is the responsibility of the caller to make sure | |
584 that the wchar_t string is 0-terminated in case this is required by | |
585 the application. | |
586 | |
587 Returns the number of wchar_t characters copied (excluding a | |
588 possibly trailing 0-termination character) or -1 in case of an | |
589 error. */ | |
590 | |
591 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( | |
592 PyUnicodeObject *unicode, /* Unicode object */ | |
593 register wchar_t *w, /* wchar_t buffer */ | |
594 Py_ssize_t size /* size of buffer */ | |
595 ); | |
596 | |
597 #endif | |
598 | |
599 /* --- Unicode ordinals --------------------------------------------------- */ | |
600 | |
601 /* Create a Unicode Object from the given Unicode code point ordinal. | |
602 | |
603 The ordinal must be in range(0x10000) on narrow Python builds | |
604 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is | |
605 raised in case it is not. | |
606 | |
607 */ | |
608 | |
609 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); | |
610 | |
611 /* --- Free-list management ----------------------------------------------- */ | |
612 | |
613 /* Clear the free list used by the Unicode implementation. | |
614 | |
615 This can be used to release memory used for objects on the free | |
616 list back to the Python memory allocator. | |
617 | |
618 */ | |
619 | |
620 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); | |
621 | |
622 /* === Builtin Codecs ===================================================== | |
623 | |
624 Many of these APIs take two arguments encoding and errors. These | |
625 parameters encoding and errors have the same semantics as the ones | |
626 of the builtin unicode() API. | |
627 | |
628 Setting encoding to NULL causes the default encoding to be used. | |
629 | |
630 Error handling is set by errors which may also be set to NULL | |
631 meaning to use the default handling defined for the codec. Default | |
632 error handling for all builtin codecs is "strict" (ValueErrors are | |
633 raised). | |
634 | |
635 The codecs all use a similar interface. Only deviation from the | |
636 generic ones are documented. | |
637 | |
638 */ | |
639 | |
640 /* --- Manage the default encoding ---------------------------------------- */ | |
641 | |
642 /* Return a Python string holding the default encoded value of the | |
643 Unicode object. | |
644 | |
645 The resulting string is cached in the Unicode object for subsequent | |
646 usage by this function. The cached version is needed to implement | |
647 the character buffer interface and will live (at least) as long as | |
648 the Unicode object itself. | |
649 | |
650 The refcount of the string is *not* incremented. | |
651 | |
652 *** Exported for internal use by the interpreter only !!! *** | |
653 | |
654 */ | |
655 | |
656 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( | |
657 PyObject *, const char *); | |
658 | |
659 /* Returns the currently active default encoding. | |
660 | |
661 The default encoding is currently implemented as run-time settable | |
662 process global. This may change in future versions of the | |
663 interpreter to become a parameter which is managed on a per-thread | |
664 basis. | |
665 | |
666 */ | |
667 | |
668 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); | |
669 | |
670 /* Sets the currently active default encoding. | |
671 | |
672 Returns 0 on success, -1 in case of an error. | |
673 | |
674 */ | |
675 | |
676 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding( | |
677 const char *encoding /* Encoding name in standard form */ | |
678 ); | |
679 | |
680 /* --- Generic Codecs ----------------------------------------------------- */ | |
681 | |
682 /* Create a Unicode object by decoding the encoded string s of the | |
683 given size. */ | |
684 | |
685 PyAPI_FUNC(PyObject*) PyUnicode_Decode( | |
686 const char *s, /* encoded string */ | |
687 Py_ssize_t size, /* size of buffer */ | |
688 const char *encoding, /* encoding */ | |
689 const char *errors /* error handling */ | |
690 ); | |
691 | |
692 /* Encodes a Py_UNICODE buffer of the given size and returns a | |
693 Python string object. */ | |
694 | |
695 PyAPI_FUNC(PyObject*) PyUnicode_Encode( | |
696 const Py_UNICODE *s, /* Unicode char buffer */ | |
697 Py_ssize_t size, /* number of Py_UNICODE chars to encode */ | |
698 const char *encoding, /* encoding */ | |
699 const char *errors /* error handling */ | |
700 ); | |
701 | |
702 /* Encodes a Unicode object and returns the result as Python | |
703 object. */ | |
704 | |
705 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( | |
706 PyObject *unicode, /* Unicode object */ | |
707 const char *encoding, /* encoding */ | |
708 const char *errors /* error handling */ | |
709 ); | |
710 | |
711 /* Encodes a Unicode object and returns the result as Python string | |
712 object. */ | |
713 | |
714 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( | |
715 PyObject *unicode, /* Unicode object */ | |
716 const char *encoding, /* encoding */ | |
717 const char *errors /* error handling */ | |
718 ); | |
719 | |
720 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( | |
721 PyObject* string /* 256 character map */ | |
722 ); | |
723 | |
724 | |
725 /* --- UTF-7 Codecs ------------------------------------------------------- */ | |
726 | |
727 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( | |
728 const char *string, /* UTF-7 encoded string */ | |
729 Py_ssize_t length, /* size of string */ | |
730 const char *errors /* error handling */ | |
731 ); | |
732 | |
733 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( | |
734 const char *string, /* UTF-7 encoded string */ | |
735 Py_ssize_t length, /* size of string */ | |
736 const char *errors, /* error handling */ | |
737 Py_ssize_t *consumed /* bytes consumed */ | |
738 ); | |
739 | |
740 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( | |
741 const Py_UNICODE *data, /* Unicode char buffer */ | |
742 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ | |
743 int base64SetO, /* Encode RFC2152 Set O characters in base64 */ | |
744 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ | |
745 const char *errors /* error handling */ | |
746 ); | |
747 | |
748 /* --- UTF-8 Codecs ------------------------------------------------------- */ | |
749 | |
750 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( | |
751 const char *string, /* UTF-8 encoded string */ | |
752 Py_ssize_t length, /* size of string */ | |
753 const char *errors /* error handling */ | |
754 ); | |
755 | |
756 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( | |
757 const char *string, /* UTF-8 encoded string */ | |
758 Py_ssize_t length, /* size of string */ | |
759 const char *errors, /* error handling */ | |
760 Py_ssize_t *consumed /* bytes consumed */ | |
761 ); | |
762 | |
763 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( | |
764 PyObject *unicode /* Unicode object */ | |
765 ); | |
766 | |
767 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( | |
768 const Py_UNICODE *data, /* Unicode char buffer */ | |
769 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ | |
770 const char *errors /* error handling */ | |
771 ); | |
772 | |
773 /* --- UTF-32 Codecs ------------------------------------------------------ */ | |
774 | |
775 /* Decodes length bytes from a UTF-32 encoded buffer string and returns | |
776 the corresponding Unicode object. | |
777 | |
778 errors (if non-NULL) defines the error handling. It defaults | |
779 to "strict". | |
780 | |
781 If byteorder is non-NULL, the decoder starts decoding using the | |
782 given byte order: | |
783 | |
784 *byteorder == -1: little endian | |
785 *byteorder == 0: native order | |
786 *byteorder == 1: big endian | |
787 | |
788 In native mode, the first four bytes of the stream are checked for a | |
789 BOM mark. If found, the BOM mark is analysed, the byte order | |
790 adjusted and the BOM skipped. In the other modes, no BOM mark | |
791 interpretation is done. After completion, *byteorder is set to the | |
792 current byte order at the end of input data. | |
793 | |
794 If byteorder is NULL, the codec starts in native order mode. | |
795 | |
796 */ | |
797 | |
798 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( | |
799 const char *string, /* UTF-32 encoded string */ | |
800 Py_ssize_t length, /* size of string */ | |
801 const char *errors, /* error handling */ | |
802 int *byteorder /* pointer to byteorder to use | |
803 0=native;-1=LE,1=BE; updated on | |
804 exit */ | |
805 ); | |
806 | |
807 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( | |
808 const char *string, /* UTF-32 encoded string */ | |
809 Py_ssize_t length, /* size of string */ | |
810 const char *errors, /* error handling */ | |
811 int *byteorder, /* pointer to byteorder to use | |
812 0=native;-1=LE,1=BE; updated on | |
813 exit */ | |
814 Py_ssize_t *consumed /* bytes consumed */ | |
815 ); | |
816 | |
817 /* Returns a Python string using the UTF-32 encoding in native byte | |
818 order. The string always starts with a BOM mark. */ | |
819 | |
820 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( | |
821 PyObject *unicode /* Unicode object */ | |
822 ); | |
823 | |
824 /* Returns a Python string object holding the UTF-32 encoded value of | |
825 the Unicode data. | |
826 | |
827 If byteorder is not 0, output is written according to the following | |
828 byte order: | |
829 | |
830 byteorder == -1: little endian | |
831 byteorder == 0: native byte order (writes a BOM mark) | |
832 byteorder == 1: big endian | |
833 | |
834 If byteorder is 0, the output string will always start with the | |
835 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is | |
836 prepended. | |
837 | |
838 */ | |
839 | |
840 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( | |
841 const Py_UNICODE *data, /* Unicode char buffer */ | |
842 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ | |
843 const char *errors, /* error handling */ | |
844 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ | |
845 ); | |
846 | |
847 /* --- UTF-16 Codecs ------------------------------------------------------ */ | |
848 | |
849 /* Decodes length bytes from a UTF-16 encoded buffer string and returns | |
850 the corresponding Unicode object. | |
851 | |
852 errors (if non-NULL) defines the error handling. It defaults | |
853 to "strict". | |
854 | |
855 If byteorder is non-NULL, the decoder starts decoding using the | |
856 given byte order: | |
857 | |
858 *byteorder == -1: little endian | |
859 *byteorder == 0: native order | |
860 *byteorder == 1: big endian | |
861 | |
862 In native mode, the first two bytes of the stream are checked for a | |
863 BOM mark. If found, the BOM mark is analysed, the byte order | |
864 adjusted and the BOM skipped. In the other modes, no BOM mark | |
865 interpretation is done. After completion, *byteorder is set to the | |
866 current byte order at the end of input data. | |
867 | |
868 If byteorder is NULL, the codec starts in native order mode. | |
869 | |
870 */ | |
871 | |
872 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( | |
873 const char *string, /* UTF-16 encoded string */ | |
874 Py_ssize_t length, /* size of string */ | |
875 const char *errors, /* error handling */ | |
876 int *byteorder /* pointer to byteorder to use | |
877 0=native;-1=LE,1=BE; updated on | |
878 exit */ | |
879 ); | |
880 | |
881 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( | |
882 const char *string, /* UTF-16 encoded string */ | |
883 Py_ssize_t length, /* size of string */ | |
884 const char *errors, /* error handling */ | |
885 int *byteorder, /* pointer to byteorder to use | |
886 0=native;-1=LE,1=BE; updated on | |
887 exit */ | |
888 Py_ssize_t *consumed /* bytes consumed */ | |
889 ); | |
890 | |
891 /* Returns a Python string using the UTF-16 encoding in native byte | |
892 order. The string always starts with a BOM mark. */ | |
893 | |
894 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( | |
895 PyObject *unicode /* Unicode object */ | |
896 ); | |
897 | |
898 /* Returns a Python string object holding the UTF-16 encoded value of | |
899 the Unicode data. | |
900 | |
901 If byteorder is not 0, output is written according to the following | |
902 byte order: | |
903 | |
904 byteorder == -1: little endian | |
905 byteorder == 0: native byte order (writes a BOM mark) | |
906 byteorder == 1: big endian | |
907 | |
908 If byteorder is 0, the output string will always start with the | |
909 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is | |
910 prepended. | |
911 | |
912 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to | |
913 UCS-2. This trick makes it possible to add full UTF-16 capabilities | |
914 at a later point without compromising the APIs. | |
915 | |
916 */ | |
917 | |
918 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( | |
919 const Py_UNICODE *data, /* Unicode char buffer */ | |
920 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ | |
921 const char *errors, /* error handling */ | |
922 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ | |
923 ); | |
924 | |
925 /* --- Unicode-Escape Codecs ---------------------------------------------- */ | |
926 | |
927 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( | |
928 const char *string, /* Unicode-Escape encoded string */ | |
929 Py_ssize_t length, /* size of string */ | |
930 const char *errors /* error handling */ | |
931 ); | |
932 | |
933 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( | |
934 PyObject *unicode /* Unicode object */ | |
935 ); | |
936 | |
937 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( | |
938 const Py_UNICODE *data, /* Unicode char buffer */ | |
939 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ | |
940 ); | |
941 | |
942 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ | |
943 | |
944 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( | |
945 const char *string, /* Raw-Unicode-Escape encoded string */ | |
946 Py_ssize_t length, /* size of string */ | |
947 const char *errors /* error handling */ | |
948 ); | |
949 | |
950 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( | |
951 PyObject *unicode /* Unicode object */ | |
952 ); | |
953 | |
954 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( | |
955 const Py_UNICODE *data, /* Unicode char buffer */ | |
956 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ | |
957 ); | |
958 | |
959 /* --- Unicode Internal Codec --------------------------------------------- | |
960 | |
961 Only for internal use in _codecsmodule.c */ | |
962 | |
963 PyObject *_PyUnicode_DecodeUnicodeInternal( | |
964 const char *string, | |
965 Py_ssize_t length, | |
966 const char *errors | |
967 ); | |
968 | |
969 /* --- Latin-1 Codecs ----------------------------------------------------- | |
970 | |
971 Note: Latin-1 corresponds to the first 256 Unicode ordinals. | |
972 | |
973 */ | |
974 | |
975 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( | |
976 const char *string, /* Latin-1 encoded string */ | |
977 Py_ssize_t length, /* size of string */ | |
978 const char *errors /* error handling */ | |
979 ); | |
980 | |
981 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( | |
982 PyObject *unicode /* Unicode object */ | |
983 ); | |
984 | |
985 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( | |
986 const Py_UNICODE *data, /* Unicode char buffer */ | |
987 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ | |
988 const char *errors /* error handling */ | |
989 ); | |
990 | |
991 /* --- ASCII Codecs ------------------------------------------------------- | |
992 | |
993 Only 7-bit ASCII data is excepted. All other codes generate errors. | |
994 | |
995 */ | |
996 | |
997 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( | |
998 const char *string, /* ASCII encoded string */ | |
999 Py_ssize_t length, /* size of string */ | |
1000 const char *errors /* error handling */ | |
1001 ); | |
1002 | |
1003 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( | |
1004 PyObject *unicode /* Unicode object */ | |
1005 ); | |
1006 | |
1007 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( | |
1008 const Py_UNICODE *data, /* Unicode char buffer */ | |
1009 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ | |
1010 const char *errors /* error handling */ | |
1011 ); | |
1012 | |
1013 /* --- Character Map Codecs ----------------------------------------------- | |
1014 | |
1015 This codec uses mappings to encode and decode characters. | |
1016 | |
1017 Decoding mappings must map single string characters to single | |
1018 Unicode characters, integers (which are then interpreted as Unicode | |
1019 ordinals) or None (meaning "undefined mapping" and causing an | |
1020 error). | |
1021 | |
1022 Encoding mappings must map single Unicode characters to single | |
1023 string characters, integers (which are then interpreted as Latin-1 | |
1024 ordinals) or None (meaning "undefined mapping" and causing an | |
1025 error). | |
1026 | |
1027 If a character lookup fails with a LookupError, the character is | |
1028 copied as-is meaning that its ordinal value will be interpreted as | |
1029 Unicode or Latin-1 ordinal resp. Because of this mappings only need | |
1030 to contain those mappings which map characters to different code | |
1031 points. | |
1032 | |
1033 */ | |
1034 | |
1035 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( | |
1036 const char *string, /* Encoded string */ | |
1037 Py_ssize_t length, /* size of string */ | |
1038 PyObject *mapping, /* character mapping | |
1039 (char ordinal -> unicode ordinal) */ | |
1040 const char *errors /* error handling */ | |
1041 ); | |
1042 | |
1043 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( | |
1044 PyObject *unicode, /* Unicode object */ | |
1045 PyObject *mapping /* character mapping | |
1046 (unicode ordinal -> char ordinal) */ | |
1047 ); | |
1048 | |
1049 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( | |
1050 const Py_UNICODE *data, /* Unicode char buffer */ | |
1051 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ | |
1052 PyObject *mapping, /* character mapping | |
1053 (unicode ordinal -> char ordinal) */ | |
1054 const char *errors /* error handling */ | |
1055 ); | |
1056 | |
1057 /* Translate a Py_UNICODE buffer of the given length by applying a | |
1058 character mapping table to it and return the resulting Unicode | |
1059 object. | |
1060 | |
1061 The mapping table must map Unicode ordinal integers to Unicode | |
1062 ordinal integers or None (causing deletion of the character). | |
1063 | |
1064 Mapping tables may be dictionaries or sequences. Unmapped character | |
1065 ordinals (ones which cause a LookupError) are left untouched and | |
1066 are copied as-is. | |
1067 | |
1068 */ | |
1069 | |
1070 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( | |
1071 const Py_UNICODE *data, /* Unicode char buffer */ | |
1072 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ | |
1073 PyObject *table, /* Translate table */ | |
1074 const char *errors /* error handling */ | |
1075 ); | |
1076 | |
1077 #ifdef MS_WIN32 | |
1078 | |
1079 /* --- MBCS codecs for Windows -------------------------------------------- */ | |
1080 | |
1081 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( | |
1082 const char *string, /* MBCS encoded string */ | |
1083 Py_ssize_t length, /* size of string */ | |
1084 const char *errors /* error handling */ | |
1085 ); | |
1086 | |
1087 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( | |
1088 const char *string, /* MBCS encoded string */ | |
1089 Py_ssize_t length, /* size of string */ | |
1090 const char *errors, /* error handling */ | |
1091 Py_ssize_t *consumed /* bytes consumed */ | |
1092 ); | |
1093 | |
1094 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( | |
1095 PyObject *unicode /* Unicode object */ | |
1096 ); | |
1097 | |
1098 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( | |
1099 const Py_UNICODE *data, /* Unicode char buffer */ | |
1100 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ | |
1101 const char *errors /* error handling */ | |
1102 ); | |
1103 | |
1104 #endif /* MS_WIN32 */ | |
1105 | |
1106 /* --- Decimal Encoder ---------------------------------------------------- */ | |
1107 | |
1108 /* Takes a Unicode string holding a decimal value and writes it into | |
1109 an output buffer using standard ASCII digit codes. | |
1110 | |
1111 The output buffer has to provide at least length+1 bytes of storage | |
1112 area. The output string is 0-terminated. | |
1113 | |
1114 The encoder converts whitespace to ' ', decimal characters to their | |
1115 corresponding ASCII digit and all other Latin-1 characters except | |
1116 \0 as-is. Characters outside this range (Unicode ordinals 1-256) | |
1117 are treated as errors. This includes embedded NULL bytes. | |
1118 | |
1119 Error handling is defined by the errors argument: | |
1120 | |
1121 NULL or "strict": raise a ValueError | |
1122 "ignore": ignore the wrong characters (these are not copied to the | |
1123 output buffer) | |
1124 "replace": replaces illegal characters with '?' | |
1125 | |
1126 Returns 0 on success, -1 on failure. | |
1127 | |
1128 */ | |
1129 | |
1130 PyAPI_FUNC(int) PyUnicode_EncodeDecimal( | |
1131 Py_UNICODE *s, /* Unicode buffer */ | |
1132 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ | |
1133 char *output, /* Output buffer; must have size >= length */ | |
1134 const char *errors /* error handling */ | |
1135 ); | |
1136 | |
1137 /* --- Methods & Slots ---------------------------------------------------- | |
1138 | |
1139 These are capable of handling Unicode objects and strings on input | |
1140 (we refer to them as strings in the descriptions) and return | |
1141 Unicode objects or integers as apporpriate. */ | |
1142 | |
1143 /* Concat two strings giving a new Unicode string. */ | |
1144 | |
1145 PyAPI_FUNC(PyObject*) PyUnicode_Concat( | |
1146 PyObject *left, /* Left string */ | |
1147 PyObject *right /* Right string */ | |
1148 ); | |
1149 | |
1150 /* Split a string giving a list of Unicode strings. | |
1151 | |
1152 If sep is NULL, splitting will be done at all whitespace | |
1153 substrings. Otherwise, splits occur at the given separator. | |
1154 | |
1155 At most maxsplit splits will be done. If negative, no limit is set. | |
1156 | |
1157 Separators are not included in the resulting list. | |
1158 | |
1159 */ | |
1160 | |
1161 PyAPI_FUNC(PyObject*) PyUnicode_Split( | |
1162 PyObject *s, /* String to split */ | |
1163 PyObject *sep, /* String separator */ | |
1164 Py_ssize_t maxsplit /* Maxsplit count */ | |
1165 ); | |
1166 | |
1167 /* Dito, but split at line breaks. | |
1168 | |
1169 CRLF is considered to be one line break. Line breaks are not | |
1170 included in the resulting list. */ | |
1171 | |
1172 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( | |
1173 PyObject *s, /* String to split */ | |
1174 int keepends /* If true, line end markers are included */ | |
1175 ); | |
1176 | |
1177 /* Partition a string using a given separator. */ | |
1178 | |
1179 PyAPI_FUNC(PyObject*) PyUnicode_Partition( | |
1180 PyObject *s, /* String to partition */ | |
1181 PyObject *sep /* String separator */ | |
1182 ); | |
1183 | |
1184 /* Partition a string using a given separator, searching from the end of the | |
1185 string. */ | |
1186 | |
1187 PyAPI_FUNC(PyObject*) PyUnicode_RPartition( | |
1188 PyObject *s, /* String to partition */ | |
1189 PyObject *sep /* String separator */ | |
1190 ); | |
1191 | |
1192 /* Split a string giving a list of Unicode strings. | |
1193 | |
1194 If sep is NULL, splitting will be done at all whitespace | |
1195 substrings. Otherwise, splits occur at the given separator. | |
1196 | |
1197 At most maxsplit splits will be done. But unlike PyUnicode_Split | |
1198 PyUnicode_RSplit splits from the end of the string. If negative, | |
1199 no limit is set. | |
1200 | |
1201 Separators are not included in the resulting list. | |
1202 | |
1203 */ | |
1204 | |
1205 PyAPI_FUNC(PyObject*) PyUnicode_RSplit( | |
1206 PyObject *s, /* String to split */ | |
1207 PyObject *sep, /* String separator */ | |
1208 Py_ssize_t maxsplit /* Maxsplit count */ | |
1209 ); | |
1210 | |
1211 /* Translate a string by applying a character mapping table to it and | |
1212 return the resulting Unicode object. | |
1213 | |
1214 The mapping table must map Unicode ordinal integers to Unicode | |
1215 ordinal integers or None (causing deletion of the character). | |
1216 | |
1217 Mapping tables may be dictionaries or sequences. Unmapped character | |
1218 ordinals (ones which cause a LookupError) are left untouched and | |
1219 are copied as-is. | |
1220 | |
1221 */ | |
1222 | |
1223 PyAPI_FUNC(PyObject *) PyUnicode_Translate( | |
1224 PyObject *str, /* String */ | |
1225 PyObject *table, /* Translate table */ | |
1226 const char *errors /* error handling */ | |
1227 ); | |
1228 | |
1229 /* Join a sequence of strings using the given separator and return | |
1230 the resulting Unicode string. */ | |
1231 | |
1232 PyAPI_FUNC(PyObject*) PyUnicode_Join( | |
1233 PyObject *separator, /* Separator string */ | |
1234 PyObject *seq /* Sequence object */ | |
1235 ); | |
1236 | |
1237 /* Return 1 if substr matches str[start:end] at the given tail end, 0 | |
1238 otherwise. */ | |
1239 | |
1240 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( | |
1241 PyObject *str, /* String */ | |
1242 PyObject *substr, /* Prefix or Suffix string */ | |
1243 Py_ssize_t start, /* Start index */ | |
1244 Py_ssize_t end, /* Stop index */ | |
1245 int direction /* Tail end: -1 prefix, +1 suffix */ | |
1246 ); | |
1247 | |
1248 /* Return the first position of substr in str[start:end] using the | |
1249 given search direction or -1 if not found. -2 is returned in case | |
1250 an error occurred and an exception is set. */ | |
1251 | |
1252 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( | |
1253 PyObject *str, /* String */ | |
1254 PyObject *substr, /* Substring to find */ | |
1255 Py_ssize_t start, /* Start index */ | |
1256 Py_ssize_t end, /* Stop index */ | |
1257 int direction /* Find direction: +1 forward, -1 backward */ | |
1258 ); | |
1259 | |
1260 /* Count the number of occurrences of substr in str[start:end]. */ | |
1261 | |
1262 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( | |
1263 PyObject *str, /* String */ | |
1264 PyObject *substr, /* Substring to count */ | |
1265 Py_ssize_t start, /* Start index */ | |
1266 Py_ssize_t end /* Stop index */ | |
1267 ); | |
1268 | |
1269 /* Replace at most maxcount occurrences of substr in str with replstr | |
1270 and return the resulting Unicode object. */ | |
1271 | |
1272 PyAPI_FUNC(PyObject *) PyUnicode_Replace( | |
1273 PyObject *str, /* String */ | |
1274 PyObject *substr, /* Substring to find */ | |
1275 PyObject *replstr, /* Substring to replace */ | |
1276 Py_ssize_t maxcount /* Max. number of replacements to apply; | |
1277 -1 = all */ | |
1278 ); | |
1279 | |
1280 /* Compare two strings and return -1, 0, 1 for less than, equal, | |
1281 greater than resp. */ | |
1282 | |
1283 PyAPI_FUNC(int) PyUnicode_Compare( | |
1284 PyObject *left, /* Left string */ | |
1285 PyObject *right /* Right string */ | |
1286 ); | |
1287 | |
1288 /* Rich compare two strings and return one of the following: | |
1289 | |
1290 - NULL in case an exception was raised | |
1291 - Py_True or Py_False for successfuly comparisons | |
1292 - Py_NotImplemented in case the type combination is unknown | |
1293 | |
1294 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in | |
1295 case the conversion of the arguments to Unicode fails with a | |
1296 UnicodeDecodeError. | |
1297 | |
1298 Possible values for op: | |
1299 | |
1300 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE | |
1301 | |
1302 */ | |
1303 | |
1304 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( | |
1305 PyObject *left, /* Left string */ | |
1306 PyObject *right, /* Right string */ | |
1307 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ | |
1308 ); | |
1309 | |
1310 /* Apply a argument tuple or dictionary to a format string and return | |
1311 the resulting Unicode string. */ | |
1312 | |
1313 PyAPI_FUNC(PyObject *) PyUnicode_Format( | |
1314 PyObject *format, /* Format string */ | |
1315 PyObject *args /* Argument tuple or dictionary */ | |
1316 ); | |
1317 | |
1318 /* Checks whether element is contained in container and return 1/0 | |
1319 accordingly. | |
1320 | |
1321 element has to coerce to an one element Unicode string. -1 is | |
1322 returned in case of an error. */ | |
1323 | |
1324 PyAPI_FUNC(int) PyUnicode_Contains( | |
1325 PyObject *container, /* Container string */ | |
1326 PyObject *element /* Element string */ | |
1327 ); | |
1328 | |
1329 /* Externally visible for str.strip(unicode) */ | |
1330 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( | |
1331 PyUnicodeObject *self, | |
1332 int striptype, | |
1333 PyObject *sepobj | |
1334 ); | |
1335 | |
1336 /* === Characters Type APIs =============================================== */ | |
1337 | |
1338 /* Helper array used by Py_UNICODE_ISSPACE(). */ | |
1339 | |
1340 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; | |
1341 | |
1342 /* These should not be used directly. Use the Py_UNICODE_IS* and | |
1343 Py_UNICODE_TO* macros instead. | |
1344 | |
1345 These APIs are implemented in Objects/unicodectype.c. | |
1346 | |
1347 */ | |
1348 | |
1349 PyAPI_FUNC(int) _PyUnicode_IsLowercase( | |
1350 Py_UNICODE ch /* Unicode character */ | |
1351 ); | |
1352 | |
1353 PyAPI_FUNC(int) _PyUnicode_IsUppercase( | |
1354 Py_UNICODE ch /* Unicode character */ | |
1355 ); | |
1356 | |
1357 PyAPI_FUNC(int) _PyUnicode_IsTitlecase( | |
1358 Py_UNICODE ch /* Unicode character */ | |
1359 ); | |
1360 | |
1361 PyAPI_FUNC(int) _PyUnicode_IsWhitespace( | |
1362 const Py_UNICODE ch /* Unicode character */ | |
1363 ); | |
1364 | |
1365 PyAPI_FUNC(int) _PyUnicode_IsLinebreak( | |
1366 const Py_UNICODE ch /* Unicode character */ | |
1367 ); | |
1368 | |
1369 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( | |
1370 Py_UNICODE ch /* Unicode character */ | |
1371 ); | |
1372 | |
1373 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( | |
1374 Py_UNICODE ch /* Unicode character */ | |
1375 ); | |
1376 | |
1377 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( | |
1378 Py_UNICODE ch /* Unicode character */ | |
1379 ); | |
1380 | |
1381 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( | |
1382 Py_UNICODE ch /* Unicode character */ | |
1383 ); | |
1384 | |
1385 PyAPI_FUNC(int) _PyUnicode_ToDigit( | |
1386 Py_UNICODE ch /* Unicode character */ | |
1387 ); | |
1388 | |
1389 PyAPI_FUNC(double) _PyUnicode_ToNumeric( | |
1390 Py_UNICODE ch /* Unicode character */ | |
1391 ); | |
1392 | |
1393 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( | |
1394 Py_UNICODE ch /* Unicode character */ | |
1395 ); | |
1396 | |
1397 PyAPI_FUNC(int) _PyUnicode_IsDigit( | |
1398 Py_UNICODE ch /* Unicode character */ | |
1399 ); | |
1400 | |
1401 PyAPI_FUNC(int) _PyUnicode_IsNumeric( | |
1402 Py_UNICODE ch /* Unicode character */ | |
1403 ); | |
1404 | |
1405 PyAPI_FUNC(int) _PyUnicode_IsAlpha( | |
1406 Py_UNICODE ch /* Unicode character */ | |
1407 ); | |
1408 | |
1409 #ifdef __cplusplus | |
1410 } | |
1411 #endif | |
1412 #endif /* Py_USING_UNICODE */ | |
1413 #endif /* !Py_UNICODEOBJECT_H */ |