Mercurial > hg > sv-dependency-builds
comparison src/opusfile-0.9/examples/win32utf8.c @ 69:7aeed7906520
Add Opus sources and macOS builds
author | Chris Cannam |
---|---|
date | Wed, 23 Jan 2019 13:48:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
68:85d5306e114e | 69:7aeed7906520 |
---|---|
1 #if defined(_WIN32) | |
2 # include <stdio.h> | |
3 # include <stdlib.h> | |
4 # include <wchar.h> | |
5 /*We need the following two to set stdin/stdout to binary.*/ | |
6 # include <io.h> | |
7 # include <fcntl.h> | |
8 # define WIN32_LEAN_AND_MEAN | |
9 # define WIN32_EXTRA_LEAN | |
10 # include <windows.h> | |
11 # include "win32utf8.h" | |
12 | |
13 static char *utf16_to_utf8(const wchar_t *_src){ | |
14 char *dst; | |
15 size_t len; | |
16 size_t si; | |
17 size_t di; | |
18 len=wcslen(_src); | |
19 dst=(char *)malloc(sizeof(*dst)*(3*len+1)); | |
20 if(dst==NULL)return dst; | |
21 for(di=si=0;si<len;si++){ | |
22 unsigned c0; | |
23 c0=_src[si]; | |
24 if(c0<0x80){ | |
25 /*Can be represented by a 1-byte sequence.*/ | |
26 dst[di++]=(char)c0; | |
27 continue; | |
28 } | |
29 else if(c0<0x800){ | |
30 /*Can be represented by a 2-byte sequence.*/ | |
31 dst[di++]=(char)(0xC0|c0>>6); | |
32 dst[di++]=(char)(0x80|c0&0x3F); | |
33 continue; | |
34 } | |
35 else if(c0>=0xD800&&c0<0xDC00){ | |
36 unsigned c1; | |
37 /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/ | |
38 c1=_src[si+1]; | |
39 if(c1>=0xDC00&&c1<0xE000){ | |
40 unsigned w; | |
41 /*Surrogate pair.*/ | |
42 w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000; | |
43 /*Can be represented by a 4-byte sequence.*/ | |
44 dst[di++]=(char)(0xF0|w>>18); | |
45 dst[di++]=(char)(0x80|w>>12&0x3F); | |
46 dst[di++]=(char)(0x80|w>>6&0x3F); | |
47 dst[di++]=(char)(0x80|w&0x3F); | |
48 si++; | |
49 continue; | |
50 } | |
51 } | |
52 /*Anything else is either a valid 3-byte sequence, an invalid surrogate | |
53 pair, or 'not a character'. | |
54 In the latter two cases, we just encode the value as a 3-byte | |
55 sequence anyway (producing technically invalid UTF-8). | |
56 Later error handling will detect the problem, with a better | |
57 chance of giving a useful error message.*/ | |
58 dst[di++]=(char)(0xE0|c0>>12); | |
59 dst[di++]=(char)(0x80|c0>>6&0x3F); | |
60 dst[di++]=(char)(0x80|c0&0x3F); | |
61 } | |
62 dst[di++]='\0'; | |
63 return dst; | |
64 } | |
65 | |
66 typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line, | |
67 int *num_args); | |
68 | |
69 /*Make a best-effort attempt to support UTF-8 on Windows.*/ | |
70 void win32_utf8_setup(int *_argc,const char ***_argv){ | |
71 HMODULE hlib; | |
72 /*We need to set stdin/stdout to binary mode. | |
73 This is unrelated to UTF-8 support, but it's platform specific and we need | |
74 to do it in the same places.*/ | |
75 _setmode(_fileno(stdin),_O_BINARY); | |
76 _setmode(_fileno(stdout),_O_BINARY); | |
77 hlib=LoadLibraryA("shell32.dll"); | |
78 if(hlib!=NULL){ | |
79 command_line_to_argv_w_func command_line_to_argv_w; | |
80 /*This function is only available on Windows 2000 or later.*/ | |
81 command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib, | |
82 "CommandLineToArgvW"); | |
83 if(command_line_to_argv_w!=NULL){ | |
84 wchar_t **argvw; | |
85 int argc; | |
86 argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc); | |
87 if(argvw!=NULL){ | |
88 int ai; | |
89 /*Really, I don't see why argc would ever differ from *_argc, but let's | |
90 be paranoid.*/ | |
91 if(argc>*_argc)argc=*_argc; | |
92 for(ai=0;ai<argc;ai++){ | |
93 char *argv; | |
94 argv=utf16_to_utf8(argvw[ai]); | |
95 if(argv!=NULL)(*_argv)[ai]=argv; | |
96 } | |
97 *_argc=argc; | |
98 LocalFree(argvw); | |
99 } | |
100 } | |
101 FreeLibrary(hlib); | |
102 } | |
103 # if defined(CP_UTF8) | |
104 /*This does not work correctly in all environments (it breaks output in | |
105 mingw32 for me), and requires a Unicode font (e.g., when using the default | |
106 Raster font, even characters that are available in the font's codepage | |
107 won't display properly).*/ | |
108 /*SetConsoleOutputCP(CP_UTF8);*/ | |
109 # endif | |
110 } | |
111 #endif |