cannam@154
|
1 #if defined(_WIN32)
|
cannam@154
|
2 # include <stdio.h>
|
cannam@154
|
3 # include <stdlib.h>
|
cannam@154
|
4 # include <wchar.h>
|
cannam@154
|
5 /*We need the following two to set stdin/stdout to binary.*/
|
cannam@154
|
6 # include <io.h>
|
cannam@154
|
7 # include <fcntl.h>
|
cannam@154
|
8 # define WIN32_LEAN_AND_MEAN
|
cannam@154
|
9 # define WIN32_EXTRA_LEAN
|
cannam@154
|
10 # include <windows.h>
|
cannam@154
|
11 # include "win32utf8.h"
|
cannam@154
|
12
|
cannam@154
|
13 static char *utf16_to_utf8(const wchar_t *_src){
|
cannam@154
|
14 char *dst;
|
cannam@154
|
15 size_t len;
|
cannam@154
|
16 size_t si;
|
cannam@154
|
17 size_t di;
|
cannam@154
|
18 len=wcslen(_src);
|
cannam@154
|
19 dst=(char *)malloc(sizeof(*dst)*(3*len+1));
|
cannam@154
|
20 if(dst==NULL)return dst;
|
cannam@154
|
21 for(di=si=0;si<len;si++){
|
cannam@154
|
22 unsigned c0;
|
cannam@154
|
23 c0=_src[si];
|
cannam@154
|
24 if(c0<0x80){
|
cannam@154
|
25 /*Can be represented by a 1-byte sequence.*/
|
cannam@154
|
26 dst[di++]=(char)c0;
|
cannam@154
|
27 continue;
|
cannam@154
|
28 }
|
cannam@154
|
29 else if(c0<0x800){
|
cannam@154
|
30 /*Can be represented by a 2-byte sequence.*/
|
cannam@154
|
31 dst[di++]=(char)(0xC0|c0>>6);
|
cannam@154
|
32 dst[di++]=(char)(0x80|c0&0x3F);
|
cannam@154
|
33 continue;
|
cannam@154
|
34 }
|
cannam@154
|
35 else if(c0>=0xD800&&c0<0xDC00){
|
cannam@154
|
36 unsigned c1;
|
cannam@154
|
37 /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
|
cannam@154
|
38 c1=_src[si+1];
|
cannam@154
|
39 if(c1>=0xDC00&&c1<0xE000){
|
cannam@154
|
40 unsigned w;
|
cannam@154
|
41 /*Surrogate pair.*/
|
cannam@154
|
42 w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000;
|
cannam@154
|
43 /*Can be represented by a 4-byte sequence.*/
|
cannam@154
|
44 dst[di++]=(char)(0xF0|w>>18);
|
cannam@154
|
45 dst[di++]=(char)(0x80|w>>12&0x3F);
|
cannam@154
|
46 dst[di++]=(char)(0x80|w>>6&0x3F);
|
cannam@154
|
47 dst[di++]=(char)(0x80|w&0x3F);
|
cannam@154
|
48 si++;
|
cannam@154
|
49 continue;
|
cannam@154
|
50 }
|
cannam@154
|
51 }
|
cannam@154
|
52 /*Anything else is either a valid 3-byte sequence, an invalid surrogate
|
cannam@154
|
53 pair, or 'not a character'.
|
cannam@154
|
54 In the latter two cases, we just encode the value as a 3-byte
|
cannam@154
|
55 sequence anyway (producing technically invalid UTF-8).
|
cannam@154
|
56 Later error handling will detect the problem, with a better
|
cannam@154
|
57 chance of giving a useful error message.*/
|
cannam@154
|
58 dst[di++]=(char)(0xE0|c0>>12);
|
cannam@154
|
59 dst[di++]=(char)(0x80|c0>>6&0x3F);
|
cannam@154
|
60 dst[di++]=(char)(0x80|c0&0x3F);
|
cannam@154
|
61 }
|
cannam@154
|
62 dst[di++]='\0';
|
cannam@154
|
63 return dst;
|
cannam@154
|
64 }
|
cannam@154
|
65
|
cannam@154
|
66 typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line,
|
cannam@154
|
67 int *num_args);
|
cannam@154
|
68
|
cannam@154
|
69 /*Make a best-effort attempt to support UTF-8 on Windows.*/
|
cannam@154
|
70 void win32_utf8_setup(int *_argc,const char ***_argv){
|
cannam@154
|
71 HMODULE hlib;
|
cannam@154
|
72 /*We need to set stdin/stdout to binary mode.
|
cannam@154
|
73 This is unrelated to UTF-8 support, but it's platform specific and we need
|
cannam@154
|
74 to do it in the same places.*/
|
cannam@154
|
75 _setmode(_fileno(stdin),_O_BINARY);
|
cannam@154
|
76 _setmode(_fileno(stdout),_O_BINARY);
|
cannam@154
|
77 hlib=LoadLibraryA("shell32.dll");
|
cannam@154
|
78 if(hlib!=NULL){
|
cannam@154
|
79 command_line_to_argv_w_func command_line_to_argv_w;
|
cannam@154
|
80 /*This function is only available on Windows 2000 or later.*/
|
cannam@154
|
81 command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib,
|
cannam@154
|
82 "CommandLineToArgvW");
|
cannam@154
|
83 if(command_line_to_argv_w!=NULL){
|
cannam@154
|
84 wchar_t **argvw;
|
cannam@154
|
85 int argc;
|
cannam@154
|
86 argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc);
|
cannam@154
|
87 if(argvw!=NULL){
|
cannam@154
|
88 int ai;
|
cannam@154
|
89 /*Really, I don't see why argc would ever differ from *_argc, but let's
|
cannam@154
|
90 be paranoid.*/
|
cannam@154
|
91 if(argc>*_argc)argc=*_argc;
|
cannam@154
|
92 for(ai=0;ai<argc;ai++){
|
cannam@154
|
93 char *argv;
|
cannam@154
|
94 argv=utf16_to_utf8(argvw[ai]);
|
cannam@154
|
95 if(argv!=NULL)(*_argv)[ai]=argv;
|
cannam@154
|
96 }
|
cannam@154
|
97 *_argc=argc;
|
cannam@154
|
98 LocalFree(argvw);
|
cannam@154
|
99 }
|
cannam@154
|
100 }
|
cannam@154
|
101 FreeLibrary(hlib);
|
cannam@154
|
102 }
|
cannam@154
|
103 # if defined(CP_UTF8)
|
cannam@154
|
104 /*This does not work correctly in all environments (it breaks output in
|
cannam@154
|
105 mingw32 for me), and requires a Unicode font (e.g., when using the default
|
cannam@154
|
106 Raster font, even characters that are available in the font's codepage
|
cannam@154
|
107 won't display properly).*/
|
cannam@154
|
108 /*SetConsoleOutputCP(CP_UTF8);*/
|
cannam@154
|
109 # endif
|
cannam@154
|
110 }
|
cannam@154
|
111 #endif
|