Chris@69: #if defined(_WIN32) Chris@69: # include Chris@69: # include Chris@69: # include Chris@69: /*We need the following two to set stdin/stdout to binary.*/ Chris@69: # include Chris@69: # include Chris@69: # define WIN32_LEAN_AND_MEAN Chris@69: # define WIN32_EXTRA_LEAN Chris@69: # include Chris@69: # include "win32utf8.h" Chris@69: Chris@69: static char *utf16_to_utf8(const wchar_t *_src){ Chris@69: char *dst; Chris@69: size_t len; Chris@69: size_t si; Chris@69: size_t di; Chris@69: len=wcslen(_src); Chris@69: dst=(char *)malloc(sizeof(*dst)*(3*len+1)); Chris@69: if(dst==NULL)return dst; Chris@69: for(di=si=0;si>6); Chris@69: dst[di++]=(char)(0x80|c0&0x3F); Chris@69: continue; Chris@69: } Chris@69: else if(c0>=0xD800&&c0<0xDC00){ Chris@69: unsigned c1; Chris@69: /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/ Chris@69: c1=_src[si+1]; Chris@69: if(c1>=0xDC00&&c1<0xE000){ Chris@69: unsigned w; Chris@69: /*Surrogate pair.*/ Chris@69: w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000; Chris@69: /*Can be represented by a 4-byte sequence.*/ Chris@69: dst[di++]=(char)(0xF0|w>>18); Chris@69: dst[di++]=(char)(0x80|w>>12&0x3F); Chris@69: dst[di++]=(char)(0x80|w>>6&0x3F); Chris@69: dst[di++]=(char)(0x80|w&0x3F); Chris@69: si++; Chris@69: continue; Chris@69: } Chris@69: } Chris@69: /*Anything else is either a valid 3-byte sequence, an invalid surrogate Chris@69: pair, or 'not a character'. Chris@69: In the latter two cases, we just encode the value as a 3-byte Chris@69: sequence anyway (producing technically invalid UTF-8). Chris@69: Later error handling will detect the problem, with a better Chris@69: chance of giving a useful error message.*/ Chris@69: dst[di++]=(char)(0xE0|c0>>12); Chris@69: dst[di++]=(char)(0x80|c0>>6&0x3F); Chris@69: dst[di++]=(char)(0x80|c0&0x3F); Chris@69: } Chris@69: dst[di++]='\0'; Chris@69: return dst; Chris@69: } Chris@69: Chris@69: typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line, Chris@69: int *num_args); Chris@69: Chris@69: /*Make a best-effort attempt to support UTF-8 on Windows.*/ Chris@69: void win32_utf8_setup(int *_argc,const char ***_argv){ Chris@69: HMODULE hlib; Chris@69: /*We need to set stdin/stdout to binary mode. Chris@69: This is unrelated to UTF-8 support, but it's platform specific and we need Chris@69: to do it in the same places.*/ Chris@69: _setmode(_fileno(stdin),_O_BINARY); Chris@69: _setmode(_fileno(stdout),_O_BINARY); Chris@69: hlib=LoadLibraryA("shell32.dll"); Chris@69: if(hlib!=NULL){ Chris@69: command_line_to_argv_w_func command_line_to_argv_w; Chris@69: /*This function is only available on Windows 2000 or later.*/ Chris@69: command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib, Chris@69: "CommandLineToArgvW"); Chris@69: if(command_line_to_argv_w!=NULL){ Chris@69: wchar_t **argvw; Chris@69: int argc; Chris@69: argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc); Chris@69: if(argvw!=NULL){ Chris@69: int ai; Chris@69: /*Really, I don't see why argc would ever differ from *_argc, but let's Chris@69: be paranoid.*/ Chris@69: if(argc>*_argc)argc=*_argc; Chris@69: for(ai=0;ai