cannam@154: #if defined(_WIN32) cannam@154: # include cannam@154: # include cannam@154: # include cannam@154: /*We need the following two to set stdin/stdout to binary.*/ cannam@154: # include cannam@154: # include cannam@154: # define WIN32_LEAN_AND_MEAN cannam@154: # define WIN32_EXTRA_LEAN cannam@154: # include cannam@154: # include "win32utf8.h" cannam@154: cannam@154: static char *utf16_to_utf8(const wchar_t *_src){ cannam@154: char *dst; cannam@154: size_t len; cannam@154: size_t si; cannam@154: size_t di; cannam@154: len=wcslen(_src); cannam@154: dst=(char *)malloc(sizeof(*dst)*(3*len+1)); cannam@154: if(dst==NULL)return dst; cannam@154: for(di=si=0;si>6); cannam@154: dst[di++]=(char)(0x80|c0&0x3F); cannam@154: continue; cannam@154: } cannam@154: else if(c0>=0xD800&&c0<0xDC00){ cannam@154: unsigned c1; cannam@154: /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/ cannam@154: c1=_src[si+1]; cannam@154: if(c1>=0xDC00&&c1<0xE000){ cannam@154: unsigned w; cannam@154: /*Surrogate pair.*/ cannam@154: w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000; cannam@154: /*Can be represented by a 4-byte sequence.*/ cannam@154: dst[di++]=(char)(0xF0|w>>18); cannam@154: dst[di++]=(char)(0x80|w>>12&0x3F); cannam@154: dst[di++]=(char)(0x80|w>>6&0x3F); cannam@154: dst[di++]=(char)(0x80|w&0x3F); cannam@154: si++; cannam@154: continue; cannam@154: } cannam@154: } cannam@154: /*Anything else is either a valid 3-byte sequence, an invalid surrogate cannam@154: pair, or 'not a character'. cannam@154: In the latter two cases, we just encode the value as a 3-byte cannam@154: sequence anyway (producing technically invalid UTF-8). cannam@154: Later error handling will detect the problem, with a better cannam@154: chance of giving a useful error message.*/ cannam@154: dst[di++]=(char)(0xE0|c0>>12); cannam@154: dst[di++]=(char)(0x80|c0>>6&0x3F); cannam@154: dst[di++]=(char)(0x80|c0&0x3F); cannam@154: } cannam@154: dst[di++]='\0'; cannam@154: return dst; cannam@154: } cannam@154: cannam@154: typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line, cannam@154: int *num_args); cannam@154: cannam@154: /*Make a best-effort attempt to support UTF-8 on Windows.*/ cannam@154: void win32_utf8_setup(int *_argc,const char ***_argv){ cannam@154: HMODULE hlib; cannam@154: /*We need to set stdin/stdout to binary mode. cannam@154: This is unrelated to UTF-8 support, but it's platform specific and we need cannam@154: to do it in the same places.*/ cannam@154: _setmode(_fileno(stdin),_O_BINARY); cannam@154: _setmode(_fileno(stdout),_O_BINARY); cannam@154: hlib=LoadLibraryA("shell32.dll"); cannam@154: if(hlib!=NULL){ cannam@154: command_line_to_argv_w_func command_line_to_argv_w; cannam@154: /*This function is only available on Windows 2000 or later.*/ cannam@154: command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib, cannam@154: "CommandLineToArgvW"); cannam@154: if(command_line_to_argv_w!=NULL){ cannam@154: wchar_t **argvw; cannam@154: int argc; cannam@154: argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc); cannam@154: if(argvw!=NULL){ cannam@154: int ai; cannam@154: /*Really, I don't see why argc would ever differ from *_argc, but let's cannam@154: be paranoid.*/ cannam@154: if(argc>*_argc)argc=*_argc; cannam@154: for(ai=0;ai