Mercurial > hg > sv-dependency-builds
diff src/opusfile-0.9/examples/win32utf8.c @ 69:7aeed7906520
Add Opus sources and macOS builds
author | Chris Cannam |
---|---|
date | Wed, 23 Jan 2019 13:48:08 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/opusfile-0.9/examples/win32utf8.c Wed Jan 23 13:48:08 2019 +0000 @@ -0,0 +1,111 @@ +#if defined(_WIN32) +# include <stdio.h> +# include <stdlib.h> +# include <wchar.h> +/*We need the following two to set stdin/stdout to binary.*/ +# include <io.h> +# include <fcntl.h> +# define WIN32_LEAN_AND_MEAN +# define WIN32_EXTRA_LEAN +# include <windows.h> +# include "win32utf8.h" + +static char *utf16_to_utf8(const wchar_t *_src){ + char *dst; + size_t len; + size_t si; + size_t di; + len=wcslen(_src); + dst=(char *)malloc(sizeof(*dst)*(3*len+1)); + if(dst==NULL)return dst; + for(di=si=0;si<len;si++){ + unsigned c0; + c0=_src[si]; + if(c0<0x80){ + /*Can be represented by a 1-byte sequence.*/ + dst[di++]=(char)c0; + continue; + } + else if(c0<0x800){ + /*Can be represented by a 2-byte sequence.*/ + dst[di++]=(char)(0xC0|c0>>6); + dst[di++]=(char)(0x80|c0&0x3F); + continue; + } + else if(c0>=0xD800&&c0<0xDC00){ + unsigned c1; + /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/ + c1=_src[si+1]; + if(c1>=0xDC00&&c1<0xE000){ + unsigned w; + /*Surrogate pair.*/ + w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000; + /*Can be represented by a 4-byte sequence.*/ + dst[di++]=(char)(0xF0|w>>18); + dst[di++]=(char)(0x80|w>>12&0x3F); + dst[di++]=(char)(0x80|w>>6&0x3F); + dst[di++]=(char)(0x80|w&0x3F); + si++; + continue; + } + } + /*Anything else is either a valid 3-byte sequence, an invalid surrogate + pair, or 'not a character'. + In the latter two cases, we just encode the value as a 3-byte + sequence anyway (producing technically invalid UTF-8). + Later error handling will detect the problem, with a better + chance of giving a useful error message.*/ + dst[di++]=(char)(0xE0|c0>>12); + dst[di++]=(char)(0x80|c0>>6&0x3F); + dst[di++]=(char)(0x80|c0&0x3F); + } + dst[di++]='\0'; + return dst; +} + +typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line, + int *num_args); + +/*Make a best-effort attempt to support UTF-8 on Windows.*/ +void win32_utf8_setup(int *_argc,const char ***_argv){ + HMODULE hlib; + /*We need to set stdin/stdout to binary mode. + This is unrelated to UTF-8 support, but it's platform specific and we need + to do it in the same places.*/ + _setmode(_fileno(stdin),_O_BINARY); + _setmode(_fileno(stdout),_O_BINARY); + hlib=LoadLibraryA("shell32.dll"); + if(hlib!=NULL){ + command_line_to_argv_w_func command_line_to_argv_w; + /*This function is only available on Windows 2000 or later.*/ + command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib, + "CommandLineToArgvW"); + if(command_line_to_argv_w!=NULL){ + wchar_t **argvw; + int argc; + argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc); + if(argvw!=NULL){ + int ai; + /*Really, I don't see why argc would ever differ from *_argc, but let's + be paranoid.*/ + if(argc>*_argc)argc=*_argc; + for(ai=0;ai<argc;ai++){ + char *argv; + argv=utf16_to_utf8(argvw[ai]); + if(argv!=NULL)(*_argv)[ai]=argv; + } + *_argc=argc; + LocalFree(argvw); + } + } + FreeLibrary(hlib); + } +# if defined(CP_UTF8) + /*This does not work correctly in all environments (it breaks output in + mingw32 for me), and requires a Unicode font (e.g., when using the default + Raster font, even characters that are available in the font's codepage + won't display properly).*/ + /*SetConsoleOutputCP(CP_UTF8);*/ +# endif +} +#endif