annotate src/opusfile-0.9/examples/win32utf8.c @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 4664ac0c1032
children
rev   line source
cannam@154 1 #if defined(_WIN32)
cannam@154 2 # include <stdio.h>
cannam@154 3 # include <stdlib.h>
cannam@154 4 # include <wchar.h>
cannam@154 5 /*We need the following two to set stdin/stdout to binary.*/
cannam@154 6 # include <io.h>
cannam@154 7 # include <fcntl.h>
cannam@154 8 # define WIN32_LEAN_AND_MEAN
cannam@154 9 # define WIN32_EXTRA_LEAN
cannam@154 10 # include <windows.h>
cannam@154 11 # include "win32utf8.h"
cannam@154 12
cannam@154 13 static char *utf16_to_utf8(const wchar_t *_src){
cannam@154 14 char *dst;
cannam@154 15 size_t len;
cannam@154 16 size_t si;
cannam@154 17 size_t di;
cannam@154 18 len=wcslen(_src);
cannam@154 19 dst=(char *)malloc(sizeof(*dst)*(3*len+1));
cannam@154 20 if(dst==NULL)return dst;
cannam@154 21 for(di=si=0;si<len;si++){
cannam@154 22 unsigned c0;
cannam@154 23 c0=_src[si];
cannam@154 24 if(c0<0x80){
cannam@154 25 /*Can be represented by a 1-byte sequence.*/
cannam@154 26 dst[di++]=(char)c0;
cannam@154 27 continue;
cannam@154 28 }
cannam@154 29 else if(c0<0x800){
cannam@154 30 /*Can be represented by a 2-byte sequence.*/
cannam@154 31 dst[di++]=(char)(0xC0|c0>>6);
cannam@154 32 dst[di++]=(char)(0x80|c0&0x3F);
cannam@154 33 continue;
cannam@154 34 }
cannam@154 35 else if(c0>=0xD800&&c0<0xDC00){
cannam@154 36 unsigned c1;
cannam@154 37 /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
cannam@154 38 c1=_src[si+1];
cannam@154 39 if(c1>=0xDC00&&c1<0xE000){
cannam@154 40 unsigned w;
cannam@154 41 /*Surrogate pair.*/
cannam@154 42 w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000;
cannam@154 43 /*Can be represented by a 4-byte sequence.*/
cannam@154 44 dst[di++]=(char)(0xF0|w>>18);
cannam@154 45 dst[di++]=(char)(0x80|w>>12&0x3F);
cannam@154 46 dst[di++]=(char)(0x80|w>>6&0x3F);
cannam@154 47 dst[di++]=(char)(0x80|w&0x3F);
cannam@154 48 si++;
cannam@154 49 continue;
cannam@154 50 }
cannam@154 51 }
cannam@154 52 /*Anything else is either a valid 3-byte sequence, an invalid surrogate
cannam@154 53 pair, or 'not a character'.
cannam@154 54 In the latter two cases, we just encode the value as a 3-byte
cannam@154 55 sequence anyway (producing technically invalid UTF-8).
cannam@154 56 Later error handling will detect the problem, with a better
cannam@154 57 chance of giving a useful error message.*/
cannam@154 58 dst[di++]=(char)(0xE0|c0>>12);
cannam@154 59 dst[di++]=(char)(0x80|c0>>6&0x3F);
cannam@154 60 dst[di++]=(char)(0x80|c0&0x3F);
cannam@154 61 }
cannam@154 62 dst[di++]='\0';
cannam@154 63 return dst;
cannam@154 64 }
cannam@154 65
cannam@154 66 typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line,
cannam@154 67 int *num_args);
cannam@154 68
cannam@154 69 /*Make a best-effort attempt to support UTF-8 on Windows.*/
cannam@154 70 void win32_utf8_setup(int *_argc,const char ***_argv){
cannam@154 71 HMODULE hlib;
cannam@154 72 /*We need to set stdin/stdout to binary mode.
cannam@154 73 This is unrelated to UTF-8 support, but it's platform specific and we need
cannam@154 74 to do it in the same places.*/
cannam@154 75 _setmode(_fileno(stdin),_O_BINARY);
cannam@154 76 _setmode(_fileno(stdout),_O_BINARY);
cannam@154 77 hlib=LoadLibraryA("shell32.dll");
cannam@154 78 if(hlib!=NULL){
cannam@154 79 command_line_to_argv_w_func command_line_to_argv_w;
cannam@154 80 /*This function is only available on Windows 2000 or later.*/
cannam@154 81 command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib,
cannam@154 82 "CommandLineToArgvW");
cannam@154 83 if(command_line_to_argv_w!=NULL){
cannam@154 84 wchar_t **argvw;
cannam@154 85 int argc;
cannam@154 86 argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc);
cannam@154 87 if(argvw!=NULL){
cannam@154 88 int ai;
cannam@154 89 /*Really, I don't see why argc would ever differ from *_argc, but let's
cannam@154 90 be paranoid.*/
cannam@154 91 if(argc>*_argc)argc=*_argc;
cannam@154 92 for(ai=0;ai<argc;ai++){
cannam@154 93 char *argv;
cannam@154 94 argv=utf16_to_utf8(argvw[ai]);
cannam@154 95 if(argv!=NULL)(*_argv)[ai]=argv;
cannam@154 96 }
cannam@154 97 *_argc=argc;
cannam@154 98 LocalFree(argvw);
cannam@154 99 }
cannam@154 100 }
cannam@154 101 FreeLibrary(hlib);
cannam@154 102 }
cannam@154 103 # if defined(CP_UTF8)
cannam@154 104 /*This does not work correctly in all environments (it breaks output in
cannam@154 105 mingw32 for me), and requires a Unicode font (e.g., when using the default
cannam@154 106 Raster font, even characters that are available in the font's codepage
cannam@154 107 won't display properly).*/
cannam@154 108 /*SetConsoleOutputCP(CP_UTF8);*/
cannam@154 109 # endif
cannam@154 110 }
cannam@154 111 #endif