annotate src/opusfile-0.9/examples/win32utf8.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 #if defined(_WIN32)
Chris@69 2 # include <stdio.h>
Chris@69 3 # include <stdlib.h>
Chris@69 4 # include <wchar.h>
Chris@69 5 /*We need the following two to set stdin/stdout to binary.*/
Chris@69 6 # include <io.h>
Chris@69 7 # include <fcntl.h>
Chris@69 8 # define WIN32_LEAN_AND_MEAN
Chris@69 9 # define WIN32_EXTRA_LEAN
Chris@69 10 # include <windows.h>
Chris@69 11 # include "win32utf8.h"
Chris@69 12
Chris@69 13 static char *utf16_to_utf8(const wchar_t *_src){
Chris@69 14 char *dst;
Chris@69 15 size_t len;
Chris@69 16 size_t si;
Chris@69 17 size_t di;
Chris@69 18 len=wcslen(_src);
Chris@69 19 dst=(char *)malloc(sizeof(*dst)*(3*len+1));
Chris@69 20 if(dst==NULL)return dst;
Chris@69 21 for(di=si=0;si<len;si++){
Chris@69 22 unsigned c0;
Chris@69 23 c0=_src[si];
Chris@69 24 if(c0<0x80){
Chris@69 25 /*Can be represented by a 1-byte sequence.*/
Chris@69 26 dst[di++]=(char)c0;
Chris@69 27 continue;
Chris@69 28 }
Chris@69 29 else if(c0<0x800){
Chris@69 30 /*Can be represented by a 2-byte sequence.*/
Chris@69 31 dst[di++]=(char)(0xC0|c0>>6);
Chris@69 32 dst[di++]=(char)(0x80|c0&0x3F);
Chris@69 33 continue;
Chris@69 34 }
Chris@69 35 else if(c0>=0xD800&&c0<0xDC00){
Chris@69 36 unsigned c1;
Chris@69 37 /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
Chris@69 38 c1=_src[si+1];
Chris@69 39 if(c1>=0xDC00&&c1<0xE000){
Chris@69 40 unsigned w;
Chris@69 41 /*Surrogate pair.*/
Chris@69 42 w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000;
Chris@69 43 /*Can be represented by a 4-byte sequence.*/
Chris@69 44 dst[di++]=(char)(0xF0|w>>18);
Chris@69 45 dst[di++]=(char)(0x80|w>>12&0x3F);
Chris@69 46 dst[di++]=(char)(0x80|w>>6&0x3F);
Chris@69 47 dst[di++]=(char)(0x80|w&0x3F);
Chris@69 48 si++;
Chris@69 49 continue;
Chris@69 50 }
Chris@69 51 }
Chris@69 52 /*Anything else is either a valid 3-byte sequence, an invalid surrogate
Chris@69 53 pair, or 'not a character'.
Chris@69 54 In the latter two cases, we just encode the value as a 3-byte
Chris@69 55 sequence anyway (producing technically invalid UTF-8).
Chris@69 56 Later error handling will detect the problem, with a better
Chris@69 57 chance of giving a useful error message.*/
Chris@69 58 dst[di++]=(char)(0xE0|c0>>12);
Chris@69 59 dst[di++]=(char)(0x80|c0>>6&0x3F);
Chris@69 60 dst[di++]=(char)(0x80|c0&0x3F);
Chris@69 61 }
Chris@69 62 dst[di++]='\0';
Chris@69 63 return dst;
Chris@69 64 }
Chris@69 65
Chris@69 66 typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line,
Chris@69 67 int *num_args);
Chris@69 68
Chris@69 69 /*Make a best-effort attempt to support UTF-8 on Windows.*/
Chris@69 70 void win32_utf8_setup(int *_argc,const char ***_argv){
Chris@69 71 HMODULE hlib;
Chris@69 72 /*We need to set stdin/stdout to binary mode.
Chris@69 73 This is unrelated to UTF-8 support, but it's platform specific and we need
Chris@69 74 to do it in the same places.*/
Chris@69 75 _setmode(_fileno(stdin),_O_BINARY);
Chris@69 76 _setmode(_fileno(stdout),_O_BINARY);
Chris@69 77 hlib=LoadLibraryA("shell32.dll");
Chris@69 78 if(hlib!=NULL){
Chris@69 79 command_line_to_argv_w_func command_line_to_argv_w;
Chris@69 80 /*This function is only available on Windows 2000 or later.*/
Chris@69 81 command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib,
Chris@69 82 "CommandLineToArgvW");
Chris@69 83 if(command_line_to_argv_w!=NULL){
Chris@69 84 wchar_t **argvw;
Chris@69 85 int argc;
Chris@69 86 argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc);
Chris@69 87 if(argvw!=NULL){
Chris@69 88 int ai;
Chris@69 89 /*Really, I don't see why argc would ever differ from *_argc, but let's
Chris@69 90 be paranoid.*/
Chris@69 91 if(argc>*_argc)argc=*_argc;
Chris@69 92 for(ai=0;ai<argc;ai++){
Chris@69 93 char *argv;
Chris@69 94 argv=utf16_to_utf8(argvw[ai]);
Chris@69 95 if(argv!=NULL)(*_argv)[ai]=argv;
Chris@69 96 }
Chris@69 97 *_argc=argc;
Chris@69 98 LocalFree(argvw);
Chris@69 99 }
Chris@69 100 }
Chris@69 101 FreeLibrary(hlib);
Chris@69 102 }
Chris@69 103 # if defined(CP_UTF8)
Chris@69 104 /*This does not work correctly in all environments (it breaks output in
Chris@69 105 mingw32 for me), and requires a Unicode font (e.g., when using the default
Chris@69 106 Raster font, even characters that are available in the font's codepage
Chris@69 107 won't display properly).*/
Chris@69 108 /*SetConsoleOutputCP(CP_UTF8);*/
Chris@69 109 # endif
Chris@69 110 }
Chris@69 111 #endif