diff src/opusfile-0.9/examples/win32utf8.c @ 69:7aeed7906520

Add Opus sources and macOS builds
author Chris Cannam
date Wed, 23 Jan 2019 13:48:08 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/opusfile-0.9/examples/win32utf8.c	Wed Jan 23 13:48:08 2019 +0000
@@ -0,0 +1,111 @@
+#if defined(_WIN32)
+# include <stdio.h>
+# include <stdlib.h>
+# include <wchar.h>
+/*We need the following two to set stdin/stdout to binary.*/
+# include <io.h>
+# include <fcntl.h>
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+# include "win32utf8.h"
+
+static char *utf16_to_utf8(const wchar_t *_src){
+  char   *dst;
+  size_t  len;
+  size_t  si;
+  size_t  di;
+  len=wcslen(_src);
+  dst=(char *)malloc(sizeof(*dst)*(3*len+1));
+  if(dst==NULL)return dst;
+  for(di=si=0;si<len;si++){
+    unsigned c0;
+    c0=_src[si];
+    if(c0<0x80){
+      /*Can be represented by a 1-byte sequence.*/
+      dst[di++]=(char)c0;
+      continue;
+    }
+    else if(c0<0x800){
+      /*Can be represented by a 2-byte sequence.*/
+      dst[di++]=(char)(0xC0|c0>>6);
+      dst[di++]=(char)(0x80|c0&0x3F);
+      continue;
+    }
+    else if(c0>=0xD800&&c0<0xDC00){
+      unsigned c1;
+      /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
+      c1=_src[si+1];
+      if(c1>=0xDC00&&c1<0xE000){
+        unsigned w;
+        /*Surrogate pair.*/
+        w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000;
+        /*Can be represented by a 4-byte sequence.*/
+        dst[di++]=(char)(0xF0|w>>18);
+        dst[di++]=(char)(0x80|w>>12&0x3F);
+        dst[di++]=(char)(0x80|w>>6&0x3F);
+        dst[di++]=(char)(0x80|w&0x3F);
+        si++;
+        continue;
+      }
+    }
+    /*Anything else is either a valid 3-byte sequence, an invalid surrogate
+       pair, or 'not a character'.
+      In the latter two cases, we just encode the value as a 3-byte
+       sequence anyway (producing technically invalid UTF-8).
+      Later error handling will detect the problem, with a better
+       chance of giving a useful error message.*/
+    dst[di++]=(char)(0xE0|c0>>12);
+    dst[di++]=(char)(0x80|c0>>6&0x3F);
+    dst[di++]=(char)(0x80|c0&0x3F);
+  }
+  dst[di++]='\0';
+  return dst;
+}
+
+typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line,
+ int *num_args);
+
+/*Make a best-effort attempt to support UTF-8 on Windows.*/
+void win32_utf8_setup(int *_argc,const char ***_argv){
+  HMODULE hlib;
+  /*We need to set stdin/stdout to binary mode.
+    This is unrelated to UTF-8 support, but it's platform specific and we need
+     to do it in the same places.*/
+  _setmode(_fileno(stdin),_O_BINARY);
+  _setmode(_fileno(stdout),_O_BINARY);
+  hlib=LoadLibraryA("shell32.dll");
+  if(hlib!=NULL){
+    command_line_to_argv_w_func command_line_to_argv_w;
+    /*This function is only available on Windows 2000 or later.*/
+    command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib,
+     "CommandLineToArgvW");
+    if(command_line_to_argv_w!=NULL){
+      wchar_t **argvw;
+      int       argc;
+      argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc);
+      if(argvw!=NULL){
+        int ai;
+        /*Really, I don't see why argc would ever differ from *_argc, but let's
+           be paranoid.*/
+        if(argc>*_argc)argc=*_argc;
+        for(ai=0;ai<argc;ai++){
+          char *argv;
+          argv=utf16_to_utf8(argvw[ai]);
+          if(argv!=NULL)(*_argv)[ai]=argv;
+        }
+        *_argc=argc;
+        LocalFree(argvw);
+      }
+    }
+    FreeLibrary(hlib);
+  }
+# if defined(CP_UTF8)
+  /*This does not work correctly in all environments (it breaks output in
+     mingw32 for me), and requires a Unicode font (e.g., when using the default
+     Raster font, even characters that are available in the font's codepage
+     won't display properly).*/
+  /*SetConsoleOutputCP(CP_UTF8);*/
+# endif
+}
+#endif