From 5a1a1856a4dfa1335d937437fade5c0bbab06560 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 6 Oct 2013 20:25:49 +0000 Subject: [PATCH] Windows: Add support for unicode command lines Summary: The MSVCRT deliberately sends main() code-page specific characters. This isn't too useful to LLVM as we end up converting the arguments to UTF-16 and subsequently attempt to use the result as, for example, a file name. Instead, we need to have the ability to access the Unicode command line and transform it to UTF-8. This has the distinct advantage over using the MSVC-specific wmain() function as our entry point because: - It doesn't work on cygwin. - It only work on MinGW with caveats and only then on certain versions. - We get to keep our entry point as main(). :) N.B. This patch includes fixes to other parts of lib/Support/Windows s.t. we would be able to take advantage of getting the Unicode paths. E.G. clang spawning clang -cc1 would want to give it Unicode arguments. Reviewers: aaron.ballman, Bigcheese, rnk, ruiu Reviewed By: rnk CC: llvm-commits, ygao Differential Revision: http://llvm-reviews.chandlerc.com/D1834 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192069 91177308-0d34-0410-b5e6-96231b3b80d8 --- autoconf/configure.ac | 1 + cmake/config-ix.cmake | 1 + cmake/modules/LLVM-Config.cmake | 2 +- cmake/modules/TableGen.cmake | 2 +- include/llvm/Config/config.h.cmake | 3 + include/llvm/Config/config.h.in | 3 + include/llvm/Support/Process.h | 11 +++ lib/Support/Unix/Process.inc | 8 ++ lib/Support/Windows/DynamicLibrary.inc | 9 +- lib/Support/Windows/Path.inc | 29 +++--- lib/Support/Windows/Process.inc | 64 ++++++++++--- lib/Support/Windows/Program.inc | 126 ++++++++++++++----------- lib/Support/Windows/Signals.inc | 2 +- lib/Support/Windows/Windows.h | 1 - projects/sample/autoconf/configure.ac | 1 + utils/FileCheck/CMakeLists.txt | 2 +- utils/FileUpdate/CMakeLists.txt | 2 +- utils/not/CMakeLists.txt | 2 +- 18 files changed, 181 insertions(+), 88 deletions(-) diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 45f2fe485ec1..85eee54bf390 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -1399,6 +1399,7 @@ AC_CHECK_LIB(m,sin) if test "$llvm_cv_os_type" = "MingW" ; then AC_CHECK_LIB(imagehlp, main) AC_CHECK_LIB(psapi, main) + AC_CHECK_LIB(shell32, main) fi dnl dlopen() is required for plugin support. diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 57725f83af94..dc991a23be07 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -415,6 +415,7 @@ endif () if( MINGW ) set(HAVE_LIBIMAGEHLP 1) set(HAVE_LIBPSAPI 1) + set(HAVE_LIBSHELL32 1) # TODO: Check existence of libraries. # include(CheckLibraryExists) # CHECK_LIBRARY_EXISTS(imagehlp ??? . HAVE_LIBIMAGEHLP) diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake index 9fa45ce966a7..e26fabd1c6f7 100644 --- a/cmake/modules/LLVM-Config.cmake +++ b/cmake/modules/LLVM-Config.cmake @@ -2,7 +2,7 @@ function(get_system_libs return_var) # Returns in `return_var' a list of system libraries used by LLVM. if( NOT MSVC ) if( MINGW ) - set(system_libs ${system_libs} imagehlp psapi) + set(system_libs ${system_libs} imagehlp psapi shell32) elseif( CMAKE_HOST_UNIX ) if( HAVE_LIBRT ) set(system_libs ${system_libs} rt) diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index bbd6339ac8cd..c17e67e478ef 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -128,7 +128,7 @@ macro(add_tablegen target project) endif() if( MINGW ) - target_link_libraries(${target} imagehlp psapi) + target_link_libraries(${target} imagehlp psapi shell32) if(CMAKE_SIZEOF_VOID_P MATCHES "8") set_target_properties(${target} PROPERTIES LINK_FLAGS -Wl,--stack,16777216) endif(CMAKE_SIZEOF_VOID_P MATCHES "8") diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index 77de967f9055..4c49cbb8c375 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -203,6 +203,9 @@ /* Define to 1 if you have the `pthread' library (-lpthread). */ #cmakedefine HAVE_LIBPTHREAD ${HAVE_LIBPTHREAD} +/* Define to 1 if you have the `shell32' library (-lshell32). */ +#cmakedefine HAVE_LIBSHELL32 ${HAVE_LIBSHELL32} + /* Define to 1 if you have the `udis86' library (-ludis86). */ #undef HAVE_LIBUDIS86 diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index 860bccb83313..0d43ae50a14e 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -217,6 +217,9 @@ /* Define to 1 if you have the `pthread' library (-lpthread). */ #undef HAVE_LIBPTHREAD +/* Define to 1 if you have the `shell32' library (-lshell32). */ +#undef HAVE_LIBSHELL32 + /* Define to 1 if you have the `udis86' library (-ludis86). */ #undef HAVE_LIBUDIS86 diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h index ce39d048bb96..21720367199d 100644 --- a/include/llvm/Support/Process.h +++ b/include/llvm/Support/Process.h @@ -25,8 +25,11 @@ #ifndef LLVM_SUPPORT_PROCESS_H #define LLVM_SUPPORT_PROCESS_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/system_error.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/TimeValue.h" @@ -168,6 +171,14 @@ class Process { // string. \arg Name is assumed to be in UTF-8 encoding too. static Optional GetEnv(StringRef name); + /// This function returns a SmallVector containing the arguments passed from + /// the operating system to the program. This function expects to be handed + /// the vector passed in from main. + static error_code + GetArgumentVector(SmallVectorImpl &Args, + ArrayRef ArgsFromMain, + SpecificBumpPtrAllocator &ArgAllocator); + /// This function determines if the standard input is connected directly /// to a user's input (keyboard probably), rather than coming from a file /// or pipe. diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index f18fa221036a..c5778e746b7c 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -190,6 +190,14 @@ Optional Process::GetEnv(StringRef Name) { return std::string(Val); } +error_code Process::GetArgumentVector(SmallVectorImpl &ArgsOut, + ArrayRef ArgsIn, + SpecificBumpPtrAllocator &) { + ArgsOut.append(ArgsIn.begin(), ArgsIn.end()); + + return error_code::success(); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(STDIN_FILENO); } diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 2edaf74fa565..5a7b21920a9e 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -83,8 +83,15 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, // This is mostly to ensure that the return value still shows up as "valid". return DynamicLibrary(&OpenedHandles); } + + SmallVector filenameUnicode; + if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { + SetLastError(ec.value()); + MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: "); + return DynamicLibrary(); + } - HMODULE a_handle = LoadLibrary(filename); + HMODULE a_handle = LoadLibraryW(filenameUnicode.data()); if (a_handle == 0) { MakeErrMsg(errMsg, std::string(filename) + ": Can't open : "); diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 94a501b39ac7..998ec422ec62 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -128,7 +128,7 @@ retry_random_path: BYTE val = 0; if (!::CryptGenRandom(CryptoProvider, 1, &val)) return windows_error(::GetLastError()); - random_path_utf16.push_back("0123456789abcdef"[val & 15]); + random_path_utf16.push_back(L"0123456789abcdef"[val & 15]); } else random_path_utf16.push_back(*i); @@ -241,22 +241,23 @@ TimeValue file_status::getLastModificationTime() const { } error_code current_path(SmallVectorImpl &result) { - SmallVector cur_path; - cur_path.reserve(128); -retry_cur_dir: - DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); + SmallVector cur_path; + DWORD len = MAX_PATH; - // A zero return value indicates a failure other than insufficient space. - if (len == 0) - return windows_error(::GetLastError()); - - // If there's insufficient space, the len returned is larger than the len - // given. - if (len > cur_path.capacity()) { + do { cur_path.reserve(len); - goto retry_cur_dir; - } + len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); + + // A zero return value indicates a failure other than insufficient space. + if (len == 0) + return windows_error(::GetLastError()); + + // If there's insufficient space, the len returned is larger than the len + // given. + } while (len > cur_path.capacity()); + // On success, GetCurrentDirectoryW returns the number of characters not + // including the null-terminator. cur_path.set_size(len); return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result); } diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 5d776504fbb5..7f7e06c85501 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -11,18 +11,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Allocator.h" + #include "Windows.h" #include #include #include #include +#include #ifdef __MINGW32__ #if (HAVE_LIBPSAPI != 1) #error "libpsapi.a should be present" #endif + #if (HAVE_LIBSHELL32 != 1) + #error "libshell32.a should be present" + #endif #else - #pragma comment(lib, "psapi.lib") +#pragma comment(lib, "psapi.lib") +#pragma comment(lib, "Shell32.lib") #endif //===----------------------------------------------------------------------===// @@ -151,25 +158,58 @@ Optional Process::GetEnv(StringRef Name) { // Environment variable can be encoded in non-UTF8 encoding, and there's no // way to know what the encoding is. The only reliable way to look up // multibyte environment variable is to use GetEnvironmentVariableW(). - std::vector Buf(16); - size_t Size = 0; - for (;;) { - Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size()); - if (Size < Buf.size()) - break; + SmallVector Buf; + size_t Size = MAX_PATH; + do { + Buf.reserve(Size); + Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.capacity()); + if (Size == 0) + return None; + // Try again with larger buffer. - Buf.resize(Size + 1); - } - if (Size == 0) - return None; + } while (Size > Buf.capacity()); + Buf.set_size(Size); // Convert the result from UTF-16 to UTF-8. - SmallVector Res; + SmallVector Res; if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res)) return None; return std::string(&Res[0]); } +error_code +Process::GetArgumentVector(SmallVectorImpl &Args, + ArrayRef, + SpecificBumpPtrAllocator &ArgAllocator) { + int NewArgCount; + error_code ec; + + wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(), + &NewArgCount); + if (!UnicodeCommandLine) + return windows_error(::GetLastError()); + + Args.reserve(NewArgCount); + + for (int i = 0; i < NewArgCount; ++i) { + SmallVector NewArgString; + ec = windows::UTF16ToUTF8(UnicodeCommandLine[i], + wcslen(UnicodeCommandLine[i]), + NewArgString); + if (ec) + break; + + char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1); + ::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1); + Args.push_back(Buffer); + } + LocalFree(UnicodeCommandLine); + if (ec) + return ec; + + return error_code::success(); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(0); } diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index 28690852b0d5..e464e2f68745 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -42,42 +42,39 @@ std::string sys::FindProgramByName(const std::string &progName) { // At this point, the file name is valid and does not contain slashes. // Let Windows search for it. - std::string buffer; - buffer.resize(MAX_PATH); - char *dummy = NULL; - DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH, - &buffer[0], &dummy); - - // See if it wasn't found. - if (len == 0) + SmallVector progNameUnicode; + if (windows::UTF8ToUTF16(progName, progNameUnicode)) return ""; - // See if we got the entire path. - if (len < MAX_PATH) - return buffer; + SmallVector buffer; + DWORD len = MAX_PATH; + do { + buffer.reserve(len); + len = ::SearchPathW(NULL, progNameUnicode.data(), L".exe", + buffer.capacity(), buffer.data(), NULL); - // Buffer was too small; grow and retry. - while (true) { - buffer.resize(len+1); - DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, &buffer[0], &dummy); - - // It is unlikely the search failed, but it's always possible some file - // was added or removed since the last search, so be paranoid... - if (len2 == 0) + // See if it wasn't found. + if (len == 0) return ""; - else if (len2 <= len) - return buffer; - len = len2; - } + // Buffer was too small; grow and retry. + } while (len > buffer.capacity()); + + buffer.set_size(len); + SmallVector result; + if (windows::UTF16ToUTF8(buffer.begin(), buffer.size(), result)) + return ""; + + return std::string(result.data(), result.size()); } static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) { HANDLE h; if (path == 0) { - DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), - GetCurrentProcess(), &h, - 0, TRUE, DUPLICATE_SAME_ACCESS); + if (!DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), + GetCurrentProcess(), &h, + 0, TRUE, DUPLICATE_SAME_ACCESS)) + return INVALID_HANDLE_VALUE; return h; } @@ -92,9 +89,13 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) { sa.lpSecurityDescriptor = 0; sa.bInheritHandle = TRUE; - h = CreateFile(fname.c_str(), fd ? GENERIC_WRITE : GENERIC_READ, - FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, - FILE_ATTRIBUTE_NORMAL, NULL); + SmallVector fnameUnicode; + if (windows::UTF8ToUTF16(fname, fnameUnicode)) + return INVALID_HANDLE_VALUE; + + h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); if (h == INVALID_HANDLE_VALUE) { MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " + (fd ? "input: " : "output: ")); @@ -218,34 +219,28 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, *p = 0; // The pointer to the environment block for the new process. - OwningArrayPtr envblock; + std::vector EnvBlock; if (envp) { // An environment block consists of a null-terminated block of // null-terminated strings. Convert the array of environment variables to // an environment block by concatenating them. + for (unsigned i = 0; envp[i]; ++i) { + SmallVector EnvString; + if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16"); + return false; + } - // First, determine the length of the environment block. - len = 0; - for (unsigned i = 0; envp[i]; i++) - len += strlen(envp[i]) + 1; - - // Now build the environment block. - envblock.reset(new char[len+1]); - p = envblock.get(); - - for (unsigned i = 0; envp[i]; i++) { - const char *ev = envp[i]; - size_t len = strlen(ev) + 1; - memcpy(p, ev, len); - p += len; + EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end()); + EnvBlock.push_back(0); } - - *p = 0; + EnvBlock.push_back(0); } // Create a child process. - STARTUPINFO si; + STARTUPINFOW si; memset(&si, 0, sizeof(si)); si.cb = sizeof(si); si.hStdInput = INVALID_HANDLE_VALUE; @@ -269,9 +264,14 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) { // If stdout and stderr should go to the same place, redirect stderr // to the handle already open for stdout. - DuplicateHandle(GetCurrentProcess(), si.hStdOutput, - GetCurrentProcess(), &si.hStdError, - 0, TRUE, DUPLICATE_SAME_ACCESS); + if (!DuplicateHandle(GetCurrentProcess(), si.hStdOutput, + GetCurrentProcess(), &si.hStdError, + 0, TRUE, DUPLICATE_SAME_ACCESS)) { + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + MakeErrMsg(ErrMsg, "can't dup stderr to stdout"); + return false; + } } else { // Just redirect stderr si.hStdError = RedirectIO(redirects[2], 2, ErrMsg); @@ -289,9 +289,27 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, fflush(stdout); fflush(stderr); - std::string ProgramStr = Program; - BOOL rc = CreateProcess(ProgramStr.c_str(), command.get(), NULL, NULL, TRUE, - 0, envblock.get(), NULL, &si, &pi); + + SmallVector ProgramUtf16; + if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert application name to UTF-16")); + return false; + } + + SmallVector CommandUtf16; + if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert command-line to UTF-16")); + return false; + } + + BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, + TRUE, CREATE_UNICODE_ENVIRONMENT, + EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, + &pi); DWORD err = GetLastError(); // Regardless of whether the process got created or not, we are done with @@ -304,7 +322,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, if (!rc) { SetLastError(err); MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") + - ProgramStr + "'"); + Program.str() + "'"); return false; } diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index bce83b968684..2b4a66d00896 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -135,7 +135,7 @@ typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64); static fpSymFunctionTableAccess64 SymFunctionTableAccess64; static bool load64BitDebugHelp(void) { - HMODULE hLib = ::LoadLibrary("Dbghelp.dll"); + HMODULE hLib = ::LoadLibrary(TEXT("Dbghelp.dll")); if (hLib) { StackWalk64 = (fpStackWalk64) ::GetProcAddress(hLib, "StackWalk64"); diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h index 1236fe565217..657ae4fc8382 100644 --- a/lib/Support/Windows/Windows.h +++ b/lib/Support/Windows/Windows.h @@ -31,7 +31,6 @@ #include "llvm/Support/system_error.h" #include #include -#include #include #include #include diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac index 46fd54909040..03cd214f6668 100644 --- a/projects/sample/autoconf/configure.ac +++ b/projects/sample/autoconf/configure.ac @@ -1087,6 +1087,7 @@ AC_CHECK_LIB(m,sin) if test "$llvm_cv_os_type" = "MingW" ; then AC_CHECK_LIB(imagehlp, main) AC_CHECK_LIB(psapi, main) + AC_CHECK_LIB(shell32, main) fi dnl dlopen() is required for plugin support. diff --git a/utils/FileCheck/CMakeLists.txt b/utils/FileCheck/CMakeLists.txt index fa56f92a8f28..d691ceb429cc 100644 --- a/utils/FileCheck/CMakeLists.txt +++ b/utils/FileCheck/CMakeLists.txt @@ -4,7 +4,7 @@ add_llvm_utility(FileCheck target_link_libraries(FileCheck LLVMSupport) if( MINGW ) - target_link_libraries(FileCheck imagehlp psapi) + target_link_libraries(FileCheck imagehlp psapi shell32) endif( MINGW ) if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD ) target_link_libraries(FileCheck pthread) diff --git a/utils/FileUpdate/CMakeLists.txt b/utils/FileUpdate/CMakeLists.txt index 655aaec3bc2a..0114e50c6274 100644 --- a/utils/FileUpdate/CMakeLists.txt +++ b/utils/FileUpdate/CMakeLists.txt @@ -4,7 +4,7 @@ add_llvm_utility(FileUpdate target_link_libraries(FileUpdate LLVMSupport) if( MINGW ) - target_link_libraries(FileUpdate imagehlp psapi) + target_link_libraries(FileUpdate imagehlp psapi shell32) endif( MINGW ) if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD ) target_link_libraries(FileUpdate pthread) diff --git a/utils/not/CMakeLists.txt b/utils/not/CMakeLists.txt index f4c02290d7d1..5ff14d6692d4 100644 --- a/utils/not/CMakeLists.txt +++ b/utils/not/CMakeLists.txt @@ -4,7 +4,7 @@ add_llvm_utility(not target_link_libraries(not LLVMSupport) if( MINGW ) - target_link_libraries(not imagehlp psapi) + target_link_libraries(not imagehlp psapi shell32) endif( MINGW ) if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD ) target_link_libraries(not pthread)