Skip to content

Commit

Permalink
Windows: Add support for unicode command lines
Browse files Browse the repository at this point in the history
Summary:
The MSVCRT deliberately sends main() code-page specific characters.
This isn't too useful to LLVM as we end up converting the arguments to
UTF-16 and subsequently attempt to use the result as, for example, a
file name.  Instead, we need to have the ability to access the Unicode
command line and transform it to UTF-8.

This has the distinct advantage over using the MSVC-specific wmain()
function as our entry point because:
 - It doesn't work on cygwin.
 - It only work on MinGW with caveats and only then on certain versions.
 - We get to keep our entry point as main(). :)

N.B.  This patch includes fixes to other parts of lib/Support/Windows
s.t. we would be able to take advantage of getting the Unicode paths.
E.G.  clang spawning clang -cc1 would want to give it Unicode arguments.

Reviewers: aaron.ballman, Bigcheese, rnk, ruiu

Reviewed By: rnk

CC: llvm-commits, ygao

Differential Revision: http://llvm-reviews.chandlerc.com/D1834

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192069 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
majnemer committed Oct 6, 2013
1 parent 2def179 commit 5a1a185
Show file tree
Hide file tree
Showing 18 changed files with 181 additions and 88 deletions.
1 change: 1 addition & 0 deletions autoconf/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1399,6 +1399,7 @@ AC_CHECK_LIB(m,sin)
if test "$llvm_cv_os_type" = "MingW" ; then
AC_CHECK_LIB(imagehlp, main)
AC_CHECK_LIB(psapi, main)
AC_CHECK_LIB(shell32, main)
fi

dnl dlopen() is required for plugin support.
Expand Down
1 change: 1 addition & 0 deletions cmake/config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ endif ()
if( MINGW )
set(HAVE_LIBIMAGEHLP 1)
set(HAVE_LIBPSAPI 1)
set(HAVE_LIBSHELL32 1)
# TODO: Check existence of libraries.
# include(CheckLibraryExists)
# CHECK_LIBRARY_EXISTS(imagehlp ??? . HAVE_LIBIMAGEHLP)
Expand Down
2 changes: 1 addition & 1 deletion cmake/modules/LLVM-Config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ function(get_system_libs return_var)
# Returns in `return_var' a list of system libraries used by LLVM.
if( NOT MSVC )
if( MINGW )
set(system_libs ${system_libs} imagehlp psapi)
set(system_libs ${system_libs} imagehlp psapi shell32)
elseif( CMAKE_HOST_UNIX )
if( HAVE_LIBRT )
set(system_libs ${system_libs} rt)
Expand Down
2 changes: 1 addition & 1 deletion cmake/modules/TableGen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ macro(add_tablegen target project)
endif()

if( MINGW )
target_link_libraries(${target} imagehlp psapi)
target_link_libraries(${target} imagehlp psapi shell32)
if(CMAKE_SIZEOF_VOID_P MATCHES "8")
set_target_properties(${target} PROPERTIES LINK_FLAGS -Wl,--stack,16777216)
endif(CMAKE_SIZEOF_VOID_P MATCHES "8")
Expand Down
3 changes: 3 additions & 0 deletions include/llvm/Config/config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@
/* Define to 1 if you have the `pthread' library (-lpthread). */
#cmakedefine HAVE_LIBPTHREAD ${HAVE_LIBPTHREAD}

/* Define to 1 if you have the `shell32' library (-lshell32). */
#cmakedefine HAVE_LIBSHELL32 ${HAVE_LIBSHELL32}

/* Define to 1 if you have the `udis86' library (-ludis86). */
#undef HAVE_LIBUDIS86

Expand Down
3 changes: 3 additions & 0 deletions include/llvm/Config/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@
/* Define to 1 if you have the `pthread' library (-lpthread). */
#undef HAVE_LIBPTHREAD

/* Define to 1 if you have the `shell32' library (-lshell32). */
#undef HAVE_LIBSHELL32

/* Define to 1 if you have the `udis86' library (-ludis86). */
#undef HAVE_LIBUDIS86

Expand Down
11 changes: 11 additions & 0 deletions include/llvm/Support/Process.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@
#ifndef LLVM_SUPPORT_PROCESS_H
#define LLVM_SUPPORT_PROCESS_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/TimeValue.h"

Expand Down Expand Up @@ -168,6 +171,14 @@ class Process {
// string. \arg Name is assumed to be in UTF-8 encoding too.
static Optional<std::string> GetEnv(StringRef name);

/// This function returns a SmallVector containing the arguments passed from
/// the operating system to the program. This function expects to be handed
/// the vector passed in from main.
static error_code
GetArgumentVector(SmallVectorImpl<const char *> &Args,
ArrayRef<const char *> ArgsFromMain,
SpecificBumpPtrAllocator<char> &ArgAllocator);

/// This function determines if the standard input is connected directly
/// to a user's input (keyboard probably), rather than coming from a file
/// or pipe.
Expand Down
8 changes: 8 additions & 0 deletions lib/Support/Unix/Process.inc
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
return std::string(Val);
}

error_code Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut,
ArrayRef<const char *> ArgsIn,
SpecificBumpPtrAllocator<char> &) {
ArgsOut.append(ArgsIn.begin(), ArgsIn.end());

return error_code::success();
}

bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(STDIN_FILENO);
}
Expand Down
9 changes: 8 additions & 1 deletion lib/Support/Windows/DynamicLibrary.inc
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,15 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
// This is mostly to ensure that the return value still shows up as "valid".
return DynamicLibrary(&OpenedHandles);
}

SmallVector<wchar_t, MAX_PATH> filenameUnicode;
if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
SetLastError(ec.value());
MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: ");
return DynamicLibrary();
}

HMODULE a_handle = LoadLibrary(filename);
HMODULE a_handle = LoadLibraryW(filenameUnicode.data());

if (a_handle == 0) {
MakeErrMsg(errMsg, std::string(filename) + ": Can't open : ");
Expand Down
29 changes: 15 additions & 14 deletions lib/Support/Windows/Path.inc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ retry_random_path:
BYTE val = 0;
if (!::CryptGenRandom(CryptoProvider, 1, &val))
return windows_error(::GetLastError());
random_path_utf16.push_back("0123456789abcdef"[val & 15]);
random_path_utf16.push_back(L"0123456789abcdef"[val & 15]);
}
else
random_path_utf16.push_back(*i);
Expand Down Expand Up @@ -241,22 +241,23 @@ TimeValue file_status::getLastModificationTime() const {
}

error_code current_path(SmallVectorImpl<char> &result) {
SmallVector<wchar_t, 128> cur_path;
cur_path.reserve(128);
retry_cur_dir:
DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
SmallVector<wchar_t, MAX_PATH> cur_path;
DWORD len = MAX_PATH;

// A zero return value indicates a failure other than insufficient space.
if (len == 0)
return windows_error(::GetLastError());

// If there's insufficient space, the len returned is larger than the len
// given.
if (len > cur_path.capacity()) {
do {
cur_path.reserve(len);
goto retry_cur_dir;
}
len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());

// A zero return value indicates a failure other than insufficient space.
if (len == 0)
return windows_error(::GetLastError());

// If there's insufficient space, the len returned is larger than the len
// given.
} while (len > cur_path.capacity());

// On success, GetCurrentDirectoryW returns the number of characters not
// including the null-terminator.
cur_path.set_size(len);
return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
}
Expand Down
64 changes: 52 additions & 12 deletions lib/Support/Windows/Process.inc
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,25 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/Allocator.h"

#include "Windows.h"
#include <direct.h>
#include <io.h>
#include <malloc.h>
#include <psapi.h>
#include <Shellapi.h>

#ifdef __MINGW32__
#if (HAVE_LIBPSAPI != 1)
#error "libpsapi.a should be present"
#endif
#if (HAVE_LIBSHELL32 != 1)
#error "libshell32.a should be present"
#endif
#else
#pragma comment(lib, "psapi.lib")
#pragma comment(lib, "psapi.lib")
#pragma comment(lib, "Shell32.lib")
#endif

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -151,25 +158,58 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
// Environment variable can be encoded in non-UTF8 encoding, and there's no
// way to know what the encoding is. The only reliable way to look up
// multibyte environment variable is to use GetEnvironmentVariableW().
std::vector<wchar_t> Buf(16);
size_t Size = 0;
for (;;) {
Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size());
if (Size < Buf.size())
break;
SmallVector<wchar_t, MAX_PATH> Buf;
size_t Size = MAX_PATH;
do {
Buf.reserve(Size);
Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.capacity());
if (Size == 0)
return None;

// Try again with larger buffer.
Buf.resize(Size + 1);
}
if (Size == 0)
return None;
} while (Size > Buf.capacity());
Buf.set_size(Size);

// Convert the result from UTF-16 to UTF-8.
SmallVector<char, 128> Res;
SmallVector<char, MAX_PATH> Res;
if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res))
return None;
return std::string(&Res[0]);
}

error_code
Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
ArrayRef<const char *>,
SpecificBumpPtrAllocator<char> &ArgAllocator) {
int NewArgCount;
error_code ec;

wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(),
&NewArgCount);
if (!UnicodeCommandLine)
return windows_error(::GetLastError());

Args.reserve(NewArgCount);

for (int i = 0; i < NewArgCount; ++i) {
SmallVector<char, MAX_PATH> NewArgString;
ec = windows::UTF16ToUTF8(UnicodeCommandLine[i],
wcslen(UnicodeCommandLine[i]),
NewArgString);
if (ec)
break;

char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1);
::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1);
Args.push_back(Buffer);
}
LocalFree(UnicodeCommandLine);
if (ec)
return ec;

return error_code::success();
}

bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(0);
}
Expand Down
Loading

0 comments on commit 5a1a185

Please sign in to comment.