Skip to content

Commit

Permalink
bpo-34485: Add _PyCoreConfig.stdio_encoding (pythonGH-8881)
Browse files Browse the repository at this point in the history
* Add stdio_encoding and stdio_errors fields to _PyCoreConfig.
* Add unit tests on stdio_encoding and stdio_errors.
  • Loading branch information
vstinner authored Aug 29, 2018
1 parent 177d921 commit dfe0dc7
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 135 deletions.
12 changes: 12 additions & 0 deletions Include/coreconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,18 @@ typedef struct {
If set to -1 (default), it is set to !Py_UnbufferedStdioFlag. */
int buffered_stdio;

/* Encoding of sys.stdin, sys.stdout and sys.stderr.
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_errors' attribute. */
char *stdio_encoding;

/* Error handler of sys.stdin and sys.stdout.
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_encoding' attribute. */
char *stdio_errors;

#ifdef MS_WINDOWS
/* If greater than 1, use the "mbcs" encoding instead of the UTF-8
encoding for the filesystem encoding.
Expand Down
3 changes: 3 additions & 0 deletions Include/pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif
#ifdef Py_BUILD_CORE
PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
#endif

#ifdef __cplusplus
}
Expand Down
36 changes: 33 additions & 3 deletions Lib/test/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'quiet': 0,
'user_site_directory': 1,
'buffered_stdio': 1,
# None means that check_config() gets the expected encoding at runtime
'stdio_encoding': None,
'stdio_errors': None,

'_install_importlib': 1,
'_check_hash_pycs_mode': 'default',
'_frozen': 0,
}

def get_stdio_encoding(self, env):
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
args = (sys.executable, '-c', code)
proc = subprocess.run(args, env=env, text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
if proc.returncode:
raise Exception(f"failed to get the stdio encoding: stdout={proc.stdout!r}")
out = proc.stdout.rstrip()
return out.split()

def check_config(self, testname, expected):
expected = dict(self.DEFAULT_CONFIG, **expected)

env = dict(os.environ)
for key in list(env):
if key.startswith('PYTHON'):
Expand All @@ -303,13 +319,19 @@ def check_config(self, testname, expected):
# on the current locale
env['PYTHONCOERCECLOCALE'] = '0'
env['PYTHONUTF8'] = '0'
out, err = self.run_embedded_interpreter(testname, env=env)
# Ignore err

expected = dict(self.DEFAULT_CONFIG, **expected)
if expected['stdio_encoding'] is None or expected['stdio_errors'] is None:
res = self.get_stdio_encoding(env)
if expected['stdio_encoding'] is None:
expected['stdio_encoding'] = res[0]
if expected['stdio_errors'] is None:
expected['stdio_errors'] = res[1]
for key, value in expected.items():
expected[key] = str(value)

out, err = self.run_embedded_interpreter(testname, env=env)
# Ignore err

config = {}
for line in out.splitlines():
key, value = line.split(' = ', 1)
Expand All @@ -331,7 +353,11 @@ def test_init_global_config(self):
'verbose': 1,
'quiet': 1,
'buffered_stdio': 0,

'utf8_mode': 1,
'stdio_encoding': 'utf-8',
'stdio_errors': 'surrogateescape',

'user_site_directory': 0,
'_frozen': 1,
}
Expand All @@ -350,6 +376,8 @@ def test_init_from_config(self):
'malloc_stats': 1,

'utf8_mode': 1,
'stdio_encoding': 'iso8859-1',
'stdio_errors': 'replace',

'pycache_prefix': 'conf_pycache_prefix',
'program_name': './conf_program_name',
Expand Down Expand Up @@ -387,6 +415,8 @@ def test_init_env(self):
'write_bytecode': 0,
'verbose': 1,
'buffered_stdio': 0,
'stdio_encoding': 'iso8859-1',
'stdio_errors': 'replace',
'user_site_directory': 0,
'faulthandler': 1,
'dev_mode': 1,
Expand Down
8 changes: 8 additions & 0 deletions Programs/_testembed.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ dump_config(void)
printf("user_site_directory = %i\n", config->user_site_directory);
printf("buffered_stdio = %i\n", config->buffered_stdio);
ASSERT_EQUAL(config->buffered_stdio, !Py_UnbufferedStdioFlag);
printf("stdio_encoding = %s\n", config->stdio_encoding);
printf("stdio_errors = %s\n", config->stdio_errors);

/* FIXME: test legacy_windows_fs_encoding */
/* FIXME: test legacy_windows_stdio */
Expand Down Expand Up @@ -532,6 +534,11 @@ static int test_init_from_config(void)
Py_UnbufferedStdioFlag = 0;
config.buffered_stdio = 0;

putenv("PYTHONIOENCODING=cp424");
Py_SetStandardStreamEncoding("ascii", "ignore");
config.stdio_encoding = "iso8859-1";
config.stdio_errors = "replace";

putenv("PYTHONNOUSERSITE=");
Py_NoUserSiteDirectory = 0;
config.user_site_directory = 0;
Expand Down Expand Up @@ -569,6 +576,7 @@ static void test_init_env_putenvs(void)
putenv("PYTHONNOUSERSITE=1");
putenv("PYTHONFAULTHANDLER=1");
putenv("PYTHONDEVMODE=1");
putenv("PYTHONIOENCODING=iso8859-1:replace");
/* FIXME: test PYTHONWARNINGS */
/* FIXME: test PYTHONEXECUTABLE */
/* FIXME: test PYTHONHOME */
Expand Down
181 changes: 179 additions & 2 deletions Python/coreconfig.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include "Python.h"
#include "internal/pystate.h"
#include <locale.h>
#ifdef HAVE_LANGINFO_H
# include <langinfo.h>
#endif


#define DECODE_LOCALE_ERR(NAME, LEN) \
Expand Down Expand Up @@ -89,8 +92,8 @@ _Py_wstrlist_copy(int len, wchar_t **list)
* mechanism that attempts to figure out an appropriate IO encoding
*/

char *_Py_StandardStreamEncoding = NULL;
char *_Py_StandardStreamErrors = NULL;
static char *_Py_StandardStreamEncoding = NULL;
static char *_Py_StandardStreamErrors = NULL;

int
Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
Expand Down Expand Up @@ -205,6 +208,9 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
CLEAR(config->dll_path);
#endif
CLEAR(config->base_exec_prefix);

CLEAR(config->stdio_encoding);
CLEAR(config->stdio_errors);
#undef CLEAR
#undef CLEAR_WSTRLIST
}
Expand All @@ -216,6 +222,15 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
_PyCoreConfig_Clear(config);

#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
#define COPY_STR_ATTR(ATTR) \
do { \
if (config2->ATTR != NULL) { \
config->ATTR = _PyMem_RawStrdup(config2->ATTR); \
if (config->ATTR == NULL) { \
return -1; \
} \
} \
} while (0)
#define COPY_WSTR_ATTR(ATTR) \
do { \
if (config2->ATTR != NULL) { \
Expand Down Expand Up @@ -287,6 +302,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(quiet);
COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio);
COPY_STR_ATTR(stdio_encoding);
COPY_STR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
COPY_ATTR(legacy_windows_fs_encoding);
COPY_ATTR(legacy_windows_stdio);
Expand Down Expand Up @@ -932,6 +949,161 @@ config_init_locale(_PyCoreConfig *config)
}


static const char *
get_stdio_errors(const _PyCoreConfig *config)
{
#ifndef MS_WINDOWS
const char *loc = setlocale(LC_CTYPE, NULL);
if (loc != NULL) {
/* surrogateescape is the default in the legacy C and POSIX locales */
if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
return "surrogateescape";
}

#ifdef PY_COERCE_C_LOCALE
/* surrogateescape is the default in locale coercion target locales */
if (_Py_IsLocaleCoercionTarget(loc)) {
return "surrogateescape";
}
#endif
}

return "strict";
#else
/* On Windows, always use surrogateescape by default */
return "surrogateescape";
#endif
}


_PyInitError
_Py_get_locale_encoding(char **locale_encoding)
{
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
#elif defined(__ANDROID__)
const char *encoding = "UTF-8";
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
#endif
*locale_encoding = _PyMem_RawStrdup(encoding);
if (*locale_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
return _Py_INIT_OK();
}


static _PyInitError
config_init_stdio_encoding(_PyCoreConfig *config)
{
/* If Py_SetStandardStreamEncoding() have been called, use these
parameters. */
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
config->stdio_encoding = _PyMem_RawStrdup(_Py_StandardStreamEncoding);
if (config->stdio_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}

if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
config->stdio_errors = _PyMem_RawStrdup(_Py_StandardStreamErrors);
if (config->stdio_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}

if (config->stdio_encoding != NULL && config->stdio_errors != NULL) {
return _Py_INIT_OK();
}

/* PYTHONIOENCODING environment variable */
const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
if (opt) {
char *pythonioencoding = _PyMem_RawStrdup(opt);
if (pythonioencoding == NULL) {
return _Py_INIT_NO_MEMORY();
}

char *err = strchr(pythonioencoding, ':');
if (err) {
*err = '\0';
err++;
if (!err[0]) {
err = NULL;
}
}

/* Does PYTHONIOENCODING contain an encoding? */
if (pythonioencoding[0]) {
if (config->stdio_encoding == NULL) {
config->stdio_encoding = _PyMem_RawStrdup(pythonioencoding);
if (config->stdio_encoding == NULL) {
PyMem_RawFree(pythonioencoding);
return _Py_INIT_NO_MEMORY();
}
}

/* If the encoding is set but not the error handler,
use "strict" error handler by default.
PYTHONIOENCODING=latin1 behaves as
PYTHONIOENCODING=latin1:strict. */
if (!err) {
err = "strict";
}
}

if (config->stdio_errors == NULL && err != NULL) {
config->stdio_errors = _PyMem_RawStrdup(err);
if (config->stdio_errors == NULL) {
PyMem_RawFree(pythonioencoding);
return _Py_INIT_NO_MEMORY();
}
}

PyMem_RawFree(pythonioencoding);
}

/* UTF-8 Mode uses UTF-8/surrogateescape */
if (config->utf8_mode) {
if (config->stdio_encoding == NULL) {
config->stdio_encoding = _PyMem_RawStrdup("utf-8");
if (config->stdio_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
if (config->stdio_errors == NULL) {
config->stdio_errors = _PyMem_RawStrdup("surrogateescape");
if (config->stdio_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
}

/* Choose the default error handler based on the current locale. */
if (config->stdio_encoding == NULL) {
_PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL) {
const char *errors = get_stdio_errors(config);
config->stdio_errors = _PyMem_RawStrdup(errors);
if (config->stdio_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}

return _Py_INIT_OK();
}


/* Read configuration settings from standard locations
*
* This function doesn't make any changes to the interpreter state - it
Expand Down Expand Up @@ -1044,6 +1216,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
config->argc = 0;
}

err = config_init_stdio_encoding(config);
if (_Py_INIT_FAILED(err)) {
return err;
}

assert(config->coerce_c_locale >= 0);
assert(config->use_environment >= 0);

Expand Down
Loading

0 comments on commit dfe0dc7

Please sign in to comment.