Skip to content

Commit

Permalink
Bug 1296503 - Switch config.status to unicode literals. r=ted
Browse files Browse the repository at this point in the history
Ironically, the first thing we do with those unicode literals is convert
them to byte strings because the build backends don't like them yet.
  • Loading branch information
glandium committed Aug 23, 2016
1 parent 31a17a0 commit f97add6
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 42 deletions.
73 changes: 31 additions & 42 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
from __future__ import print_function, unicode_literals

import codecs
import json
import os
import subprocess
import sys

from collections import Iterable
import textwrap


base_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(base_dir, 'python', 'mozbuild'))
from mozbuild.configure import ConfigureSandbox
from mozbuild.util import (
indented_repr,
encode,
)


def main(argv):
Expand Down Expand Up @@ -60,57 +62,44 @@ def sanitized_bools(v):
print("Creating config.status", file=sys.stderr)
encoding = 'mbcs' if sys.platform == 'win32' else 'utf-8'
with codecs.open('config.status', 'w', encoding) as fh:
fh.write('#!%s\n' % config['PYTHON'])
fh.write('# coding=%s\n' % encoding)
# Because we're serializing as JSON but reading as python, the values
# for True, False and None are true, false and null, which don't exist.
# Define them.
fh.write('true, false, null = True, False, None\n')
fh.write(textwrap.dedent('''\
#!%(python)s
# coding=%(encoding)s
from __future__ import unicode_literals
from mozbuild.util import encode
encoding = '%(encoding)s'
''') % {'python': config['PYTHON'], 'encoding': encoding})
# A lot of the build backend code is currently expecting byte
# strings and breaks in subtle ways with unicode strings. (bug 1296508)
for k, v in sanitized_config.iteritems():
fh.write('%s = ' % k)
json.dump(v, fh, sort_keys=True, indent=4, ensure_ascii=False)
fh.write('\n')
fh.write('%s = encode(%s, encoding)\n' % (k, indented_repr(v)))
fh.write("__all__ = ['topobjdir', 'topsrcdir', 'defines', "
"'non_global_defines', 'substs', 'mozconfig']")

if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
fh.write('''
if __name__ == '__main__':
args = dict([(name, globals()[name]) for name in __all__])
from mozbuild.config_status import config_status
config_status(**args)
''')

# Running config.status standalone uses byte literals for all the config,
# instead of the unicode literals we have in sanitized_config right now.
# Some values in sanitized_config also have more complex types, such as
# EnumString, which using when calling config_status would currently break
# the build, as well as making it inconsistent with re-running
# config.status. Fortunately, EnumString derives from unicode, so it's
# covered by converting unicode strings.
# Moreover, a lot of the build backend code is currently expecting byte
# strings and breaks in subtle ways with unicode strings.
def encode(v):
if isinstance(v, dict):
return {
encode(k): encode(val)
for k, val in v.iteritems()
}
if isinstance(v, str):
return v
if isinstance(v, unicode):
return v.encode(encoding)
if isinstance(v, Iterable):
return [encode(i) for i in v]
return v
fh.write(textwrap.dedent('''
if __name__ == '__main__':
from mozbuild.config_status import config_status
args = dict([(name, globals()[name]) for name in __all__])
config_status(**args)
'''))

# Other things than us are going to run this file, so we need to give it
# executable permissions.
os.chmod('config.status', 0o755)
if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
os.environ[b'WRITE_MOZINFO'] = b'1'
from mozbuild.config_status import config_status
return config_status(args=[], **encode(sanitized_config))

# Some values in sanitized_config also have more complex types, such as
# EnumString, which using when calling config_status would currently
# break the build, as well as making it inconsistent with re-running
# config.status. Fortunately, EnumString derives from unicode, so it's
# covered by converting unicode strings.

# A lot of the build backend code is currently expecting byte strings
# and breaks in subtle ways with unicode strings.
return config_status(args=[], **encode(sanitized_config, encoding))
return 0


Expand Down
17 changes: 17 additions & 0 deletions python/mozbuild/mozbuild/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from collections import (
defaultdict,
Iterable,
OrderedDict,
)
from io import (
Expand Down Expand Up @@ -1244,3 +1245,19 @@ def recurse_indented_repr(o, level):
else:
yield repr(o)
return ''.join(recurse_indented_repr(o, 0))


def encode(obj, encoding='utf-8'):
'''Recursively encode unicode strings with the given encoding.'''
if isinstance(obj, dict):
return {
encode(k, encoding): encode(v, encoding)
for k, v in obj.iteritems()
}
if isinstance(obj, bytes):
return obj
if isinstance(obj, unicode):
return obj.encode(encoding)
if isinstance(obj, Iterable):
return [encode(i, encoding) for i in obj]
return obj

0 comments on commit f97add6

Please sign in to comment.