Skip to content

Commit

Permalink
lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
Browse files Browse the repository at this point in the history
between runs given the same Grammar.txt input regardless of the hash
randomization setting.
  • Loading branch information
gpshead committed Sep 8, 2016
1 parent d61910c commit dd1c638
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 18 deletions.
15 changes: 9 additions & 6 deletions Lib/lib2to3/pgen2/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,19 @@ def parse_string(self, text, debug=False):
return self.parse_tokens(tokens, debug)


def _generate_pickle_name(gt):
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"


def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
if gp is None:
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
Expand All @@ -124,7 +127,7 @@ def load_grammar(gt="Grammar.txt", gp=None,
try:
g.dump(gp)
except OSError as e:
logger.info("Writing failed:"+str(e))
logger.info("Writing failed: %s", e)
else:
g = grammar.Grammar()
g.load(gp)
Expand Down
28 changes: 26 additions & 2 deletions Lib/lib2to3/pgen2/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"""

# Python imports
import collections
import pickle

# Local imports
Expand Down Expand Up @@ -85,9 +86,21 @@ def __init__(self):
self.start = 256

def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
"""Dump the grammar tables to a pickle file.
dump() recursively changes all dict to OrderedDict, so the pickled file
is not exactly the same as what was passed in to dump(). load() uses the
pickled file to create the tables, but only changes OrderedDict to dict
at the top level; it does not recursively change OrderedDict to dict.
So, the loaded tables are different from the original tables that were
passed to load() in that some of the OrderedDict (from the pickled file)
are not changed back to dict. For parsing, this has no effect on
performance because OrderedDict uses dict's __getitem__ with nothing in
between.
"""
with open(filename, "wb") as f:
pickle.dump(self.__dict__, f, 2)
d = _make_deterministic(self.__dict__)
pickle.dump(d, f, 2)

def load(self, filename):
"""Load the grammar tables from a pickle file."""
Expand Down Expand Up @@ -124,6 +137,17 @@ def report(self):
print("start", self.start)


def _make_deterministic(top):
if isinstance(top, dict):
return collections.OrderedDict(
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
if isinstance(top, list):
return [_make_deterministic(e) for e in top]
if isinstance(top, tuple):
return tuple(_make_deterministic(e) for e in top)
return top


# Map from operator to number (since tokenize doesn't do this)

opmap_raw = """
Expand Down
8 changes: 4 additions & 4 deletions Lib/lib2to3/pgen2/pgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def make_grammar(self):
states = []
for state in dfa:
arcs = []
for label, next in state.arcs.items():
for label, next in sorted(state.arcs.items()):
arcs.append((self.make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
Expand All @@ -52,7 +52,7 @@ def make_grammar(self):
def make_first(self, c, name):
rawfirst = self.first[name]
first = {}
for label in rawfirst:
for label in sorted(rawfirst):
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
Expand Down Expand Up @@ -192,7 +192,7 @@ def addclosure(state, base):
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
for label, nfaset in arcs.items():
for label, nfaset in sorted(arcs.items()):
for st in states:
if st.nfaset == nfaset:
break
Expand Down Expand Up @@ -222,7 +222,7 @@ def dump_dfa(self, name, dfa):
print("Dump of DFA for", name)
for i, state in enumerate(dfa):
print(" State", i, state.isfinal and "(final)" or "")
for label, next in state.arcs.items():
for label, next in sorted(state.arcs.items()):
print(" %s -> %d" % (label, dfa.index(next)))

def simplify_dfa(self, dfa):
Expand Down
6 changes: 3 additions & 3 deletions Lib/lib2to3/tests/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

# Local imports
from lib2to3 import pytree, refactor
from lib2to3.pgen2 import driver
from lib2to3.pgen2 import driver as pgen2_driver

test_dir = os.path.dirname(__file__)
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
grammar_path = os.path.join(test_dir, "..", "Grammar.txt")
grammar = driver.load_grammar(grammar_path)
driver = driver.Driver(grammar, convert=pytree.convert)
grammar = pgen2_driver.load_grammar(grammar_path)
driver = pgen2_driver.Driver(grammar, convert=pytree.convert)

def parse_string(string):
return driver.parse_string(reformat(string), debug=True)
Expand Down
72 changes: 69 additions & 3 deletions Lib/lib2to3/tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,22 @@
test_grammar.py files from both Python 2 and Python 3.
"""

from __future__ import with_statement

# Testing imports
from . import support
from .support import driver, test_dir
from test.support import verbose

# Python imports
import os
import shutil
import subprocess
import sys
import tempfile
import unittest
import warnings
import subprocess

# Local imports
from lib2to3.pgen2 import driver as pgen2_driver
from lib2to3.pgen2 import tokenize
from ..pgen2.parse import ParseError
from lib2to3.pygram import python_symbols as syms
Expand All @@ -35,6 +36,71 @@ def test_formfeed(self):
self.assertEqual(t.children[1].children[0].type, syms.print_stmt)


class TestPgen2Caching(support.TestCase):
def test_load_grammar_from_txt_file(self):
pgen2_driver.load_grammar(support.grammar_path, save=False, force=True)

def test_load_grammar_from_pickle(self):
# Make a copy of the grammar file in a temp directory we are
# guaranteed to be able to write to.
tmpdir = tempfile.mkdtemp()
try:
grammar_copy = os.path.join(
tmpdir, os.path.basename(support.grammar_path))
shutil.copy(support.grammar_path, grammar_copy)
pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)

pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
self.assertTrue(os.path.exists(pickle_name))

os.unlink(grammar_copy) # Only the pickle remains...
pgen2_driver.load_grammar(grammar_copy, save=False, force=False)
finally:
shutil.rmtree(tmpdir)

@unittest.skipIf(sys.executable is None, 'sys.executable required')
def test_load_grammar_from_subprocess(self):
tmpdir = tempfile.mkdtemp()
tmpsubdir = os.path.join(tmpdir, 'subdir')
try:
os.mkdir(tmpsubdir)
grammar_base = os.path.basename(support.grammar_path)
grammar_copy = os.path.join(tmpdir, grammar_base)
grammar_sub_copy = os.path.join(tmpsubdir, grammar_base)
shutil.copy(support.grammar_path, grammar_copy)
shutil.copy(support.grammar_path, grammar_sub_copy)
pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
pickle_sub_name = pgen2_driver._generate_pickle_name(
grammar_sub_copy)
self.assertNotEqual(pickle_name, pickle_sub_name)

# Generate a pickle file from this process.
pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
self.assertTrue(os.path.exists(pickle_name))

# Generate a new pickle file in a subprocess with a most likely
# different hash randomization seed.
sub_env = dict(os.environ)
sub_env['PYTHONHASHSEED'] = 'random'
subprocess.check_call(
[sys.executable, '-c', """
from lib2to3.pgen2 import driver as pgen2_driver
pgen2_driver.load_grammar(%r, save=True, force=True)
""" % (grammar_sub_copy,)],
env=sub_env)
self.assertTrue(os.path.exists(pickle_sub_name))

with open(pickle_name, 'rb') as pickle_f_1, \
open(pickle_sub_name, 'rb') as pickle_f_2:
self.assertEqual(
pickle_f_1.read(), pickle_f_2.read(),
msg='Grammar caches generated using different hash seeds'
' were not identical.')
finally:
shutil.rmtree(tmpdir)



class GrammarTest(support.TestCase):
def validate(self, code):
support.parse_string(code)
Expand Down
4 changes: 4 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ Core and Builtins
Library
-------

- lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash
randomization setting.

- Issue #27570: Avoid zero-length memcpy() etc calls with null source
pointers in the "ctypes" and "array" modules.

Expand Down

0 comments on commit dd1c638

Please sign in to comment.