From af68c53d4616b5bc9f34b49f8d38fb2a61b1861b Mon Sep 17 00:00:00 2001 From: Steven Bird Date: Sun, 1 Jun 2014 09:38:49 +1000 Subject: [PATCH] changed ContextFreeGrammar to CFG, WeightedGrammar to PCFG, and StatisticalGrammar to ProbabilisticGrammar --- nltk/app/chartparser_app.py | 6 +- nltk/app/rdparser_app.py | 4 +- nltk/app/srparser_app.py | 4 +- nltk/data.py | 4 +- nltk/draw/cfg.py | 14 ++-- nltk/grammar.py | 96 ++++++++++++------------ nltk/parse/chart.py | 14 ++-- nltk/parse/earleychart.py | 2 +- nltk/parse/featurechart.py | 2 +- nltk/parse/generate.py | 4 +- nltk/parse/pchart.py | 6 +- nltk/parse/projectivedependencyparser.py | 6 +- nltk/parse/recursivedescent.py | 6 +- nltk/parse/shiftreduce.py | 4 +- nltk/parse/util.py | 16 ++-- nltk/parse/viterbi.py | 4 +- nltk/test/data.doctest | 4 +- nltk/test/generate.doctest | 4 +- nltk/test/grammar.doctest | 10 +-- nltk/test/parse.doctest | 10 +-- nltk/test/simple.doctest | 4 +- 21 files changed, 113 insertions(+), 111 deletions(-) diff --git a/nltk/app/chartparser_app.py b/nltk/app/chartparser_app.py index f819e954c2..17d2f990bc 100644 --- a/nltk/app/chartparser_app.py +++ b/nltk/app/chartparser_app.py @@ -51,7 +51,7 @@ SteppingChartParser, TopDownInitRule, TopDownPredictRule, TreeEdge) from nltk.tree import Tree -from nltk.grammar import Nonterminal, ContextFreeGrammar +from nltk.grammar import Nonterminal, CFG from nltk.util import in_idle from nltk.draw.util import (CanvasFrame, ColorizedList, EntryDialog, MutableOptionMenu, @@ -2038,7 +2038,7 @@ def load_grammar(self, *args): grammar = pickle.load(infile) else: with open(filename, 'r') as infile: - grammar = ContextFreeGrammar.fromstring(infile.read()) + grammar = CFG.fromstring(infile.read()) self.set_grammar(grammar) except Exception as e: tkinter.messagebox.showerror('Error Loading Grammar', @@ -2230,7 +2230,7 @@ def top_down_strategy(self, *e): self.apply_strategy(self._TD_STRATEGY, TopDownPredictEdgeRule) def app(): - grammar = ContextFreeGrammar.fromstring(""" + grammar = CFG.fromstring(""" # Grammatical productions. S -> NP VP VP -> VP PP | V NP | V diff --git a/nltk/app/rdparser_app.py b/nltk/app/rdparser_app.py index 5ce85f65c8..d5f3526ec7 100644 --- a/nltk/app/rdparser_app.py +++ b/nltk/app/rdparser_app.py @@ -867,8 +867,8 @@ def app(): Create a recursive descent parser demo, using a simple grammar and text. """ - from nltk.grammar import ContextFreeGrammar - grammar = ContextFreeGrammar.fromstring(""" + from nltk.grammar import CFG + grammar = CFG.fromstring(""" # Grammatical productions. S -> NP VP NP -> Det N PP | Det N diff --git a/nltk/app/srparser_app.py b/nltk/app/srparser_app.py index 7e1d35b6b0..b3ff44a5fa 100644 --- a/nltk/app/srparser_app.py +++ b/nltk/app/srparser_app.py @@ -772,7 +772,7 @@ def app(): text. """ - from nltk.grammar import Nonterminal, Production, ContextFreeGrammar + from nltk.grammar import Nonterminal, Production, CFG nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] @@ -796,7 +796,7 @@ def app(): Production(Det, ['my']), ) - grammar = ContextFreeGrammar(S, productions) + grammar = CFG(S, productions) # tokenize the sentence sent = 'my dog saw a man in the park with a statue'.split() diff --git a/nltk/data.py b/nltk/data.py index 09bf9826ba..86d6e3e435 100644 --- a/nltk/data.py +++ b/nltk/data.py @@ -803,10 +803,10 @@ def load(resource_url, format='auto', cache=True, verbose=False, if format == 'text': resource_val = string_data elif format == 'cfg': - resource_val = nltk.grammar.ContextFreeGrammar.fromstring( + resource_val = nltk.grammar.CFG.fromstring( string_data, encoding=encoding) elif format == 'pcfg': - resource_val = nltk.grammar.WeightedGrammar.fromstring( + resource_val = nltk.grammar.PCFG.fromstring( string_data, encoding=encoding) elif format == 'fcfg': resource_val = nltk.grammar.FeatureGrammar.fromstring( diff --git a/nltk/draw/cfg.py b/nltk/draw/cfg.py index 208c3996d3..1458e6358f 100644 --- a/nltk/draw/cfg.py +++ b/nltk/draw/cfg.py @@ -54,7 +54,7 @@ from tkinter import (Button, Canvas, Entry, Frame, IntVar, Label, Scrollbar, Text, Tk, Toplevel) -from nltk.grammar import (ContextFreeGrammar, _read_cfg_production, +from nltk.grammar import (CFG, _read_cfg_production, Nonterminal, nonterminals) from nltk.tree import Tree from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment @@ -157,7 +157,7 @@ class CFGEditor(object): def __init__(self, parent, cfg=None, set_cfg_callback=None): self._parent = parent if cfg is not None: self._cfg = cfg - else: self._cfg = ContextFreeGrammar(Nonterminal('S'), []) + else: self._cfg = CFG(Nonterminal('S'), []) self._set_cfg_callback = set_cfg_callback self._highlight_matching_nonterminals = 1 @@ -482,7 +482,7 @@ def _ok(self, *e): def _apply(self, *e): productions = self._parse_productions() start = Nonterminal(self._start.get()) - cfg = ContextFreeGrammar(start, productions) + cfg = CFG(start, productions) if self._set_cfg_callback is not None: self._set_cfg_callback(cfg) @@ -666,7 +666,7 @@ def mainloop(self, *args, **kwargs): self._top.mainloop(*args, **kwargs) def demo2(): - from nltk import Nonterminal, Production, ContextFreeGrammar + from nltk import Nonterminal, Production, CFG nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] @@ -691,7 +691,7 @@ def demo2(): Production(N, ['dog']), Production(N, ['statue']), Production(Det, ['my']), ) - grammar = ContextFreeGrammar(S, productions) + grammar = CFG(S, productions) text = 'I saw a man in the park'.split() d=CFGDemo(grammar, text) @@ -702,12 +702,12 @@ def demo2(): ###################################################################### def demo(): - from nltk import Nonterminal, ContextFreeGrammar + from nltk import Nonterminal, CFG nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] - grammar = ContextFreeGrammar.fromstring(""" + grammar = CFG.fromstring(""" S -> NP VP PP -> P NP NP -> Det N diff --git a/nltk/grammar.py b/nltk/grammar.py index bac7896ed0..05d26f912f 100644 --- a/nltk/grammar.py +++ b/nltk/grammar.py @@ -22,8 +22,8 @@ tokens; and the node values are phrasal categories, such as ``NP`` and ``VP``. -The ``ContextFreeGrammar`` class is used to encode context free grammars. Each -``ContextFreeGrammar`` consists of a start symbol and a set of productions. +The ``CFG`` class is used to encode context free grammars. Each +``CFG`` consists of a start symbol and a set of productions. The "start symbol" specifies the root node value for parse trees. For example, the start symbol for syntactic parsing is usually ``S``. Start symbols are encoded using the ``Nonterminal`` class, which is discussed @@ -48,7 +48,7 @@ The ``Nonterminal`` class is used to distinguish node values from leaf values. This prevents the grammar from accidentally using a leaf value (such as the English word "A") as the node of a subtree. Within -a ``ContextFreeGrammar``, all node values are wrapped in the ``Nonterminal`` +a ``CFG``, all node values are wrapped in the ``Nonterminal`` class. Note, however, that the trees that are specified by the grammar do *not* include these ``Nonterminal`` wrappers. @@ -99,7 +99,7 @@ class Nonterminal(object): hashable. Two ``Nonterminals`` are considered equal if their symbols are equal. - :see: ``ContextFreeGrammar``, ``Production`` + :see: ``CFG``, ``Production`` :type _symbol: any :ivar _symbol: The node value corresponding to this ``Nonterminal``. This value must be immutable and hashable. @@ -244,7 +244,7 @@ class Production(object): not a ``Nonterminal``. Typically, terminals are strings representing words, such as ``"dog"`` or ``"under"``. - :see: ``ContextFreeGrammar`` + :see: ``CFG`` :see: ``DependencyGrammar`` :see: ``Nonterminal`` :type _lhs: Nonterminal @@ -373,26 +373,26 @@ def __str__(self): @python_2_unicode_compatible -class WeightedProduction(Production, ImmutableProbabilisticMixIn): +class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn): """ A probabilistic context free grammar production. - A PCFG ``WeightedProduction`` is essentially just a ``Production`` that + A PCFG ``ProbabilisticProduction`` is essentially just a ``Production`` that has an associated probability, which represents how likely it is that this production will be used. In particular, the probability of a - ``WeightedProduction`` records the likelihood that its right-hand side is + ``ProbabilisticProduction`` records the likelihood that its right-hand side is the correct instantiation for any given occurrence of its left-hand side. :see: ``Production`` """ def __init__(self, lhs, rhs, **prob): """ - Construct a new ``WeightedProduction``. + Construct a new ``ProbabilisticProduction``. - :param lhs: The left-hand side of the new ``WeightedProduction``. + :param lhs: The left-hand side of the new ``ProbabilisticProduction``. :type lhs: Nonterminal - :param rhs: The right-hand side of the new ``WeightedProduction``. + :param rhs: The right-hand side of the new ``ProbabilisticProduction``. :type rhs: sequence(Nonterminal and terminal) - :param prob: Probability parameters of the new ``WeightedProduction``. + :param prob: Probability parameters of the new ``ProbabilisticProduction``. """ ImmutableProbabilisticMixIn.__init__(self, **prob) Production.__init__(self, lhs, rhs) @@ -417,7 +417,7 @@ def __hash__(self): ################################################################# @python_2_unicode_compatible -class ContextFreeGrammar(object): +class CFG(object): """ A context-free grammar. A grammar consists of a start state and a set of productions. The set of terminals and nonterminals is @@ -510,13 +510,13 @@ def _calculate_leftcorners(self): @classmethod def fromstring(cls, input, encoding=None): """ - Return the ``ContextFreeGrammar`` corresponding to the input string(s). + Return the ``CFG`` corresponding to the input string(s). :param input: a grammar, either in the form of a string or as a list of strings. """ start, productions = read_grammar(input, standard_nonterm_parser, encoding=encoding) - return ContextFreeGrammar(start, productions) + return CFG(start, productions) def start(self): """ @@ -712,10 +712,10 @@ def __str__(self): return result -class FeatureGrammar(ContextFreeGrammar): +class FeatureGrammar(CFG): """ A feature-based grammar. This is equivalent to a - ``ContextFreeGrammar`` whose nonterminals are all + ``CFG`` whose nonterminals are all ``FeatStructNonterminal``. A grammar consists of a start state and a set of @@ -732,7 +732,7 @@ def __init__(self, start, productions): :param productions: The list of productions that defines the grammar :type productions: list(Production) """ - ContextFreeGrammar.__init__(self, start, productions) + CFG.__init__(self, start, productions) # The difference with CFG is that the productions are # indexed on the TYPE feature of the nonterminals. @@ -982,7 +982,7 @@ def __repr__(self): @python_2_unicode_compatible -class StatisticalDependencyGrammar(object): +class ProbabilisticDependencyGrammar(object): """ """ @@ -1011,7 +1011,7 @@ def contains(self, head, mod): def __str__(self): """ - Return a verbose string representation of the ``StatisticalDependencyGrammar`` + Return a verbose string representation of the ``ProbabilisticDependencyGrammar`` :rtype: str """ @@ -1028,21 +1028,21 @@ def __str__(self): def __repr__(self): """ - Return a concise string representation of the ``StatisticalDependencyGrammar`` + Return a concise string representation of the ``ProbabilisticDependencyGrammar`` """ return 'Statistical Dependency grammar with %d productions' % len(self._productions) -class WeightedGrammar(ContextFreeGrammar): +class PCFG(CFG): """ - A probabilistic context-free grammar. A Weighted Grammar consists - of a start state and a set of weighted productions. The set of + A probabilistic context-free grammar. A PCFG consists of a + start state and a set of productions with probabilities. The set of terminals and nonterminals is implicitly specified by the productions. - PCFG productions should be ``WeightedProductions``. - ``WeightedGrammars`` impose the constraint that the set of - productions with any given left-hand-side must have probabilities - that sum to 1. + PCFG productions use the ``ProbabilisticProduction`` class. + ``PCFGs`` impose the constraint that the set of productions with + any given left-hand-side must have probabilities that sum to 1 + (allowing for a small margin of error). If you need efficient key-based access to productions, you can use a subclass to implement it. @@ -1057,7 +1057,7 @@ class WeightedGrammar(ContextFreeGrammar): def __init__(self, start, productions, calculate_leftcorners=True): """ Create a new context-free grammar, from the given start state - and set of ``WeightedProductions``. + and set of ``ProbabilisticProductions``. :param start: The start symbol :type start: Nonterminal @@ -1070,7 +1070,7 @@ def __init__(self, start, productions, calculate_leftcorners=True): leftcorner relation. In that case, some optimized chart parsers won't work. :type calculate_leftcorners: bool """ - ContextFreeGrammar.__init__(self, start, productions, calculate_leftcorners) + CFG.__init__(self, start, productions, calculate_leftcorners) # Make sure that the probabilities sum to one. probs = {} @@ -1078,15 +1078,15 @@ def __init__(self, start, productions, calculate_leftcorners=True): probs[production.lhs()] = (probs.get(production.lhs(), 0) + production.prob()) for (lhs, p) in probs.items(): - if not ((1-WeightedGrammar.EPSILON) < p < - (1+WeightedGrammar.EPSILON)): + if not ((1-PCFG.EPSILON) < p < + (1+PCFG.EPSILON)): raise ValueError("Productions for %r do not sum to 1" % lhs) @classmethod def fromstring(cls, input, encoding=None): """ - Return a probabilistic ``WeightedGrammar`` corresponding to the + Return a probabilistic ``PCFG`` corresponding to the input string(s). :param input: a grammar, either in the form of a string or else @@ -1094,7 +1094,7 @@ def fromstring(cls, input, encoding=None): """ start, productions = read_grammar(input, standard_nonterm_parser, probabilistic=True, encoding=encoding) - return WeightedGrammar(start, productions) + return PCFG(start, productions) ################################################################# @@ -1128,10 +1128,10 @@ def induce_pcfg(start, productions): lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1 pcount[prod] = pcount.get(prod, 0) + 1 - prods = [WeightedProduction(p.lhs(), p.rhs(), + prods = [ProbabilisticProduction(p.lhs(), p.rhs(), prob=float(pcount[p]) / lcount[p.lhs()]) for p in pcount] - return WeightedGrammar(start, prods) + return PCFG(start, prods) ################################################################# @@ -1146,7 +1146,7 @@ def _read_cfg_production(input): def _read_pcfg_production(input): """ - Return a list of PCFG ``WeightedProductions``. + Return a list of PCFG ``ProbabilisticProductions``. """ return _read_production(input, standard_nonterm_parser, probabilistic=True) @@ -1213,7 +1213,7 @@ def _read_production(line, nonterm_parser, probabilistic=False): rhsides[-1].append(nonterm) if probabilistic: - return [WeightedProduction(lhs, rhs, prob=probability) + return [ProbabilisticProduction(lhs, rhs, prob=probability) for (rhs, probability) in zip(rhsides, probabilities)] else: return [Production(lhs, rhs) for rhs in rhsides] @@ -1324,10 +1324,10 @@ def _read_dependency_production(s): def cfg_demo(): """ - A demonstration showing how ``ContextFreeGrammars`` can be created and used. + A demonstration showing how ``CFGs`` can be created and used. """ - from nltk import nonterminals, Production, ContextFreeGrammar + from nltk import nonterminals, Production, CFG # Create some nonterminals S, NP, VP, PP = nonterminals('S, NP, VP, PP') @@ -1341,7 +1341,7 @@ def cfg_demo(): print(Production(S, [NP])) # Create some Grammar Productions - grammar = ContextFreeGrammar.fromstring(""" + grammar = CFG.fromstring(""" S -> NP VP PP -> P NP NP -> Det N | NP PP @@ -1359,7 +1359,7 @@ def cfg_demo(): print(repr(grammar.productions()).replace(',', ',\n'+' '*25)) print() -toy_pcfg1 = WeightedGrammar.fromstring(""" +toy_pcfg1 = PCFG.fromstring(""" S -> NP VP [1.0] NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] Det -> 'the' [0.8] | 'my' [0.2] @@ -1370,7 +1370,7 @@ def cfg_demo(): P -> 'with' [0.61] | 'under' [0.39] """) -toy_pcfg2 = WeightedGrammar.fromstring(""" +toy_pcfg2 = PCFG.fromstring(""" S -> NP VP [1.0] VP -> V NP [.59] VP -> V [.40] @@ -1398,7 +1398,7 @@ def cfg_demo(): def pcfg_demo(): """ - A demonstration showing how a ``WeightedGrammar`` can be created and used. + A demonstration showing how a ``PCFG`` can be created and used. """ from nltk.corpus import treebank @@ -1507,7 +1507,9 @@ def demo(): demo() __all__ = ['Nonterminal', 'nonterminals', - 'Production', 'DependencyProduction', 'WeightedProduction', - 'ContextFreeGrammar', 'WeightedGrammar', 'DependencyGrammar', - 'StatisticalDependencyGrammar', 'induce_pcfg', 'read_grammar'] + 'CFG', 'Production', + 'PCFG', 'ProbabilisticProduction', + 'DependencyGrammar', 'DependencyProduction', + 'ProbabilisticDependencyGrammar', + 'induce_pcfg', 'read_grammar'] diff --git a/nltk/parse/chart.py b/nltk/parse/chart.py index 2873b88f33..51be1cce5b 100644 --- a/nltk/parse/chart.py +++ b/nltk/parse/chart.py @@ -43,7 +43,7 @@ from nltk import compat from nltk.tree import Tree -from nltk.grammar import WeightedGrammar, is_nonterminal, is_terminal +from nltk.grammar import PCFG, is_nonterminal, is_terminal from nltk.util import OrderedDict from nltk.internals import raise_unorderable_types from nltk.compat import (total_ordering, python_2_unicode_compatible, @@ -1242,7 +1242,7 @@ def __init__(self, grammar, strategy=BU_LC_STRATEGY, trace=0, Create a new chart parser, that uses ``grammar`` to parse texts. - :type grammar: ContextFreeGrammar + :type grammar: CFG :param grammar: The grammar used to parse texts. :type strategy: list(ChartRuleI) :param strategy: A list of rules that should be used to decide @@ -1364,8 +1364,8 @@ class BottomUpChartParser(ChartParser): See ``ChartParser`` for more information. """ def __init__(self, grammar, **parser_args): - if isinstance(grammar, WeightedGrammar): - warnings.warn("BottomUpChartParser only works for ContextFreeGrammar, " + if isinstance(grammar, PCFG): + warnings.warn("BottomUpChartParser only works for CFG, " "use BottomUpProbabilisticChartParser instead", category=DeprecationWarning) ChartParser.__init__(self, grammar, BU_STRATEGY, **parser_args) @@ -1548,8 +1548,8 @@ def parse(self, tokens, tree_class=Tree): ######################################################################## def demo_grammar(): - from nltk.grammar import ContextFreeGrammar - return ContextFreeGrammar.fromstring(""" + from nltk.grammar import CFG + return CFG.fromstring(""" S -> NP VP PP -> "with" NP NP -> NP PP @@ -1578,7 +1578,7 @@ def demo(choice=None, A demonstration of the chart parsers. """ import sys, time - from nltk import nonterminals, Production, ContextFreeGrammar + from nltk import nonterminals, Production, CFG # The grammar for ChartParser and SteppingChartParser: grammar = demo_grammar() diff --git a/nltk/parse/earleychart.py b/nltk/parse/earleychart.py index 5c5b11c368..1a4b81ce1a 100644 --- a/nltk/parse/earleychart.py +++ b/nltk/parse/earleychart.py @@ -268,7 +268,7 @@ def __init__(self, grammar, strategy=BU_LC_INCREMENTAL_STRATEGY, Create a new Earley chart parser, that uses ``grammar`` to parse texts. - :type grammar: ContextFreeGrammar + :type grammar: CFG :param grammar: The grammar used to parse texts. :type trace: int :param trace: The level of tracing that should be used when diff --git a/nltk/parse/featurechart.py b/nltk/parse/featurechart.py index 283e94fa12..90ff716f8e 100644 --- a/nltk/parse/featurechart.py +++ b/nltk/parse/featurechart.py @@ -17,7 +17,7 @@ from nltk.featstruct import FeatStruct, unify, TYPE, find_variables from nltk.sem import logic from nltk.tree import Tree -from nltk.grammar import (Nonterminal, Production, ContextFreeGrammar, +from nltk.grammar import (Nonterminal, Production, CFG, FeatStructNonterminal, is_nonterminal, is_terminal) from nltk.parse.chart import (TreeEdge, Chart, ChartParser, EdgeI, diff --git a/nltk/parse/generate.py b/nltk/parse/generate.py index 4f89c75c91..ab5ae1f22d 100644 --- a/nltk/parse/generate.py +++ b/nltk/parse/generate.py @@ -64,11 +64,11 @@ def _generate_one(grammar, item, depth): """ def demo(N=23): - from nltk.grammar import ContextFreeGrammar + from nltk.grammar import CFG print('Generating the first %d sentences for demo grammar:' % (N,)) print(demo_grammar) - grammar = ContextFreeGrammar.fromstring(demo_grammar) + grammar = CFG.fromstring(demo_grammar) for n, sent in enumerate(generate(grammar, n=N), 1): print('%3d. %s' % (n, ' '.join(sent))) diff --git a/nltk/parse/pchart.py b/nltk/parse/pchart.py index 8dd670c89c..cacfd3ff42 100644 --- a/nltk/parse/pchart.py +++ b/nltk/parse/pchart.py @@ -40,7 +40,7 @@ from functools import reduce from nltk.tree import Tree, ProbabilisticTree -from nltk.grammar import Nonterminal, WeightedGrammar +from nltk.grammar import Nonterminal, PCFG from nltk.parse.api import ParserI from nltk.parse.chart import Chart, LeafEdge, TreeEdge, AbstractChartRule @@ -173,8 +173,8 @@ def __init__(self, grammar, beam_size=0, trace=0): and higher numbers will produce more verbose tracing output. """ - if not isinstance(grammar, WeightedGrammar): - raise ValueError("The grammar must be probabilistic WeightedGrammar") + if not isinstance(grammar, PCFG): + raise ValueError("The grammar must be probabilistic PCFG") self._grammar = grammar self.beam_size = beam_size self._trace = trace diff --git a/nltk/parse/projectivedependencyparser.py b/nltk/parse/projectivedependencyparser.py index 54bc76e440..3398cd75ae 100644 --- a/nltk/parse/projectivedependencyparser.py +++ b/nltk/parse/projectivedependencyparser.py @@ -11,7 +11,7 @@ from collections import defaultdict from nltk.grammar import (DependencyProduction, DependencyGrammar, - StatisticalDependencyGrammar) + ProbabilisticDependencyGrammar) from nltk.parse.dependencygraph import DependencyGraph, conll_data2 from nltk.internals import raise_unorderable_types from nltk.compat import total_ordering, python_2_unicode_compatible @@ -334,7 +334,7 @@ def concatenate(self, span1, span2): def train(self, graphs): """ - Trains a StatisticalDependencyGrammar based on the list of input + Trains a ProbabilisticDependencyGrammar based on the list of input DependencyGraphs. This model is an implementation of Eisner's (1996) Model C, which derives its statistics from head-word, head-tag, child-word, and child-tag relationships. @@ -390,7 +390,7 @@ def train(self, graphs): mod_event = '(mods (%s, %s, %s) right))' % (prev_tag, head_word, head_tag) events[head_event] += 1 events[mod_event] += 1 - self._grammar = StatisticalDependencyGrammar(productions, events, tags) + self._grammar = ProbabilisticDependencyGrammar(productions, events, tags) # print self._grammar def compute_prob(self, dg): diff --git a/nltk/parse/recursivedescent.py b/nltk/parse/recursivedescent.py index be768997f1..540dfde414 100644 --- a/nltk/parse/recursivedescent.py +++ b/nltk/parse/recursivedescent.py @@ -56,7 +56,7 @@ def __init__(self, grammar, trace=0): Create a new ``RecursiveDescentParser``, that uses ``grammar`` to parse texts. - :type grammar: ContextFreeGrammar + :type grammar: CFG :param grammar: The grammar used to parse texts. :type trace: int :param trace: The level of tracing that should be used when @@ -630,9 +630,9 @@ def demo(): A demonstration of the recursive descent parser. """ - from nltk import parse, ContextFreeGrammar + from nltk import parse, CFG - grammar = ContextFreeGrammar.fromstring(""" + grammar = CFG.fromstring(""" S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP diff --git a/nltk/parse/shiftreduce.py b/nltk/parse/shiftreduce.py index f8863a3912..f1f7ecbbb2 100644 --- a/nltk/parse/shiftreduce.py +++ b/nltk/parse/shiftreduce.py @@ -435,9 +435,9 @@ def demo(): A demonstration of the shift-reduce parser. """ - from nltk import parse, ContextFreeGrammar + from nltk import parse, CFG - grammar = ContextFreeGrammar.fromstring(""" + grammar = CFG.fromstring(""" S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP diff --git a/nltk/parse/util.py b/nltk/parse/util.py index 18ce5d4537..b670cd989e 100644 --- a/nltk/parse/util.py +++ b/nltk/parse/util.py @@ -12,7 +12,7 @@ """ from __future__ import print_function -from nltk.grammar import ContextFreeGrammar, FeatureGrammar, WeightedGrammar +from nltk.grammar import CFG, FeatureGrammar, PCFG from nltk.data import load from nltk.parse.chart import Chart, ChartParser @@ -28,9 +28,9 @@ def load_parser(grammar_url, trace=0, on properties of the grammar itself. The following grammar formats are currently supported: - - ``'cfg'`` (CFGs: ``ContextFreeGrammar``) - - ``'pcfg'`` (probabilistic CFGs: ``WeightedGrammar``) - - ``'fcfg'`` (feature-based CFGs: ``ContextFreeGrammar``) + - ``'cfg'`` (CFGs: ``CFG``) + - ``'pcfg'`` (probabilistic CFGs: ``PCFG``) + - ``'fcfg'`` (feature-based CFGs: ``CFG``) :type grammar_url: str :param grammar_url: A URL specifying where the grammar is located. @@ -54,10 +54,10 @@ def load_parser(grammar_url, trace=0, See ``data.load`` for more information. """ grammar = load(grammar_url, **load_args) - if not isinstance(grammar, ContextFreeGrammar): - raise ValueError("The grammar must be a ContextFreeGrammar, " + if not isinstance(grammar, CFG): + raise ValueError("The grammar must be a CFG, " "or a subclass thereof.") - if isinstance(grammar, WeightedGrammar): + if isinstance(grammar, PCFG): if parser is None: parser = InsideChartParser return parser(grammar, trace=trace, beam_size=beam_size) @@ -69,7 +69,7 @@ def load_parser(grammar_url, trace=0, chart_class = FeatureChart return parser(grammar, trace=trace, chart_class=chart_class) - else: # Plain ContextFreeGrammar. + else: # Plain CFG. if parser is None: parser = ChartParser if chart_class is None: diff --git a/nltk/parse/viterbi.py b/nltk/parse/viterbi.py index 3abbc6d2b3..9e7c102f0d 100644 --- a/nltk/parse/viterbi.py +++ b/nltk/parse/viterbi.py @@ -67,7 +67,7 @@ class ViterbiParser(ParserI): | MLC[start, start+width, prod.lhs] = new_tree | Return MLC[0, len(text), start_symbol] - :type _grammar: WeightedGrammar + :type _grammar: PCFG :ivar _grammar: The grammar used to parse sentences. :type _trace: int :ivar _trace: The level of tracing output that should be generated @@ -78,7 +78,7 @@ def __init__(self, grammar, trace=0): Create a new ``ViterbiParser`` parser, that uses ``grammar`` to parse texts. - :type grammar: WeightedGrammar + :type grammar: PCFG :param grammar: The grammar used to parse texts. :type trace: int :param trace: The level of tracing that should be used when diff --git a/nltk/test/data.doctest b/nltk/test/data.doctest index f333ff9540..de09e52177 100644 --- a/nltk/test/data.doctest +++ b/nltk/test/data.doctest @@ -309,7 +309,7 @@ internal use by NLTK's corpus readers. >>> # Show that it's now been loaded: >>> object.__repr__(ll) # doctest: +ELLIPSIS - '' + '' >>> # Test that accessing an attribute also loads it: @@ -317,7 +317,7 @@ internal use by NLTK's corpus readers. >>> ll.start() S >>> object.__repr__(ll) # doctest: +ELLIPSIS - '' + '' Buffered Gzip Reading and Writing --------------------------------- diff --git a/nltk/test/generate.doctest b/nltk/test/generate.doctest index d723c4361e..4840599cab 100644 --- a/nltk/test/generate.doctest +++ b/nltk/test/generate.doctest @@ -8,8 +8,8 @@ Generating sentences from context-free grammars An example grammar: >>> from nltk.parse.generate import generate, demo_grammar - >>> from nltk import ContextFreeGrammar - >>> grammar = ContextFreeGrammar.fromstring(demo_grammar) + >>> from nltk import CFG + >>> grammar = CFG.fromstring(demo_grammar) >>> print(grammar) Grammar with 13 productions (start state = S) S -> NP VP diff --git a/nltk/test/grammar.doctest b/nltk/test/grammar.doctest index ba2eaf8523..0a9f3943ad 100644 --- a/nltk/test/grammar.doctest +++ b/nltk/test/grammar.doctest @@ -7,8 +7,8 @@ Grammar Parsing Grammars can be parsed from strings: - >>> from nltk import ContextFreeGrammar - >>> grammar = ContextFreeGrammar.fromstring(""" + >>> from nltk import CFG + >>> grammar = CFG.fromstring(""" ... S -> NP VP ... PP -> P NP ... NP -> Det N | NP PP @@ -29,8 +29,8 @@ Grammars can be parsed from strings: Probabilistic CFGs: - >>> from nltk import WeightedGrammar - >>> toy_pcfg1 = WeightedGrammar.fromstring(""" + >>> from nltk import PCFG + >>> toy_pcfg1 = PCFG.fromstring(""" ... S -> NP VP [1.0] ... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] ... Det -> 'the' [0.8] | 'my' [0.2] @@ -43,6 +43,6 @@ Probabilistic CFGs: Chomsky Normal Form grammar (Test for bug 474) - >>> g = ContextFreeGrammar.fromstring("VP^ -> VBP NP^") + >>> g = CFG.fromstring("VP^ -> VBP NP^") >>> g.productions()[0].lhs() VP^ diff --git a/nltk/test/parse.doctest b/nltk/test/parse.doctest index 1818350381..e47f25105e 100644 --- a/nltk/test/parse.doctest +++ b/nltk/test/parse.doctest @@ -8,7 +8,7 @@ Unit tests for the Context Free Grammar class --------------------------------------------- - >>> from nltk import Nonterminal, nonterminals, Production, ContextFreeGrammar + >>> from nltk import Nonterminal, nonterminals, Production, CFG >>> nt1 = Nonterminal('NP') >>> nt2 = Nonterminal('VP') @@ -40,7 +40,7 @@ Unit tests for the Context Free Grammar class >>> prod1 == prod2 False - >>> grammar = ContextFreeGrammar.fromstring(""" + >>> grammar = CFG.fromstring(""" ... S -> NP VP ... PP -> P NP ... NP -> 'the' N | N PP | 'the' N PP @@ -529,11 +529,11 @@ Unit tests for the Probabilistic CFG class >>> from nltk.corpus import treebank >>> from itertools import islice - >>> from nltk.grammar import WeightedGrammar, induce_pcfg, toy_pcfg1, toy_pcfg2 + >>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2 -Create a set of probabilistic CFG productions. +Create a set of PCFG productions. - >>> grammar = WeightedGrammar.fromstring(""" + >>> grammar = PCFG.fromstring(""" ... A -> B B [.3] | C B C [.7] ... B -> B D [.5] | C [.5] ... C -> 'a' [.1] | 'b' [0.9] diff --git a/nltk/test/simple.doctest b/nltk/test/simple.doctest index 53d0f66f64..71e8c40435 100644 --- a/nltk/test/simple.doctest +++ b/nltk/test/simple.doctest @@ -62,8 +62,8 @@ Parsing ------- >>> from nltk.parse.recursivedescent import RecursiveDescentParser - >>> from nltk.grammar import ContextFreeGrammar - >>> grammar = ContextFreeGrammar.fromstring(""" + >>> from nltk.grammar import CFG + >>> grammar = CFG.fromstring(""" ... S -> NP VP ... PP -> P NP ... NP -> 'the' N | N PP | 'the' N PP