Skip to content

Commit

Permalink
add TransitionBasedParser.v1 (#11)
Browse files Browse the repository at this point in the history
* add TransitionBasedParser.v1

* expand on actual code

* fix registry
  • Loading branch information
svlandeg authored Jul 6, 2021
1 parent 6f2b82e commit fe81535
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 10 deletions.
11 changes: 6 additions & 5 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,16 @@ setup_requires =

[options.entry_points]
spacy_architectures =
spacy-legacy.Tok2Vec.v1 = spacy_legacy.architectures.tok2vec:Tok2Vec_v1
spacy-legacy.CharacterEmbed.v1 = spacy_legacy.architectures.tok2vec:CharacterEmbed_v1
spacy-legacy.HashEmbedCNN.v1 = spacy_legacy.architectures.tok2vec:HashEmbedCNN_v1
spacy-legacy.MaxoutWindowEncoder.v1 = spacy_legacy.architectures.tok2vec:MaxoutWindowEncoder_v1
spacy-legacy.MishWindowEncoder.v1 = spacy_legacy.architectures.tok2vec:MishWindowEncoder_v1
spacy-legacy.TextCatCNN.v1 = spacy_legacy.architectures.textcat:TextCatCNN_v1
spacy-legacy.MultiHashEmbed.v1 = spacy_legacy.architectures.tok2vec:MultiHashEmbed_v1
spacy-legacy.TextCatBOW.v1 = spacy_legacy.architectures.textcat:TextCatBOW_v1
spacy-legacy.TextCatCNN.v1 = spacy_legacy.architectures.textcat:TextCatCNN_v1
spacy-legacy.TextCatEnsemble.v1 = spacy_legacy.architectures.textcat:TextCatEnsemble_v1
spacy-legacy.HashEmbedCNN.v1 = spacy_legacy.architectures.tok2vec:HashEmbedCNN_v1
spacy-legacy.MultiHashEmbed.v1 = spacy_legacy.architectures.tok2vec:MultiHashEmbed_v1
spacy-legacy.CharacterEmbed.v1 = spacy_legacy.architectures.tok2vec:CharacterEmbed_v1
spacy-legacy.Tok2Vec.v1 = spacy_legacy.architectures.tok2vec:Tok2Vec_v1
spacy-legacy.TransitionBasedParser.v1 = spacy_legacy.architectures.parser:TransitionBasedParser_v1
spacy_loggers =
spacy-legacy.WandbLogger.v1 = spacy_legacy.loggers:wandb_logger_v1
thinc_layers =
Expand Down
50 changes: 50 additions & 0 deletions spacy_legacy/architectures/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Optional, List
from thinc.types import Floats2d
from thinc.api import Model, zero_init, use_ops

from spacy.tokens import Doc
from spacy.compat import Literal
from spacy.errors import Errors
from spacy.util import registry

# TODO: replace with registered layers after spacy is released with the update
from spacy.ml._precomputable_affine import PrecomputableAffine
from spacy.ml.tb_framework import TransitionModel


def TransitionBasedParser_v1(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
use_upper: bool = True,
nO: Optional[int] = None,
) -> Model:

chain = registry.get("layers", "chain.v1")
list2array = registry.get("layers", "list2array.v1")
Linear = registry.get("layers", "Linear.v1")

if state_type == "parser":
nr_feature_tokens = 13 if extra_state_tokens else 8
elif state_type == "ner":
nr_feature_tokens = 6 if extra_state_tokens else 3
else:
raise ValueError(Errors.E917.format(value=state_type))
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width))
tok2vec.set_dim("nO", hidden_width)
lower = PrecomputableAffine(
nO=hidden_width if use_upper else nO,
nF=nr_feature_tokens,
nI=tok2vec.get_dim("nO"),
nP=maxout_pieces,
)
if use_upper:
with use_ops("numpy"):
# Initialize weights at zero, as it's a classification layer.
upper = Linear(nO=nO, init_W=zero_init)
else:
upper = None
return TransitionModel(tok2vec, lower, upper)
10 changes: 5 additions & 5 deletions spacy_legacy/tests/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@

PACKAGES = ["spacy", "spacy-legacy"]
FUNCTIONS = [
("architectures", "Tok2Vec.v1"),
("architectures", "CharacterEmbed.v1"),
("architectures", "HashEmbedCNN.v1"),
("architectures", "MaxoutWindowEncoder.v1"),
("architectures", "MishWindowEncoder.v1"),
("architectures", "MultiHashEmbed.v1"),
("architectures", "TextCatBOW.v1"),
("architectures", "TextCatCNN.v1"),
("architectures", "TextCatEnsemble.v1"),
("architectures", "HashEmbedCNN.v1"),
("architectures", "MultiHashEmbed.v1"),
("architectures", "CharacterEmbed.v1"),
("loggers", "WandbLogger.v1"),
("architectures", "Tok2Vec.v1"),
("layers", "StaticVectors.v1"),
("loggers", "WandbLogger.v1"),
]


Expand Down

0 comments on commit fe81535

Please sign in to comment.