Skip to content

Commit

Permalink
neurlux example
Browse files Browse the repository at this point in the history
  • Loading branch information
dtrizna committed Jan 19, 2024
1 parent d858d80 commit d167b34
Show file tree
Hide file tree
Showing 5 changed files with 10,054 additions and 4 deletions.
9 changes: 6 additions & 3 deletions nebula/models/neurlux/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,13 @@ def encode(self, tokenized):
"""Encodes tokenized text."""
return [self.vocab[token] for token in tokenized]

def preprocess(self, text):
def preprocess(self, text, pad=True, tokenize=True):
"""Preprocesses text for NeurLux."""
tokenized = self.tokenize(text)
encoded = self.encode(tokenized)
if tokenize:
text = self.tokenize(text)
encoded = self.encode(text)
if pad:
encoded = self.pad(encoded)
return encoded

def preprocess_sequence(self, sequence):
Expand Down
Binary file added nebula/objects/neurlux_whitespace.model
Binary file not shown.
Loading

0 comments on commit d167b34

Please sign in to comment.