flairNLPGH-856: Fixed deprecated references to TaggedCorpus in experi…

…ments and data_fetcher.py docs accordingly to the Corpus update that addressed flairNLPGH-232
FrankNTT · Jul 3, 2019 · a670990 · a670990
1 parent 34f2490
commit a670990
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 24 deletions.
diff --git a/flair/data_fetcher.py b/flair/data_fetcher.py
@@ -122,13 +122,13 @@ def load_corpora(
     @deprecated(version="0.4.1", reason="Use 'flair.datasets' instead.")
     def load_corpus(task: Union[NLPTask, str], base_path: [str, Path] = None) -> Corpus:
         """
-        Helper function to fetch a TaggedCorpus for a specific NLPTask. For this to work you need to first download
+        Helper function to fetch a Corpus for a specific NLPTask. For this to work you need to first download
         and put into the appropriate folder structure the corresponding NLP task data. The tutorials on
         https://github.com/zalandoresearch/flair give more info on how to do this. Alternatively, you can use this
         code to create your own data fetchers.
         :param task: specification of the NLPTask you wish to get
         :param base_path: path to data folder containing tasks sub folders
-        :return: a TaggedCorpus consisting of train, dev and test data
+        :return: a Corpus consisting of train, dev and test data
         """
 
         # first, try to fetch dataset online
@@ -257,15 +257,15 @@ def load_column_corpus(
         tag_to_biloes=None,
     ) -> Corpus:
         """
-        Helper function to get a TaggedCorpus from CoNLL column-formatted task data such as CoNLL03 or CoNLL2000.
+        Helper function to get a Corpus from CoNLL column-formatted task data such as CoNLL03 or CoNLL2000.
 
         :param data_folder: base folder with the task data
         :param column_format: a map specifying the column format
         :param train_file: the name of the train file
         :param test_file: the name of the test file
         :param dev_file: the name of the dev file, if None, dev data is sampled from train
         :param tag_to_biloes: whether to convert to BILOES tagging scheme
-        :return: a TaggedCorpus with annotated train, dev and test data
+        :return: a Corpus with annotated train, dev and test data
         """
 
         if type(data_folder) == str:
@@ -353,13 +353,13 @@ def load_ud_corpus(
         data_folder: Union[str, Path], train_file=None, test_file=None, dev_file=None
     ) -> Corpus:
         """
-        Helper function to get a TaggedCorpus from CoNLL-U column-formatted task data such as the UD corpora
+        Helper function to get a Corpus from CoNLL-U column-formatted task data such as the UD corpora
 
         :param data_folder: base folder with the task data
         :param train_file: the name of the train file
         :param test_file: the name of the test file
         :param dev_file: the name of the dev file, if None, dev data is sampled from train
-        :return: a TaggedCorpus with annotated train, dev and test data
+        :return: a Corpus with annotated train, dev and test data
         """
         # automatically identify train / test / dev files
         if train_file is None:
@@ -400,13 +400,13 @@ def load_classification_corpus(
         max_tokens_per_doc=-1,
     ) -> Corpus:
         """
-        Helper function to get a TaggedCorpus from text classification-formatted task data
+        Helper function to get a Corpus from text classification-formatted task data
 
         :param data_folder: base folder with the task data
         :param train_file: the name of the train file
         :param test_file: the name of the test file
         :param dev_file: the name of the dev file, if None, dev data is sampled from train
-        :return: a TaggedCorpus with annotated train, dev and test data
+        :return: a Corpus with annotated train, dev and test data
         """
 
         if type(data_folder) == str:

diff --git a/resources/docs/EXPERIMENTS.md b/resources/docs/EXPERIMENTS.md
@@ -30,23 +30,23 @@ This allows the `NLPTaskDataFetcher` class to read the data into our data struct
 the dataset, as follows: 
 
 ```python
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03, base_path='resources/tasks')
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03, base_path='resources/tasks')
 ```
 
-This gives you a `TaggedCorpus` object that contains the data. Now, select `ner` as the tag you wish to predict and init the embeddings you wish to use.
+This gives you a `Corpus` object that contains the data. Now, select `ner` as the tag you wish to predict and init the embeddings you wish to use.
 
 #### Best Known Configuration
 
 The full code to get a state-of-the-art model for English NER is as follows: 
 
 ```python
-from flair.data import TaggedCorpus
+from flair.data import Corpus
 from flair.data_fetcher import  NLPTaskDataFetcher, NLPTask
 from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, PooledFlairEmbeddings
 from typing import List
 
 # 1. get the corpus
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03, base_path='resources/tasks')
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03, base_path='resources/tasks')
 
 # 2. what tag do we want to predict?
 tag_type = 'ner'
@@ -108,13 +108,13 @@ Once you have the data, reproduce our experiments exactly like for CoNLL-03, jus
 FastText word embeddings and German contextual string embeddings. The full code then is as follows:
 
 ```python
-from flair.data import TaggedCorpus
+from flair.data import Corpus
 from flair.data_fetcher import  NLPTaskDataFetcher, NLPTask
 from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, PooledFlairEmbeddings
 from typing import List
 
 # 1. get the corpus
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03_GERMAN, base_path='resources/tasks')
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03_GERMAN, base_path='resources/tasks')
 
 # 2. what tag do we want to predict?
 tag_type = 'ner'
@@ -163,13 +163,13 @@ Once you have the data, reproduce our experiments exactly like for CoNLL-03, jus
 FastText word embeddings and German contextual string embeddings. The full code then is as follows:
 
 ```python
-from flair.data import TaggedCorpus
+from flair.data import Corpus
 from flair.data_fetcher import  NLPTaskDataFetcher, NLPTask
 from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, PooledFlairEmbeddings
 from typing import List
 
 # 1. get the corpus
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03_DUTCH, base_path='resources/tasks')
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03_DUTCH, base_path='resources/tasks')
 
 # 2. what tag do we want to predict?
 tag_type = 'ner'
@@ -218,13 +218,13 @@ Once you have the data, reproduce our experiments exactly like for CoNLL-03, jus
 FastText word embeddings and German contextual string embeddings. The full code then is as follows:
 
 ```python
-from flair.data import TaggedCorpus
+from flair.data import Corpus
 from flair.data_fetcher import  NLPTaskDataFetcher, NLPTask
 from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, FlairEmbeddings
 from typing import List
 
 # 1. get the corpus
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03_DUTCH, base_path='resources/tasks')
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03_DUTCH, base_path='resources/tasks')
 
 # 2. what tag do we want to predict?
 tag_type = 'ner'
@@ -286,13 +286,13 @@ Once you have the data, reproduce our experiments exactly like for CoNLL-03, jus
 FastText embeddings (they work better on this dataset). The full code then is as follows: 
 
 ```python
-from flair.data import TaggedCorpus
+from flair.data import Corpus
 from flair.data_fetcher import  NLPTaskDataFetcher, NLPTask
 from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, FlairEmbeddings
 from typing import List
 
 # 1. get the corpus
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.ONTONER, base_path='resources/tasks')
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.ONTONER, base_path='resources/tasks')
 
 # 2. what tag do we want to predict?
 tag_type = 'ner'
@@ -354,13 +354,13 @@ so the algorithm knows that POS tags and not NER are to be predicted from this d
 #### Best Known Configuration
 
 ```python
-from flair.data import TaggedCorpus
+from flair.data import Corpus
 from flair.data_fetcher import  NLPTaskDataFetcher, NLPTask
 from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, FlairEmbeddings
 from typing import List
 
 # 1. get the corpus
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.PENN, base_path='resources/tasks')
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.PENN, base_path='resources/tasks')
 
 # 2. what tag do we want to predict?
 tag_type = 'pos'
@@ -410,13 +410,13 @@ Run the code with extvec embeddings and our proposed contextual string embedding
 so the algorithm knows that chunking tags and not NER are to be predicted from this data. 
 
 ```python
-from flair.data import TaggedCorpus
+from flair.data import Corpus
 from flair.data_fetcher import  NLPTaskDataFetcher, NLPTask
 from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, FlairEmbeddings
 from typing import List
 
 # 1. get the corpus
-corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_2000)
+corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_2000)
 
 # 2. what tag do we want to predict?
 tag_type = 'np'