From e6ecf4679194cf28ece8ad8c9035de6140470ff0 Mon Sep 17 00:00:00 2001 From: Alexis Conneau Date: Mon, 9 Sep 2019 18:27:34 -0700 Subject: [PATCH] Fixes tokenization of XNLI training file --- get-data-xnli.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/get-data-xnli.sh b/get-data-xnli.sh index 41a60d1a..86600b56 100755 --- a/get-data-xnli.sh +++ b/get-data-xnli.sh @@ -46,7 +46,7 @@ echo "*** Preparing English train set ****" echo -e "premise\thypo\tlabel" > $XNLI_PATH/en.train sed '1d' $OUTPATH/XNLI-MT-1.0/multinli/multinli.train.en.tsv | cut -f1 | python $LOWER_REMOVE_ACCENT > $XNLI_PATH/train.f1 sed '1d' $OUTPATH/XNLI-MT-1.0/multinli/multinli.train.en.tsv | cut -f2 | python $LOWER_REMOVE_ACCENT > $XNLI_PATH/train.f2 -sed '1d' $OUTPATH/XNLI-MT-1.0/multinli/multinli.train.en.tsv | cut -f3 | sed 's/\tcontradictory/\tcontradiction/g' > $XNLI_PATH/train.f3 +sed '1d' $OUTPATH/XNLI-MT-1.0/multinli/multinli.train.en.tsv | cut -f3 | sed 's/contradictory/contradiction/g' > $XNLI_PATH/train.f3 paste $XNLI_PATH/train.f1 $XNLI_PATH/train.f2 $XNLI_PATH/train.f3 >> $XNLI_PATH/en.train rm $XNLI_PATH/train.f1 $XNLI_PATH/train.f2 $XNLI_PATH/train.f3