Skip to content

Commit

Permalink
Update chnsenticorp examples for qianyan dataset modification(PaddleP…
Browse files Browse the repository at this point in the history
  • Loading branch information
Steffy-zxf authored Jun 3, 2021
1 parent 07d414a commit bac370b
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 40 deletions.
7 changes: 3 additions & 4 deletions examples/sentiment_analysis/skep/train_sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,12 @@ def create_dataloader(dataset,
set_seed(args.seed)
if args.model_name == "skep_ernie_1.0_large_ch":
dataset_name = "chnsenticorp"
train_ds, dev_ds, test_ds = load_dataset(
dataset_name, splits=["train", "dev", "test"])
train_ds, dev_ds = load_dataset(dataset_name, splits=["train", "dev"])

else:
dataset_name = "sst-2"
train_ds, dev_ds, test_ds = load_dataset(
"glue", dataset_name, splits=["train", "dev", "test"])
train_ds, dev_ds = load_dataset(
"glue", dataset_name, splits=["train", "dev"])
label_map = {0: 'negative', 1: 'positive'}

model = SkepForSequenceClassification.from_pretrained(
Expand Down
13 changes: 1 addition & 12 deletions examples/text_classification/pretrained_models/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,7 @@ def do_train():

set_seed(args.seed)

train_ds, dev_ds, test_ds = load_dataset(
"chnsenticorp", splits=["train", "dev", "test"])
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])

# If you wanna use bert/roberta/electra pretrained model,
# model = ppnlp.transformers.BertForSequenceClassification.from_pretrained('bert-base-chinese', num_class=2)
Expand Down Expand Up @@ -191,12 +190,6 @@ def do_train():
batch_size=args.batch_size,
batchify_fn=batchify_fn,
trans_fn=trans_func)
test_data_loader = create_dataloader(
test_ds,
mode='test',
batch_size=args.batch_size,
batchify_fn=batchify_fn,
trans_fn=trans_func)

if args.init_from_ckpt and os.path.isfile(args.init_from_ckpt):
state_dict = paddle.load(args.init_from_ckpt)
Expand Down Expand Up @@ -254,10 +247,6 @@ def do_train():
model._layers.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

if rank == 0:
print('Evaluating on test data.')
evaluate(model, criterion, metric, test_data_loader)


if __name__ == "__main__":
do_train()
13 changes: 1 addition & 12 deletions examples/text_classification/rnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,7 @@ def create_dataloader(dataset,
vocab = Vocab.load_vocabulary(
args.vocab_path, unk_token='[UNK]', pad_token='[PAD]')
# Loads dataset.
train_ds, dev_ds, test_ds = load_dataset(
"chnsenticorp", splits=["train", "dev", "test"])
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])

# Constructs the newtork.
network = args.network.lower()
Expand Down Expand Up @@ -180,12 +179,6 @@ def create_dataloader(dataset,
batch_size=args.batch_size,
mode='validation',
batchify_fn=batchify_fn)
test_loader = create_dataloader(
test_ds,
trans_fn=trans_fn,
batch_size=args.batch_size,
mode='test',
batchify_fn=batchify_fn)

optimizer = paddle.optimizer.Adam(
parameters=model.parameters(), learning_rate=args.lr)
Expand All @@ -208,7 +201,3 @@ def create_dataloader(dataset,
epochs=args.epochs,
save_dir=args.save_dir,
callbacks=callback)

# Finally tests model.
results = model.evaluate(test_loader)
print("Finally test acc: %.5f" % results['acc'])
13 changes: 1 addition & 12 deletions examples/word_embedding/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ def forward(self, text, seq_len=None):
if '[PAD]' not in vocab:
vocab['[PAD]'] = len(vocab)
# Loads dataset.
train_ds, dev_ds, test_ds = load_dataset(
"chnsenticorp", splits=["train", "dev", "test"])
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])

# Constructs the newtork.
model = BoWModel(
Expand Down Expand Up @@ -187,12 +186,6 @@ def forward(self, text, seq_len=None):
batch_size=args.batch_size,
mode='validation',
pad_token_id=vocab['[PAD]'])
test_loader = create_dataloader(
test_ds,
trans_fn=trans_fn,
batch_size=args.batch_size,
mode='test',
pad_token_id=vocab['[PAD]'])

optimizer = paddle.optimizer.Adam(
parameters=model.parameters(), learning_rate=args.lr)
Expand All @@ -219,7 +212,3 @@ def forward(self, text, seq_len=None):
epochs=args.epochs,
save_dir=args.save_dir,
callbacks=callback)

# Finally tests model.
results = model.evaluate(test_loader, callbacks=callback)
print("Finally test acc: %.5f" % results['acc'])

0 comments on commit bac370b

Please sign in to comment.