The code for "Learning Semantic Textual Similarity via Topic-informed Discrete Latent Variables".
conda create -n dis python=3.7
conda activate dis
conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia
pip install -e .
pip install gensim jieba matplotlib overrides pyhocon allennlp accelerate tensorboard pandas datasets
python3 topic_model/GSM_run.py --taskname mrpc --n_topic 30 --num_epochs 500
python run_double_sentences.py \
--pretrain_vq 1\
--topic_num 30\
--task_name mrpc \
--home_dir /home/XXX/DisBert/
python run_double_sentences.py \
--model_name_or_path bert-base-uncased \
--max_length 128 \
--per_device_train_batch_size 32\
--topic_num 30\
--pretrain_vq_model path_to_vq_model\
--task_name mrpc \
--home_dir /home/XXX/DisBert/
@inproceedings{yu2022DisBert,
title={Learning Semantic Textual Similarity via Topic-informed Discrete Latent Variables},
author={Erxin Yu, Lan Du, Yuan Jin, Zhepei Wei, Yi Chang},
booktitle={Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
pages={4937–4948},
year={2022}
}