update

jklj077 · Sep 26, 2020 · 7e0a5a4 · 7e0a5a4
1 parent c8d3366
commit 7e0a5a4
Show file tree

Hide file tree

Showing 6 changed files with 57 additions and 4 deletions.
diff --git a/config/_default/menus.toml b/config/_default/menus.toml
@@ -30,7 +30,7 @@
 
 # Link to a PDF of your resume/CV from the menu.
 # To enable, copy your resume/CV to `static/files/cv.pdf` and uncomment the lines below.
-[[main]]
-  name = "Resume"
-  url = "files/resume.pdf"
-  weight = 70
+# [[main]]
+#  name = "Resume"
+#  url = "files/resume.pdf"
+#  weight = 70
diff --git a/content/publication/emnlp2020-regularizing/index.md b/content/publication/emnlp2020-regularizing/index.md
@@ -0,0 +1,12 @@
+---
+title: "Regularizing Dialogue Generation by Imitating Implicit Scenarios"
+date: 2020-11-16
+publishDate: 2020-09-26T15:30:29.981130Z
+authors: ["Shaoxiong Feng", "**Xuancheng Ren**", "Hongshen Chen", "Bin Sun", "Kan li", "Xu Sun"]
+publication_types: ["1"]
+abstract: "Human dialogues are scenario-based and appropriate responses generally relate to the latent context knowledge entailed by the specific scenario. To enable responses that are more meaningful and context-specific, we propose to improve generative dialogue systems from the scenario perspective, where both dialogue history and future conversation are taken into account to implicitly reconstruct the scenario knowledge. More importantly, the conversation scenarios are further internalized using imitation learning framework, where the conventional dialogue model that has no access to future conversations is effectively regularized by transferring the scenario knowledge contained in hierarchical supervising signals from the scenario-based dialogue model, so that the future conversation is not required in actual inference. Extensive evaluations show that our approach significantly outperforms state-of-the-art baselines on diversity and relevance, and expresses scenario-specific knowledge."
+featured: false
+publication: "*Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, **EMNLP 2020** (to appear)*"
+publication_short: "**EMNLP 2020** (to appear)"
+---
+
diff --git a/content/publication/neurips2020-prophet/index.md b/content/publication/neurips2020-prophet/index.md
@@ -0,0 +1,12 @@
+---
+title: "Prophet Attention: Predicting Attention with Future Attention for Improved Image Captioning"
+date: 2020-12-07
+publishDate: 2020-09-26T08:04:29.940124Z
+authors: ["Fenglin Liu", "**Xuancheng Ren**", "Xian Wu", "Shen Ge", "Wei Fan", "Yuexian Zou", "Xu Sun"]
+publication_types: ["1"]
+abstract: "Recently, attention based models have been used extensively in image captioning and are expected to ground correct image regions with proper generated words. However, for each time step in the decoding process, the attention based models usually use the hidden state of current input to attend to the image regions. Under this setting, these attention models have a “deviated focus” problem, that they calculate the attention weights based on previous words instead of the one to be generated, impairing the performance of both grounding and captioning. In this paper, we propose the Prophet Attention, similar to the form of self-supervision. In the training stage, this module utilizes the future information to calculate the “ideal” attention weights towards image regions. These calculated weights are further used to regularize the “deviated” attention. In this manner, image regions are grounded with the correct words. Prophet Attention does not introduce additional model parameters or inference computations, making it easily incorporated into any existing systems. The experiments on the Flickr30k Entities and MSCOCO datasets show that the proposed Prophet Attention consistently outperforms baselines in both automatic metrics and human evaluations. It is worth noticing that we set new state-of-the-arts on the two benchmark datasets and achieve the 1st place on the leaderboard of the online MSCOCO benchmark."
+featured: false
+publication: "*Advances in Neural Information Processing Systems 33, **NeurIPS 2020** (to appear)*"
+publication_short: "**NeurIPS 2020** (to appear)"
+---
+
diff --git a/content/publication/neurocomputing2020-memorized/cite.bib b/content/publication/neurocomputing2020-memorized/cite.bib
@@ -0,0 +1,14 @@
+@article{zhang2020memorized,
+  author =  {Zhiyuan Zhang and
+             Pengcheng Yang and
+             Xuancheng Ren and
+             Qi Su and
+             Xu Sun},
+  journal = {Neurocomputing},
+  pages   = {397-407},
+  volume  = {415},
+  title   = {Memorized Sparse Backpropagation},
+  doi     = {10.1016/j.neucom.2020.08.055},
+  year    = {2020}
+}
+
diff --git a/content/publication/neurocomputing2020-memorized/index.md b/content/publication/neurocomputing2020-memorized/index.md
@@ -0,0 +1,15 @@
+---
+title: "Memorized Sparse Backpropagation"
+date: 2020-11-20
+publishDate: 2020-09-26T08:04:30.047125Z
+authors: ["Zhiyuan Zhang", "Pengcheng Yang", "**Xuancheng Ren**", "Qi Su", "Xu Sun"]
+publication_types: ["2"]
+abstract: "Neural network learning is usually time-consuming since backpropagation needs to compute full gradients and backpropagate them across multiple layers. Despite its success of existing works in accelerating propagation through sparseness, the relevant theoretical characteristics remain under-researched and empirical studies found that they suffer from the loss of information contained in unpropagated gradients. To tackle these problems, this paper presents a unified sparse backpropagation framework and provides a detailed analysis of its theoretical characteristics. Analysis reveals that when applied to a multilayer perceptron, our framework essentially performs gradient descent using an estimated gradient similar enough to the true gradient, resulting in convergence in probability under certain conditions. Furthermore, a simple yet effective algorithm named memorized sparse backpropagation (MSBP) is proposed to remedy the problem of information loss by storing unpropagated gradients in memory for learning in the next steps. Experimental results demonstrate that the proposed MSBP is effective to alleviate the information loss in traditional sparse backpropagation while achieving comparable acceleration."
+featured: false
+publication: "*Neurocomputing*"
+publication_short: "**Neurocomputing** 415"
+url_pdf: "https://doi.org/10.1016/j.neucom.2020.08.055"
+url_arxiv: "https://arxiv.org/abs/1905.10194"
+doi: "10.1016/j.neucom.2020.08.055"
+---
+
diff --git a/static/files/resume.pdf b/static/files/resume.pdf