Skip to content

Commit

Permalink
update tkde
Browse files Browse the repository at this point in the history
  • Loading branch information
jklj077 committed Jan 13, 2020
1 parent 09135ed commit eb4cf27
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 17 deletions.
31 changes: 15 additions & 16 deletions content/publication/tkde2018-training/cite.bib
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
@article{sun2018training,
author = {Xu Sun and
Xuancheng Ren and
Shuming Ma and
Bingzhen Wei and
Wei Li and
Jingjing Xu and
Houfeng Wang and
Yi Zhang},
journal = {IEEE Transactions on Knowledge and Data Engineering},
pages = {1-1},
volume={},
number={},
series = {Proceedings of Machine Learning Research},
title = {Training Simplification and Model Simplification for Deep Learning: A Minimal Effort Back Propagation Method},
doi = {10.1109/TKDE.2018.2883613},
year = {2018}
author = {Xu Sun and
Xuancheng Ren and
Shuming Ma and
Bingzhen Wei and
Wei Li and
Jingjing Xu and
Houfeng Wang and
Yi Zhang},
journal = {IEEE Transactions on Knowledge and Data Engineering},
pages = {374-387},
volume = {32},
number = {2},
title = {Training Simplification and Model Simplification for Deep Learning: A Minimal Effort Back Propagation Method},
doi = {10.1109/TKDE.2018.2883613},
year = {2020}
}

2 changes: 1 addition & 1 deletion content/publication/tkde2018-training/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ publication_types: ["2"]
abstract: "We propose a simple yet effective technique to simplify the training and the resulting model of neural networks. In back propagation, only a small subset of the full gradient is computed to update the model parameters. The gradient vectors are sparsified in such a way that only the top-k elements (in terms of magnitude) are kept. As a result, only k rows or columns (depending on the layout) of the weight matrix are modified, leading to a linear reduction in the computational cost. Based on the sparsified gradients, we further simplify the model by eliminating the rows or columns that are seldom updated, which will reduce the computational cost both in the training and decoding, and potentially accelerate decoding in real-world applications. Surprisingly, experimental results demonstrate that most of time we only need to update fewer than 5% of the weights at each back propagation pass. More interestingly, the accuracy of the resulting models is actually improved rather than degraded, and a detailed analysis is given. The model simplification results show that we could adaptively simplify the model which could often be reduced by around 9x, without any loss on accuracy or even with improved accuracy."
featured: true
publication: "*IEEE Transactions on Knowledge and Data Engineering (**TKDE**)*"
publication_short: "**TKDE** (*in press*)"
publication_short: "**TKDE** 32(2)"
url_pdf: "https://doi.org/10.1109/TKDE.2018.2883613"
url_arxiv: "https://arxiv.org/abs/1711.06528"
url_code: "https://github.com/lancopku/meSimp"
Expand Down

0 comments on commit eb4cf27

Please sign in to comment.