@article{Shin2021Perspectives,
author = {Andrew Shin and
Masato Ishii and
Takuya Narihira},
title = {Perspectives and Prospects on Transformer Architecture for Cross-Modal
Tasks with Language and Vision},
journal = {CoRR},
volume = {abs/2103.04037},
year = {2021},
url = {https://arxiv.org/abs/2103.04037},
archivePrefix = {arXiv},
eprint = {2103.04037},
timestamp = {Mon, 15 Mar 2021 17:30:55 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2103-04037.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}