Abstract
Sign languages use multiple asynchronous information channels (articulators), not just the hands but also the face and body, which computational approaches often ignore. In this paper we tackle the ultiarticulatory sign language translation task and propose a novel multichannel
transformer architecture. The proposed architecture allows both
the inter and intra contextual relationships between different sign articulators
to be modelled within the transformer network itself, while also
maintaining channel specific information. We evaluate our approach on
the RWTH-PHOENIX-Weather-2014T dataset and report competitive
translation performance. Importantly, we overcome the reliance on gloss
annotations which underpin other state-of-the-art approaches, thereby
removing the need for expensive curated datasets.
Links
BibTeX (Download)
@inproceedings{surrey858587, title = {Multi-channel Transformers for Multi-articulatory Sign Language Translation}, author = {Necati Cihan Camgöz and Oscar Koller and Simon Hadfield and Richard Bowden}, url = {http://epubs.surrey.ac.uk/858587/}, doi = {10.1007/978-3-030-58621-8}, year = {2020}, date = {2020-08-23}, booktitle = {16th European Conference on Computer Vision (ECCV), ACVR Workshop, 2020}, journal = {Proceedings of the 16th European Conference on Computer Vision (ECCV 2020) Part XI}, publisher = {Springer International Publishing}, abstract = {Sign languages use multiple asynchronous information channels (articulators), not just the hands but also the face and body, which computational approaches often ignore. In this paper we tackle the ultiarticulatory sign language translation task and propose a novel multichannel transformer architecture. The proposed architecture allows both the inter and intra contextual relationships between different sign articulators to be modelled within the transformer network itself, while also maintaining channel specific information. We evaluate our approach on the RWTH-PHOENIX-Weather-2014T dataset and report competitive translation performance. Importantly, we overcome the reliance on gloss annotations which underpin other state-of-the-art approaches, thereby removing the need for expensive curated datasets.}, note = {Series Volume: 12356}, keywords = {Sign language translation; Multi-channel; Sequence-to-sequence}, pubstate = {published}, tppubtype = {inproceedings} }