@inproceedings{wu-monz-2023-beyond,
title = "Beyond Shared Vocabulary: Increasing Representational Word Similarities across Languages for Multilingual Machine Translation",
author = "Wu, Di and
Monz, Christof",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.605",
doi = "10.18653/v1/2023.emnlp-main.605",
pages = "9749--9764",
abstract = "Using a shared vocabulary is common practice in Multilingual Neural Machine Translation (MNMT). In addition to its simple design, shared tokens play an important role in positive knowledge transfer, which manifests naturally when the shared tokens refer to similar meanings across languages. However, when words overlap is small, e.g., using different writing systems, transfer is inhibited. In this paper, we propose a re-parameterized method for building embeddings to alleviate this problem. More specifically, we define word-level information transfer pathways via word equivalence classes and rely on graph networks to fuse word embeddings across languages. Our experiments demonstrate the advantages of our approach: 1) the semantics of embeddings are better aligned across languages, 2) our method achieves evident BLEU improvements on high- and low-resource MNMT, and 3) only less than 1.0{\%} additional trainable parameters are required with a limited increase in computational costs, while the inference time is identical to baselines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-monz-2023-beyond">
<titleInfo>
<title>Beyond Shared Vocabulary: Increasing Representational Word Similarities across Languages for Multilingual Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Di</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Using a shared vocabulary is common practice in Multilingual Neural Machine Translation (MNMT). In addition to its simple design, shared tokens play an important role in positive knowledge transfer, which manifests naturally when the shared tokens refer to similar meanings across languages. However, when words overlap is small, e.g., using different writing systems, transfer is inhibited. In this paper, we propose a re-parameterized method for building embeddings to alleviate this problem. More specifically, we define word-level information transfer pathways via word equivalence classes and rely on graph networks to fuse word embeddings across languages. Our experiments demonstrate the advantages of our approach: 1) the semantics of embeddings are better aligned across languages, 2) our method achieves evident BLEU improvements on high- and low-resource MNMT, and 3) only less than 1.0% additional trainable parameters are required with a limited increase in computational costs, while the inference time is identical to baselines.</abstract>
<identifier type="citekey">wu-monz-2023-beyond</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.605</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.605</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>9749</start>
<end>9764</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Shared Vocabulary: Increasing Representational Word Similarities across Languages for Multilingual Machine Translation
%A Wu, Di
%A Monz, Christof
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F wu-monz-2023-beyond
%X Using a shared vocabulary is common practice in Multilingual Neural Machine Translation (MNMT). In addition to its simple design, shared tokens play an important role in positive knowledge transfer, which manifests naturally when the shared tokens refer to similar meanings across languages. However, when words overlap is small, e.g., using different writing systems, transfer is inhibited. In this paper, we propose a re-parameterized method for building embeddings to alleviate this problem. More specifically, we define word-level information transfer pathways via word equivalence classes and rely on graph networks to fuse word embeddings across languages. Our experiments demonstrate the advantages of our approach: 1) the semantics of embeddings are better aligned across languages, 2) our method achieves evident BLEU improvements on high- and low-resource MNMT, and 3) only less than 1.0% additional trainable parameters are required with a limited increase in computational costs, while the inference time is identical to baselines.
%R 10.18653/v1/2023.emnlp-main.605
%U https://aclanthology.org/2023.emnlp-main.605
%U https://doi.org/10.18653/v1/2023.emnlp-main.605
%P 9749-9764
Markdown (Informal)
[Beyond Shared Vocabulary: Increasing Representational Word Similarities across Languages for Multilingual Machine Translation](https://aclanthology.org/2023.emnlp-main.605) (Wu & Monz, EMNLP 2023)
ACL