@inproceedings{gongora-etal-2022-use,
title = "Can We Use Word Embeddings for Enhancing {G}uarani-{S}panish Machine Translation?",
author = "G{\'o}ngora, Santiago and
Giossa, Nicol{\'a}s and
Chiruzzo, Luis",
editor = "Moeller, Sarah and
Anastasopoulos, Antonios and
Arppe, Antti and
Chaudhary, Aditi and
Harrigan, Atticus and
Holden, Josh and
Lachler, Jordan and
Palmer, Alexis and
Rijhwani, Shruti and
Schwartz, Lane",
booktitle = "Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.computel-1.16",
doi = "10.18653/v1/2022.computel-1.16",
pages = "127--132",
abstract = "Machine translation for low-resource languages, such as Guarani, is a challenging task due to the lack of data. One way of tackling it is using pretrained word embeddings for model initialization. In this work we try to check if currently available data is enough to train rich embeddings for enhancing MT for Guarani and Spanish, by building a set of word embedding collections and training MT systems using them. We found that the trained vectors are strong enough to slightly improve the performance of some of the translation models and also to speed up the training convergence.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gongora-etal-2022-use">
<titleInfo>
<title>Can We Use Word Embeddings for Enhancing Guarani-Spanish Machine Translation?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santiago</namePart>
<namePart type="family">Góngora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolás</namePart>
<namePart type="family">Giossa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antti</namePart>
<namePart type="family">Arppe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditi</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atticus</namePart>
<namePart type="family">Harrigan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josh</namePart>
<namePart type="family">Holden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Lachler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lane</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation for low-resource languages, such as Guarani, is a challenging task due to the lack of data. One way of tackling it is using pretrained word embeddings for model initialization. In this work we try to check if currently available data is enough to train rich embeddings for enhancing MT for Guarani and Spanish, by building a set of word embedding collections and training MT systems using them. We found that the trained vectors are strong enough to slightly improve the performance of some of the translation models and also to speed up the training convergence.</abstract>
<identifier type="citekey">gongora-etal-2022-use</identifier>
<identifier type="doi">10.18653/v1/2022.computel-1.16</identifier>
<location>
<url>https://aclanthology.org/2022.computel-1.16</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>127</start>
<end>132</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can We Use Word Embeddings for Enhancing Guarani-Spanish Machine Translation?
%A Góngora, Santiago
%A Giossa, Nicolás
%A Chiruzzo, Luis
%Y Moeller, Sarah
%Y Anastasopoulos, Antonios
%Y Arppe, Antti
%Y Chaudhary, Aditi
%Y Harrigan, Atticus
%Y Holden, Josh
%Y Lachler, Jordan
%Y Palmer, Alexis
%Y Rijhwani, Shruti
%Y Schwartz, Lane
%S Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F gongora-etal-2022-use
%X Machine translation for low-resource languages, such as Guarani, is a challenging task due to the lack of data. One way of tackling it is using pretrained word embeddings for model initialization. In this work we try to check if currently available data is enough to train rich embeddings for enhancing MT for Guarani and Spanish, by building a set of word embedding collections and training MT systems using them. We found that the trained vectors are strong enough to slightly improve the performance of some of the translation models and also to speed up the training convergence.
%R 10.18653/v1/2022.computel-1.16
%U https://aclanthology.org/2022.computel-1.16
%U https://doi.org/10.18653/v1/2022.computel-1.16
%P 127-132
Markdown (Informal)
[Can We Use Word Embeddings for Enhancing Guarani-Spanish Machine Translation?](https://aclanthology.org/2022.computel-1.16) (Góngora et al., ComputEL 2022)
ACL