@inproceedings{cruz-2023-samsung,
title = "{S}amsung {R}{\&}{D} Institute {P}hilippines at {WMT} 2023",
author = "Cruz, Jan Christian Blaise",
editor = "Koehn, Philipp and
Haddow, Barry and
Kocmi, Tom and
Monz, Christof",
booktitle = "Proceedings of the Eighth Conference on Machine Translation",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wmt-1.6",
doi = "10.18653/v1/2023.wmt-1.6",
pages = "103--109",
abstract = "In this paper, we describe the constrained submission systems of Samsung R{\&}D Institute Philippines to the WMT 2023 General Translation Task for two directions: en-{\textgreater}he and he-{\textgreater}en. Our systems comprise of Transformer-based sequence-to-sequence models that are trained with a mix of best practices: comprehensive data preprocessing pipelines, synthetic backtranslated data, and the use of noisy channel reranking during online decoding. Our models perform comparably to, and sometimes outperform, strong baseline unconstrained systems such as mBART50 M2M and NLLB 200 MoE despite having significantly fewer parameters on two public benchmarks: FLORES-200 and NTREX-128.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cruz-2023-samsung">
<titleInfo>
<title>Samsung R&D Institute Philippines at WMT 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">Christian</namePart>
<namePart type="given">Blaise</namePart>
<namePart type="family">Cruz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe the constrained submission systems of Samsung R&D Institute Philippines to the WMT 2023 General Translation Task for two directions: en-\textgreaterhe and he-\textgreateren. Our systems comprise of Transformer-based sequence-to-sequence models that are trained with a mix of best practices: comprehensive data preprocessing pipelines, synthetic backtranslated data, and the use of noisy channel reranking during online decoding. Our models perform comparably to, and sometimes outperform, strong baseline unconstrained systems such as mBART50 M2M and NLLB 200 MoE despite having significantly fewer parameters on two public benchmarks: FLORES-200 and NTREX-128.</abstract>
<identifier type="citekey">cruz-2023-samsung</identifier>
<identifier type="doi">10.18653/v1/2023.wmt-1.6</identifier>
<location>
<url>https://aclanthology.org/2023.wmt-1.6</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>103</start>
<end>109</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Samsung R&D Institute Philippines at WMT 2023
%A Cruz, Jan Christian Blaise
%Y Koehn, Philipp
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Monz, Christof
%S Proceedings of the Eighth Conference on Machine Translation
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F cruz-2023-samsung
%X In this paper, we describe the constrained submission systems of Samsung R&D Institute Philippines to the WMT 2023 General Translation Task for two directions: en-\textgreaterhe and he-\textgreateren. Our systems comprise of Transformer-based sequence-to-sequence models that are trained with a mix of best practices: comprehensive data preprocessing pipelines, synthetic backtranslated data, and the use of noisy channel reranking during online decoding. Our models perform comparably to, and sometimes outperform, strong baseline unconstrained systems such as mBART50 M2M and NLLB 200 MoE despite having significantly fewer parameters on two public benchmarks: FLORES-200 and NTREX-128.
%R 10.18653/v1/2023.wmt-1.6
%U https://aclanthology.org/2023.wmt-1.6
%U https://doi.org/10.18653/v1/2023.wmt-1.6
%P 103-109
Markdown (Informal)
[Samsung R&D Institute Philippines at WMT 2023](https://aclanthology.org/2023.wmt-1.6) (Cruz, WMT 2023)
ACL