@inproceedings{wrobel-nowak-2022-transformer,
title = "Transformer-based Part-of-Speech Tagging and Lemmatization for {L}atin",
author = "Wr{\'o}bel, Krzysztof and
Nowak, Krzysztof",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lt4hala-1.31",
pages = "193--197",
abstract = "The paper presents a submission to the EvaLatin 2022 shared task. Our system places first for lemmatization, part-of-speech and morphological tagging in both closed and open modalities. The results for cross-genre and cross-time sub-tasks show that the system handles the diachronic and diastratic variation of Latin. The architecture employs state-of-the-art transformer models. For part-of-speech and morphological tagging, we use XLM-RoBERTa large, while for lemmatization a ByT5 small model was employed. The paper features a thorough discussion of part-of-speech and lemmatization errors which shows how the system performance may be improved for Classical, Medieval and Neo-Latin texts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wrobel-nowak-2022-transformer">
<titleInfo>
<title>Transformer-based Part-of-Speech Tagging and Lemmatization for Latin</title>
</titleInfo>
<name type="personal">
<namePart type="given">Krzysztof</namePart>
<namePart type="family">Wróbel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krzysztof</namePart>
<namePart type="family">Nowak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper presents a submission to the EvaLatin 2022 shared task. Our system places first for lemmatization, part-of-speech and morphological tagging in both closed and open modalities. The results for cross-genre and cross-time sub-tasks show that the system handles the diachronic and diastratic variation of Latin. The architecture employs state-of-the-art transformer models. For part-of-speech and morphological tagging, we use XLM-RoBERTa large, while for lemmatization a ByT5 small model was employed. The paper features a thorough discussion of part-of-speech and lemmatization errors which shows how the system performance may be improved for Classical, Medieval and Neo-Latin texts.</abstract>
<identifier type="citekey">wrobel-nowak-2022-transformer</identifier>
<location>
<url>https://aclanthology.org/2022.lt4hala-1.31</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>193</start>
<end>197</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Transformer-based Part-of-Speech Tagging and Lemmatization for Latin
%A Wróbel, Krzysztof
%A Nowak, Krzysztof
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F wrobel-nowak-2022-transformer
%X The paper presents a submission to the EvaLatin 2022 shared task. Our system places first for lemmatization, part-of-speech and morphological tagging in both closed and open modalities. The results for cross-genre and cross-time sub-tasks show that the system handles the diachronic and diastratic variation of Latin. The architecture employs state-of-the-art transformer models. For part-of-speech and morphological tagging, we use XLM-RoBERTa large, while for lemmatization a ByT5 small model was employed. The paper features a thorough discussion of part-of-speech and lemmatization errors which shows how the system performance may be improved for Classical, Medieval and Neo-Latin texts.
%U https://aclanthology.org/2022.lt4hala-1.31
%P 193-197
Markdown (Informal)
[Transformer-based Part-of-Speech Tagging and Lemmatization for Latin](https://aclanthology.org/2022.lt4hala-1.31) (Wróbel & Nowak, LT4HALA 2022)
ACL