@inproceedings{hammarstrom-2021-measuring,
title = "Measuring Prefixation and Suffixation in the Languages of the World",
author = {Hammarstr{\"o}m, Harald},
editor = {Vylomova, Ekaterina and
Salesky, Elizabeth and
Mielke, Sabrina and
Lapesa, Gabriella and
Kumar, Ritesh and
Hammarstr{\"o}m, Harald and
Vuli{\'c}, Ivan and
Korhonen, Anna and
Reichart, Roi and
Ponti, Edoardo Maria and
Cotterell, Ryan},
booktitle = "Proceedings of the Third Workshop on Computational Typology and Multilingual NLP",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sigtyp-1.8",
doi = "10.18653/v1/2021.sigtyp-1.8",
pages = "81--89",
abstract = "It has long been recognized that suffixing is more common than prefixing in the languages of the world. More detailed statistics on this tendency are needed to sharpen proposed explanations for this tendency. The classic approach to gathering data on the prefix/suffix preference is for a human to read grammatical descriptions (948 languages), which is time-consuming and involves discretization judgments. In this paper we explore two machine-driven approaches for prefix and suffix statistics which are crude approximations, but have advantages in terms of time and replicability. The first simply searches a large collection of grammatical descriptions for occurrences of the terms {`}prefix{'} and {`}suffix{'} (4 287 languages). The second counts substrings from raw text data in a way indirectly reflecting prefixation and suffixation (1 030 languages, using New Testament translations). The three approaches largely agree in their measurements but there are important theoretical and practical differences. In all measurements, there is an overall preference for suffixation, albeit only slightly, at ratios ranging between 0.51 and 0.68.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hammarstrom-2021-measuring">
<titleInfo>
<title>Measuring Prefixation and Suffixation in the Languages of the World</title>
</titleInfo>
<name type="personal">
<namePart type="given">Harald</namePart>
<namePart type="family">Hammarström</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Computational Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabrina</namePart>
<namePart type="family">Mielke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Lapesa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harald</namePart>
<namePart type="family">Hammarström</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roi</namePart>
<namePart type="family">Reichart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It has long been recognized that suffixing is more common than prefixing in the languages of the world. More detailed statistics on this tendency are needed to sharpen proposed explanations for this tendency. The classic approach to gathering data on the prefix/suffix preference is for a human to read grammatical descriptions (948 languages), which is time-consuming and involves discretization judgments. In this paper we explore two machine-driven approaches for prefix and suffix statistics which are crude approximations, but have advantages in terms of time and replicability. The first simply searches a large collection of grammatical descriptions for occurrences of the terms ‘prefix’ and ‘suffix’ (4 287 languages). The second counts substrings from raw text data in a way indirectly reflecting prefixation and suffixation (1 030 languages, using New Testament translations). The three approaches largely agree in their measurements but there are important theoretical and practical differences. In all measurements, there is an overall preference for suffixation, albeit only slightly, at ratios ranging between 0.51 and 0.68.</abstract>
<identifier type="citekey">hammarstrom-2021-measuring</identifier>
<identifier type="doi">10.18653/v1/2021.sigtyp-1.8</identifier>
<location>
<url>https://aclanthology.org/2021.sigtyp-1.8</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>81</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring Prefixation and Suffixation in the Languages of the World
%A Hammarström, Harald
%Y Vylomova, Ekaterina
%Y Salesky, Elizabeth
%Y Mielke, Sabrina
%Y Lapesa, Gabriella
%Y Kumar, Ritesh
%Y Hammarström, Harald
%Y Vulić, Ivan
%Y Korhonen, Anna
%Y Reichart, Roi
%Y Ponti, Edoardo Maria
%Y Cotterell, Ryan
%S Proceedings of the Third Workshop on Computational Typology and Multilingual NLP
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F hammarstrom-2021-measuring
%X It has long been recognized that suffixing is more common than prefixing in the languages of the world. More detailed statistics on this tendency are needed to sharpen proposed explanations for this tendency. The classic approach to gathering data on the prefix/suffix preference is for a human to read grammatical descriptions (948 languages), which is time-consuming and involves discretization judgments. In this paper we explore two machine-driven approaches for prefix and suffix statistics which are crude approximations, but have advantages in terms of time and replicability. The first simply searches a large collection of grammatical descriptions for occurrences of the terms ‘prefix’ and ‘suffix’ (4 287 languages). The second counts substrings from raw text data in a way indirectly reflecting prefixation and suffixation (1 030 languages, using New Testament translations). The three approaches largely agree in their measurements but there are important theoretical and practical differences. In all measurements, there is an overall preference for suffixation, albeit only slightly, at ratios ranging between 0.51 and 0.68.
%R 10.18653/v1/2021.sigtyp-1.8
%U https://aclanthology.org/2021.sigtyp-1.8
%U https://doi.org/10.18653/v1/2021.sigtyp-1.8
%P 81-89
Markdown (Informal)
[Measuring Prefixation and Suffixation in the Languages of the World](https://aclanthology.org/2021.sigtyp-1.8) (Hammarström, SIGTYP 2021)
ACL