@inproceedings{olabisi-etal-2022-analyzing,
title = "Analyzing the Dialect Diversity in Multi-document Summaries",
author = "Olabisi, Olubusayo and
Hudson, Aaron and
Jetter, Antonie and
Agrawal, Ameeta",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.542",
pages = "6208--6221",
abstract = "Social media posts provide a compelling, yet challenging source of data of diverse perspectives from many socially salient groups. Automatic text summarization algorithms make this data accessible at scale by compressing large collections of documents into short summaries that preserve salient information from the source text. In this work, we take a complementary approach to analyzing and improving the quality of summaries generated from social media data in terms of their ability to represent salient as well as diverse perspectives. We introduce a novel dataset, DivSumm, of dialect diverse tweets and human-written extractive and abstractive summaries. Then, we study the extent of dialect diversity reflected in human-written reference summaries as well as system-generated summaries. The results of our extensive experiments suggest that humans annotate fairly well-balanced dialect diverse summaries, and that cluster-based pre-processing approaches seem beneficial in improving the overall quality of the system-generated summaries without loss in diversity.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="olabisi-etal-2022-analyzing">
<titleInfo>
<title>Analyzing the Dialect Diversity in Multi-document Summaries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olubusayo</namePart>
<namePart type="family">Olabisi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Hudson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonie</namePart>
<namePart type="family">Jetter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ameeta</namePart>
<namePart type="family">Agrawal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansaem</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Key-Sun</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pum-Mo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younggyun</namePart>
<namePart type="family">Hahm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="given">Kyungil</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-Hoon</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media posts provide a compelling, yet challenging source of data of diverse perspectives from many socially salient groups. Automatic text summarization algorithms make this data accessible at scale by compressing large collections of documents into short summaries that preserve salient information from the source text. In this work, we take a complementary approach to analyzing and improving the quality of summaries generated from social media data in terms of their ability to represent salient as well as diverse perspectives. We introduce a novel dataset, DivSumm, of dialect diverse tweets and human-written extractive and abstractive summaries. Then, we study the extent of dialect diversity reflected in human-written reference summaries as well as system-generated summaries. The results of our extensive experiments suggest that humans annotate fairly well-balanced dialect diverse summaries, and that cluster-based pre-processing approaches seem beneficial in improving the overall quality of the system-generated summaries without loss in diversity.</abstract>
<identifier type="citekey">olabisi-etal-2022-analyzing</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.542</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>6208</start>
<end>6221</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analyzing the Dialect Diversity in Multi-document Summaries
%A Olabisi, Olubusayo
%A Hudson, Aaron
%A Jetter, Antonie
%A Agrawal, Ameeta
%Y Calzolari, Nicoletta
%Y Huang, Chu-Ren
%Y Kim, Hansaem
%Y Pustejovsky, James
%Y Wanner, Leo
%Y Choi, Key-Sun
%Y Ryu, Pum-Mo
%Y Chen, Hsin-Hsi
%Y Donatelli, Lucia
%Y Ji, Heng
%Y Kurohashi, Sadao
%Y Paggio, Patrizia
%Y Xue, Nianwen
%Y Kim, Seokhwan
%Y Hahm, Younggyun
%Y He, Zhong
%Y Lee, Tony Kyungil
%Y Santus, Enrico
%Y Bond, Francis
%Y Na, Seung-Hoon
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F olabisi-etal-2022-analyzing
%X Social media posts provide a compelling, yet challenging source of data of diverse perspectives from many socially salient groups. Automatic text summarization algorithms make this data accessible at scale by compressing large collections of documents into short summaries that preserve salient information from the source text. In this work, we take a complementary approach to analyzing and improving the quality of summaries generated from social media data in terms of their ability to represent salient as well as diverse perspectives. We introduce a novel dataset, DivSumm, of dialect diverse tweets and human-written extractive and abstractive summaries. Then, we study the extent of dialect diversity reflected in human-written reference summaries as well as system-generated summaries. The results of our extensive experiments suggest that humans annotate fairly well-balanced dialect diverse summaries, and that cluster-based pre-processing approaches seem beneficial in improving the overall quality of the system-generated summaries without loss in diversity.
%U https://aclanthology.org/2022.coling-1.542
%P 6208-6221
Markdown (Informal)
[Analyzing the Dialect Diversity in Multi-document Summaries](https://aclanthology.org/2022.coling-1.542) (Olabisi et al., COLING 2022)
ACL
- Olubusayo Olabisi, Aaron Hudson, Antonie Jetter, and Ameeta Agrawal. 2022. Analyzing the Dialect Diversity in Multi-document Summaries. In Proceedings of the 29th International Conference on Computational Linguistics, pages 6208–6221, Gyeongju, Republic of Korea. International Committee on Computational Linguistics.