@inproceedings{wang-etal-2023-goal,
title = "Goal-Driven Explainable Clustering via Language Descriptions",
author = "Wang, Zihan and
Shang, Jingbo and
Zhong, Ruiqi",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.657",
doi = "10.18653/v1/2023.emnlp-main.657",
pages = "10626--10649",
abstract = "Unsupervised clustering is widely used to explore large corpora, but existing formulations neither consider the users{'} goals nor explain clusters{'} meanings. We propose a new task formulation, {``}Goal-Driven Clustering with Explanations{''} (GoalEx), which represents both the goal and the explanations as free-form language descriptions. For example, to categorize the errors made by a summarization system, the input to GoalEx is a corpus of annotator-written comments for system-generated summaries and a goal description {``}cluster the comments based on why the annotators think the summary is imperfect.{''}; the outputs are text clusters each with an explanation ({``}this cluster mentions that the summary misses important context information.{''}), which relates to the goal and accurately explains which comments should (not) belong to a cluster. To tackle GoalEx, we prompt a language model with {``}[corpus subset] + [goal] + Brainstorm a list of explanations each representing a cluster.{''}; then we classify whether each sample belongs to a cluster based on its explanation; finally, we use integer linear programming to select a subset of candidate clusters to cover most samples while minimizing overlaps. Under both automatic and human evaluation on corpora with or without labels, our method produces more accurate and goal-related explanations than prior methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2023-goal">
<titleInfo>
<title>Goal-Driven Explainable Clustering via Language Descriptions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zihan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingbo</namePart>
<namePart type="family">Shang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruiqi</namePart>
<namePart type="family">Zhong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Unsupervised clustering is widely used to explore large corpora, but existing formulations neither consider the users’ goals nor explain clusters’ meanings. We propose a new task formulation, “Goal-Driven Clustering with Explanations” (GoalEx), which represents both the goal and the explanations as free-form language descriptions. For example, to categorize the errors made by a summarization system, the input to GoalEx is a corpus of annotator-written comments for system-generated summaries and a goal description “cluster the comments based on why the annotators think the summary is imperfect.”; the outputs are text clusters each with an explanation (“this cluster mentions that the summary misses important context information.”), which relates to the goal and accurately explains which comments should (not) belong to a cluster. To tackle GoalEx, we prompt a language model with “[corpus subset] + [goal] + Brainstorm a list of explanations each representing a cluster.”; then we classify whether each sample belongs to a cluster based on its explanation; finally, we use integer linear programming to select a subset of candidate clusters to cover most samples while minimizing overlaps. Under both automatic and human evaluation on corpora with or without labels, our method produces more accurate and goal-related explanations than prior methods.</abstract>
<identifier type="citekey">wang-etal-2023-goal</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.657</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.657</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>10626</start>
<end>10649</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Goal-Driven Explainable Clustering via Language Descriptions
%A Wang, Zihan
%A Shang, Jingbo
%A Zhong, Ruiqi
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F wang-etal-2023-goal
%X Unsupervised clustering is widely used to explore large corpora, but existing formulations neither consider the users’ goals nor explain clusters’ meanings. We propose a new task formulation, “Goal-Driven Clustering with Explanations” (GoalEx), which represents both the goal and the explanations as free-form language descriptions. For example, to categorize the errors made by a summarization system, the input to GoalEx is a corpus of annotator-written comments for system-generated summaries and a goal description “cluster the comments based on why the annotators think the summary is imperfect.”; the outputs are text clusters each with an explanation (“this cluster mentions that the summary misses important context information.”), which relates to the goal and accurately explains which comments should (not) belong to a cluster. To tackle GoalEx, we prompt a language model with “[corpus subset] + [goal] + Brainstorm a list of explanations each representing a cluster.”; then we classify whether each sample belongs to a cluster based on its explanation; finally, we use integer linear programming to select a subset of candidate clusters to cover most samples while minimizing overlaps. Under both automatic and human evaluation on corpora with or without labels, our method produces more accurate and goal-related explanations than prior methods.
%R 10.18653/v1/2023.emnlp-main.657
%U https://aclanthology.org/2023.emnlp-main.657
%U https://doi.org/10.18653/v1/2023.emnlp-main.657
%P 10626-10649
Markdown (Informal)
[Goal-Driven Explainable Clustering via Language Descriptions](https://aclanthology.org/2023.emnlp-main.657) (Wang et al., EMNLP 2023)
ACL