@inproceedings{541643d6a5584a01b9e60fea2f89cd54,
title = "Strength in Numbers: Estimating Confidence of Large Language Models by Prompt Agreement",
abstract = "Large language models have achieved impressive few-shot performance on a wide variety of tasks. However, in many settings, users require confidence estimates for model predictions. While traditional classifiers produce scores for each label, language models instead produce scores for the generation which may not be well calibrated. We compare generations across diverse prompts and show that these can be used to create confidence scores. By utilizing more prompts we can get more precise confidence estimates and use response diversity as a proxy for confidence. We evaluate this approach across ten multiple-choice questionanswering datasets using three models: T0, FLAN-T5, and GPT-3. In addition to analyzing multiple human written prompts, we automatically generate more prompts using a language model in order to produce finer-grained confidence estimates. Our method produces more calibrated confidence estimates compared to the log probability of the answer to a single prompt. These improvements could benefit users who rely on prediction confidence for integration into a larger system or in decisionmaking processes.",
author = "Wightman, {Gwenyth Portillo} and Alexandra DeLucia and Mark Dredze",
note = "Publisher Copyright: {\textcopyright} 2023 Proceedings of the Annual Meeting of the Association for Computational Linguistics. All rights reserved.; 3rd Workshop on Trustworthy Natural Language Processing, TrustNLP 2023, co-located with ACL 2023 ; Conference date: 14-07-2023",
year = "2023",
language = "English (US)",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "326--362",
editor = "Anaelia Ovalle and Kai-Wei Chang and Kai-Wei Chang and Ninareh Mehrabi and Yada Pruksachatkun and Aram Galystan and Aram Galystan and Jwala Dhamala and Apurv Verma and Trista Cao and Anoop Kumar and Rahul Gupta",
booktitle = "3rd Workshop on Trustworthy Natural Language Processing, TrustNLP 2023 - Proceedings of the Workshop",
address = "United States",
}