import copy
from dataclasses import dataclass
from typing import TYPE_CHECKING, NamedTuple
import pandas as pd
from ..inference.response_generation import (
ChoiceResponseGenerationMethod,
JSONResponseGenerationMethod,
JSONVerbalizedDistribution,
LogprobResponseGenerationMethod,
ResponseGenerationMethod,
constrain_json_response_options,
copy_json_response_generation_method,
)
from ..utilities import placeholder, prompt_templates
if TYPE_CHECKING:
from ..prompt_builder import LLMPrompt
from ..utilities import constants
[docs]
@dataclass
class AnswerTexts:
"""Represents the answer choices for a questionnaire item.
This class manages the different formats of answer texts, including
lists of options and scales. It can handle answers with or without
all_answers.
Attributes:
full_answers (List[str]): A list of the complete answer strings,
including indices and separators if provided.
answer_texts (Optional[List[str]]): The text of the answer options.
indices (Optional[List[str]]): The indices corresponding to the
answer options.
index_answer_seperator (str): The separator between an index and
its corresponding answer text. Defaults to ": ".
option_seperators (Tuple[str, ...]): The separators used to join
multiple answer options into a single string. Defaults to (", ",).
only_scale (bool): If True, the answers represent a scale, and
only the first and last answer texts are used to create a
range of options. Defaults to False.
"""
full_answers: list[str]
answer_texts: list[str] | None = None
indices: list[str] | None = None
index_answer_seperator: str = ": "
option_seperators: str = (", ",)
only_scale: bool = (False,)
def __init__(
self,
answer_texts: list[str],
indices: list[str] | None = None,
index_answer_seperator: str = ": ",
option_seperators: str = ", ",
only_scale: bool = False,
):
"""Initializes the AnswerTexts object.
Args:
answer_texts (List[str]): The text of the answer options.
indices (Optional[List[str]]): The indices corresponding to the
answer options. Defaults to None.
index_answer_seperator (str): The separator between an index and
its corresponding answer text. Defaults to ": ".
option_seperators (str): The separators used to join
multiple answer options into a single string. Defaults to ", ".
only_scale (bool): If True, the answers represent a scale.
Defaults to False.
Raises:
ValueError: If neither answer_texts nor indices are provided.
"""
self.answer_texts = answer_texts
self.indices = indices
self.index_answer_seperator = index_answer_seperator
self.option_seperators = option_seperators
self.only_scale = only_scale
if self.only_scale:
full_indices = []
dummy_answer_texts = []
for index in range(int(self.indices[0]), int(self.indices[-1]) + 1):
index = str(index)
if index == self.indices[0]:
dummy_answer_texts.append(self.answer_texts[0])
elif index == self.indices[-1]:
dummy_answer_texts.append(self.answer_texts[-1])
else:
dummy_answer_texts.append("")
full_indices.append(index)
self.indices = full_indices
if len(self.answer_texts) == 2:
self.answer_texts = dummy_answer_texts
if self.answer_texts and self.indices:
self.full_answers = []
for answer_text, index in zip(self.answer_texts, self.indices):
if answer_text == "":
self.full_answers.append(f"{index}")
else:
self.full_answers.append(f"{index}{self.index_answer_seperator}{answer_text}")
elif self.answer_texts and self.indices is None:
self.full_answers = [f"{answer_text}" for answer_text in self.answer_texts]
elif self.answer_texts is None and self.indices:
self.full_answers = [f"{index}" for index in self.indices]
else:
raise ValueError("Invalid Answer Text, because neither text nor indices were given.")
[docs]
def get_list_answer_texts(self):
"""Returns the answer texts as a single string, joined by the option separators.
Returns:
str: A string representation of the list of answers.
"""
return self.option_seperators.join(self.full_answers)
[docs]
def get_scale_answer_texts(self):
"""Returns the first and last answer texts for a scale.
Returns:
Tuple[str, str]: A tuple containing the first and last answer
texts.
"""
return self.full_answers[0], self.full_answers[-1]
[docs]
@dataclass
class AnswerOptions:
"""
Stores answer options for a single question or a full questionnaire.
Args:
answer_texts (list): A list of possible answer strings.
index (list | None): Optionally store answer option indices separately,
e.g., for structured outputs.
from_to_scale (bool): If True, treat answer_text as a scale [start, ..., end].
list_prompt_template (str): A format string for list-based options.
Must contain an '{options}' placeholder.
scale_prompt_template (str): A format string for scale-based options.
Must contain '{start}' and '{end}' placeholders.
"""
answer_texts: AnswerTexts
from_to_scale: bool = False
list_prompt_template: str = prompt_templates.LIST_OPTIONS_DEFAULT
scale_prompt_template: str = prompt_templates.SCALE_OPTIONS_DEFAULT
response_generation_method: ResponseGenerationMethod | None = None
_response_generation_method: ResponseGenerationMethod | None = None
def _response_generation_options(
self,
response_generation_method: ResponseGenerationMethod | None = None,
) -> list[str]:
if response_generation_method is None:
response_generation_method = object.__getattribute__(
self,
"_response_generation_method",
)
if self.answer_texts.indices is not None and response_generation_method is not None:
if response_generation_method.output_index_only:
return list(self.answer_texts.indices)
return list(self.answer_texts.full_answers)
def _response_generation_options_text(
self,
response_generation_method: ResponseGenerationMethod | None = None,
) -> str:
if response_generation_method is None:
response_generation_method = object.__getattribute__(
self,
"_response_generation_method",
)
if self.from_to_scale:
if self.scale_prompt_template is None:
return ", ".join(self._response_generation_options(response_generation_method))
if self.answer_texts.indices is not None and response_generation_method is not None:
if response_generation_method.output_index_only:
start_option = self.answer_texts.indices[0]
end_option = self.answer_texts.indices[-1]
return self.scale_prompt_template.format(start=start_option, end=end_option)
start_option, end_option = self.answer_texts.get_scale_answer_texts()
return self.scale_prompt_template.format(start=start_option, end=end_option)
if self.list_prompt_template is None:
return ", ".join(self._response_generation_options(response_generation_method))
return self.list_prompt_template.format(
options=self.answer_texts.option_seperators.join(
self._response_generation_options(response_generation_method)
)
)
def _response_generation_scale_range_text(
self,
response_generation_method: ResponseGenerationMethod | None = None,
) -> str:
if not self.from_to_scale:
return ""
return self._response_generation_options_text(response_generation_method)
def _response_generation_prompt_formatter(
self,
response_generation_method: ResponseGenerationMethod | None = None,
) -> dict[str, str]:
return {
placeholder.PROMPT_OPTIONS: self._response_generation_options_text(
response_generation_method
),
placeholder.SCALE_RANGE: self._response_generation_scale_range_text(
response_generation_method
),
}
def _prepare_response_generation_method(
self,
response_generation_method: ResponseGenerationMethod | None,
) -> ResponseGenerationMethod | None:
prepared_method = copy.deepcopy(response_generation_method)
if prepared_method:
if isinstance(prepared_method, JSONVerbalizedDistribution):
prepared_method.set_verbalized_options(
self._response_generation_options(prepared_method),
prompt_formatter=self._response_generation_prompt_formatter(prepared_method),
)
elif isinstance(prepared_method, JSONResponseGenerationMethod):
prepared_method = copy_json_response_generation_method(
prepared_method,
prompt_formatter=self._response_generation_prompt_formatter(prepared_method),
options=self._response_generation_options_text(prepared_method),
)
if prepared_method.constrain_answer_options:
prepared_method.json_object = constrain_json_response_options(
json_object=prepared_method.json_object,
response_field=prepared_method.response_field,
options=self._response_generation_options(prepared_method),
)
elif isinstance(prepared_method, ChoiceResponseGenerationMethod) or isinstance(
prepared_method, LogprobResponseGenerationMethod
):
if prepared_method.allowed_choices_template is not None:
if prepared_method.allowed_choices_template != "{options}":
raise ValueError(
"`allowed_choices_template` currently only supports '{options}'."
)
prepared_method.allowed_choices = self._response_generation_options(
prepared_method
)
return prepared_method
def __setattr__(self, name: str, value: object) -> None:
if name == "response_generation_method":
value = self._prepare_response_generation_method(value)
object.__setattr__(self, "_response_generation_method", value)
return
object.__setattr__(self, name, value)
def __getattribute__(self, name: str) -> object:
if name == "response_generation_method":
return object.__getattribute__(self, "_response_generation_method")
return object.__getattribute__(self, name)
def __init__(
self,
answer_texts: AnswerTexts,
from_to_scale: bool = False,
list_prompt_template: str = prompt_templates.LIST_OPTIONS_DEFAULT,
scale_prompt_template: str = prompt_templates.SCALE_OPTIONS_DEFAULT,
response_generation_method: ResponseGenerationMethod | None = None,
):
self.answer_texts = answer_texts
self.from_to_scale = from_to_scale
self.list_prompt_template = list_prompt_template
self.scale_prompt_template = scale_prompt_template
self.response_generation_method = response_generation_method
[docs]
def create_options_str(self) -> str:
if self.from_to_scale:
if self.scale_prompt_template is None:
return None
if len(self.answer_texts.answer_texts) < 2:
raise ValueError(
"From-To scale requires at least a start and end value, "
f"but answer_text was set to {self.answer_texts}."
)
start_option, end_option = self.answer_texts.get_scale_answer_texts()
return self.scale_prompt_template.format(start=start_option, end=end_option)
else:
if self.list_prompt_template is None:
return None
return self.list_prompt_template.format(
options=self.answer_texts.get_list_answer_texts()
)
[docs]
class QuestionLLMResponseTuple(NamedTuple):
"""Contains the question, llm_response and optionally logprobs and built-in reasoning."""
question: str
llm_response: str
logprobs: dict[str, float] | None
reasoning: str | None
[docs]
@dataclass
class InferenceResult:
"""Contains a prompt and the corresponding responses by the LLM.
Can return results as a dataframe or return the transcript of all questions and answers.
"""
questionnaire: "LLMPrompt"
results: dict[int, QuestionLLMResponseTuple]
[docs]
def to_dataframe(self) -> pd.DataFrame:
answers = []
for item_id, question_llm_response_tuple in self.results.items():
answers.append((item_id, *question_llm_response_tuple))
return pd.DataFrame(
answers,
columns=[constants.QUESTIONNAIRE_ITEM_ID, *question_llm_response_tuple._fields],
)
[docs]
def get_questions_transcript(self) -> str:
parts = []
for i, (_, question_llm_response_tuple) in enumerate(self.results.items()):
if hasattr(self.questionnaire, "get_question"):
question_obj = self.questionnaire.get_question(i)
else:
question_obj = self.questionnaire._questions[i]
parts.append(self.questionnaire.generate_question_prompt(question_obj))
parts.append(question_llm_response_tuple.llm_response)
return "\n".join(parts)
[docs]
@dataclass
class QuestionnaireItem:
"""Represents a single questionnaire item."""
item_id: str
question_content: str | int
question_stem: str | None = None
answer_options: AnswerOptions | None = None
prefilled_response: str | None = None