import copy
import random
import warnings
from collections.abc import Sequence
from dataclasses import dataclass, replace
from enum import StrEnum
from string import ascii_lowercase, ascii_uppercase
from typing import Any, Literal, Self, overload
import pandas as pd
from ._questionnaire_loader import (
QuestionnaireLoaderColumn,
optional_bool,
optional_int,
optional_list,
optional_row_value,
optional_template,
row_has_value,
)
from .inference.response_generation import (
ChoiceResponseGenerationMethod,
JSONReasoningResponseGenerationMethod,
JSONSingleResponseGenerationMethod,
JSONVerbalizedDistribution,
LogprobResponseGenerationMethod,
ResponseGenerationMethod,
resolve_battery_response_generation_method,
)
from .utilities import constants, placeholder, prompt_templates
from .utilities.constants import QuestionnairePresentation
from .utilities.survey_objects import AnswerOptions, AnswerTexts, QuestionnaireItem
from .utilities.utils import safe_format_with_regex
[docs]
class ResponseGenerationPreset(StrEnum):
"""Named response-generation methods supported by questionnaire loading."""
NONE = "none"
CHOICE = "choice"
LOGPROB = "logprob"
JSON_SINGLE = "json_single"
JSON_REASONING = "json_reasoning"
JSON_DISTRIBUTION = "json_distribution"
[docs]
@dataclass(frozen=True)
class BaseModelPromptTemplate:
"""Template used to render chat-style turns for base-model prompts."""
user_prefix: str | None = "User:"
assistant_prefix: str | None = "Assistant:"
separator: str = "\n"
system_prefix: str | None = None
def _render_prefixed(prefix: str | None, content: str) -> str:
"""Render a single prompt block, preserving empty prefixes and content."""
if prefix is None:
return content
return f"{prefix}\n{content}"
[docs]
def messages_to_base_model_prompt(
messages: Sequence[dict[str, str]],
prompt_template: BaseModelPromptTemplate | None = None,
) -> str:
"""Render chat-style messages into a plain prompt for base models."""
template = prompt_template or BaseModelPromptTemplate()
blocks: list[str] = []
for message in messages:
role = message["role"]
content = message["content"]
if role == "system":
blocks.append(_render_prefixed(template.system_prefix, content))
elif role == "user":
blocks.append(_render_prefixed(template.user_prefix, content))
elif role == "assistant":
blocks.append(_render_prefixed(template.assistant_prefix, content))
else:
raise ValueError(f"Unsupported message role for base-model rendering: {role}")
if template.assistant_prefix is not None:
blocks.append(template.assistant_prefix)
return template.separator.join(blocks)
def _build_response_generation_method(
row: pd.Series,
item_id: Any,
) -> ResponseGenerationMethod | None:
column = QuestionnaireLoaderColumn.RESPONSE_GENERATION_METHOD
value = optional_row_value(row, column)
if value is None:
return None
if isinstance(value, ResponseGenerationMethod):
return value
preset_value = str(value).strip().lower()
try:
preset = ResponseGenerationPreset(preset_value)
except ValueError as exc:
supported = ", ".join(preset.value for preset in ResponseGenerationPreset)
raise ValueError(
f"Unsupported response_generation_method '{value}' for questionnaire_item_id "
f"'{item_id}'. Supported presets are: {supported}."
) from exc
if preset == ResponseGenerationPreset.NONE:
return None
output_index_only = optional_bool(
row,
QuestionnaireLoaderColumn.OUTPUT_INDEX_ONLY,
item_id,
default=False,
)
constrain_answer_options = optional_bool(
row,
QuestionnaireLoaderColumn.CONSTRAIN_ANSWER_OPTIONS,
item_id,
default=True,
)
if preset == ResponseGenerationPreset.CHOICE:
return ChoiceResponseGenerationMethod(
allowed_choices_template="{options}",
output_index_only=output_index_only,
)
if preset == ResponseGenerationPreset.LOGPROB:
return LogprobResponseGenerationMethod(
allowed_choices_template="{options}",
output_index_only=output_index_only,
)
if preset == ResponseGenerationPreset.JSON_SINGLE:
return JSONSingleResponseGenerationMethod(
output_index_only=output_index_only,
constrain_answer_options=constrain_answer_options,
)
if preset == ResponseGenerationPreset.JSON_REASONING:
return JSONReasoningResponseGenerationMethod(
output_index_only=output_index_only,
constrain_answer_options=constrain_answer_options,
)
if preset == ResponseGenerationPreset.JSON_DISTRIBUTION:
return JSONVerbalizedDistribution(output_index_only=output_index_only)
return None
def _has_likert_config(row: pd.Series) -> bool:
return any(
row_has_value(row, column)
for column in QuestionnaireLoaderColumn
if column.value.startswith("likert_")
)
def _build_answer_options_from_row(row: pd.Series, item_id: Any) -> AnswerOptions | None:
answer_texts = optional_list(row, QuestionnaireLoaderColumn.ANSWER_TEXTS, item_id)
answer_codes = optional_list(row, QuestionnaireLoaderColumn.ANSWER_CODES, item_id)
response_generation_method = _build_response_generation_method(row, item_id)
list_prompt_template = optional_template(
row,
QuestionnaireLoaderColumn.LIST_PROMPT_TEMPLATE,
prompt_templates.LIST_OPTIONS_DEFAULT,
)
scale_prompt_template = optional_template(
row,
QuestionnaireLoaderColumn.SCALE_PROMPT_TEMPLATE,
prompt_templates.SCALE_OPTIONS_DEFAULT,
)
index_answer_separator = optional_template(
row,
QuestionnaireLoaderColumn.INDEX_ANSWER_SEPARATOR,
": ",
)
options_separator = optional_template(row, QuestionnaireLoaderColumn.OPTIONS_SEPARATOR, ", ")
if _has_likert_config(row):
only_from_to_scale = optional_bool(
row,
QuestionnaireLoaderColumn.LIKERT_ONLY_FROM_TO_SCALE,
item_id,
default=False,
)
explicit_n = optional_int(row, QuestionnaireLoaderColumn.LIKERT_N, item_id)
if explicit_n is None:
if only_from_to_scale:
raise ValueError(
f"Column '{QuestionnaireLoaderColumn.LIKERT_N}' is required for "
f"from-to Likert scales on questionnaire_item_id '{item_id}'."
)
if answer_texts is None:
raise ValueError(
f"Column '{QuestionnaireLoaderColumn.LIKERT_N}' is required when "
f"'{QuestionnaireLoaderColumn.ANSWER_TEXTS}' is missing for "
f"questionnaire_item_id '{item_id}'."
)
n = len(answer_texts)
else:
n = explicit_n
idx_type = str(
optional_row_value(row, QuestionnaireLoaderColumn.LIKERT_IDX_TYPE, "integer")
)
if idx_type not in {"char_lower", "char_upper", "integer", "no_index"}:
raise ValueError(
f"Column '{QuestionnaireLoaderColumn.LIKERT_IDX_TYPE}' for "
f"questionnaire_item_id '{item_id}' must be one of: "
"char_lower, char_upper, integer, no_index."
)
return generate_likert_options(
n=n,
answer_texts=answer_texts,
only_from_to_scale=only_from_to_scale,
random_order=optional_bool(
row,
QuestionnaireLoaderColumn.LIKERT_RANDOM_ORDER,
item_id,
default=False,
),
reversed_order=optional_bool(
row,
QuestionnaireLoaderColumn.LIKERT_REVERSED_ORDER,
item_id,
default=False,
),
even_order=optional_bool(
row,
QuestionnaireLoaderColumn.LIKERT_EVEN_ORDER,
item_id,
default=False,
),
add_middle_category=optional_bool(
row,
QuestionnaireLoaderColumn.LIKERT_ADD_MIDDLE_CATEGORY,
item_id,
default=False,
),
str_middle_cat=str(
optional_row_value(
row,
QuestionnaireLoaderColumn.LIKERT_MIDDLE_CATEGORY,
"Neutral",
)
),
add_refusal=optional_bool(
row,
QuestionnaireLoaderColumn.LIKERT_ADD_REFUSAL,
item_id,
default=False,
),
refusal_code=str(
optional_row_value(row, QuestionnaireLoaderColumn.LIKERT_REFUSAL_CODE, "-99")
),
start_idx=optional_int(
row,
QuestionnaireLoaderColumn.LIKERT_START_IDX,
item_id,
default=1,
),
list_prompt_template=list_prompt_template,
scale_prompt_template=scale_prompt_template,
index_answer_separator=index_answer_separator,
options_separator=options_separator,
idx_type=idx_type,
response_generation_method=response_generation_method,
)
if answer_texts is None and answer_codes is None:
if response_generation_method is not None:
raise ValueError(
f"questionnaire_item_id '{item_id}' defines a response_generation_method "
"but no answer_texts or answer_codes."
)
return None
if (
answer_texts is not None
and answer_codes is not None
and len(answer_texts) != len(answer_codes)
):
raise ValueError(
f"answer_texts and answer_codes must have the same length for "
f"questionnaire_item_id '{item_id}'."
)
answer_texts_object = AnswerTexts(
answer_texts=answer_texts,
indices=answer_codes,
index_answer_seperator=index_answer_separator,
option_seperators=options_separator,
)
return AnswerOptions(
answer_texts=answer_texts_object,
list_prompt_template=list_prompt_template,
scale_prompt_template=scale_prompt_template,
response_generation_method=response_generation_method,
)
[docs]
class LLMPrompt:
"""
Main class for setting up and managing the prompt in the LLM experiment.
This class handles loading questions
from a predefined questionnaire, preparing prompts, managing answer options,
and generating prompt structures for different interview types.
"""
DEFAULT_QUESTIONNAIRE_ID: str = "Questionnaire"
DEFAULT_SYSTEM_PROMPT: str = (
"You will be given questions and possible answer options for each. "
"Please reason about each question before answering."
)
DEFAULT_TASK_INSTRUCTION: str = ""
DEFAULT_JSON_STRUCTURE: list[str] = ["reasoning", "answer"]
DEFAULT_PROMPT_STRUCTURE: str = f"{placeholder.PROMPT_QUESTIONS}\n{placeholder.PROMPT_OPTIONS}"
def __init__(
self,
questionnaire_source: str | pd.DataFrame = None,
questionnaire_name: str = DEFAULT_QUESTIONNAIRE_ID,
system_prompt: str | None = DEFAULT_SYSTEM_PROMPT,
prompt: str = DEFAULT_PROMPT_STRUCTURE,
verbose: bool = False,
seed: int = 42,
):
"""
Initialize an LLMPrompt instance. Either a path to a csv file
or a pandas dataframe can be provided to structure the questionnaire.
Question structure can later be modified with explicit methods such as
`insert_questions`, `replace_question`, and `remove_question`.
Args:
questionnaire_source (str/pd.Dataframe): Path to the CSV file containing the
questionnaire structure and questions.
questionnaire_name (str): Name/ID for the questionnaire.
system_prompt (str | None): System prompt for all questions.
Set to `None` to omit a system message.
prompt (str): Prompt for all questions.
verbose (bool): Deprecated. Use `qstn.logger.configure_logging`
to enable logging output.
seed (int): Random seed for reproducibility.
"""
if verbose:
warnings.warn(
"`verbose` is deprecated and will be removed in a future release. "
"Use `qstn.logger.configure_logging` to enable logging output.",
DeprecationWarning,
stacklevel=2,
)
random.seed(seed)
self._questions: list[QuestionnaireItem] = []
if self._check_valid_questionnaire(questionnaire_source):
self.load_questionnaire_format(questionnaire_source=questionnaire_source)
self.verbose: bool = verbose
self.questionnaire_name: str = questionnaire_name
self.system_prompt: str | None = system_prompt
self.prompt: str = prompt
self.base_model_prompt_template: BaseModelPromptTemplate | None = None
def _check_valid_questionnaire(self, questionnaire_source: str | pd.DataFrame = None) -> bool:
# No Object
if questionnaire_source is None:
return False
# Empty String
if isinstance(questionnaire_source, str) and not questionnaire_source:
return False
# Empty Dataframe
if isinstance(questionnaire_source, pd.DataFrame):
if questionnaire_source.empty:
warnings.warn(
"The provided Dataframe is empty! No questions are created.", stacklevel=2
)
return False
# Optional check if the correct columns are provided?
# Would probably be nice to have that warning here.
return True
[docs]
def duplicate(self):
"""
Create a deep copy of the current interview instance.
Returns:
LLMQuestionnaire: A deep copy of the current object.
"""
return copy.deepcopy(self)
[docs]
def set_base_model_prompt_template(
self,
template: BaseModelPromptTemplate | None = None,
user_prefix: str | None = "User:",
assistant_prefix: str | None = "Assistant:",
separator: str = "\n",
system_prefix: str | None = None,
) -> Self:
"""Set the template used when rendering prompts for base-model completion mode.
Args:
template (BaseModelPromptTemplate | None): Existing template object to store.
user_prefix (str | None): Prefix placed before each user turn.
assistant_prefix (str | None): Prefix placed before assistant turns and final cue.
separator (str): Text inserted between rendered conversation blocks.
system_prefix (str | None): Optional prefix placed before the system prompt.
Returns:
LLMPrompt: The current prompt object for fluent configuration.
"""
if template is not None:
self.base_model_prompt_template = template
else:
self.base_model_prompt_template = BaseModelPromptTemplate(
user_prefix=user_prefix,
assistant_prefix=assistant_prefix,
separator=separator,
system_prefix=system_prefix,
)
return self
[docs]
def render_base_model_prompt(
self,
system_message: str | None,
prompts: list[str],
assistant_messages: list[str] | None = None,
) -> str:
"""Render chat-style turns into the exact prompt used for base-model generation.
Args:
system_message (str | None): Optional system text to place before the turns.
prompts (list[str]): User turns to render.
assistant_messages (list[str] | None): Assistant history between user turns.
Returns:
str: Rendered base-model prompt.
"""
messages = []
if system_message is not None:
messages.append({"role": "system", "content": system_message})
assistant_messages = assistant_messages or []
for index, prompt in enumerate(prompts):
messages.append({"role": "user", "content": prompt})
if index < len(assistant_messages):
messages.append({"role": "assistant", "content": assistant_messages[index]})
return messages_to_base_model_prompt(messages, self.base_model_prompt_template)
[docs]
def get_prompt_for_questionnaire_type(
self,
questionnaire_type: QuestionnairePresentation = QuestionnairePresentation.SINGLE_ITEM,
item_id: str | int | None = None,
item_position: int | None = 0,
item_separator: str = "\n",
inference_type: Literal["chat", "generation"] = "chat",
) -> tuple[str | None, str]:
"""
Generate the full prompt for a given questionnaire presentation.
Args:
quesitonnaire_type (QuestionnairePresentation):
The type of questionnaire prompt to generate.
item_id (str):
The id of the questionnaire_item that should be shown.
If both item_id and item_position are provided, only item_id is considered.
item_position (int): The question at that position will be shown.
If both item_id and item_position are provided, only item_id is considered.
Defaults to the first question.
item_separator (str): For QuestionnairePresentation.BATTERY decides the str
that seperates each question.
inference_type (str): If "chat", return system and user messages.
If "generation", return the exact rendered base-model prompt.
Returns:
Tuple(str | None, str): The first element corresponds to the system_prompt,
the second element to the prompt.
"""
options = ""
automatic_output_instructions = ""
question_map = {question.item_id: question for question in self._questions}
reference_item_position = item_position
if item_id:
question_item = question_map[item_id]
reference_item_position = next(
i for i, question in enumerate(self._questions) if question.item_id == item_id
)
elif item_id and item_id not in question_map.keys():
raise ValueError("item_id does not exist.")
elif item_position >= len(self._questions):
raise ValueError("item_order_id is bigger than the number of questions")
else:
question_item = self._questions[item_position]
if (
questionnaire_type == QuestionnairePresentation.SINGLE_ITEM
or questionnaire_type == QuestionnairePresentation.SEQUENTIAL
):
question = self.generate_question_prompt(question_item)
if question_item.answer_options:
options = question_item.answer_options.create_options_str()
rgm = question_item.answer_options.response_generation_method
if rgm is None: # by default, no response generation method is required
automatic_output_instructions = ""
else:
automatic_output_instructions: str = rgm.get_automatic_prompt()
else:
options = ""
automatic_output_instructions = ""
format_dict = {
placeholder.PROMPT_QUESTIONS: question,
placeholder.PROMPT_OPTIONS: options,
placeholder.PROMPT_AUTOMATIC_OUTPUT_INSTRUCTIONS: automatic_output_instructions,
}
elif questionnaire_type == QuestionnairePresentation.BATTERY:
all_questions: list[str] = []
for question in self._questions:
current_question_prompt = self.generate_question_prompt(question)
if question.answer_options:
options = question.answer_options.create_options_str()
else:
options = ""
format_dict = {
placeholder.PROMPT_OPTIONS: options,
}
current_question_prompt = safe_format_with_regex(
current_question_prompt, format_dict
)
all_questions.append(current_question_prompt)
all_questions_str = item_separator.join(all_questions)
if question_item.answer_options:
options = question_item.answer_options.create_options_str()
else:
options = ""
rgm = resolve_battery_response_generation_method(
questions=list(self._questions),
item_position=reference_item_position,
)
if rgm is None: # by default, no response generation method is required
automatic_output_instructions = ""
else:
automatic_output_instructions = rgm.get_automatic_prompt()
format_dict = {
placeholder.PROMPT_QUESTIONS: all_questions_str,
placeholder.PROMPT_OPTIONS: options,
placeholder.PROMPT_AUTOMATIC_OUTPUT_INSTRUCTIONS: automatic_output_instructions,
}
if self.system_prompt is None:
system_prompt = None
else:
system_prompt = safe_format_with_regex(self.system_prompt, format_dict)
prompt = safe_format_with_regex(self.prompt, format_dict)
if inference_type == "generation":
return None, self.render_base_model_prompt(system_prompt, [prompt])
if inference_type != "chat":
raise ValueError("`inference_type` must be either 'chat' or 'generation'.")
return system_prompt, prompt
def _get_token_counter(
self,
model_id: str,
tokenizer_backend: Literal["tiktoken", "transformers"],
):
if tokenizer_backend == "tiktoken":
import tiktoken
encoding = tiktoken.encoding_for_model(model_id)
def count_tokens(text: str | None) -> int:
if text is None:
return 0
return len(encoding.encode(text, disallowed_special=()))
return count_tokens
if tokenizer_backend == "transformers":
try:
from transformers import AutoTokenizer
except ImportError as exc:
raise ImportError(
"Token estimation with tokenizer_backend='transformers' requires "
"the optional 'transformers' package."
) from exc
tokenizer = AutoTokenizer.from_pretrained(model_id)
def count_tokens(text: str | None) -> int:
if text is None:
return 0
return len(tokenizer.encode(text, add_special_tokens=False))
return count_tokens
raise ValueError("`tokenizer_backend` must be either 'tiktoken' or 'transformers'.")
@staticmethod
def _count_chat_input_tokens(
system_prompt: str | None,
prompt: str,
count_tokens,
tokenizer_backend: Literal["tiktoken", "transformers"],
) -> int:
"""Count chat message content with a small OpenAI chat wrapper estimate."""
message_count = 1 + (1 if system_prompt is not None else 0)
content_tokens = count_tokens(system_prompt) + count_tokens(prompt)
if tokenizer_backend == "tiktoken":
# OpenAI chat APIs add structural tokens around each message plus a reply cue.
return content_tokens + message_count * 3 + 3
return content_tokens
[docs]
def get_questions(self) -> tuple[QuestionnaireItem, ...]:
"""
Get an immutable snapshot of loaded interview questions.
Returns:
Tuple[QuestionnaireItem, ...]: Loaded questions.
"""
return tuple(self._questions)
@property
def questions(self) -> tuple[QuestionnaireItem, ...]:
"""Read-only view of questionnaire items."""
return tuple(self._questions)
[docs]
def get_question(self, position: int) -> QuestionnaireItem:
"""Return a question by positional index."""
return self._questions[position]
[docs]
def replace_question(self, position: int, questionnaire_item: QuestionnaireItem) -> None:
"""Replace the question at a given index."""
self._questions[position] = questionnaire_item
[docs]
def remove_question(self, position: int) -> None:
"""Remove the question at a given index."""
del self._questions[position]
[docs]
def get_question_item_id(self, position: int) -> Any:
"""Return the questionnaire item id at a given index."""
return self._questions[position].item_id
# TODO Item order could be given by ids
@overload
def prepare_prompt(
self,
question_stem: str | None = None,
answer_options: AnswerOptions | None = None,
prefilled_responses: dict[int, str] | None = None,
randomized_item_order: bool = False,
) -> Self: ...
@overload
def prepare_prompt(
self,
question_stem: list[str] | None = None,
answer_options: dict[str, AnswerOptions] | None = None,
prefilled_responses: dict[int, str] | None = None,
randomized_item_order: bool = False,
) -> Self: ...
[docs]
def prepare_prompt(
self,
question_stem: str | list[str] | None = None,
answer_options: AnswerOptions | dict[str, AnswerOptions] | None = None,
prefilled_responses: dict[int, str] | None = None,
randomized_item_order: bool = False,
) -> Self:
"""
Prepare the interview by assigning question stems, answer options, and prefilled responses.
Args:
question_stem (str or List[str], optional): Single or list of question stems.
answer_options (AnswerOptions or Dict[int, AnswerOptions], optional):
Answer options for all or per question.
prefilled_responses (Dict[int, str], optional):
If you provide prefilled responses, they will be used
to fill the answers instead of prompting the LLM for that question.
randomized_item_order (bool): If True, randomize the order of questions.
Returns:
Self: The updated instance with prepared questions.
"""
questionnaire_questions: list[QuestionnaireItem] = self._questions
prompt_list = isinstance(question_stem, list)
if prompt_list:
assert len(question_stem) == len(
questionnaire_questions
), "If a list of question stems is given, length of prompt "
" and survey questions have to be the same"
options_dict = False
if isinstance(answer_options, AnswerOptions):
# self._same_options = True # unnecessary
options_dict = False
elif isinstance(answer_options, dict):
# self._same_options = False # unnecessary
options_dict = True
updated_questions: list[QuestionnaireItem] = []
if not prefilled_responses:
prefilled_responses = {}
# for survey_question in survey_questions:
# prefilled_answers[survey_question.question_id] = None
if not prompt_list and not options_dict:
updated_questions = []
for question in questionnaire_questions:
new_questionnaire_question = replace(
question,
question_stem=(question_stem if question_stem else question.question_stem),
answer_options=answer_options,
prefilled_response=prefilled_responses.get(question.item_id),
)
updated_questions.append(new_questionnaire_question)
elif not prompt_list and options_dict:
for question in questionnaire_questions:
new_questionnaire_question = replace(
question,
question_stem=(question_stem if question_stem else question.question_stem),
answer_options=answer_options.get(question.item_id),
prefilled_response=prefilled_responses.get(question.item_id),
)
updated_questions.append(new_questionnaire_question)
elif prompt_list and not options_dict:
for i, question in enumerate(questionnaire_questions):
new_questionnaire_question = replace(
question,
question_stem=(question_stem[i] if question_stem else question.question_stem),
answer_options=answer_options,
prefilled_response=prefilled_responses.get(question.item_id),
)
updated_questions.append(new_questionnaire_question)
elif prompt_list and options_dict:
for i, question in enumerate(questionnaire_questions):
new_questionnaire_question = replace(
question,
question_stem=(question_stem[i] if question_stem else question.question_stem),
answer_options=answer_options.get(question.item_id),
prefilled_response=prefilled_responses.get(question.item_id),
)
updated_questions.append(new_questionnaire_question)
if randomized_item_order:
random.shuffle(updated_questions)
self._questions = updated_questions
return self
[docs]
def generate_question_prompt(self, questionnaire_items: QuestionnaireItem) -> str:
"""
Generate the prompt string for a single interview question.
Args:
questionnaire_items (InterviewItem): The question to prompt.
Returns:
str: The formatted prompt for the question.
"""
if questionnaire_items.question_stem:
if placeholder.QUESTION_CONTENT in questionnaire_items.question_stem:
format_dict = {placeholder.QUESTION_CONTENT: questionnaire_items.question_content}
question_prompt = safe_format_with_regex(
questionnaire_items.question_stem, format_dict
)
else:
question_prompt = f"""{questionnaire_items.question_stem} {questionnaire_items.question_content}""" # noqa: E501
else:
question_prompt = f"""{questionnaire_items.question_content}"""
if questionnaire_items.answer_options:
_options_str = questionnaire_items.answer_options.create_options_str()
if _options_str is not None:
safe_formatter = {placeholder.PROMPT_OPTIONS: _options_str}
question_prompt = safe_format_with_regex(question_prompt, safe_formatter)
return question_prompt
def __len__(self) -> int:
"""
Returns the number of questions in our LLMPrompt.
Returns:
int: The number of questions.
"""
return len(self._questions)
def __str__(self) -> str:
"""
Creates a human readable display of the system prompt and prompt in default Battery format.
"""
name_str: str = f"=== {self.questionnaire_name} ==="
sys_prompt, prompt = self.get_prompt_for_questionnaire_type(
questionnaire_type=QuestionnairePresentation.BATTERY
)
sys_str: str = f"=== SYSTEM_PROMPT ===\n{sys_prompt}"
prompt_str: str = f"=== USER_PROMPT_WITH_ALL_QUESTIONS ===\n{prompt}"
full_str: str = f"{name_str}\n{sys_str}\n{prompt_str}"
return full_str
[docs]
def insert_questions(
self,
items: QuestionnaireItem | list[QuestionnaireItem],
position: int = None,
) -> None:
"""Inserts one or more questions into the questionnaire.
Args:
items (Union[QuestionnaireItem, List[QuestionnaireItem]]): A single
QuestionnaireItem or a list of items to insert.
position (int): The index where the questions should be inserted.
Default [None] adds them at the end.
"""
if position is None:
position = len(self._questions)
if not isinstance(items, (list, tuple)):
items = [items]
self._questions[position:position] = items
_IDX_TYPES = Literal["char_lower", "char_upper", "integer", "no_index"]
[docs]
def generate_likert_options(
n: int,
answer_texts: list[str] | None,
only_from_to_scale: bool = False,
random_order: bool = False,
reversed_order: bool = False,
even_order: bool = False,
add_middle_category: bool = False,
str_middle_cat: str = "Neutral",
add_refusal: bool = False,
refusal_code: str = "-99",
start_idx: int = 1,
list_prompt_template: str = prompt_templates.LIST_OPTIONS_DEFAULT,
scale_prompt_template: str = prompt_templates.SCALE_OPTIONS_DEFAULT,
index_answer_separator: str = ": ",
options_separator: str = ", ",
idx_type: _IDX_TYPES = "integer",
response_generation_method: ResponseGenerationMethod | None = None,
) -> AnswerOptions:
"""Generates a set of options and a prompt for a Likert-style scale.
This function creates a numeric or alphabetic scale of a specified size (n),
optionally attaching textual labels to the scale. It provides
extensive control over ordering, formatting, and the final prompt string.
Args:
n (int): The number of options to generate (e.g., 5 for a 5-point scale).
answer_texts (Optional[List[str]]): A list of text labels for each option.
Its length must equal `n` if provided.
only_from_to_scale (bool, optional): If True, the prompt will only show the
min and max of the scale (e.g., "1 to 5"). Defaults to False.
random_order (bool, optional): If True, the options are randomized. Defaults to False.
reversed_order (bool, optional): If True, the options are in reversed input order.
Defaults to False.
even_order (bool, optional): If True, options the center option will be removed.
E.g., for n=5: 1, 2, 4, 5
add_middle_category (bool, optional): If True, a middle category will be added.
The name can be specified,
by default it is "Neutral". E.g., for n=4: 1, 2, 3: Neutral, 4, 5
str_middle_cat (str, optional): The label for the middle category
if `add_middle_category` is True.
Defaults to "Neutral".
add_refusal (bool, optional): If True, an additional option for
"Don't know / Refuse to answer" will be added.
Defaults to False.
refusal_code (str, optional): The code assigned to the refusal option
if `add_refusal` is True.
Defaults to "-99".
start_idx (int, optional): The starting index for the scale (usually 0 or 1).
Defaults to 1.
list_prompt_template (str, optional): The template for prompts that list all options.
scale_prompt_template (str, optional): The template for prompts that only show the range.
index_answer_separator (str, optional): The string used to separate an index from its
text label (e.g., "1: Strongly Agree"). Defaults to ": ".
options_separator (str, optional): The string used to separate options when listed
in the prompt. Defaults to ", ".
idx_type (_IDX_TYPES, optional): The type of index to use: "integer", "upper" (A, B, C),
or "lower" (a, b, c). Defaults to "integer".
response_generation_method (Optional[ResponseGenerationMethod], optional): An object
controlling how the final response object is generated. Defaults to None.
Raises:
ValueError: If `answer_texts` is provided and its length does not match `n`.
Returns:
AnswerOptions: An object containing the generated list of option strings and the
final formatted prompt ready for display.
Example:
.. code-block:: python
# Generate a classic 5-point "Strongly Disagree" to "Strongly Agree" scale
labels = [
"Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"
]
options = SurveyOptionGenerator.generate_likert_options(n=5, answer_texts=labels)
"""
if only_from_to_scale:
# if len(answer_texts) != 2:
# raise ValueError(
# "From-To scales require exactly 2 descriptions, but "
# f"answer_texts was set to '{answer_texts}'."
# )
if idx_type != "integer":
raise ValueError(
"From-To scales require an integer scale index, but "
f"idx_type was set to '{idx_type}'."
)
else:
if answer_texts:
if len(answer_texts) != n:
raise ValueError(
"answer_texts and n need to be the same length, but "
f"answer_texts has length {len(answer_texts)} "
f"and n was given as {n}."
)
if even_order:
if n % 2 == 0:
raise ValueError("If you want to turn a scale even, it should be odd before.")
middle_index = n // 2
answer_texts = answer_texts[:middle_index] + answer_texts[middle_index + 1 :]
n = n - 1
if add_middle_category:
if n % 2 != 0:
raise ValueError("If you want to add a middle category, it should be even before.")
middle_index = n // 2
answer_texts = answer_texts[:middle_index] + [str_middle_cat] + answer_texts[middle_index:]
n = n + 1
if random_order:
if len(answer_texts) < 2:
raise ValueError("There must be at least two answer options to reorder randomly.")
random.shuffle(answer_texts) # no assignment needed because shuffles already inplace
if reversed_order:
if len(answer_texts) < 2:
raise ValueError("There must be at least two answer options to reorder in reverse.")
answer_texts = answer_texts[::-1]
if add_refusal:
answer_texts.append("Don't know / Refuse to answer")
n += 1
answer_option_indices = []
if idx_type == "no_index":
# no index, just the answer options directly
answer_option_indices = None
elif idx_type == "integer":
if add_refusal: # if refusal is added, assign it a common code -99
for i in range(n - 1):
answer_code = i + start_idx
answer_option_indices.append(str(answer_code))
answer_option_indices.append(refusal_code) # common code for refusal
else:
for i in range(n):
answer_code = i + start_idx
answer_option_indices.append(str(answer_code))
else:
# TODO @Jens add these to constants.py
if idx_type == "char_lower":
for i in range(n):
answer_option_indices.append(ascii_lowercase[(i + start_idx) % 26])
elif idx_type == "char_upper":
for i in range(n):
answer_option_indices.append(ascii_uppercase[(i + start_idx) % 26])
answer_texts_object = AnswerTexts(
answer_texts=answer_texts,
indices=answer_option_indices,
index_answer_seperator=index_answer_separator,
option_seperators=options_separator,
only_scale=only_from_to_scale,
)
questionnaire_options = AnswerOptions(
answer_texts=answer_texts_object,
from_to_scale=only_from_to_scale,
list_prompt_template=list_prompt_template,
scale_prompt_template=scale_prompt_template,
response_generation_method=response_generation_method,
)
return questionnaire_options