201 lines
8.0 KiB
Python
201 lines
8.0 KiB
Python
|
from typing import Dict, List
|
||
|
from haystack.schema import Document
|
||
|
from api.embeddingsServiceCaller import EmbeddingServiceCaller
|
||
|
from helper.openai import (
|
||
|
openai_doc_reference_prompt_v1,
|
||
|
openai_doc_citation_prompt_v2,
|
||
|
openai_wpm_recommendation,
|
||
|
MAX_GPT4_TOKENS,
|
||
|
GPT4_COMPLETION_TOKENS,
|
||
|
MAX_GPT35_TURBO_TOKENS,
|
||
|
RERANKING_TOKENS,
|
||
|
count_prompt_tokens_gpt4,
|
||
|
count_prompt_tokens_gpt35,
|
||
|
)
|
||
|
import json
|
||
|
|
||
|
AUTHOR_MAPPING = {
|
||
|
"Wolf": "Prof. Dr. Ivo Wolf",
|
||
|
"Hummel": "Prof. Dr. Oliver Hummel",
|
||
|
"Fimmel": "Prof. Dr. Elena Fimmel",
|
||
|
"Eckert": "Prof. Dr. rer. nat. Kai Eckert",
|
||
|
"Fischer": "Prof. Dr. Jörn Fischer",
|
||
|
"Gröschel": "Prof. Dr. Michael Gröschel",
|
||
|
"Gumbel": "Prof. Dr. Markus Gumbel",
|
||
|
"Nagel": "Prof. Dr. Till Nagel",
|
||
|
"Specht": "Prof. Dr. Thomas Specht",
|
||
|
"Steinberger": "Prof. Dr. Jessica Steinberger",
|
||
|
"Dietrich": "Prof. Dr. Gabriele Roth-Dietrich",
|
||
|
"Dopatka": "Prof. Dr. rer. nat. Frank Dopatka",
|
||
|
"Kraus": "Prof. Dr. Stefan Kraus",
|
||
|
"Leuchter": "Prof. Dr.-Ing. Sandro Leuchter",
|
||
|
"Paulus": "Prof. Dr. Sachar Paulus",
|
||
|
}
|
||
|
|
||
|
|
||
|
class Reader:
|
||
|
THRESHOLD = 0.5
|
||
|
|
||
|
def __init__(self, caller: EmbeddingServiceCaller) -> None:
|
||
|
"""
|
||
|
NOTE: The BERT Reader is in question_answering.py
|
||
|
Initializes the Reader class for generating answers.
|
||
|
Args:
|
||
|
caller (EmbeddingServiceCaller): for calling MODEL SERVICE.
|
||
|
"""
|
||
|
self.caller = caller
|
||
|
pass
|
||
|
|
||
|
def get_gpt_wpm_recommendation(self, top_k_wpms: List[Document], query: str):
|
||
|
"""
|
||
|
Generates GPT-based recommendations for WPMS using the provided top K documents. Also prompt is being build, and tokens are counted.
|
||
|
"""
|
||
|
current_token_count = count_prompt_tokens_gpt4(openai_wpm_recommendation)
|
||
|
reference = ""
|
||
|
picked_references = []
|
||
|
for doc in top_k_wpms:
|
||
|
current_token_count += count_prompt_tokens_gpt4(doc.content)
|
||
|
if current_token_count < MAX_GPT4_TOKENS - GPT4_COMPLETION_TOKENS:
|
||
|
meta = doc.meta
|
||
|
title = meta.get("name_de")
|
||
|
description = meta.get("inhalte_de")
|
||
|
profs = meta.get("dozenten")
|
||
|
reference += f'"""\Course Title:\n{title}\nCourse Descripiton:\n{description}\Professoren:\n{profs}\n"""\n\n'
|
||
|
picked_references.append(doc)
|
||
|
|
||
|
payload = json.dumps(
|
||
|
{
|
||
|
"reference": reference,
|
||
|
"question": query,
|
||
|
"model": "GPT",
|
||
|
"prompt": openai_wpm_recommendation,
|
||
|
}
|
||
|
)
|
||
|
return self.caller.get_answer(payload=payload), picked_references
|
||
|
|
||
|
def get_gpt_expert_search_answer(
|
||
|
self,
|
||
|
top_k_passages: List[Document],
|
||
|
query: str,
|
||
|
prompt: str = openai_doc_reference_prompt_v1,
|
||
|
):
|
||
|
"""
|
||
|
Generates an answer using GPT for expert search based on the provided top K passages. Also prompt is being build, and tokens are counted.
|
||
|
Args:
|
||
|
top_k_passages (List[Document]): Top K documents retrieved from the search.
|
||
|
query (str): User query string.
|
||
|
prompt (str, optional): System Prompt for general instructions
|
||
|
|
||
|
"""
|
||
|
current_token_count = count_prompt_tokens_gpt4(prompt)
|
||
|
reference = ""
|
||
|
picked_references = []
|
||
|
for doc in top_k_passages:
|
||
|
current_token_count += count_prompt_tokens_gpt4(doc.content)
|
||
|
if current_token_count < MAX_GPT4_TOKENS - GPT4_COMPLETION_TOKENS:
|
||
|
title = doc.meta.get("title", "")
|
||
|
abstract = doc.meta.get("abstract", "")
|
||
|
author = AUTHOR_MAPPING.get(doc.meta.get("author", ""), "unknown")
|
||
|
reference += f'"""\nTitle:\n{title}\nAuthor:\n{author}\nAbstract:\n{abstract}\n"""\n\n'
|
||
|
picked_references.append(doc)
|
||
|
|
||
|
payload = json.dumps(
|
||
|
{
|
||
|
"reference": reference,
|
||
|
"question": query,
|
||
|
"model": "GPT",
|
||
|
"prompt": prompt,
|
||
|
}
|
||
|
)
|
||
|
return self.caller.get_answer(payload=payload), picked_references
|
||
|
|
||
|
def get_gpt_answer(
|
||
|
self,
|
||
|
top_k_passages: List[Document],
|
||
|
query: str,
|
||
|
prompt: str = openai_doc_reference_prompt_v1,
|
||
|
):
|
||
|
"""
|
||
|
Generates a generic GPT-based answer using the provided top K passages. For scenarios like questions about Stupo or crawled web data.
|
||
|
|
||
|
Args:
|
||
|
top_k_passages (List[Document]): Top K documents retrieved from the search.
|
||
|
query (str): User query string.
|
||
|
prompt (str, optional): System Prompt for general instructions. Defaults to openai_doc_reference_prompt_v1.
|
||
|
|
||
|
Returns:
|
||
|
Tuple: A tuple of the answer from GPT model and the documents used for generating the answer.
|
||
|
"""
|
||
|
current_token_count = count_prompt_tokens_gpt4(prompt)
|
||
|
reference = ""
|
||
|
picked_references = []
|
||
|
for doc in top_k_passages:
|
||
|
current_token_count += count_prompt_tokens_gpt4(doc.content)
|
||
|
if current_token_count < MAX_GPT4_TOKENS - GPT4_COMPLETION_TOKENS:
|
||
|
reference += f'"""\n{doc.content}\n"""\n\n'
|
||
|
picked_references.append(doc)
|
||
|
|
||
|
payload = json.dumps(
|
||
|
{
|
||
|
"reference": reference,
|
||
|
"question": query,
|
||
|
"model": "GPT",
|
||
|
"prompt": prompt,
|
||
|
}
|
||
|
)
|
||
|
return self.caller.get_answer(payload=payload), picked_references
|
||
|
|
||
|
def generate_llama_answer(self, top_k_passages: List[Document], query: str):
|
||
|
"""
|
||
|
Generates an answer using the Llama model based on the provided top K passages.
|
||
|
"""
|
||
|
picked_references = []
|
||
|
reference = ""
|
||
|
if top_k_passages:
|
||
|
for doc in top_k_passages[:2]:
|
||
|
if doc.score >= self.THRESHOLD:
|
||
|
picked_references.append(doc)
|
||
|
reference += f'"""\n{doc.content}\n"""\n\n'
|
||
|
if reference:
|
||
|
prompt = f"""
|
||
|
Your Task is to use the provided articles delimited by triple quotes to answer questions. If the answer cannot be found in the articles, write "I could not find an answer.". If you find a suitable answer, then reply to the question as if you already knew the answer and do not mention the provided articles in your response. Answer in German.
|
||
|
[INST] User: {query}\n\nArticles:{reference}[/INST]\n\nAssistant:"""
|
||
|
payload = json.dumps(
|
||
|
{
|
||
|
"reference": reference,
|
||
|
"question": query,
|
||
|
"model": "HF",
|
||
|
"prompt": prompt,
|
||
|
}
|
||
|
)
|
||
|
return self.caller.get_answer(payload=payload, llama=True), picked_references
|
||
|
|
||
|
def get_answers(
|
||
|
self,
|
||
|
top_k_passages: List[Document],
|
||
|
query: str,
|
||
|
index: str,
|
||
|
model: str = "",
|
||
|
):
|
||
|
"""
|
||
|
Retrieves answers based on the specified model (GPT or Llama) and the top K passages.
|
||
|
|
||
|
Args:
|
||
|
top_k_passages (List[Document]): Top K documents retrieved from the search.
|
||
|
query (str): User query string.
|
||
|
index (str): The index , which documents got retrieved previously. This clarifies if we have an expert search, wpm recomm. or anything else.
|
||
|
model (str, optional): The model to use for generating answers (GPT or Llama). Defaults to an empty string.
|
||
|
|
||
|
Returns:
|
||
|
Dict: The response containing the answer.
|
||
|
"""
|
||
|
if index in ["stupo", "crawled_hsma"]:
|
||
|
if model == "GPT":
|
||
|
return self.get_gpt_answer(documents=top_k_passages, query=query)
|
||
|
elif model == "Llama":
|
||
|
return self.generate_llama_answer(
|
||
|
top_k_passages=top_k_passages, query=query
|
||
|
)
|
||
|
else:
|
||
|
return {"choices": [{"text": "Ich weiß die Antwort nicht"}]}
|