BA-Chatbot/backend/evaluation/eval_retriever/convert_data_to_squad.py

13 lines
661 B
Python
Raw Permalink Normal View History

2023-11-15 14:28:48 +01:00
import json
squad_format = {"data": []}
with open('gold_standard_retriever_stupo.json', 'r') as f:
data_input = json.load(f)
for idx, item in enumerate(data_input):
paragraphs = []
for context in item["context"]:
qas = [{"question": q, "id": f"{idx}_{qid}" ,"answers":[{"text":context, "answer_start":0}], "is_impossible":False} for qid, q in enumerate(item["questions"])]
paragraphs.append({"context": context, "qas": qas})
squad_format["data"].append({"title": f"doc_{idx}", "paragraphs": paragraphs})
with open('squad_format.json', 'w', encoding='utf-8') as f:
json.dump(squad_format, f, ensure_ascii=False, indent=4)