13 lines
661 B
Python
13 lines
661 B
Python
|
import json
|
||
|
squad_format = {"data": []}
|
||
|
with open('gold_standard_retriever_stupo.json', 'r') as f:
|
||
|
data_input = json.load(f)
|
||
|
for idx, item in enumerate(data_input):
|
||
|
paragraphs = []
|
||
|
for context in item["context"]:
|
||
|
qas = [{"question": q, "id": f"{idx}_{qid}" ,"answers":[{"text":context, "answer_start":0}], "is_impossible":False} for qid, q in enumerate(item["questions"])]
|
||
|
paragraphs.append({"context": context, "qas": qas})
|
||
|
squad_format["data"].append({"title": f"doc_{idx}", "paragraphs": paragraphs})
|
||
|
|
||
|
with open('squad_format.json', 'w', encoding='utf-8') as f:
|
||
|
json.dump(squad_format, f, ensure_ascii=False, indent=4)
|