|
api
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
converter
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
crawler
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
data
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
parser
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
pipeline
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
preprocessing
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
data_indexer.py
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
parsed_pdfs.json
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
requirements.txt
|
initial
|
2023-11-15 14:28:48 +01:00 |
|
tei_xml.txt
|
initial
|
2023-11-15 14:28:48 +01:00 |