forked from 1827133/BA-Chatbot
54 lines
2.0 KiB
Python
Executable File
54 lines
2.0 KiB
Python
Executable File
|
|
"""
|
|
This script includes a SQL to JSON converter specifically designed for converting data from a university's module database into a JSON format.
|
|
It connects to a MySQL database, retrieves module data using a SQL query, and processes this data into a structured JSON format.
|
|
NOTE: You have to run MySQL Database on localhost at the default PORT.
|
|
The script removes unnecessary timestamp columns, combines relevant fields to create a content field for each module, and identifies elective modules (Wahlpflichtmodule) based on specific criteria.
|
|
The processed data is saved in a JSON file.
|
|
"""
|
|
import pymysql
|
|
import pandas as pd
|
|
import json
|
|
# Database connection setup
|
|
db_connection = pymysql.connect(
|
|
host='localhost',
|
|
user='maydane',
|
|
password='1234',
|
|
db='mydatabase')
|
|
|
|
# SQL query execution
|
|
query = 'SELECT * FROM modul'
|
|
df = pd.read_sql(query, con=db_connection)
|
|
|
|
timestamp_cols = ['changed']
|
|
|
|
# Remove timestamps, cause they are irrelevant
|
|
df = df.drop(columns=timestamp_cols)
|
|
# DataFrame in JSON konvertieren
|
|
json_data = df.to_dict(orient='records')
|
|
|
|
# Write JSON to file
|
|
with open('data.json', 'w', encoding="utf-8") as f:
|
|
json.dump(json_data, f, ensure_ascii=False)
|
|
# Verbindung schließen
|
|
db_connection.close()
|
|
|
|
#---------------------------------------------------------
|
|
# This file is from a notebook. So this part is antoher script for parsing the json to a suitable format.
|
|
# TODO: This can be refactored into the upper script
|
|
import json
|
|
# Load the data from a JSON file for converting to right format
|
|
with open('data.json', 'r') as f:
|
|
data = json.load(f)
|
|
|
|
# Iterate over the data, combining the fields
|
|
for dic in data:
|
|
combined_str = dic.get("name_de", "") + " " + dic.get("inhalte_de", "") + " " + dic.get("kompetenzen_de", "")
|
|
dic["content"] = combined_str
|
|
dic["is_wpm"]= dic.get("semester") == "6/7" and dic.get("pflichtmodul") == 0
|
|
|
|
|
|
|
|
# If you want to save the updated data back to the JSON file:
|
|
with open('converted_data.json', 'w') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=4) |