""" This script includes a SQL to JSON converter specifically designed for converting data from a university's module database into a JSON format. It connects to a MySQL database, retrieves module data using a SQL query, and processes this data into a structured JSON format. NOTE: You have to run MySQL Database on localhost at the default PORT. The script removes unnecessary timestamp columns, combines relevant fields to create a content field for each module, and identifies elective modules (Wahlpflichtmodule) based on specific criteria. The processed data is saved in a JSON file. """ import pymysql import pandas as pd import json # Database connection setup db_connection = pymysql.connect( host='localhost', user='maydane', password='1234', db='mydatabase') # SQL query execution query = 'SELECT * FROM modul' df = pd.read_sql(query, con=db_connection) timestamp_cols = ['changed'] # Remove timestamps, cause they are irrelevant df = df.drop(columns=timestamp_cols) # DataFrame in JSON konvertieren json_data = df.to_dict(orient='records') # Write JSON to file with open('data.json', 'w', encoding="utf-8") as f: json.dump(json_data, f, ensure_ascii=False) # Verbindung schließen db_connection.close() #--------------------------------------------------------- # This file is from a notebook. So this part is antoher script for parsing the json to a suitable format. # TODO: This can be refactored into the upper script import json # Load the data from a JSON file for converting to right format with open('data.json', 'r') as f: data = json.load(f) # Iterate over the data, combining the fields for dic in data: combined_str = dic.get("name_de", "") + " " + dic.get("inhalte_de", "") + " " + dic.get("kompetenzen_de", "") dic["content"] = combined_str dic["is_wpm"]= dic.get("semester") == "6/7" and dic.get("pflichtmodul") == 0 # If you want to save the updated data back to the JSON file: with open('converted_data.json', 'w') as f: json.dump(data, f, ensure_ascii=False, indent=4)