DAT_Projekt/data/combine_hourly.py

import glob
import os

import pandas as pd


def main():
    path = "./stundenwerte/*.csv"
    files = glob.glob(path)
    dfs = []
    print("Working", end="", flush=True)
    for file in files:
        df_temp = pd.read_csv(file, low_memory=False)
        df_temp["year"] = os.path.basename(file).split("_")[2].split(".")[0]
        dfs.append(df_temp.astype({"year": int}))
        print(".", end="", flush=True)

    max_df = pd.concat(dfs, ignore_index=True)

    # Mannheim subset
    df_ma = max_df[max_df["domain_name"] == "Stadt Mannheim"]
    df_ma.to_csv("../data/processed/hourly_bikes_mannheim.csv", index=False)


if __name__ == "__main__":
    main()