import glob import os import pandas as pd def main(): path = "./stundenwerte/*.csv" files = glob.glob(path) dfs = [] print("Working", end="", flush=True) for file in files: df_temp = pd.read_csv(file, low_memory=False) df_temp["year"] = os.path.basename(file).split("_")[2].split(".")[0] dfs.append(df_temp.astype({"year": int})) print(".", end="", flush=True) max_df = pd.concat(dfs, ignore_index=True) max_df.to_csv("../data/processed/hourly_bikes_all.csv", index=False) # Mannheim subset df_ma = max_df[max_df["domain_name"] == "Stadt Mannheim"] df_ma.to_csv("../data/processed/hourly_bikes_mannheim.csv", index=False) if __name__ == "__main__": main()