import glob import pandas as pd import os def main(): path = "./stundenwerte/*.csv" files = glob.glob(path) dfs = [] print("Working", end="", flush=True) for file in files: df_temp= pd.read_csv(file,low_memory=False) df_temp["year"] = os.path.basename(file).split("_")[2].split(".")[0] dfs.append(df_temp.astype({"year":int})) print(".", end="", flush=True) max_df = pd.concat(dfs, ignore_index=True) # Mannheim subset df_ma = max_df[max_df["domain_name"] == "Stadt Mannheim"] df_ma.to_csv("../data/processed/hourly_bikes_mannheim.csv", index=False) if __name__ == "__main__": main()