DAT_Projekt/data/combine_hourly.py

27 lines
666 B
Python

import glob
import os
import pandas as pd
def main():
path = "./stundenwerte/*.csv"
files = glob.glob(path)
dfs = []
print("Working", end="", flush=True)
for file in files:
df_temp = pd.read_csv(file, low_memory=False)
df_temp["year"] = os.path.basename(file).split("_")[2].split(".")[0]
dfs.append(df_temp.astype({"year": int}))
print(".", end="", flush=True)
max_df = pd.concat(dfs, ignore_index=True)
# Mannheim subset
df_ma = max_df[max_df["domain_name"] == "Stadt Mannheim"]
df_ma.to_csv("../data/processed/hourly_bikes_mannheim.csv", index=False)
if __name__ == "__main__":
main()