198 lines
42 KiB
Plaintext
198 lines
42 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "d6fa6fc8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import seaborn as sns\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"import holidays"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "a2f9a292",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/tmp/ipykernel_17243/1257658190.py:2: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
|
|
" df = df.replace([\"na\", \"NA\", \"Na\"], np.nan)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"operator_name 0\n",
|
|
"domain_name 0\n",
|
|
"domain_id 0\n",
|
|
"counter_site 0\n",
|
|
"counter_site_id 0\n",
|
|
"counter_serial 0\n",
|
|
"longitude 0\n",
|
|
"latitude 0\n",
|
|
"timezone 0\n",
|
|
"iso_timestamp 0\n",
|
|
"channels_in 0\n",
|
|
"channels_out 0\n",
|
|
"channels_unknown 698715\n",
|
|
"channels_all 0\n",
|
|
"site_temperature 8846\n",
|
|
"site_rain_accumulation 8846\n",
|
|
"site_snow_accumulation 698715\n",
|
|
"year 0\n",
|
|
"dtype: int64\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df = pd.read_csv(\"../data/processed/hourly_bikes_mannheim.csv\", low_memory=False)\n",
|
|
"df = df.replace([\"na\", \"NA\", \"Na\"], np.nan)\n",
|
|
"\n",
|
|
"print(df.isna().sum())\n",
|
|
"df[\"site_temperature\"] = df[\"site_temperature\"].astype(float)\n",
|
|
"df[\"site_rain_accumulation\"] = df[\"site_rain_accumulation\"].astype(float)\n",
|
|
"df[\"site_snow_accumulation\"] = df[\"site_snow_accumulation\"].astype(float)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "623979f9",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/tmp/ipykernel_17243/678675454.py:17: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
|
|
" agg_df = df_ma.groupby('time_of_day')['channels_all'].sum().reset_index()\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "AttributeError",
|
|
"evalue": "Text.set() got an unexpected keyword argument 'useOffset'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
|
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[83]\u001b[39m\u001b[32m, line 33\u001b[39m\n\u001b[32m 30\u001b[39m ax.set_xticks(np.arange(-\u001b[32m0.5\u001b[39m, n + \u001b[32m0.5\u001b[39m, \u001b[32m1\u001b[39m))\n\u001b[32m 32\u001b[39m \u001b[38;5;66;03m# Tick-Labels: 0..24 (Grenzwerte)\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m33\u001b[39m \u001b[43max\u001b[49m\u001b[43m.\u001b[49m\u001b[43mset_xticklabels\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mn\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrotation\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43museOffset\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 34\u001b[39m \u001b[38;5;66;03m# Add some labels and title to the plot\u001b[39;00m\n\u001b[32m 35\u001b[39m ax.set_title(\u001b[33m'\u001b[39m\u001b[33mAnzahl Fahrradpassagen nach Stunden über den Beobachtungszeitraum (Mannheim Renzstraße ab 2014)\u001b[39m\u001b[33m'\u001b[39m, fontsize=\u001b[32m16\u001b[39m)\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/WS26/DAT_Projekt/.venv/lib/python3.12/site-packages/matplotlib/axes/_base.py:74\u001b[39m, in \u001b[36m_axis_method_wrapper.__set_name__.<locals>.wrapper\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 73\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mwrapper\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **kwargs):\n\u001b[32m---> \u001b[39m\u001b[32m74\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mget_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/WS26/DAT_Projekt/.venv/lib/python3.12/site-packages/matplotlib/axis.py:2141\u001b[39m, in \u001b[36mAxis.set_ticklabels\u001b[39m\u001b[34m(self, labels, minor, fontdict, **kwargs)\u001b[39m\n\u001b[32m 2139\u001b[39m \u001b[38;5;66;03m# deal with label1\u001b[39;00m\n\u001b[32m 2140\u001b[39m tick.label1.set_text(tick_label)\n\u001b[32m-> \u001b[39m\u001b[32m2141\u001b[39m \u001b[43mtick\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlabel1\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_internal_update\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2142\u001b[39m \u001b[38;5;66;03m# deal with label2\u001b[39;00m\n\u001b[32m 2143\u001b[39m tick.label2.set_text(tick_label)\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/WS26/DAT_Projekt/.venv/lib/python3.12/site-packages/matplotlib/artist.py:1233\u001b[39m, in \u001b[36mArtist._internal_update\u001b[39m\u001b[34m(self, kwargs)\u001b[39m\n\u001b[32m 1226\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_internal_update\u001b[39m(\u001b[38;5;28mself\u001b[39m, kwargs):\n\u001b[32m 1227\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1228\u001b[39m \u001b[33;03m Update artist properties without prenormalizing them, but generating\u001b[39;00m\n\u001b[32m 1229\u001b[39m \u001b[33;03m errors as if calling `set`.\u001b[39;00m\n\u001b[32m 1230\u001b[39m \n\u001b[32m 1231\u001b[39m \u001b[33;03m The lack of prenormalization is to maintain backcompatibility.\u001b[39;00m\n\u001b[32m 1232\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1233\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_update_props\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1234\u001b[39m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{cls.__name__}\u001b[39;49;00m\u001b[33;43m.set() got an unexpected keyword argument \u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m 1235\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{prop_name!r}\u001b[39;49;00m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/WS26/DAT_Projekt/.venv/lib/python3.12/site-packages/matplotlib/artist.py:1206\u001b[39m, in \u001b[36mArtist._update_props\u001b[39m\u001b[34m(self, props, errfmt)\u001b[39m\n\u001b[32m 1204\u001b[39m func = \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mset_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m 1205\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(func):\n\u001b[32m-> \u001b[39m\u001b[32m1206\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[32m 1207\u001b[39m errfmt.format(\u001b[38;5;28mcls\u001b[39m=\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m), prop_name=k),\n\u001b[32m 1208\u001b[39m name=k)\n\u001b[32m 1209\u001b[39m ret.append(func(v))\n\u001b[32m 1210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ret:\n",
|
|
"\u001b[31mAttributeError\u001b[39m: Text.set() got an unexpected keyword argument 'useOffset'"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 2000x1000 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Ensure the 'iso_timestamp' is properly parsed with utc=True\n",
|
|
"df['timestamp'] = pd.to_datetime(df['iso_timestamp'], utc=True) # Convert to datetime with UTC handling\n",
|
|
"\n",
|
|
"# Extract the hour from the timestamp\n",
|
|
"df['hour'] = df['timestamp'].dt.hour\n",
|
|
"\n",
|
|
"bins = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17,18,19,20,21,22,23,24] # These are the edges of your 3-hour bins\n",
|
|
"\n",
|
|
"\n",
|
|
"# Add a new column with the time bin for each row\n",
|
|
"df['time_of_day'] = pd.cut(df['hour'], bins=bins, right=False)\n",
|
|
"\n",
|
|
"# Aggregate the data by time_of_day\n",
|
|
"#agg_df = df.groupby('time_of_day')['channels_all'].sum().reset_index()\n",
|
|
"\n",
|
|
"df_ma = df[df['counter_site'] == \"Renzstraße\"]\n",
|
|
"agg_df = df_ma.groupby('time_of_day')['channels_all'].sum().reset_index()\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# Create a Seaborn bar plot to visualize the aggregated data\n",
|
|
"plt.figure(figsize=(20, 10))\n",
|
|
"ax = sns.barplot(x='time_of_day', y='channels_all', data=agg_df)\n",
|
|
"\n",
|
|
"\n",
|
|
"n = len(agg_df) # bei dir 24\n",
|
|
"\n",
|
|
"# Ticks auf die Grenzen legen: -0.5, 0.5, 1.5, ..., 23.5\n",
|
|
"ax.set_xlim(-0.5, n - 0.5)\n",
|
|
"ax.set_xticks(np.arange(-0.5, n + 0.5, 1))\n",
|
|
"\n",
|
|
"# Tick-Labels: 0..24 (Grenzwerte)\n",
|
|
"ax.set_xticklabels([str(i) for i in range(n + 1)], rotation=0, useOffset=False)\n",
|
|
"ax.ticklabel_format(axis='y', style='plain', useOffset=False)\n",
|
|
"\n",
|
|
"# Add some labels and title to the plot\n",
|
|
"ax.set_title('Anzahl Fahrradpassagen nach Stunden über den Beobachtungszeitraum (Mannheim Renzstraße ab 2014)', fontsize=16)\n",
|
|
"plt.xlabel('Uhrzeit', fontsize=16)\n",
|
|
"plt.ylabel('Fahrradpassagen', fontsize=16)\n",
|
|
"\n",
|
|
"# Optionally, rotate the x-axis labels for better readability\n",
|
|
"plt.xticks(rotation=0)\n",
|
|
"\n",
|
|
"# Show the plot\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "0364d24a",
|
|
"metadata": {
|
|
"vscode": {
|
|
"languageId": "ruby"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df['hour'] = df['timestamp'].dt.hour\n",
|
|
"df['day_of_week'] = df['timestamp'].dt.dayofweek # 0=Monday, 6=Sunday\n",
|
|
"df['month'] = df['timestamp'].dt.month\n",
|
|
"df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int) # 1 if weekend, 0 if weekday\n",
|
|
"# You can manually add holidays or use a library like `holidays` to check if the day is a public holiday.\n",
|
|
"de_holidays = holidays.Germany(years=2023) # For Germany, for example\n",
|
|
"df['is_holiday'] = df['timestamp'].dt.date.isin(de_holidays.keys()).astype(int)\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "dat_projekt",
|
|
"language": "python",
|
|
"name": "dat_projekt"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|