DSA_SS24/notebooks/demographic_plots.ipynb

350 lines
178 KiB
Plaintext
Raw Normal View History

2024-05-08 17:45:29 +02:00
{
"cells": [
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Demographic Plots\n",
2024-06-05 09:53:25 +02:00
"This notebook is used to read the data from the pickle files and to create a dataframe with the demographic data.\n",
2024-05-15 20:20:01 +02:00
"With this data we can create a plots to show the distribution of the demographic data."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-02 16:37:52 +02:00
"execution_count": 1,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
2024-06-02 16:27:39 +02:00
"import pickle\n",
"from scipy.stats import chi2_contingency"
2024-05-15 20:20:01 +02:00
]
},
{
"cell_type": "code",
2024-06-02 16:37:52 +02:00
"execution_count": 2,
2024-05-15 20:20:01 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Set path to data\n",
"path = \"C:/Studium/dsa/data\"\n",
2024-06-05 09:53:25 +02:00
"#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\"\n",
"#C:\\Users\\klara\\projects\\DSA\\a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0"
2024-05-15 20:20:01 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Data"
2024-05-08 17:45:29 +02:00
]
},
{
"cell_type": "code",
2024-06-02 16:37:52 +02:00
"execution_count": 5,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reading SB\n",
2024-05-29 09:25:12 +02:00
"Length of SB: 50\n",
2024-05-08 17:45:29 +02:00
"Reading AFIB\n",
2024-05-29 09:25:12 +02:00
"Length of AFIB: 27\n",
2024-05-08 17:45:29 +02:00
"Reading GSVT\n",
2024-05-29 09:25:12 +02:00
"Length of GSVT: 0\n",
2024-05-08 17:45:29 +02:00
"Reading SR\n",
2024-05-29 09:25:12 +02:00
"Length of SR: 13\n"
2024-05-08 17:45:29 +02:00
]
}
],
"source": [
"\n",
2024-05-29 09:25:12 +02:00
"#path = \"C:/Studium/dsa/data\"\n",
2024-05-08 17:45:29 +02:00
"#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\"\n",
2024-05-29 09:25:12 +02:00
"path = \"C:/Users/klara/projects/DSA/data\"\n",
2024-05-08 17:45:29 +02:00
"\n",
"categories_dict = {\n",
"'SB': [426177001],\n",
"'AFIB': [164889003, 164890007],\n",
"'GSVT': [426761007, 713422000, 233896004, 233897008, 713422000],\n",
"'SR': [426783006, 427393009]\n",
"}\n",
"\n",
"data = {}\n",
"for cat_name in categories_dict.keys():\n",
" print(f\"Reading {cat_name}\")\n",
" with open(f'{path}/{cat_name}.pkl', 'rb') as f:\n",
" records = pickle.load(f)\n",
" data[cat_name] = records\n",
" print(f\"Length of {cat_name}: {len(records)}\")\n",
"\n",
"data_demographic = {'age':[], 'diag':[], 'gender':[]}\n",
"for cat_name, records in data.items():\n",
" for record in records:\n",
" age = record.comments[0].split(' ')[1]\n",
" sex = record.comments[1].split(' ')[1]\n",
" if age == 'NaN' or sex == 'NaN':\n",
" continue\n",
" # cut Age: from alter string \n",
" data_demographic['age'].append(int(age))\n",
" data_demographic['diag'].append(cat_name)\n",
" data_demographic['gender'].append(sex)\n",
"\n",
"df_dgc = pd.DataFrame(data_demographic)\n",
"\n",
"# Change from group to category\n",
2024-05-12 13:31:54 +02:00
"age_categories = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]\n",
2024-05-08 17:45:29 +02:00
"df_dgc['age_group'] = pd.cut(df_dgc['age'], bins=age_categories)"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot Data"
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-02 16:37:52 +02:00
"execution_count": 6,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-02 16:27:39 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmUAAAHJCAYAAADEuU4vAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAACDcUlEQVR4nO3deVhU1f8H8PcMywCyi2yKgKK4hAq4oeVKoqJiaoiZIuIaamqZUe5ppplLWpZJYqml9XVfwwWXpFxxw13cATcQEWQ9vz/8MTmyCONlFn2/nuc+j3Pucj5zuXPn4znnnpEJIQSIiIiISKvk2g6AiIiIiJiUEREREekEJmVEREREOoBJGREREZEOYFJGREREpAOYlBERERHpACZlRERERDqASRkRERGRDmBSRkRERKQDyp2Uubm5QSaTITo6utj1d+/eRePGjSGTyVC/fn3cunXrZWPUiilTpkAmk2HKlCkaq7Pw3F69elVjdVLxZDIZZDKZtsPQO3PnzlWeu4ULF5a6bWxsLGQyGdq0aaOZ4Ii0RJ17+4ABA5SfpcLF1NQUDg4OaNKkCYYMGYKNGzciLy+vxGNo43uMXo6kLWU3btzAW2+9haNHj6Jp06bYt28fqlatKmUVeqvwA1ZSMkvSYDKlXVFRUcp///zzz2ofh39Hoqdq1qyJ0NBQhIaGomfPnmjcuDHu3LmDn376CUFBQfDw8MCuXbu0HSZJxFCqA50/fx5vv/02bty4AX9/f6xbtw7m5uZSHf61sGvXLuTm5jKR1QFnz57Vdgh6559//kFCQgKsra2Rm5uL+Ph4HDt2DD4+PtoOjUhvvfnmm8X+Z/7EiRP49NNPsX37dgQEBGDdunXo2rWryjYjRoxASEgI7OzsNBQtvSxJWsqOHTuGt956Czdu3EDPnj2xZcsWJmRqqFmzJurUqQMjIyNth/Laq1OnDurUqaPtMPRKYStZnz598O6776qUEZG0GjZsiK1bt6J3797Iz89HaGgo0tPTVbaxs7NDnTp1mJTpkZdOyvbt24e2bdvi7t27GDx4MNasWQNjY+Nit/3999/Rvn172NraQqFQwNXVFQMHDsSFCxeK3f7ZfvgNGzagXbt2sLW1hUwmQ2xsLADVbo5ly5bBz88PVlZWRfrvb9++jbFjx6Ju3bowMzODhYUFmjRpgkWLFpXaJ/+83NxcrFixAn379kWdOnVgaWkJU1NTeHp6YtSoUbh9+7bK9levXoVMJsPy5csBAGFhYSpjBJ7t6y9t3EFmZia++uor+Pj4wMLCAmZmZqhfvz4mTJiA1NTUItsX1uvm5gYhBJYsWQJfX19UqlQJVlZW6NChA+Li4op9jxcvXsTAgQPh7u4OhUIBc3NzuLq6IjAwEMuWLVPZNjo6GjKZDAMGDMDDhw8xduxYuLm5wcTEBLVq1cKsWbNQUFAAALh16xaGDh0KFxcXKBQKeHp6ljju6Nq1a5g1axbatWuH6tWrQ6FQwNraGm+++SZ+/PFH5TELFY6dKPT8WIzCc/psvA8ePMDo0aNRs2ZNKBQKlbFNxXWfffPNN5DJZKhduzYePXpUJOaffvoJMpkMLi4uuHfvXrHv61l+fn6QyWT4/fffS9xm0aJFkMlkeOedd4qse5nP0549e9ChQwfY2NjA1NQUPj4++OWXX14Yc0keP36M1atXAwDCw8MRHh4OAFi1ahWePHlS5uOU9e9Y6MKFCxg6dChq1qwJExMTWFlZoVWrVlixYkWxx2/Tpo3y/rF//3507doVVapUgVwuV7ZGFBQUYMmSJWjZsiWsra1hZGQEe3t7NGzYECNHjiz283nt2jUMGDAAjo6Oymt/8uTJePLkiUqd5Y3lRcMenr2eSyq/f/8+IiIilJ8jV1dXjBkzptj7xrPj/DIzM/HZZ5/Bw8MDJiYmcHZ2Rnh4eKnjhFNTUzF58mQ0atRIeZ/y8vLC9OnTkZmZWWT7Z8c83b17FxEREXBxcYGxsTFcXFwwcuRIpKWllVhfec5JoWfvjSWVl/eeCQAJCQl49913YWdnB1NTU7zxxhuYM2cO8vPzyxV/echkMnz33XcwNTVFamoqfvrpJ5X1JY0pK+/32LPu37+PUaNGqVxPo0ePRlpaWonX67PliYmJ6NevHxwdHaFQKFCzZk1MmDAB2dnZJdZZ3ntdUlISPvzwQ9SuXRsmJiYwMzODi4sL2rdvjzlz5hS7T3lzhJd9TyUS5eTq6ioAiGXLlolNmzYJExMTAUCMHz++xH0KCgpE//79BQBhaGgo2rVrJ0JCQkTt2rUFAGFmZia2bdtWYl0jRowQAETjxo1Fnz59ROvWrcW+ffuEEEIAUG4jl8vFm2++Kfr06SOaNWsmrl69KoQQYu/evcLGxkYAEG5ubqJbt24iICBAWdahQweRk5OjUvfkyZMFADF58mSV8hs3bggAwsrKSjRv3ly8++67onPnzsLZ2VkAEFWqVBEXL15Ubn/37l0RGhoqatasKQCIli1bitDQUOWybt26Iu83MTFRpc779++LRo0aCQDC0tJSdOvWTfTs2VPY2dkJAMLd3b3IPomJiQKAcHV1FaGhocLIyEi0a9dOBAcHK8+7QqEQ//zzj8p+p06dEpaWlgKA8PT0FD169BDvvvuu8PPzE+bm5qJhw4Yq2y9btkwAEEFBQaJu3brC3t5e9OzZU3To0EGYmpoq/zaXLl0Sjo6OwsXFRQQHB4u2bdsKAwMDAUB89dVXRf72X3zxhfK9tW/fXoSEhIjWrVsLY2NjAUD06NFDFBQUKLdft26dCA0NVV4Pz57j0NBQcffuXZV4AwMDhbu7u7CxsRHdunUT7777rujbt6/yeIXHeV63bt0EABESEqJSHh8fL0xMTIShoaH4+++/i+xXnB9//FEAEAEBASVu4+PjIwCIjRs3Kste9vM0ceJEIZPJhK+vrwgJCRHNmzdXvt958+aVKfbnRUVFCQCiQYMGyrLCeFauXFnsPnv27BEAROvWrZVlZf07CiHEmjVrlPefOnXqiHfeeUe0a9dOVKpUSQAQYWFhReps3bq1ACA++OADIZfLRb169URISIjo0KGDWLVqlRBCiLCwMAFAmJiYCH9/f9GnTx8REBAgatWqJQCofGaFEOLMmTPKz6Kzs7MIDg4WgYGBolKlSuLNN98ULVq0EADEnj17yh1L4blYtmxZseew8HoODQ0ttrxbt26iZs2awtraWnTv3l288847yvuep6enuHPnTrF/Ez8/P9G8eXNhZmYmOnfuLN59913h5OQkAAhHR0dx4cKFIrGcOXNGuLi4CADCyclJdOzYUXTt2lU4ODgIAKJRo0YiLS1NZZ/C++zAgQNFtWrVhIODg+jRo4fo3LmzsLKyEgBEkyZNityfS1PSOSn07L2xpPLy3DOFEGL//v3K665GjRoiJCRE+Pv7CyMjI9GzZ88S7+2lKfzbl/Q+nvXOO+8Uey+R6nus0O3bt5XfZba2tqJHjx6ie/fuwsbGRnh6eoru3bsXe70WvpcPP/xQWFpaCldXVxEcHCz8/f2V3xPdu3cvUp8697qkpCTl+6hevboICgoSvXv3Fm+99ZawtbUVVlZWRepRJ0dQ9z29iNpJWWBgoDA0NBQAxNdff13qPosXLxYAhJ2dnTh+/LiyvKCgQHnRWFtbF7lBFNZlYGAgNmzYUPwb+P+bt6WlpYiLiyuyPikpSVSuXFnIZDLx/fffi/z8fOW6e/fuiXbt2gkAYurUqSr7lXQxp6eniw0bNojs7GyV8pycHBEZGSkAiM6dOxeJ40U312ff7/Mf3N69ewsAolmzZuLevXvK8kePHolOnToJAKJFixYq+xTeYApvMufPn1euy8vLEwMHDlRebM8q/EKaPn16kfgyMzPF3r17VcoKb4AARNeuXcXjx4+V644ePSoMDQ2VXzjDhg0Tubm5yvXr169X/u2e3U8IIQ4dOiROnTpVJIZbt26Jhg0bCgBizZo1RdaXlEwVF2/79u3Fw4cPi92upOOkpqYKNzc3AUAsXrxYCPH0mij8wn7RZ+FZDx8+FGZmZkIul4ubN28WWX/ixAkBQDg4OKict5f
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Correlation matrix\n",
"corr_matrix_age_diag= pd.crosstab(df_dgc['age_group'], df_dgc['diag'])\n",
"\n",
"# Plot the correlation matrix\n",
"sns.heatmap(corr_matrix_age_diag, annot=True, cmap='coolwarm', fmt='d')\n",
2024-05-15 20:20:01 +02:00
"plt.title('Korrelationsmatrix von Altersgruppen und Diagnosen', fontsize=16)\n",
2024-05-08 17:45:29 +02:00
"plt.xlabel('Diagnose')\n",
"plt.ylabel('Altersgruppe')\n",
"plt.show()"
]
},
2024-06-02 16:27:39 +02:00
{
"cell_type": "code",
2024-06-02 16:37:52 +02:00
"execution_count": 11,
2024-06-02 16:27:39 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Chi-Square Statistic: 38.266574797751275\n",
"P-value: 0.0004730210823940083\n",
"Chi-Square Statistic for SB in 60-70 vs others: 1.4858035714285718\n",
"P-value for SB in 60-70 vs others: 0.22286870264719977\n"
]
}
],
"source": [
"\n",
"# Chi-square test\n",
"chi2, p, _, _ = chi2_contingency(corr_matrix_age_diag)\n",
"\n",
"# Difference between observed and expected frequencies\n",
"print(f\"Chi-Square Statistic: {chi2}\")\n",
"print(f\"P-value: {p}\")\n",
"\n",
"# Check if SB (Sinusbradykardie) has a significantly higher frequency in the 60-70 age group\n",
"sb_60_70 = corr_matrix_age_diag.loc[pd.Interval(60, 70, closed='right'), 'SB']\n",
"sb_other = corr_matrix_age_diag.drop(pd.Interval(60, 70, closed='right')).sum()['SB']\n",
"total_60_70 = corr_matrix_age_diag.loc[pd.Interval(60, 70, closed='right')].sum()\n",
"total_other = corr_matrix_age_diag.drop(pd.Interval(60, 70, closed='right')).sum().sum()\n",
"\n",
"# Frequency table for the specific Chi-Square test\n",
"observed = [[sb_60_70, total_60_70 - sb_60_70], [sb_other, total_other - sb_other]]\n",
"chi2_sb, p_sb = chi2_contingency(observed)[:2]\n",
"\n",
"\n",
"print(f\"Chi-Square Statistic for SB in 60-70 vs others: {chi2_sb}\")\n",
"print(f\"P-value for SB in 60-70 vs others: {p_sb}\")"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Correlation matrix of age groups and diagnoses.This matrix describes the four diagnosis groupings on the horizontal axis and the age groupings in decades steps on the vertical axis. The color scale blue (low) to red (high) describes the correlation of the two categorization types."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-02 16:37:52 +02:00
"execution_count": 12,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-02 16:27:39 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkMAAAHJCAYAAACG+j24AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABajklEQVR4nO3deVxU1f8/8NcdlmEHEREUxAXFfUMlxQURRTFXXNPEJbfUUssMM7c0i8qsby71ybXCpXJJP6WpuS+5C2WiEoobigoqiCPL+f0xP+7HkQFRBi7MfT0fj/t4MOcu87537tx5c86550pCCAEiIiIildIoHQARERGRkpgMERERkaoxGSIiIiJVYzJEREREqsZkiIiIiFSNyRARERGpGpMhIiIiUjUmQ0RERKRqTIaIiIhI1Z47GapatSokScLKlSuNzk9OTkazZs0gSRLq1auHa9euFTVGRcyaNQuSJGHWrFkl9p65x/bSpUsl9p5knCRJkCRJ6TDKlOvXr2PWrFlo06YNPDw8YG1tDUdHR9SsWRN9+vTBt99+i9TUVKXDLLQX/T4OHTq0wGtkcVP6/UuLPXv2QJIkBAUFPdd6ud/93Emj0cDZ2Rk+Pj4IDQ3F9OnTcfbs2QK3wWt52WPSmqErV66gTZs2OHHiBFq0aIF9+/ahcuXKpnyLMosXqJLBJEYZn376KapXr47Zs2fjxIkT8PPzQ+/evREaGopy5cph06ZNGDlyJHx8fHDgwAGlwyUjVq5cCUmSMHToUKVDKRVCQ0MRERGBIUOGoFOnTqhatSoOHTqEefPmoV69eggPD8etW7eUDpNMxNJUG4qLi0PHjh1x5coVhISEYOPGjXBwcDDV5lVh165dyMzMZAJZCvzzzz9Kh1BmTJ06FVFRUbC2tkZUVBTGjx8PW1tbg2Xu3buHFStW4OOPP8bVq1cVipSo8N599908tUpZWVlYv349Jk+ejA0bNuDs2bM4dOgQypUrZ7Acr+Vlj0mSoZMnT6Jz585ITk5GeHg4oqOjYW1tbYpNq0qNGjWUDoH+v9q1aysdQpmwc+dOREVFAQDWr1+PHj16GF3O2dkZEydOREREBO7cuVOSIRKZjKWlJV555RW0bt0a/v7+OHfuHN5++20sW7bMYDley8ueIjeT7du3D+3bt0dycjJGjhyJ9evX55sIrV27Fh06dICrqyu0Wi18fHwwfPhwnD9/3ujyT7a7bt68GcHBwXB1dYUkSdizZw8Aw2aRFStWoGXLlnB2ds7TXnv9+nVMnjwZderUgZ2dHRwdHdG8eXN89dVXyMrKKvT+ZmZm4vvvv8egQYNQu3ZtODk5wdbWFn5+fnjjjTdw/fp1g+UvXboESZKwatUqAMCwYcMM2qOf7JNUUDvzw4cP8dFHH6Fp06ZwdHSEnZ0d6tWrh+nTpyMlJSXP8rnvW7VqVQgh8M0338Df3x/29vZwdnZGp06dcPjwYaP7eOHCBQwfPhzVqlWDVquFg4MDfHx80LVrV6xYscJg2Ser1u/du4fJkyejatWqsLGxQc2aNfHxxx8jJycHAHDt2jWMHj0a3t7e0Gq18PPzw//93/8ZjeHy5cv4+OOPERwcjCpVqkCr1cLFxQWtW7fG119/LW8zV24fr1xPt/vnHtMn47179y4mTpyIGjVqQKvVGvwXaKy57bPPPoMkSahVqxYePHiQJ+b//Oc/kCQJ3t7euH37ttH9elLLli0hSRLWrl2b7zJfffUVJElCr1698swryvdp9+7d6NSpE8qVKwdbW1s0bdoUq1evfmbMT5s7dy4AoFevXvkmQk8qV64cfH19jc47f/48Ro8ejRo1asDGxgbOzs5o27Ytvv/+e6PL37t3D9OnT0eDBg1gb28PrVaLSpUqITAwEDNmzEBmZmaedVJSUjBnzhw0a9YMzs7OsLW1RfXq1dGvXz/89ttv+cZtquMFACdOnMCgQYPk89rV1RWhoaH49ddf810nKysLy5cvR0hICNzc3KDVauHl5YWQkJB8v0MAkJCQgFdffRUeHh7QarWoUaMGpk+fDp1OZ7Bc1apVMWzYMADAqlWrDL47he1zExQUZHBtflp+/TCfLE9OTsa4cePg7e0Na2treHt7Y8KECQX2NVu9ejWaN28OOzs7uLq6onPnzti/f3+hYn5RVapUwezZs+X3v3nzpsH8/K7lz3tde9KBAwfQuXNnuLi4wMHBAc2bN5fPwfy6BzxZ/vPPP6N169ZwcnKCvb09AgMDCzzn7t69i2nTpqFevXryb6a/vz+ioqKQkZFhdJ2dO3eiW7duqFixIqysrFCuXDnUrFkTgwcPxr59+4yus2vXLvTu3Ruenp6wtraGu7s7evXqle/vU1H2qUDiOfn4+AgAYsWKFWLLli3CxsZGABBTp07Nd52cnBwxZMgQAUBYWlqK4OBgMWDAAFGrVi0BQNjZ2Ynffvst3/caP368ACCaNWsmBg4cKNq1ayf27dsnhBACgLyMRqMRrVu3FgMHDhQBAQHi0qVLQggh9u7dK8qVKycAiKpVq4ru3buL0NBQuaxTp07i8ePHBu89c+ZMAUDMnDnToPzKlSsCgHB2dhYvvfSS6Nu3rwgLCxOVKlUSAESFChXEhQsX5OWTk5NFRESEqFGjhgAgAgMDRUREhDxt3Lgxz/4mJCQYvOedO3dE48aNBQDh5OQkunfvLsLDw4Wbm5sAIKpVq5ZnnYSEBAFA+Pj4iIiICGFlZSWCg4NFv3795OOu1WrFkSNHDNaLjY0VTk5OAoDw8/MTvXv3Fn379hUtW7YUDg4OolGjRgbLr1ixQgAQPXr0EHXq1BHu7u4iPDxcdOrUSdja2sqfzcWLF4WHh4fw9vYW/fr1E+3btxcWFhYCgPjoo4/yfPYffPCBvG8dOnQQAwYMEO3atRPW1tYCgOjdu7fIycmRl9+4caOIiIiQz4cnj3FERIRITk42iLdr166iWrVqoly5cqJ79+6ib9++YtCgQfL2crfztO7duwsAYsCAAQblp0+fFjY2NsLS0lIcPHgwz3rGfP311wKACA0NzXeZpk2bCgDil19+kcuK+n16//33hSRJwt/fXwwYMEC89NJL8v5+/vnnhYpdCCHu3r0rNBqNACA2bNhQ6PWMWb9+vXwtqV27tujVq5cIDg4W9vb2AoAYNmyYwfLp6emifv368neuW7duYsCAASIoKEh4eHgIACIlJcVgndOnT4vKlSvL39+wsDDRv39/0bJlS2FrayvatWtnsPyLHq/c83DFihV55i1cuFA+Zo0bNxZ9+vQRrVu3ls/r2bNn51knNTVVtG7dWgAQVlZWol27dmLgwIGiffv2okKFCnnO09z3f/PNN4WTk5Pw8fER/fr1EyEhIfJ3smfPngbrvPXWWyIwMFAAEDVq1DD47syfPz//D+4J7dq1EwDE7t27jc7P75qaWz58+HDh5eUlKlasKHr37i3CwsKEs7OzACCaN2+e5xothBBvvPGGACA0Go1o27atGDBggKhbt67QaDTizTffFADyfK7PkvvZ5rcfuVJSUoQkSQKAWLNmjcG8/K7lz3tdy7VmzRr5vGnQoIEYOHCgaNu2rdBoNGLq1Kn5Xq9yy2fMmCEkSRKBgYGif//+olGjRgKAkCTJ6Hc3Pj5e3ocKFSqI8PBw0b17d+Ho6CgAiKZNm4q7d+8arLNy5UohSZKQJEkEBASI/v37i+7du4umTZsKCwsL8eabb+Z5n7feekv+/Fq0aCH69u0rAgIChCRJwsLCQixfvtxk+/QsL5wMde3aVVhaWgoA4pNPPilwnSVLlggAws3NTZw6dUouz8nJkb8ILi4u4tatW0bfy8LCQmzevNn4Dvz/A+Pk5CQOHz6cZ/6NGzdE+fLlhSRJYvHixSI7O1ued/v2bREcHGz0IpTfF/f+/fti8+bNQqfTGZQ/fvxYREZGCgAiLCwsTxwFXSCf3t+nv0D9+/cXAERAQIC4ffu2XP7gwQPRpUsXAUC0atXKYJ3cZCg3IYqLi5PnZWVlieHDh8uJ4JOGDRsmAIi5c+fmie/hw4di7969BmW5yQUA0a1bN5Geni7PO3HihLC
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# cut out sex 'unknown' (because only one occurence)\n",
"df_dgc_bineary = df_dgc[df_dgc['gender'] != 'Unknown']\n",
"# Correlation matrix\n",
"corr_matrix_sex_diag = pd.crosstab(df_dgc_bineary['gender'], df_dgc_bineary['diag'])\n",
"\n",
"# Plot the correlation matrix\n",
"sns.heatmap(corr_matrix_sex_diag, annot=True, cmap='coolwarm', fmt='d')\n",
2024-05-15 20:20:01 +02:00
"plt.title('Korrelationsmatrix von Geschlecht und Diagnosen', fontsize=16)\n",
2024-05-08 17:45:29 +02:00
"plt.xlabel('Diagnose')\n",
"plt.ylabel('Geschlecht')\n",
"plt.show()"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Correlation matrix of genders and diagnoses. This matrix describes the four diagnosis groupings on the horizontal axis and the gender in decades steps on the vertical axis. The color scale blue (low) to red (high) describes the correlation of the two categorization types."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-02 16:27:39 +02:00
"execution_count": 8,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-02 16:27:39 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnUAAAHbCAYAAACtCWxXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABsl0lEQVR4nO3deXxM1/8/8NdkmySVxZpFNkskiNhFQouKhCpiaygVoVqK1ieo6sdabaOqSlulJehi/3xQVJGmwkdFEdS+a0MlQSwhYYzM+f3hl/s1ZrLPevN6Ph7zYM499973uXfuyXvuckYhhBAgIiIiIqtmY+4AiIiIiKjimNQRERERyQCTOiIiIiIZYFJHREREJANM6oiIiIhkgEkdERERkQwwqSMiIiKSASZ1RERERDLApI6IiIhIBpjUkV4BAQFQKBRYsWJFsfU6duwIhUKBGTNmaJWnpqZCoVCgY8eORouRrNfQoUNL9fkyl3nz5kGhUEChUODLL78sti4/69ZlxYoVUCgUGDp0qFb5X3/9BYVCgYCAALPERWQITOrIohXVARMZU1JSkvT/ZcuWlXs5hYkhlV3hF8bU1FRzh0JkNezMHQDJU5s2bXD69Gk4OzubOxSiMtm/fz9OnToFd3d3qNVqHD16FIcPH0aLFi3MHRoZQO/evdG2bVu4ubmZOxQig2NSR0bh7OyM4OBgc4dBVGaFZ+kGDhyIBw8eYMWKFUhKSmJSJxNubm5M6Ei2ePmVjKK4+4zS09MRGxsLHx8fODg4wNXVFXXr1kXfvn3x008/SfUCAgIQHx8PAPjuu++kS1n6lpufn4/Zs2ejRYsWcHFxgbOzMxo3bowpU6bg9u3bRca5d+9edO3aFe7u7qhSpQpat26N77//HkDRl86eLl++fDnCw8Ph5uYGhUKBv/76CwDw999/45NPPsGLL74IPz8/KJVKuLu7o3379vjmm2+g0Wh0lvv0PT0ajQZffPEFQkND4ezsDC8vL4wcORK3bt0CAKhUKsyaNQvBwcFwcnKCt7c33nnnHeTl5eksd8aMGdJ9j9euXcPrr78Ob29vODk5ISQkROtS45kzZ/Dqq6/C09MTjo6OaNq0KdauXVvk9ivOrVu3MG7cOPj7+0OpVMLPzw9jxoyR2lCclJQU9OnTB15eXnBwcECtWrXQu3dvpKWl6a1fmn1SGnl5eVJ7hw8fjuHDhwMAVq1ahYcPH5Z6OYXb/Nn4Cl/PxnTu3Dm8+eabqFevHhwdHeHm5oYXXngBP/74o97lP31p8n//+x969OiBmjVrwsbGRrpPUaPR4Ntvv0W7du3g7u4Oe3t71KpVC02bNsXYsWOlGO7cuQMnJyfY2trin3/+KbJN/fr1g0KhwIIFC3Sm/ec//0HXrl1Rs2ZNODg4oHbt2hg8eDBOnTqlU/fpz3lBQQHmzZuH5s2bo0qVKlKbFAoFdu/eDQDo1KmT1rZ79j7M27dvY/r06WjWrJl07Ddp0gQffvgh8vPzddZf1ls6SnOvXeE9yM/u16fLd+3ahaioKFStWhVOTk5o0aKF1Nfok5OTg7ffflvqP/z9/TFu3DjcuXPH4u9JJTMSRHr4+/sLAGL58uXF1uvQoYMAIKZPn65VvmvXLgFAdOjQQav8119/Ffb29gKAaNq0qejXr5/o3bu3aNOmjVAqlaJXr15S3fHjx4t27doJAKJevXoiLi5OeiUmJkr1cnJyRLNmzQQA4erqKnr27Cn69u0ratSoIQCIOnXqiMuXL+vEvnr1amFjYyMAiCZNmoiBAweKF154QdjY2IhJkyYJAELfIVJYPmbMGGFjYyPat28vBg4cKMLCwsRff/0lhBBi1qxZ0ro7d+4sBgwYIDp06CAcHBwEANGnTx+h0Wi0lnv58mUBQPj7+4uBAwcKJycn0bVrVxETEyNq1aolAIjmzZuL+/fvi/bt20ttffnll4Wbm5sAILp166YT7/Tp0wUAER8fLzw9PYWfn5945ZVXRKdOnYStra0AIObOnSvS0tKEi4uLCAoKEgMGDBDh4eFSW9esWVPs5+BZWVlZIjAwUAAQVatWFX369BExMTHC3d1d1KtXT/Ts2bPIz9f48eMFAGFjYyPatGkj+vfvL8LCwoRCoRC2trZi2bJl5donpZGUlCQAiNDQUKmsQYMGAoBYuXKl3nn0fdY3btwo4uLipLie/uzGxcWJGzduSHXXrVsnHB0dBQARHBwsevfuLV588UXx3HPPSfvtWYXH3VtvvSVsbGxEo0aNxIABA0RUVJRYtWqVEEKI+Ph4AUA4OjqKyMhIMXDgQBEdHS3tl40bN0rLGzhwoACgdVw97ebNm8LBwUE4ODiImzdvSuVqtVq88sorAoBQKpUiIiJC9O/fXzRt2lQAEE5OTuKXX37RWlbh59zPz0/07NlTODg4iM6dO4uBAweK0NBQcfr0aREXFyc8PDwEABEdHa217f73v/9Jyzp58qTw9fUVAISXl5fo2rWr6NGjhzRvs2bNxJ07d7TWv3z5cmmf6IvL39+/VOVPK+wvn+1nCsunTp0qFAqFaNmypRgwYIBo27at9Nn4/PPPdZZ37do1Ua9ePQFAVKtWTTp+qlatKoKCgkRMTEyp+meqfJjUkV7GSuo6deokAIgff/xRZ1l37twRaWlpWmVFdcBPi42NFQBEWFiY1h+ce/fuiW7dugkAIiIiQmuef/75R1SpUkUAEAsWLNCatnv3bukPanFJnaurq068hQ4cOCCOHz+uU/7PP/9If/DWrVunNa3wj0dhEvt0MnLz5k3pj3GTJk1EmzZttNp66dIlUbVqVQFA7N27V2u5hUkdADFy5EihVqulaZs3bxYAhIuLi/D39xcffvihVrI5f/58AUDUr19fbzuL0q9fPwFAPP/881p/VHNyckRYWJgUz7Ofr2+//VZa359//qk1bffu3cLFxUU4ODiIc+fOaU0rzT4pjYiICAFAzJ8/XypLTEwUAMSLL76od56iPutPx1WUY8eOCaVSKRwdHcV///tfrWl//fWXaNKkiQAgvvvuO61phccdALFw4UKd5f79998CgPDx8RGZmZk600+dOiX+/vtv6X1ycrKUVOqzYMECAUD07dtXq/z999+Xjr1Lly5pTVu/fr2wtbUVVatWFbdv35bKn/6c+/j4iLNnz+pdZ2Ebd+3apXd6fn6+lPhMmTJFqFQqaVpeXp6UqD6bFJsjqbO3txdbtmzRG4ebm5vIz8/Xmta7d28BQHTs2FHcvXtXKr99+7Zo3759kccPEZM60quwMyrtq7RJXaNGjQQAcevWrVLFUVJS9/fffwsbGxuhUCh0kgAhhLh69ap0FuT333+Xyj/44AMBQISHh+td7oQJE0pM6j744INSteFZO3bsEABE//79tcqf/mP3888/68w3b948AUAoFAq9CePYsWMFADFz5kyt8sKkzs/PTzx48EBnvtDQUAFAtGnTRufsoVqtFtWqVRMAtJKA4mRkZEj75OTJkzrTjxw5ovePUkFBgfD29hYAxKFDh/Que86cOQKAGD9+vFZ5RfeJEEKcPn1aANA5G3Xt2jVha2srFAqFTuIiRMWSusIvJHPnztU7/cCBAwKAaNmypVZ5YcJTVKJZOF/Pnj2LXPfTNBqNdMzv27dPZ3rhmfCtW7dKZTk5OcLJyUk4OjqKq1ev6l3uW2+9JQCIL7/8Uip7+nP+/fffFxlTSUndokWLBADx8ssv651+7949UatWLWFnZ6fV35gjqUtISNA7X3BwsAAg9uzZI5X99ddfQqFQCBsbG3H69GmdeY4fPy4UCgWTOtKL99RRsdq1a4e4uLgiXx4eHmVaXps2bQAAgwYNwt69e/H48eMKxbdnzx5oNBo0b94coaGhOtNr166N6OhoAMCuXbuk8sL7dQYNGqR3uUWVP61fv37FTlepVNiyZQumTZuGkSNHIj4+HkOHDsU333wDADh79qze+ezs7BAVFaVTHhgYCADw8/NDSEhIkdOvXbumd7mdOnWCo6NjkfN169ZN5x5COzs76V6iopb7rMJ90qJFCzRq1EhnerNmzfTuqyNHjuDatWuoV68eWrZ
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 4 subplots for each diagnosis a histrgramm for the age\n",
"fig, axs = plt.subplots(2, 2)\n",
2024-05-15 20:20:01 +02:00
"fig.suptitle('Histogramm der Altersverteilung', fontsize=16)\n",
2024-05-08 17:45:29 +02:00
"for i, cat_name in enumerate(categories_dict.keys()):\n",
" ax = axs[i // 2, i % 2]\n",
" df_dgc[df_dgc['diag'] == cat_name]['age'].hist(ax=ax)\n",
" ax.set_title(cat_name)\n",
" ax.set_xlabel('Alter')\n",
" ax.set_ylabel('Anzahl')\n",
2024-05-12 13:31:54 +02:00
" # add some space between the subplots\n",
"plt.tight_layout()\n",
2024-05-08 17:45:29 +02:00
"plt.show()"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Histogram of the age distribution. Breakdown of grouped diagnoses by age group and absolute incidence of diagnoses."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-02 16:27:39 +02:00
"execution_count": 9,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-02 16:27:39 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABImElEQVR4nO3deXgNd///8deRXSIJEZIQ+77edmpfk1hqSW23VuzlRkuq2vSunSpd0N5F7xZp3VRbtZWi1qqttVNaJQ3R1k4WWyLJ/P7wc76OBAmJk+H5uK65LjPzmc+8zzkjeWXmM3MshmEYAgAAMKFc9i4AAADgYRFkAACAaRFkAACAaRFkAACAaRFkAACAaRFkAACAaRFkAACAaRFkAACAaRFkAACAaRFk8ERr0qSJKlWq9MB2J06ckMViUWRk5EPtJzIyUhaLRSdOnHio7bPb2LFjZbFY7F0GMmnz5s2yWCxavHixvUvJNrePzQsXLjywbbFixdSrV69sq8VisWjIkCHZ1j+yB0EGWWLmzJmyWCyqU6eOvUt54t0OTbcnV1dXBQQEKCgoSB988IESEhLsXSJymOjoaA0ZMkRlypRR7ty5lTt3blWoUEGDBw/WwYMH7V2e6Wzfvl1jx45VbGysvUuBCDLIIgsWLFCxYsX0888/6/jx4/Yu56kwfvx4zZ8/X7NmzdLQoUMlScOGDVPlypXT/HJ68803df36dXuUCTtbuXKlKlWqpPnz56tFixaaNm2aZsyYoZCQEH333Xf6xz/+oZMnT9q7TFPZvn27xo0bR5DJIRztXQDMLzo6Wtu3b9eSJUv04osvasGCBRozZoy9yzK1q1evyt3d/b5tQkJCVLNmTet8RESENm7cqLZt2+rZZ5/Vr7/+Kjc3N0mSo6OjHB357/60iYqKUrdu3VS0aFFt2LBB/v7+NuunTJmimTNnKlcu/qaFeXH04pEtWLBAefPmVZs2bfTcc89pwYIFadrcHoPy7rvv6r///a9KliwpFxcX1apVS7t27bK2uz0mIL2pWLFi1nbLly9XmzZtFBAQIBcXF5UsWVITJkxQSkpKujUeOXJETZs2Ve7cuVWoUCFNnTr1oV/v4cOH1axZM7m5ualw4cKaOHGiUlNT0227evVqNWzYUO7u7sqTJ4/atGmjw4cP27Tp1auXPDw8FBUVpdatWytPnjzq0aPHQ9XWrFkzjRo1SidPntT//vc/6/L0xsjMmzdPzZo1U4ECBeTi4qIKFSpo1qxZafpMTU3V2LFjFRAQoNy5c6tp06Y6cuRImvEKty95bdu2TeHh4fL19ZW7u7s6duyo8+fPp+l35syZqlixolxcXBQQEKDBgwen+Qv32LFjCg0NlZ+fn1xdXVW4cGF169ZNcXFxNu3+97//qUaNGnJzc1O+fPnUrVs3nTp1yqbN7fFSD3ss3B4/sWzZMlWqVEkuLi6qWLGi1qxZY9Pu5MmT+te//qWyZcvKzc1NPj4+6ty5c7rjp2JjYzV8+HAVK1ZMLi4uKly4sHr27JlmvEhqaqomTZqkwoULy9XVVc2bN8/Qmc+pU6fq6tWrmjdvXpoQI90KuC+99JICAwNtlv/222967rnnlC9fPrm6uqpmzZpasWKFTZubN29q3LhxKl26tFxdXeXj46MGDRpo3bp1afrq0qWLfH195ebmprJly+rf//53uu9Fr1695O3tLS8vL/Xu3VvXrl174GuMjY3VsGHDFBgYKBcXF5UqVUpTpkxJ838yNTVVM2bMUOXKleXq6ipfX18FBwdr9+7dafq832c8duxYvfrqq5Kk4sWLW38+5dTxcU8D/kTDI1uwYIE6deokZ2dnde/eXbNmzdKuXbtUq1atNG0XLlyohIQEvfjii7JYLJo6dao6deqkP/74Q05OTipfvrzmz59vs01sbKzCw8NVoEAB67LIyEh5eHgoPDxcHh4e2rhxo0aPHq34+Hi98847NttfvnxZwcHB6tSpk7p06aLFixfrtddeU+XKlRUSEpKp13rmzBk1bdpUycnJev311+Xu7q7//ve/1jMfd5o/f77CwsIUFBSkKVOm6Nq1a5o1a5YaNGigffv22QSz5ORkBQUFqUGDBnr33XeVO3fuTNV1pxdeeEFvvPGGvv/+e/Xv3/+e7WbNmqWKFSvq2WeflaOjo7799lv961//UmpqqgYPHmxtFxERoalTp6pdu3YKCgrSgQMHFBQUpBs3bqTb79ChQ5U3b16NGTNGJ06c0PTp0zVkyBB9+eWX1jZjx47VuHHj1KJFCw0aNEhHjx61Hjfbtm2Tk5OTkpKSFBQUpMTERA0dOlR+fn7666+/tHLlSsXGxsrLy0uSNGnSJI0aNUpdunRRv379dP78eX344Ydq1KiR9u3bJ29vb+t+H/VY2Lp1q5YsWaJ//etfypMnjz744AOFhoYqJiZGPj4+kqRdu3Zp+/bt6tatmwoXLqwTJ05o1qxZatKkiY4cOWL9bK9cuaKGDRvq119/VZ8+fVS9enVduHBBK1as0J9//qn8+fNb9/v2228rV65cGjFihOLi4jR16lT16NFDP/30033rXblypUqVKpWpsWuHDx9W/fr1VahQIesx/tVXX6lDhw765ptv1LFjR+tnOHnyZPXr10+1a9dWfHy8du/erb1796ply5aSpIMHD6phw4ZycnLSgAEDVKxYMUVFRenbb7/VpEmTbPbbpUsXFS9eXJMnT9bevXv16aefqkCBApoyZco9a7127ZoaN26sv/76Sy+++KKKFCmi7du3KyIiQqdPn9b06dOtbfv27avIyEiFhISoX79+Sk5O1o8//qidO3fanNl80GfcqVMn/f777/riiy80bdo06+fk6+ub4fcYWcwAHsHu3bsNSca6desMwzCM1NRUo3DhwsbLL79s0y46OtqQZPj4+BiXLl2yLl++fLkhyfj222/T7T81NdVo27at4eHhYRw+fNi6/Nq1a2navvjii0bu3LmNGzduWJc1btzYkGR8/vnn1mWJiYmGn5+fERoamqa+efPm3ff1Dhs2zJBk/PTTT9Zl586dM7y8vAxJRnR0tGEYhpGQkGB4e3sb/fv3t9n+zJkzhpeXl83ysLAwQ5Lx+uuv33fft82bN8+QZOzateuebby8vIxq1apZ58eMGWPc/d89vfcwKCjIKFGihE29jo6ORocOHWzajR071pBkhIWFpamrRYsWRmpqqnX58OHDDQcHByM2NtYwjFvvl7Ozs9GqVSsjJSXF2u4///mPIcmYO3euYRiGsW/fPkOS8fXXX9/zdZ44ccJwcHAwJk2aZLP80KFDhqOjo83yjB4L9yLJcHZ2No4fP25dduDAAUOS8eGHH1qXpfe+7tixI82+R48ebUgylixZkqb97fdv06ZNhiSjfPnyRmJionX9jBkzDEnGoUOH7llvXFycISnNZ2cYhnH58mXj/Pnz1unOmps3b25UrlzZ5v9Ramqq8cwzzxilS5e2LqtatarRpk2be+7fMAyjUaNGRp48eYyTJ0+m+/oM4/+OzT59+ti06dixo+Hj42OzrGjRojbH3IQJEwx3d3fj999/t2n3+uuvGw4ODkZMTIxhGIaxceNGQ5Lx0ksvpanxzloy+hm/8847Nv/fYV9cWsIjWbBggQoWLKimTZtKunX6vWvXrlq0aFG6l3m6du2qvHnzWucbNmwoSfrjjz/S7X/ChAlauXKlIiMjVaFCBevyO8+AJCQk6MKFC2rYsKGuXbum3377zaYPDw8PPf/889Z5Z2dn1a5d+577vJ/vvvtOdevWVe3ata3LfH1901wKWrdunWJjY9W9e3dduHDBOjk4OKhOnTratGlTmr4HDRqU6XruxcPD44F3L935HsbFxenChQtq3Lix/vjjD+ulmw0bNig5OVn/+te/bLa9Pbg4PQMGDLC5jNWwYUOlpKRYB5SuX79eSUlJGjZsmM3YjP79+8vT01OrVq2SJOsZl7Vr197zEsOSJUuUmpqqLl262LzPfn5+Kl26dJr3+VGPhRYtWqhkyZLW+SpVqsjT09Nm+zvf15s3b+rixYsqVaqUvL29tXfvXuu6b775RlW
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# a barplot for each category with the age grpuoped besides each other\n",
"fig, ax = plt.subplots()\n",
"sns.countplot(data=df_dgc_bineary, x='diag', hue='gender', ax=ax)\n",
"plt.title('Anzahl der Diagnosen nach Geschlecht')\n",
"plt.xlabel('Diagnose')\n",
"plt.ylabel('Anzahl')\n",
"plt.show()"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Barplot to visualize the distribution of diagnoses by gender. Where blue is female and orange is male."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
2024-05-12 13:31:54 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAABhoUlEQVR4nO3dd1gU1/s28HtpS5PeRIooRsUuRkGiWFA0xBJJNMaCPRrUqIn6JVEsiTGxGxtJRFFjT2wRu4gVGxG7WAJqFNCggJV63j/y7vxcFxRkYdW5P9e118WceWbmPLM7uw+zc2YVQggBIiIiIhnT03UHiIiIiHSNBRERERHJHgsiIiIikj0WRERERCR7LIiIiIhI9lgQERERkeyxICIiIiLZY0FEREREsseCiIiIiGSPBRHJTosWLVC7du2XxiUnJ0OhUCAqKuqVthMVFQWFQoHk5ORXWr6sTZw4EQqFQtfdoBKKjY2FQqHA77//Xi7bUygUmDhxYrlsi0iXWBBRmVm4cCEUCgWaNGmi66689VTFl+phbGwMZ2dnBAYG4qeffsKDBw903UV6Db3KMXrkyBFMnDgRGRkZZdcxIh1gQURlZuXKlahcuTKOHz+Oq1ev6ro7sjB58mSsWLECixYtwrBhwwAAI0aMQJ06dXDmzBm12HHjxuHJkye66Ca9Jl7lGD1y5AgmTZrEgojeOiyIqEwkJSXhyJEjmDVrFuzt7bFy5Updd+mN9+jRo5fGtG/fHj179kTfvn0RFhaGnTt3Ys+ePbhz5w46duyoVgAZGBjA2Ni4LLtMr7HX7Rh9/PhxuWynOMcRyRMLIioTK1euhLW1NYKCgvDRRx8V+marukZnxowZ+OWXX1C1alUolUq8++67OHHihBSnumaisEflypWluM2bNyMoKAjOzs5QKpWoWrUqvv32W+Tn5xfaxwsXLqBly5YwNTVFpUqVMG3atFfO9/z582jVqhVMTEzg4uKC7777DgUFBYXGbt++Hc2aNYOZmRkqVKiAoKAgnD9/Xi2mT58+MDc3x7Vr1/D++++jQoUK6NGjxyv1rVWrVhg/fjyuX7+O3377TWov7BqipUuXolWrVnBwcIBSqYSXlxcWLVqksc6CggJMnDgRzs7OMDU1RcuWLXHhwgVUrlwZffr0keJUX+UdPnwYo0aNgr29PczMzPDhhx/i7t27GutduHAhatWqBaVSCWdnZ4SGhmqcibhy5QqCg4Ph5OQEY2NjuLi44JNPPkFmZqZa3G+//QZvb2+YmJjAxsYGn3zyCW7evKkWo7qe7FVfCwqFAkOHDsWmTZtQu3ZtKJVK1KpVCzt27FCLu379Oj7//HNUr14dJiYmsLW1xccff1zo9WUZGRkYOXIkKleuDKVSCRcXF/Tu3Rv//vuvWlxBQQGmTJkCFxcXGBsbo3Xr1iU6E1ucY/R5EydOxOjRowEAHh4e0nH4bB4l2e/x8fFo3rw5TE1N8fXXXwMATp48icDAQNjZ2cHExAQeHh7o16+f2vLp6eno1asXLCwsYGVlhZCQEJw+fVrjmr8XHUfPv1af7VuLFi2kadX7z9q1a/H111/DyckJZmZm6Nix4wvzatq0qdT/iIgIje1kZ2djwoQJ8PT0hFKphKurK8aMGYPs7Gy1uOK+xqj0DHTdAXo7rVy5El26dIGRkRG6d++ORYsW4cSJE3j33Xc1YletWoUHDx7gs88+g0KhwLRp09ClSxf8/fffMDQ0RM2aNbFixQq1ZTIyMjBq1Cg4ODhIbVFRUTA3N8eoUaNgbm6OmJgYhIeHIysrC9OnT1db/v79+2jXrh26dOmCrl274vfff8fYsWNRp04dtG/fvkS5pqamomXLlsjLy8P//vc/mJmZ4ZdffoGJiYlG7IoVKxASEoLAwED8+OOPePz4MRYtWoT33nsPp06dUivw8vLyEBgYiPfeew8zZsyAqalpifr1rF69euHrr7/Grl27MHDgwCLjFi1ahFq1aqFjx44wMDDAn3/+ic8//xwFBQUIDQ2V4sLCwjBt2jR06NABgYGBOH36NAIDA/H06dNC1zts2DBYW1tjwoQJSE5Oxpw5czB06FCsXbtWipk4cSImTZqEgIAADBkyBImJidLr5vDhwzA0NEROTg4CAwORnZ2NYcOGwcnJCbdu3cLWrVuRkZEBS0tLAMCUKVMwfvx4dO3aFQMGDMDdu3cxb948NG/eHKdOnYKVlZW03dK+Fg4dOoQNGzbg888/R4UKFfDTTz8hODgYN27cgK2tLQDgxIkTOHLkCD755BO4uLggOTkZixYtQosWLXDhwgXpuX348CGaNWuGixcvol+/fmjYsCH+/fdfbNmyBf/88w/s7Oyk7f7www/Q09PDV199hczMTEybNg09evTAsWPHXtpnoGTHqEqXLl1w+fJlrF69GrNnz5b6Y29vX+L9np6ejvbt2+OTTz5Bz5494ejoiDt37qBt27awt7fH//73P1hZWSE5ORkbNmyQlisoKECHDh1w/PhxDBkyBDVq1MDmzZsREhJSaJ+1dRxNmTIFCoUCY8eOxZ07dzBnzhwEBAQgISFB7Vi/f/8+3n//fXTt2hXdu3fHunXrMGTIEBgZGUmFXUFBATp27IhDhw5h0KBBqFmzJs6ePYvZs2fj8uXL2LRpk9q2i/MaIy0QRFp28uRJAUDs3r1bCCFEQUGBcHFxEV988YVaXFJSkgAgbG1txb1796T2zZs3CwDizz//LHT9BQUF4oMPPhDm5ubi/PnzUvvjx481Yj/77DNhamoqnj59KrX5+/sLAGL58uVSW3Z2tnBychLBwcEa/Vu6dOkL8x0xYoQAII4dOya13blzR1haWgoAIikpSQghxIMHD4SVlZUYOHCg2vKpqanC0tJSrT0kJEQAEP/73/9euG2VpUuXCgDixIkTRcZYWlqKBg0aSNMTJkwQz78FFLYPAwMDRZUqVdT6a2BgIDp37qwWN3HiRAFAhISEaPQrICBAFBQUSO0jR44U+vr6IiMjQwjx3/4yMjISbdu2Ffn5+VLc/PnzBQCxZMkSIYQQp06dEgDE+vXri8wzOTlZ6OvriylTpqi1nz17VhgYGKi1F/e1UBQAwsjISFy9elVqO336tAAg5s2bJ7UVtl/j4uI0th0eHi4AiA0bNmjEq/bfvn37BABRs2ZNkZ2dLc2fO3euACDOnj370n4X9xhV5ThhwgRpevr06Wqva5VX2e8RERFqsRs3bnzp6/iPP/4QAMScOXOktvz8fNGqVSuN4/VFx5G7u7vaa/XZvvn7+0vTqv1dqVIlkZWVJbWvW7dOABBz587VyGvmzJlSW3Z2tqhfv75wcHAQOTk5QgghVqxYIfT09MTBgwfVth0RESEAiMOHD0ttxX2NUenxKzPSupUrV8LR0REtW7YE8N8p327dumHNmjWFfn3VrVs3WFtbS9PNmjUDAPz999+Frv/bb7/F1q1bERUVBS8vL6n92f/SHjx4gH///RfNmjXD48ePcenSJbV1mJubo2fPntK0kZERGjduXOQ2X2Tbtm3w8fFB48aNpTZ7e3uNr7h2796NjIwMdO/eHf/++6/00NfXR5MmTbBv3z6NdQ8ZMqTE/SmKubn5S0ebPbsPMzMz8e+//8Lf3x9///239JXU3r17kZeXh88//1xtWdVF3IUZNGiQ2tdzzZo1Q35+Pq5fvw4A2LNnD3JycjBixAjo6f3f29LAgQNhYWGB6OhoAJDOAO3cubPIa042bNiAgoICdO3aVW0/Ozk5oVq1ahr7ubSvhYCAAFStWlWarlu3LiwsLNSWf3a/5ubmIj09HZ6enrCyssJff/0lzfvjjz9Qr149fPjhhxrbef7rzb59+8LIyEiaftlx86ySHqPFUdL9rlQq0bdvX7U21RmkrVu3Ijc3t9Dt7NixA4aGhmpnOvX09NTOYD5PG8dR7969UaFCBWn6o48+QsWKFbFt2za1OAMDA3z22WfStJGRET777DPcuXMH8fHxAID169ejZs2aqFGjhtq+atWqFQBo7KvivMao9FgQkVbl5+djzZo1aNmyJZKSknD16lVcvXoVTZo0QVpaGvb
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# barplot how many diagnosis are in each age group\n",
"fig, ax = plt.subplots()\n",
"sns.countplot(data=df_dgc, x='age_group', hue='diag', ax=ax)\n",
"plt.title('Anzahl der Diagnosen nach Altersgruppen')\n",
"plt.xlabel('Altersgruppe')\n",
"plt.ylabel('Anzahl')\n",
"plt.show()"
]
2024-05-15 20:20:01 +02:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Barplot to show the distribution of diagnoses based on the age groupings. The difference in the incidence of the various diseases can be clearly seen here."
]
2024-05-08 17:45:29 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-05-29 09:25:12 +02:00
"version": "3.11.9"
2024-05-08 17:45:29 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}