DSA_SS24/notebooks/demographic_plots.ipynb

382 lines
232 KiB
Plaintext
Raw Permalink Normal View History

2024-05-08 17:45:29 +02:00
{
"cells": [
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Demographic Plots\n",
2024-06-05 09:53:25 +02:00
"This notebook is used to read the data from the pickle files and to create a dataframe with the demographic data.\n",
2024-06-11 19:04:23 +02:00
"With this data we can create plots to show the distribution of the demographic data."
2024-05-15 20:20:01 +02:00
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 1,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
2024-06-11 19:04:23 +02:00
"from scipy.stats import chi2_contingency\n",
"import sys\n",
"import json\n",
"\n",
"sys.path.append('../scripts')\n",
"import data_helper"
2024-05-15 20:20:01 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Data"
2024-05-08 17:45:29 +02:00
]
},
{
"cell_type": "code",
2024-06-11 19:04:23 +02:00
"execution_count": 2,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reading GSVT\n",
2024-06-11 19:04:23 +02:00
"Reading AFIB\n",
2024-05-08 17:45:29 +02:00
"Reading SR\n",
2024-06-11 19:04:23 +02:00
"Reading SB\n"
2024-05-08 17:45:29 +02:00
]
}
],
"source": [
2024-06-11 19:04:23 +02:00
"data_demographic = data_helper.load_data(only_demographic=True)\n",
2024-05-08 17:45:29 +02:00
"\n",
"df_dgc = pd.DataFrame(data_demographic)\n",
"\n",
"# Change from group to category\n",
2024-05-12 13:31:54 +02:00
"age_categories = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]\n",
2024-05-08 17:45:29 +02:00
"df_dgc['age_group'] = pd.cut(df_dgc['age'], bins=age_categories)"
]
},
2024-06-26 18:33:03 +02:00
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 3,
2024-06-26 18:33:03 +02:00
"metadata": {},
2024-06-26 18:44:50 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Average age: 59.58733889924617\n",
"Std Dev age: 18.29087120360519\n",
"Average age group: age_group\n",
"(0, 10] 6.715503\n",
"(10, 20] 16.360606\n",
"(20, 30] 26.066710\n",
"(30, 40] 35.847409\n",
"(40, 50] 46.229902\n",
"(50, 60] 55.403579\n",
"(60, 70] 65.557701\n",
"(70, 80] 75.208785\n",
"(80, 90] 84.706091\n",
"Name: age, dtype: float64\n",
"Std Dev age group: age_group\n",
"(0, 10] 1.883777\n",
"(10, 20] 2.817185\n",
"(20, 30] 2.968634\n",
"(30, 40] 2.878519\n",
"(40, 50] 2.749121\n",
"(50, 60] 2.936383\n",
"(60, 70] 2.884971\n",
"(70, 80] 2.945118\n",
"(80, 90] 2.749137\n",
"Name: age, dtype: float64\n",
"Male Ratio: 0.5733970981600065\n",
"Female Ratio: 0.42657588284564046\n"
]
}
],
2024-06-26 18:33:03 +02:00
"source": [
"# avg age and std dev overall and for each group\n",
"avg_age = df_dgc['age'].mean()\n",
"std_age = df_dgc['age'].std()\n",
"avg_age_group = df_dgc.groupby('age_group')['age'].mean()\n",
"std_age_group = df_dgc.groupby('age_group')['age'].std()\n",
"\n",
"# print \n",
"print(\"Average age: \", avg_age)\n",
"print(\"Std Dev age: \", std_age)\n",
"print(\"Average age group: \", avg_age_group)\n",
"print(\"Std Dev age group: \", std_age_group)\n",
"\n",
"# female and male ratio\n",
"count_male = df_dgc[df_dgc['gender'] == 'Male'].shape[0]\n",
"count_female = df_dgc[df_dgc['gender'] == 'Female'].shape[0]\n",
"count_total = df_dgc.shape[0]\n",
"male_ratio = count_male / count_total\n",
"female_ratio = count_female / count_total\n",
"\n",
"# print\n",
"print('Male Ratio: ', male_ratio)\n",
"print('Female Ratio:', female_ratio)\n"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot Data"
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-11 19:04:23 +02:00
"execution_count": 5,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-11 19:04:23 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHJCAYAAACYMw0LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAADLMklEQVR4nOzdd1gUxxsH8O91eu9KEyygWMCGvYKKXWONYoslaiyJhcTEFiU/7C3RxIKJGjUmmth7jdjFAnZRROm9c2V+f1xYPTnkTg8P8f08zz3K7Ozs7O7t7Xszs3M8xhgDIYQQQgh5I76+K0AIIYQQ8iGgoIkQQgghRAMUNBFCCCGEaICCJkIIIYQQDVDQRAghhBCiAQqaCCGEEEI0QEETIYQQQogGKGgihBBCCNEABU2EEEIIIRoo16Dp6NGjGD58OGrUqAEzMzNIJBI4OjqiY8eOWLZsGZKTk8tz8+/FnDlzwOPxMGfOnPe2TTc3N/B4PDx58uS9bfNjMWzYMPB4PISHh+u7KqV6/vw5hgwZAicnJwiFQvB4PAwbNkzrcpYuXQoejwcej4dVq1bpvqIfibd5z4SHh3PHvvglFothY2MDb29vDBo0CD///DOysrJKLePUqVPg8Xho06bNu+/ER+zJkyfg8Xhwc3N7b9t88eIF5syZg5YtW8LBwQFisRimpqaoXr06+vbti/Xr1yMjI+O91YdorlyCppSUFHTs2BEBAQEIDw+HVCpF27Zt0adPH3h5eeH8+fOYOnUqqlWrhosXL5ZHFT5YH8JNuyL6WI4bYwy9e/fGli1bYGlpif79+yM4OBgtWrTQuqwNGzZw/9+4caMuq0k0ZGxsjODgYAQHB2PAgAFo3rw5BAIBduzYgTFjxsDJyQkrV64E/drV26toXzIXL16MatWqYe7cubh69Spq1qyJ3r17IzAwEJaWltizZw8+++wzuLq64ty5c/quLnmNUNcFZmZmokWLFrh37x5q1aqFn3/+GS1btlTJU1hYiM2bN2P27NmIj4/XdRUqvePHj0MqlaJKlSr6rkqlExoaipkzZ8LR0VHfVVHr6dOnuHTpElxcXHDjxg0IhW93CV+4cAHR0dGwsLCAVCpFZGQkrl27Bl9fXx3XmLyJjY2N2kA/Pj4eYWFhWLFiBSZNmoS4uDiEhYWp5GncuDHu3LkDIyOj91TbyqlKlSq4c+cORCJRuW9rxowZCAsLg1gsRlhYGCZMmABDQ0OVPJmZmdi0aRP+97//IS4urtzrRLSj86Bp4sSJuHfvHtzc3PDvv//CysqqRB6JRILRo0ejR48e1AT5Fjw8PPRdhUrL0dGxwgZMABAbGwsAcHd3f+uACXjZyjRw4EDk5+cjPDwcGzZsoKCpgnB0dMSyZctQvXp1jB8/HosWLUK3bt1UvoAaGRmhVq1aeqxl5SASid7LcTx27BgX+O7cuRM9evRQm8/c3ByTJ09GcHAwUlNTy71eREtMhx49esQEAgEDwP7666+3Luf3339n7dq1Y5aWlkwsFjMXFxc2fPhwdu/ePbX5XV1dGQAWExPD9uzZw9q2bcssLS0ZAHby5EnGGGMAWPHubty4kTVt2pSZmZlx6xV7/vw5mzJlCqtVqxYzNDRkJiYmrGHDhmzVqlVMKpWW2Pbs2bMZADZ79myV9KKiIvbbb7+xQYMGsZo1azJTU1NmYGDAatSowSZOnMieP3+ukj8mJoaro7rXq+W/ur+vy83NZaGhoaxBgwbMxMSEGRoaMm9vb/bNN9+wtLS0EvmLt+vq6soUCgVbt24d8/X1ZUZGRszMzIx17NiRnT9/Xu1xf/WY/vbbb6xRo0bM2NiY2djYsAEDBrCnT58yxhhTKBRs1apVrF69eszIyIhZW1uz4OBglpiYWKLM8jxumrwHgoODGQC2adMmbr1Hjx4xc3NzxuPx2IEDB0rU+fnz58zW1pYBYNu3b1d7rEpz8eJF9sknnzBHR0cmEomYra0t69q1Kzty5IhW+6nuvVCanJwcZmpqygCwK1eusLNnzzIAzMLCguXn55e6nkKhYBs2bGB+fn7M0NCQWVlZsU6dOrF///2XnTx5kgFgrVu3VruuttfVm2RlZbGff/6Z9erVi3l6ejIjIyNmZGTE6tSpw77++muWnp6udr1Xr5sTJ06wjh07MgsLC2ZgYMAaNGjANm/eXOo2U1NT2aRJk5iLiwsTi8XM2dmZjR8/nqWmpqp9z5Rl06ZN3HVXlkaNGjEArGvXrirpbzrmR48eZRMmTGD16tVj1tbWTCwWsypVqrB+/fqxS5culbotqVTKFi9ezGrXrs0kEgmztbVlffv2ZVFRUVydg4OD1e5LcHAwy8nJYTNnzmQeHh5MLBYze3t7NnToUBYXF1fqNjW9BooVFBSwsLAw5uvry0xMTJhIJGL29vasYcOGbNq0aSw1NVWlXqW9iu8Nr34GqpObm8uWLVvGmjdvziwsLLh7UteuXdnWrVtL3a/XtW7dmgFgvXr10nid1716rIvfk9WqVWNisVjlfSCVStlPP/3E/P39mZmZGZNIJMzT05NNnDhR7bko6xgwVvp959X0v/76izVv3pyZmpoyExMT1rp1a7Z//3615WVkZLBvvvmG1alThxkZGTGxWMwcHR1Zs2bN2LfffsuKiore5hCVO50GTStWrOA+fGUymdbrKxQKNnToUAaACYVC1q5dOzZgwABWo0YNBoAZGRmxgwcPlliv+KRNmDCBAWANGzZkAwcOZK1bt2ZnzpxhjL28YU6YMIHx+XzWokULNnDgQNakSRP25MkTxhhjp0+f5oItNzc31r17dxYYGMilBQQElDiRpQVNz549YwCYubk5a9q0Kfvkk09Yly5dmJOTEwPAbG1t2YMHD7j8ycnJLDg4mHl4eDAArHnz5iw4OJh77d69u8T+vv7mTU1NZfXr12cAmJmZGevevTvr06cPs7GxYQCYu7t7iXVevViCg4OZSCRi7dq1Y/369eOOu0QiYRcuXChx3IuP6cyZM7nz1bdvX+bi4sIAMGdnZ5aWlsb69evHDAwMWKdOnVivXr2YnZ0dA8Dq1q3LCgsL39tx0+Q9UNoN8M8//2QAmI2NDXv27BmXLpPJWMuWLRkA9vnnn5c4Rm/y888/Mz6fzwCwBg0asIEDB7JmzZpx9ZwzZ06J/QwMDGQAmL29vcp+Jicna7zdDRs2cMe/WPG5ftNNYNy4cQwA4/P5rHXr1mzAgAGsdu3aTCAQsC+//LLUG/jbXFdvUhzk2drashYtWrD+/fuzgIAAZm1tzQAwT09PlpKSUmK94uvm22+/ZTwej/n5+bEBAwawpk2bcsd82bJlJdZLSEhg1atXZwCYpaUl6927N+vZsyezsLBgHh4erHv37uUaNBV/rpqYmKgEmG8KmoqDlgYNGrDu3buz3r17M29vb+6zddeuXSXWkcvlrGvXrgwAE4vFLCAggPXv359Vq1aNGRkZcZ+vpQVNPXv2ZHXr1mUWFhasW7durEePHty17urqyjIyMkpsU5troLiO7du35z7jOnfuzAYOHMg6dOjAnd/r168zxpTvk+DgYGZsbMwAsD59+qhcM3fu3GGMvTlgiI2N5Y6bkZER69ixIxswYABr2bIlMzc31+j8McZYWloat5/v0qBQfKyDgoKYu7s7s7S0ZN27d2effPIJGzx4MGNMGVR26NCBAWAGBgasc+fOrH///szZ2Zn7DLt69apKuboImqZMmaJy/23cuDF3HleuXKmyTm5uLqtTpw53HXfr1o0NGDCAtWnThjk4ODAApX750TedBk1DhgxhAFi7du3eav2ffvqJO6nFb3zGlMFUcXBiYWHBkpKSVNYrPmkCgYD9/fffassuPnlmZmYsIiKixPL4+HhmbW3NeDwe+/HHH5lcLueWpaSksHbt2jEAbO7cuSrrlRY0ZWVlsb///rtEUFBUVMRCQkIYANalS5cS9dDkW2tpb97+/fszAKxJkyYqN43s7GzWuXNnBoA1a9ZMZZ1XWzBcXV1VWvNkMhkbMWIEd2N7XfF61tbWLDIykkvPy8tjLVq0YACYj48P8/Dw4IISxpQBgKe
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Correlation matrix\n",
"corr_matrix_age_diag= pd.crosstab(df_dgc['age_group'], df_dgc['diag'])\n",
"\n",
"# Plot the correlation matrix\n",
"sns.heatmap(corr_matrix_age_diag, annot=True, cmap='coolwarm', fmt='d')\n",
2024-06-11 19:04:23 +02:00
"plt.title('Correlationmatrix of Age and Diagnostic Groups ', fontsize=16)\n",
"plt.xlabel('Diagnostic Group')\n",
"plt.ylabel('Age Group')\n",
2024-05-08 17:45:29 +02:00
"plt.show()"
]
},
2024-06-11 19:04:23 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Chi-Square Statistc Test\n",
"\n",
"With this test we want to check if there is a significant difference between the groups."
]
},
2024-06-02 16:27:39 +02:00
{
"cell_type": "code",
2024-06-11 19:04:23 +02:00
"execution_count": 6,
2024-06-02 16:27:39 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-11 19:04:23 +02:00
"Chi-Square Statistic: 11503.925605129169\n",
"P-value: 0.0\n",
"Chi-Square Statistic for SB in 60-70 vs others: 401.0271328859626\n",
"P-value for SB in 60-70 vs others: 3.291102770159182e-89\n"
2024-06-02 16:27:39 +02:00
]
}
],
"source": [
"\n",
"# Chi-square test\n",
"chi2, p, _, _ = chi2_contingency(corr_matrix_age_diag)\n",
"\n",
"# Difference between observed and expected frequencies\n",
"print(f\"Chi-Square Statistic: {chi2}\")\n",
"print(f\"P-value: {p}\")\n",
"\n",
"# Check if SB (Sinusbradykardie) has a significantly higher frequency in the 60-70 age group\n",
"sb_60_70 = corr_matrix_age_diag.loc[pd.Interval(60, 70, closed='right'), 'SB']\n",
"sb_other = corr_matrix_age_diag.drop(pd.Interval(60, 70, closed='right')).sum()['SB']\n",
"total_60_70 = corr_matrix_age_diag.loc[pd.Interval(60, 70, closed='right')].sum()\n",
"total_other = corr_matrix_age_diag.drop(pd.Interval(60, 70, closed='right')).sum().sum()\n",
"\n",
"# Frequency table for the specific Chi-Square test\n",
"observed = [[sb_60_70, total_60_70 - sb_60_70], [sb_other, total_other - sb_other]]\n",
"chi2_sb, p_sb = chi2_contingency(observed)[:2]\n",
"\n",
"\n",
"print(f\"Chi-Square Statistic for SB in 60-70 vs others: {chi2_sb}\")\n",
"print(f\"P-value for SB in 60-70 vs others: {p_sb}\")"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Correlation matrix of age groups and diagnoses.This matrix describes the four diagnosis groupings on the horizontal axis and the age groupings in decades steps on the vertical axis. The color scale blue (low) to red (high) describes the correlation of the two categorization types."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-11 19:04:23 +02:00
"execution_count": 7,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-11 19:04:23 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAHJCAYAAABKYwdTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB2pklEQVR4nO3dd1gURx8H8O/Rjl6VJogoiL03xC6KNRasMbGhxhp7jVFjNBq7xp5Y81qTqFGjxq6xVxQVsaHYAKWIIFLn/QNZPTiU09UD7/t5nnsemJndnd273fvdzOysQgghQERERES5pqftChARERHlNwygiIiIiDTEAIqIiIhIQwygiIiIiDTEAIqIiIhIQwygiIiIiDTEAIqIiIhIQwygiIiIiDTEAIqIiIhIQx8UQO3btw89evRA8eLFYWlpCaVSCScnJzRq1Ahz587FkydP5KrnO02aNAkKhQKTJk36ZNt8H4cPH4ZCoUC9evW0XZXPkkKhgEKh0HY13mrHjh2oXbs2LC0tpfoePnxYo3UcPnwYvXv3RqlSpWBjYwNDQ0PY2dmhWrVqGDhwIPbv34/88JCBIkWKQKFQ4O7du9quSp70Pp/nevXqSctlvszMzODk5AQfHx8MGjQIBw8efOvno3v37lAoFFi9evUH7oFu08b30udybcgPDN5noadPn6Jz587Yv38/gIyLYP369WFmZobw8HCcOHEC+/fvx4QJE7B//35Ur15d1krnZZkXO35ANaMrxy0wMBD+/v5IT09HgwYN4OTkBIVCAUdHx1wt//TpU3Tp0gV79+4FABQqVAg+Pj6wsrLCs2fPcOXKFSxatAiLFi1CxYoVceHChY+5O5SHlS9fHhUqVAAAJCcnIyoqCpcuXcKJEyewcOFClCtXDqtXr0bFihW1W9F86vDhw6hfvz7q1q2r8Q+gj4HXBi0QGoqNjRVeXl4CgChRooQ4evRotjIvX74Uy5YtE46OjmLr1q2abuK9TJw4UQAQEydO/CTbywkA8bbDmpCQIIKDg8W9e/c+Ya3yvncdt9wKDg4WwcHBMtTo48j8nI4bN07jZWNiYlTOvYMHD6otFxQUJHr27ClMTU0/tLofnZubmwAgQkNDtV2VPOl9zou6deu+9Vp49OhRUa1aNQFAmJqairNnz2Yr8+jRIxEcHCxiY2Pfp9o64dChQwKAqFu3bo5lnjx5IoKDg8WTJ08+al0+x2tDfqBxC9SgQYMQEhKCIkWK4Pjx47C1tc1WRqlUok+fPmjVqhViY2PfK7D7XJmamqJEiRLarsZnK68f27CwMACAp6enxstmnntFixbFiRMnYGNjo7ZcmTJlsGLFCnzzzTcfVFf6PNWuXRv//fcfGjZsiGPHjuHLL79EcHAw9PX1pTJOTk5wcnLSYi0/DwUKFECBAgU++nZ4bdASTaKt27dvC319fQFAbNmy5b2jtg0bNogGDRoIGxsbYWRkJAoXLix69OghQkJC1JZ/81fqtm3bRP369YWNjY0AIA4dOiSEeHcLVEhIiOjTp48oWrSoUCqVwtLSUtSuXVv8/vvvasvfvXtXTJ8+XdSvX1+4uroKIyMjYWVlJXx8fMTSpUtFWlqaSvnM7ef0yvyF/a5fLcHBwaJ79+6icOHCwsjISNjY2IgGDRqITZs2qS3/5n5HRkaK/v37CxcXF2FoaChcXFzEwIEDRUxMTLblVq1aJQCIbt26idjYWDF06FDh5uYmlEql8PDwENOnT5f28cGDB6JPnz7CxcVFGBkZieLFi4sFCxZ80uP2Zn2joqLE4MGDRdGiRYWRkZHKsYSaX+yzZs0SAISnp6eIi4vLVufly5cLAMLFxUWjX4opKSliyZIlwtvbW1haWkrHbtCgQeLBgwe53s+3/YLNdOvWLaGnpycAiL///jvXdczJH3/8Ifz8/ESBAgWEoaGhcHZ2Fl26dBFXr17NVjY0NFQAEG5ubiI9PV0sW7ZMVKpUSZiamgpLS0vRqFEjceLEiRy3dfXqVdGuXTthZ2cnjI2NRenSpcXMmTNFamrqW1ugUlJSxK+//irq1q0rXSuKFCki+vbtK8LCwrKVf/PcSkhIEN9//70oUaKEMDExEW5ubrk6LlevXhUTJkwQNWvWFM7OzsLQ0FDY2tqKhg0b5ngOvrnd5ORkMX36dFGqVClhbGwsbG1tRZs2bcS1a9dy3OaJEydEkyZNhJWVlTAzMxOVK1cWK1asEEJ8nBaoTFeuXJHW/+eff6rkdevWTQAQq1atUkmPi4sTy5cvF23atBEeHh7C1NRUmJqaijJlyohx48apvdZkunv3rujWrZtwcHCQzpUJEyaIxMREqc6Z1/Os+3Lo0CFx8eJF0aZNG2FnZyeMjIxEyZIlxaxZs0R6erra7Wlyfma6ceOG6NGjhyhSpIgwMjISZmZmonDhwqJZs2Zi5cqV2eql7vXmZy0330v9+vUTxYsXFyYmJsLCwkKULFlS9OvXTwQFBeV4LN8k17XhzWN99OhR0aJFC1GgQAGhUChUPgf3798XAwcOFB4eHtJ3ac2aNcXSpUtFampqtvW+6xjk9J2Y9XweO3asKFasmFAqlcLJyUn07Nkzx/fxU9HozJw/f74AIKytrdUeqHdJT08XXbt2FQCEgYGBaNCggejUqZMoXry41Jy8e/fubMtlXmQHDhwoAIgqVaqIzp07i7p160pdiG97kzZv3iyMjY2l5s02bdqIBg0aCDMzMwFA9OjRI9syP/74owAg3N3dRcOGDUWnTp1E3bp1hZGRkQAg2rZtq3Libt26VbroZH7Rv/nK/GJ+WwC1c+dOqZ5eXl6iU6dOokGDBlLQ2rNnz2zLZO53z549hYuLi3BwcBBt27YVzZo1E1ZWVgKAqFq1qkhOTlZZLjMgadWqlShZsqSwt7cX/v7+onHjxsLExEQ63rdu3RKOjo7C1dVVdOjQQdSvX1+qz/Tp0z/Zccusb/PmzYW7u7uwsbERX3zxhWjfvr3o0qWLtL6cvnC++OILAUB06tRJJT0wMFAYGxsLAwMDcfz48WzL5eTly5fC19dXABDGxsaiadOmomPHjsLV1VUAEAUKFBDnz5/Ptp/FihUTAISPj4+0j9OmTXvn9ubNmycACBsbm2xBqCZSUlJEhw4dBAChVCpFzZo1Rfv27UX58uUFAGFiYpLtHHwzgOrWrZswNDQUDRo0EB06dJDOXaVSKU6dOpVte//99590nhUtWlR06tRJ+Pr6CkNDQ+Hv759jABUXFyfq1asnAAhzc3NRt25d0a5dO6mbws7OTly4cEFlmcxzq3r16qJq1arCzMxMel98fX1zdXwCAgKk64Sfn5/o2LGj8Pb2lr6ghg4dmm2ZzO3WrFlT+Pr6ClNTU9GkSRPh7+8vfR6sra3VBombN2+WzqcyZcqIzp07i1q1agmFQiGGDRv2UQMoIYSoWLGiACC++eYblfScAqj//vtPABAFCxYUtWrVEh07dhSNGzcWdnZ2AoDw8PAQT58+zbadq1evigIFCggAwtnZWXTo0EE0b95cmJmZiVq1aomaNWu+NYAaM2aMFDRlXlMyj9vgwYOzbU/T81OIjO4tS0tL6frbtm1b0b59e+Ht7S3Mzc1F+fLlpbLTpk0Tfn5+AoBwcHBQuWYNHz5cKve276V169YJpVIpAIjChQsLf39/0aZNG1G+fHmhUChyPRxFrmtD5rHu37+/0NPTE6VKlRKdOnUSjRs3FuvXrxdCCHHmzBlha2sr1bljx46iSZMm0veWn5+fSEpKUlnvhwZQ3t7eokaNGsLU1FQ0a9ZMtG/fXjg5OQkAwtHRUdy4ceO99/lDaXRmfv311wKAaNCgwXttbMmSJdKH9+LFi1J6enq6dJCtra1FZGSkynKZF1l9ff0cI+yc3qTLly8LpVIpjI2NxV9//aWSd/fuXVG2bFkBQKxZs0Yl78yZM2p/ATx8+FD6stm8eXO2/Hdd8HL6sISHh0sBz5QpU1SCjLNnz0otbsuXL1e73wBE9+7dxcuXL6W8sLAwUahQIQFAOgEyZQYkAETLli1FQkKClHf+/HlhYGAgnUR9+/YVKSkpUv62bds
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# cut out sex 'unknown' (because only one occurence)\n",
"df_dgc_bineary = df_dgc[df_dgc['gender'] != 'Unknown']\n",
"# Correlation matrix\n",
"corr_matrix_sex_diag = pd.crosstab(df_dgc_bineary['gender'], df_dgc_bineary['diag'])\n",
"\n",
"# Plot the correlation matrix\n",
"sns.heatmap(corr_matrix_sex_diag, annot=True, cmap='coolwarm', fmt='d')\n",
2024-06-11 19:04:23 +02:00
"plt.title('Corleationmatrix of Gender and Diagnostic Group', fontsize=16)\n",
"plt.xlabel('Diagnostic Group')\n",
"plt.ylabel('Gender')\n",
2024-05-08 17:45:29 +02:00
"plt.show()"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Correlation matrix of genders and diagnoses. This matrix describes the four diagnosis groupings on the horizontal axis and the gender in decades steps on the vertical axis. The color scale blue (low) to red (high) describes the correlation of the two categorization types."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-11 19:04:23 +02:00
"execution_count": 14,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-11 19:04:23 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHbCAYAAABGPtdUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAACHw0lEQVR4nO3de1zO9/8/8MfV6apQyaFEJYeV86GUnCklbRg+HwzLeXzCyOYwc2b5GDPGmIm2D81hwwyjltOQU+Qw5JTFKKMlFXXV9fr94df761JxXbquroPH/Xa7brrer9f1ej/fXdf19Ox9eL1lQggBIiIiIjJ6ZvoOgIiIiIi0g4UdERERkYlgYUdERERkIljYEREREZkIFnZEREREJoKFHREREZGJYGFHREREZCJY2BERERGZCBZ2RERERCaChR2hdu3akMlkiI6Ofmm/Tp06QSaTYfbs2SrLDx48CJlMhk6dOuksRtKvS5cuoVevXqhevTrMzc1L/By8LplMBplMppWxtCk/Px/VqlWDTCaDs7MzCgoK9B2S2oq+00UPMzMzVKpUCbVq1ULnzp3x0Ucf4eTJky8do+j7fvDgwfIJ+hWKtunWrVsqyw0tTgAYMmSIWjmVSBdY2JHBiI6Ohkwmw5AhQ/QdCj0nJycHoaGh+Pnnn+Hu7o733nsPYWFhaN68+Stfa8z/wf3888948OABACA9PR27d+/Wc0Saa9u2LcLCwvD++++je/fu8PT0xLlz57BkyRL4+fmhU6dOuHnzpk5jKK0gM1bMU2ToLPQdABk/X19fXL58Gba2tvoOhXTg1KlTuHXrFtq0aYOjR4/qO5xyExUVBQCoWbMm/vrrL0RFRaFnz556jkozI0aMKFaACCHw66+/YsKECTh06BDatGmDhIQEeHh4qPT7/vvvkZubCzc3t3KMuHTx8fFQKBSoWbOmvkN5pcjISEydOhU1atTQdyj0BuIeOyozW1tbeHl5Gcx/AKRdqampAID69evrOZLyc/v2bcTFxcHc3BxbtmyBTCbDnj17cO/ePX2HVmYymQzdu3fHyZMnUb9+faSnp2PEiBHF+rm5ucHLy8tg/mCrW7cuvLy8YGlpqe9QXqlGjRrw8vKCvb29vkOhNxALOyqzl51jl5iYiH79+qFWrVqwsrKCnZ0d6tSpgz59+uDnn3+W+tWuXRtDhw4FAHz33Xcq5we9OG5ubi4WLlyIli1bolKlSrC1tUWjRo3w6aef4p9//ik1ziNHjqBbt25wcHBAxYoV0apVK3z//fcASj/P6/nl69evh7+/P+zt7VUOLf3555/473//iy5dusDNzQ1yuRwODg5o164dvvnmGyiVymLj3rp1CzKZDLVr14ZSqcTy5cvRtGlT2NraokaNGhg9ejQyMjIAAHl5eZg3bx68vLxgY2MDFxcXfPjhh8jJySn9TXmJffv24e2330b16tVhZWUFFxcX9OvXD6dPn1bpV/S+hoWFASj+vrxM0fZ99913AIChQ4eqvLa08/N++ukntGvXDnZ2dqhQoQLatm2LPXv2lLqegoICrF27Fp06dYKjoyPkcjk8PDwwZswY3L59W4Pfiqp169ZBqVQiJCQEbdq0QZcuXVBYWChtT2n+/PNPDBkyBM7OzrC2tkb9+vUxa9YsPH369JXngv3444/o1q0bqlWrBisrK9SsWRODBg3CpUuXXns7XsbBwQFffvklAGD//v1ITExUaS8t3ry8PHz++efw9vZGpUqVYGVlBWdnZ7Rq1QqTJ0+WPrdFhyz//PNPAICHh4fKZ6Bo3OfzR25uLmbOnIkGDRrA1tYWtWvXltarziHdQ4cOISgoCI6OjrC1tYWvry/+97//ldj3Ve/H7Nmzi31W1c1TrzoFYdOmTQgICJA+s+7u7hg2bBiuXr1aYv/nt/3AgQMICgpC5cqVYWNjg5YtW0p5jAjgoVjSofj4eISEhEChUKBZs2bw9/dHYWEh/vrrL+zevRuFhYXSoa2+ffvi+PHjOHr0KOrWrYt27dpJ43h5eUk/Z2RkICAgAElJSbCzs0OXLl1gaWmJQ4cOYcGCBYiJicH+/ftV/kMAniXSgQMHQqlUokmTJmjcuDH++usvDB06VK3/OMeNG4evv/4abdq0QWhoKG7evCkVN//73/8wY8YMeHh44K233kLbtm1x7949JCQk4OjRo4iNjcWPP/5YajE0aNAg7NixAx07dkTdunVx7NgxfPPNNzh58iR+//13dOvWDefPn0enTp1Qv359/P7771i+fDmuXbv20qKnJDNmzMD8+fMhk8nQpk0buLm54fLly9iyZQt++uknrFmzBsOGDQMAODs7IywsDNevXy/xfXmZihUrIiwsDEeOHMGNGzfQtm1b1KtXT2ov6fy8WbNmYd68eWjTpg26d++OK1eu4NixY3j77bfx008/4d1331Xp//jxY/To0QMHDx5ExYoV4e3tjWrVquHChQtYvXo1tm7diri4OLRo0UKj35EQAuvXrwcA6XcxbNgwxMfHY/369Zg6dWqJr7t06RI6duyIBw8ewMXFBT179kROTg6WLFmC/fv3l1jgA8+K04EDB2LLli2Qy+Xw9vZGzZo1cfXqVWzcuBHbtm3Dtm3b0K1bN422Qx0hISFwdHRERkYG4uLi4O3t/dL+SqUSoaGhiI+Ph52dHdq3bw8HBwf8/fffuHbtGj7//HO89957cHR0RL169RAWFoYff/wROTk56NOnDypWrCiN5ezsrDJ2UfF76dIldOjQAc2aNcPDhw/V3pbt27djxYoV8PLyQnBwMO7evYsjR47g/fffR1JSEpYsWaLZL6cE6uap0gghMGTIEHz//fewsLBAhw4dUL16dZw5cwbr16/H5s2b8dNPP5X6Xq9btw7z589Hy5Yt0a1bN9y6dQvHjx9HWFgYMjIyMGHChDJvI5kAQW88d3d3AUCsX7/+pf06duwoAIhZs2apLD9w4IAAIDp27KiyvHPnzgKA2LBhQ7GxMjMzRUJCgsqy9evXCwAiLCys1Bj69esnAAg/Pz/x4MEDafnjx49FSEiIACDatGmj8pq//vpLVKxYUQAQy5YtU2k7dOiQqFChggAgSvo6FC23s7MrFm+RkydPigsXLhRb/tdff4lmzZoJAGLLli0qbSkpKdLYdevWFbdu3ZLaHjx4IOrXry8AiCZNmghfX1+Vbb1586aoXLmyACCOHDlS6u/qRb/++qsAIKytrUVsbKxK29q1awUAYWlpKS5evKjSps77UpqwsLBXfraKfg8ODg7i+PHjKm2zZs0SAMRbb71V7HXvvfeeACDefvttkZ6ertK2dOlSAUDUr19fFBQUaBRzbGysACCqV68u8vPzhRBCPHnyRDg4OAgA4vDhwyW+rmXLlgKA6N+/v3j69Km0/M6dO8LT01PazgMHDqi87pNPPpE+0zdv3lRp27p1qzA3NxeVK1cW//zzj9rboO53WgghAgMDBQAxaNAgleVF3/fn4z106JAAIFq0aCGysrKKjXXq1CmVz+rzsaSkpJS4/qL8AUA0bdpU3Lt376Xb9OI4RXECEJ999plK28GDB4WNjY0AIPbu3fvK7Xte0WfvxXynzvehtM/9qlWrBABRtWpVcfbsWWm5UqmU1ufg4CDu379f4rZbWlqKX375pcR47O3tRW5ubqkx0ZuDh2JJ8uLhshcfhw4d0mi89PR0AED37t2Ltdnb26N169YajZeamoqtW7dCJpNhzZo1qFKlitRWsWJFfPvtt7C2tsaxY8dw7NgxqS0qKgrZ2dnw9/fH+PHjVcbs0KEDxowZ88p1f/TRR6XG26pVKzRu3LjYchcXFyxatAgAsHXr1lLHXr58Odzd3aXnVapUkWK6ePEioqKiVLbVw8MDgwYNAvBsr6i6Fi9eDAD4z3/+g65du6q0DR8+HG+//TYUCgWWLVum9pjaNHfuXPj5+aksmzZtGuzt7XH16lWVQ6uXL1/GDz/8ABcXF8TExKB69eoqr5swYQK6d++Oa9eu4ddff9UojqKLJgYPHiydz2VtbY333ntPpf15v//+O86cOYOKFSti5cqVkMvlUlvNmjV
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-06-11 19:04:23 +02:00
"settings = json.load(open('../settings.json'))\n",
"categories_dict = settings['labels']\n",
2024-05-08 17:45:29 +02:00
"# 4 subplots for each diagnosis a histrgramm for the age\n",
"fig, axs = plt.subplots(2, 2)\n",
2024-06-11 19:04:23 +02:00
"fig.suptitle('Histogram of the Age Distribution', fontsize=16)\n",
2024-05-08 17:45:29 +02:00
"for i, cat_name in enumerate(categories_dict.keys()):\n",
" ax = axs[i // 2, i % 2]\n",
" df_dgc[df_dgc['diag'] == cat_name]['age'].hist(ax=ax)\n",
" ax.set_title(cat_name)\n",
2024-06-11 19:04:23 +02:00
" ax.set_xlabel('Age')\n",
" ax.set_ylabel('Amount of Patients')\n",
2024-05-12 13:31:54 +02:00
" # add some space between the subplots\n",
"plt.tight_layout()\n",
2024-05-08 17:45:29 +02:00
"plt.show()"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Histogram of the age distribution. Breakdown of grouped diagnoses by age group and absolute incidence of diagnoses."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-11 19:04:23 +02:00
"execution_count": 11,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-11 19:04:23 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAABdwklEQVR4nO3dd3xN9/8H8NfNuDdLpkySCLFiz4gURYi9W1oliNE29tZhj9aepZSEllKrVRQRq4gtRmmsEC1JkE3mzef3h1/O15WEc7lxr3o9H488Hs7nfM4573NPxsvnnPu5CiGEABERERG9kJG+CyAiIiJ6GzA0EREREcnA0EREREQkA0MTERERkQwMTUREREQyMDQRERERycDQRERERCQDQxMRERGRDAxNRERERDIwNBH9h8yZMwdly5aFsbExatasqe9y0KdPH5QpU0bfZbwzFAoFJk+erO8y3qgyZcqgT58++i6D3hEMTWRwvvvuOygUCvj6+uq7lGKxYcMGLFy4UOf73bdvH8aOHQt/f3+EhoZi5syZRfbt06cPFAqF9GVtbY0aNWpg3rx5yMrK0uq49+7dw+TJkxEVFfWaZ/Bqrly5gsmTJ+P27dt6OX5xKVOmjHR9jIyMYGtri2rVqmHgwIE4efKkvst7o44fP47JkycjOTm52I+Vl5eHdevWoUWLFihZsiRMTU3h5OSEli1bYuXKlVr/fNB/i4KfPUeGxt/fH/fu3cPt27dx/fp1eHt767sknWrXrh0uX76s8z/y48ePx5w5c5CRkQGlUvnCvn369MHGjRvxww8/AACSk5OxdetWHDp0CN27d8fGjRtlH/fMmTOoV68eQkNDC/yPPycnB3l5eVCpVFqfj1xbtmzBBx98gIMHD+L9998vtuO8aWXKlIGdnR1GjRoFAEhLS8PVq1exefNmxMXFYcSIEZg/f77GNpmZmTAxMYGJiYk+Si42c+fOxZgxYxATE1Ng5DIrKwtGRkYwNTV97eNkZGSgc+fO2Lt3Lxo2bIj27dvD2dkZiYmJOHz4MHbv3o2goCCsXr36tY9Fb6f/1k8WvfViYmJw/PhxbNu2DYMGDcL69esxadIkfZf1VkhISIC5uflLA1M+ExMTfPLJJ9Ly559/Dl9fX2zatAnz58+Hm5vba9ekiz9k/1W5ubnIy8t74fUqVaqUxjUCgG+//RYff/wxFixYgPLly+Ozzz6T1pmZmRVbvYZKl4F8xIgR2Lt3LxYuXIhhw4ZprBs1ahSuX7+O8PDwF+5DznWlt5ggMiDTpk0TdnZ2IisrS3z22WeifPnyBfrExMQIAGLOnDli6dKlwsvLS5ibm4sWLVqI2NhYkZeXJ6ZOnSpKlSolzMzMRIcOHcSjR48K7GfZsmXCx8dHKJVK4erqKj7//HORlJSk0cfT01MEBQUV2LZJkyaiSZMm0vLBgwcFALFp0yYxffp0UapUKaFSqUSzZs3E9evXNbYDoPHl6en5wtckJydHTJ06VZQtW1YolUrh6ekpJkyYIDIzM6U+z+8TgAgNDS1yn0FBQcLS0rJA++jRowUAcezYMfHo0SMxatQoUbVqVWFpaSlKlCghWrVqJaKiogqcd1HHDgoKKnB+arVaLFiwQPj4+AiVSiWcnJzEwIEDRWJiokY/T09P0bZtW/Hnn3+KevXqCZVKJby8vMTatWulPqGhoYUe/+DBg0IIIU6fPi1atmwpHBwchJmZmShTpozo27fvC1/vZ4+9d+9eUaNGDaFSqUTlypXF1q1bC/RNSkoSw4YNE6VLlxZKpVKUK1dOfPPNN0KtVkt9nv2eXbBggShbtqwwMjIS58+ff2kNhUlLSxP29vaiVKlSIi8vT2oHICZNmiQt3759W3z22WeiQoUKwszMTNjb24tu3bqJmJiYAvu8cOGCaNy4sTAzMxOlSpUS06ZNE2vWrBEANPrLuS75bt68Kbp16ybs7OyEubm58PX1FTt37izQb/HixcLHx0eYm5sLW1tbUadOHbF+/XohhBCTJk0q9Brn11TYz2hSUpIYPny48PT0FEqlUpQqVUr06tVLPHjwoIhXW4jY2FhhbGwsWrVqVWSf573sukZERIj33ntPWFhYCBsbG9GhQwdx5coVjX0U9jPy7Hk/C4AICQkRP/30k6hQoYJQqVSidu3a4vDhw7JrptfDkSYyKOvXr0eXLl2gVCrx0UcfYfny5Th9+jTq1atXaN/s7GwMGTIEiYmJmD17Nj788EM0a9YMhw4dwrhx43Djxg0sWbIEo0ePxpo1a6RtJ0+ejClTpiAgIACfffYZoqOjpWMdO3bslUdIvvnmGxgZGWH06NFISUnB7Nmz0bNnT+kZlC+//BIpKSn4559/sGDBAgCAlZXVC/fZv39/rF27Ft26dcOoUaNw8uRJzJo1C1evXsX27dsBAD/++CNWrlyJU6dOSbfcGjZsqHX9N2/eBAA4ODjg1q1b+PXXX/HBBx/Ay8sL8fHx+P7779GkSRNcuXIFbm5uqFy5MqZOnYqJEydi4MCBaNSo0UuPPWjQIISFhaFv374YOnQoYmJisHTpUpw/f77Aa3/jxg1069YNwcHBCAoKwpo1a9CnTx/UqVMHVapUQePGjTF06FAsXrwYX3zxBSpXrgwAqFy5MhISEtCyZUs4Ojpi/PjxsLW1xe3bt7Ft2zZZr8X169fRvXt3fPrppwgKCkJoaCg++OAD7NmzBy1atAAAPHnyBE2aNMG///6LQYMGwcPDA8ePH8eECRNw//79As+uhYaGIjMzEwMHDoRKpYK9vb3sa/MsKysrdO7cGatXr8aVK1dQpUqVQvudPn0ax48fR48ePVC6dGncvn0by5cvx/vvv48rV67AwsICAPDvv/+iadOmUCgUmDBhAiwtLfHDDz8UOYrzsusCAPHx8WjYsCGePHmCoUOHwsHBAWvXrkWHDh2wZcsWdO7cGQCwatUqDB06FN26dcOwYcOQmZmJixcv4uTJk/j444/RpUsXXLt2DT///DMWLFiAkiVLAgAcHR0LrS09PR2NGjXC1atX0a9fP9SuXRsPHz7Ejh078M8//0jbP++PP/6AWq0uMLInR2HXdf/+/WjdujXKli2LyZMnIyMjA0uWLIG/vz/OnTv3ym+QOHz4MDZt2oShQ4dCpVLhu+++Q6tWrXDq1ClUrVr1lfZJWtB3aiPKd+bMGQFAhIeHCyGEyMvLE6VLlxbDhg3T6Jf/vztHR0eRnJwstU+YMEEAEDVq1BA5OTlS+0cffSSUSqU0MpOQkCCUSqVo2bKlxmjA0qVLBQCxZs0aqU3bkabKlSuLrKwsqX3RokUCgLh06ZLU1rZt25eOLuWLiooSAET//v012vNHhA4cOCC1FTV6VJj8vg8ePBAPHjwQN27cEDNnzhQKhUJUr15dCCFEZmamxusjxNPXXqVSialTp0ptp0+fLnJk6/n/Rf/5558CgDSKkG/Pnj0F2j09PQUAceTIEaktISFBqFQqMWrUKKlt8+bNGqNL+bZv3y4AiNOnT8t6TZ6Vf+xnR5ZSUlKEq6urqFWrltQ2bdo0YWlpKa5du6ax/fjx44WxsbGIjY0VQvzve9ba2lokJCTIrqGokSYhhFiwYIEAIH777TepDc+NND158qTAdpGRkQKAWLdundQ2ZMgQoVAoNEa+Hj16JOzt7QsdaZJzXYYPHy4AiD///FNqS0tLE15eXqJMmTLS91bHjh1FlSpVXvhazJkzp0Adz9bz7M/oxIkTBQCxbdu2An2fHZV73ogRIwQAjZFUIYTIysqSfk4ePHggHj58KK170XWtWbOmcHJy0hjlvnDhgjAyMhK9e/eW2rQdaQIgzpw5I7XduXNHmJmZic6dOxd5bqQ7fPccGYz169fD2dkZTZs2BfD07dP5DyWr1eoC/T/44APY2NhIy/nvtvvkk080HoT19fVFdnY2/v33XwDA/v37kZ2djeHDh8PI6H8/AgMGDIC1tTV27dr1yufQt29fjWcZ8kdebt269Ur72717NwBg5MiRGu35Dwe/Tq2PHz+Go6MjHB0d4e3tjS+++AJ+fn7S6JVKpZJeH7VajUePHsHKygo
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# a barplot for each category with the age grpuoped besides each other\n",
"fig, ax = plt.subplots()\n",
"sns.countplot(data=df_dgc_bineary, x='diag', hue='gender', ax=ax)\n",
2024-06-11 19:04:23 +02:00
"plt.title('Amount of Patients per Diagnostic Group')\n",
"plt.xlabel('Diagnostic Group')\n",
"plt.ylabel('Amount of Patients')\n",
2024-05-08 17:45:29 +02:00
"plt.show()"
]
},
2024-05-15 20:20:01 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Barplot to visualize the distribution of diagnoses by gender. Where blue is female and orange is male."
]
},
2024-05-08 17:45:29 +02:00
{
"cell_type": "code",
2024-06-11 19:04:23 +02:00
"execution_count": 13,
2024-05-08 17:45:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-06-11 19:04:23 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAABsVklEQVR4nO3dd3xN9/8H8NfNutmDLCGJELOIihX5Ejs2bZSqvSlVoyi1qjVKiV1VI/ZsjdqbllB7FLFiFIk2ZFiZ798fHvf8XBlyk3sT3Nfz8biPR87nfM457/e55577zllXJSICIiIiIiNmkt8BEBEREeU3FkRERERk9FgQERERkdFjQURERERGjwURERERGT0WRERERGT0WBARERGR0WNBREREREaPBREREREZPRZE9M6YOnUqihUrBlNTU1SsWDG/w0GXLl1QtGjR/A6D6K1y8OBBqFQqHDx4UO/zHjduHFQqld7n+zYLCwuDSqXCrVu38juU9x4Lojw2b948qFQqVKtWLb9DMYhVq1ZhxowZep/v7t27MWzYMAQGBmLJkiWYOHFipn27dOkClUqlvOzt7eHn54dp06YhMTFRp+Xev38f48aNw9mzZ3OZQc5cunQJ48aNe293hqmpqfDw8IBKpcKOHTvyOxwAwPnz59G1a1f4+PjA0tIStra2qFixIoYNG4abN2/md3jvDc0XveZlaWkJDw8PBAcHY9asWUhISMjvEPPUxIkTsWnTpjxZVmRkJPr374+SJUvC2toa1tbWKFu2LPr164fz58/nSQxvJaE8VaNGDSlatKgAkGvXruV3OHrXtGlT8fb21vt8hw8fLiYmJpKYmPjGvp07dxa1Wi3Lly+X5cuXy+zZs6V27doCQNq2bavTck+cOCEAZMmSJenGJSUlyYsXL3San67Wr18vAOTAgQMGXU5+2b17twCQokWLSvv27fM7HFmwYIGYmpqKm5ubDB48WBYsWCDz5s2Tzz//XNzc3MTc3FxSUlLyO8y32oEDB7K1zS5ZskQAyPjx42X58uWyePFimThxojRs2FBUKpV4e3vLuXPntKZJTk6W58+fGzD6/GNjYyOdO3dO156SkiLPnz+XtLQ0vSzn999/F2tra7G3t5e+ffvK/PnzZcGCBTJ48GApWrSoqFQquXXrll6W9a4xy8dazOhERkbi6NGj+O2339C7d2+sXLkSY8eOze+w3gkPHz6ElZUVLCwsstXfzMwMHTp0UIY///xzVKtWDWvXrsX06dPh4eGR65jMzc1zPY/3VUpKCtLS0t74fq1YsQKVKlVC586dMXLkSDx9+hQ2NjZ5FKW2o0ePom/fvggMDMTWrVthZ2enNX7atGmYMGHCG+fz7NkzWFtbGyrM907jxo1RuXJlZXjEiBHYv38/mjVrhhYtWuDy5cuwsrIC8PJzbWZmXF9bpqamMDU11cu8bty4gU8//RTe3t7Yt28fChUqpDX+hx9+wLx582BikvXJo/z8nBpUfldkxuS7774TJycnSUxMlL59+0qJEiXS9YmMjBQAMnXqVJkzZ474+PiIlZWVNGjQQO7cuSNpaWkyfvx4KVy4sFhaWkqLFi0kJiYm3Xzmzp0rZcuWFQsLCylUqJB8/vnn8vjxY60+3t7eGf5HEhQUJEFBQcqw5j++tWvXyvfffy+FCxcWtVotdevW1TrKFRQUJAC0Xm86WpScnCzjx4+XYsWKiYWFhXh7e8uIESO0jry8Pk9kcsRGo3PnzmJjY5Ou/auvvhIAcuTIEYmJiZEhQ4ZIuXLlxMbGRuzs7KRRo0Zy9uzZdHlntuzOnTunyy81NVVCQ0OlbNmyolarxdXVVXr16iWPHj3S6uft7S1NmzaVP/74Q6pUqSJqtVp8fHxk6dKlSh/Nf9CvvzT/eZ84cUIaNmwoBQsWFEtLSylatKh07do1y/X96rJ37dolfn5+olarpUyZMvLrr7+m6/v48WP58ssvpUiRImJhYSHFixeXyZMnS2pqqtLn1W02NDRUihUrJiYmJnLmzJks43j27JnY2dnJlClT5MGDB2JiYiIrV67MsO+6deukTJkyolar5YMPPpDffvstV+s/Iw0bNhQzMzO5e/fuG/tqBAUFyQcffCAnT56UmjVripWVlXz55ZciIhIdHS3dunUTV1dXUavVUqFCBQkLC9OaPrOjKZp1+up2rtmub9y4IQ0bNhRra2spVKiQfPvtt9k6erBp0yZp0qSJFCpUSCwsLKRYsWIyfvz4dEe8NDn9/fffUrt2bbGyshIPDw/54Ycf0s3z7t270rJlS7G2thYXFxcZOHCg7Ny5U6cjRCdOnMhw/MSJEwWALFiwQGkbO3asvP61tXjxYqlTp464uLiIhYWFlClTRubNm5dufqmpqTJ27FgpVKiQWFlZSe3ateXvv/9Otx/UxPXnn3/KoEGDxNnZWaytraVVq1by8OHDdPPNzr726tWr8vHHH4ubm5uo1WopXLiwtG3bVmJjY0Uk432cJiZNPJGRkVrz3L59u9SqVUtsbW3Fzs5OKleunOnnR6NXr14CQI4dO5Zlv1dptrvr169L48aNxdbWVlq2bCkiIk+ePJHBgwcr+4eSJUvK1KlTtbbHjLZlDQAyduxYZVjz/l6+fFk++eQTsbOzkwIFCsiAAQPy5MggC6I8VLp0aenevbuIiBw+fFgAyF9//aXVR7PxVKxYUcqWLSvTp0+XUaNGiYWFhVSvXl1GjhwpNWrUkFmzZsmAAQNEpVKl+xLUbFT169eX2bNnS//+/cXU1FSqVKkiSUlJSj9dC6IPP/xQ/P39JTQ0VMaNGyfW1tZStWpVpd/u3bulYsWK4uzsrJyu2rhxY5brpHPnzgJAWrduLXPnzpVOnToJAGnVqpXSZ/ny5VKzZk2t02A3btzIcp4ZFUQfffSRAJArV67IiRMnpHjx4vL111/Lzz//rBSZDg4Ocu/ePRERiYqKkvHjxwsA6dWrV7plZ/SF3KNHDzEzM5OePXvK/PnzZfjw4WJjY5Phui9VqpS4ubnJyJEjZc6cOVKpUiVRqVRy8eJFERG5ceOGDBgwQADIyJEjleVHRUVJdHS0ODk5KTugX375Rb755hspU6ZMlutbs+ySJUuKo6OjfP311zJ9+nQpX768mJiYyO7du5V+T58+lQoVKkjBggVl5MiRMn/+fOnUqZOoVCrlS1/k/7fZsmXLSrFixWTy5MkSGhoqt2/fzjKONWvWiEqlkjt37oiISN26daVJkybp+m3dulVUKpVUqFBBpk+fLqNHjxYnJycpV65cjtf/654+fSpmZmZSv379N66/VwUFBYm7u7u4uLjIF198IT///LNs2rRJnj17JmXKlBFzc3MZNGiQzJo1S2rWrCkAZMaMGcr0uhZElpaWUqJECenYsaPMmTNHmjVrJgBk9OjRb4y1VatW0qZNG5k6dar89NNP8sknnwgA+eqrr9Ll5OHhIZ6envLll1/KvHnzpG7dugJAtm/frvR79uyZlCxZUiwtLWXYsGEyY8YM8ff3lwoVKuilILp7966yb9DIqCCqUqWKdOnSRUJDQ2X27NnSsGFDASBz5szR6jds2DABIM2bN5c5c+ZIz549pUiRIuLs7JxhQfThhx9K3bp1Zfbs2TJkyBAxNTWVNm3aaM0zO/vaxMRE8fHxEQ8PD/n+++9l4cKF8u2330qVKlWUU1PLly8XtVotNWvWVD7nR48e1Yrn1YJoyZIlolKppFy5cjJhwgSZO3eu9OjRQzp27JjlOvfw8BBfX98s+7xOcwlC8eLFpXPnzjJ//nxZtmyZpKWlSd26dUWlUkmPHj1kzpw50rx5cwEgAwcOVKbPSUFUvnx55X3q0KGDAHhjbvrAgiiPnDx5UgDInj17REQkLS1NihQpovXFIvL/G4+Li4vy34OIyIgRIwSA+Pn5SXJystLerl07sbCwUI6oPHz4UCwsLKRhw4Za/8XPmTNHAMjixYuVNl0LojJlymhdwzNz5kwBIBcuXFDadLmG6OzZswJAevToodWuOZKzf/9+pS2zIicjmr7//vuv/Pvvv3L9+nWZOHGi8qUqIvLixQut9SP
2024-05-08 17:45:29 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# barplot how many diagnosis are in each age group\n",
"fig, ax = plt.subplots()\n",
"sns.countplot(data=df_dgc, x='age_group', hue='diag', ax=ax)\n",
2024-06-11 19:04:23 +02:00
"plt.title('Amount of Patients per Age Group and Diagnostic Group')\n",
"plt.xlabel('Age Group')\n",
"plt.ylabel('Amount of Patients')\n",
2024-05-08 17:45:29 +02:00
"plt.show()"
]
2024-05-15 20:20:01 +02:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Barplot to show the distribution of diagnoses based on the age groupings. The difference in the incidence of the various diseases can be clearly seen here."
]
2024-05-08 17:45:29 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-06-11 19:04:23 +02:00
"version": "3.10.4"
2024-05-08 17:45:29 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}