Erweiterungen
parent
2a8916acdd
commit
fedf321bc7
|
@ -5,7 +5,7 @@
|
|||
"metadata": {},
|
||||
"source": [
|
||||
"# Demographic Plots\n",
|
||||
"This Notebook is used to read the data from the pickle files and to create a dataframe with the demographic data.\n",
|
||||
"This notebook is used to read the data from the pickle files and to create a dataframe with the demographic data.\n",
|
||||
"With this data we can create a plots to show the distribution of the demographic data."
|
||||
]
|
||||
},
|
||||
|
@ -30,7 +30,8 @@
|
|||
"source": [
|
||||
"# Set path to data\n",
|
||||
"path = \"C:/Studium/dsa/data\"\n",
|
||||
"#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\""
|
||||
"#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\"\n",
|
||||
"#C:\\Users\\klara\\projects\\DSA\\a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hypothesis\n",
|
||||
"This notebook is used to read the data from the pickle files and to test the hypothesis that in the age group of 60-70 the frequency of a sinus bradycardia is significantly higher than in the other age groups.\n",
|
||||
"For that instance the chi-squared test is used."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import seaborn as sns\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import pickle\n",
|
||||
"from scipy.stats import chi2_contingency\n",
|
||||
"from data_helper import *\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Reading SB\n",
|
||||
"Length of SB: 50\n",
|
||||
"Reading AFIB\n",
|
||||
"Length of AFIB: 27\n",
|
||||
"Reading GSVT\n",
|
||||
"Length of GSVT: 0\n",
|
||||
"Reading SR\n",
|
||||
"Length of SR: 13\n",
|
||||
"Chi-Square Statistic: 38.266574797751275\n",
|
||||
"P-value: 0.0004730210823940083\n",
|
||||
"Chi-Square Statistic for SB in 60-70 vs others: 1.4858035714285718\n",
|
||||
"P-value for SB in 60-70 vs others: 0.22286870264719977\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#path = \"C:/Studium/dsa/data\"\n",
|
||||
"#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\"\n",
|
||||
"path = \"C:/Users/klara/projects/DSA/data\"\n",
|
||||
"\n",
|
||||
"categories_dict = {\n",
|
||||
"'SB': [426177001],\n",
|
||||
"'AFIB': [164889003, 164890007],\n",
|
||||
"'GSVT': [426761007, 713422000, 233896004, 233897008, 713422000],\n",
|
||||
"'SR': [426783006, 427393009]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"data = {}\n",
|
||||
"for cat_name in categories_dict.keys():\n",
|
||||
" print(f\"Reading {cat_name}\")\n",
|
||||
" with open(f'{path}/{cat_name}.pkl', 'rb') as f:\n",
|
||||
" records = pickle.load(f)\n",
|
||||
" data[cat_name] = records\n",
|
||||
" print(f\"Length of {cat_name}: {len(records)}\")\n",
|
||||
"\n",
|
||||
"data_demographic = {'age':[], 'diag':[], 'gender':[]}\n",
|
||||
"for cat_name, records in data.items():\n",
|
||||
" for record in records:\n",
|
||||
" age = record.comments[0].split(' ')[1]\n",
|
||||
" sex = record.comments[1].split(' ')[1]\n",
|
||||
" if age == 'NaN' or sex == 'NaN':\n",
|
||||
" continue\n",
|
||||
" # cut Age: from alter string \n",
|
||||
" data_demographic['age'].append(int(age))\n",
|
||||
" data_demographic['diag'].append(cat_name)\n",
|
||||
" data_demographic['gender'].append(sex)\n",
|
||||
"\n",
|
||||
"df_dgc = pd.DataFrame(data_demographic)\n",
|
||||
"\n",
|
||||
"# Change from group to category\n",
|
||||
"age_categories = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]\n",
|
||||
"df_dgc['age_group'] = pd.cut(df_dgc['age'], bins=age_categories)\n",
|
||||
"corr_matrix_age_diag= pd.crosstab(df_dgc['age_group'], df_dgc['diag'])\n",
|
||||
"\n",
|
||||
"# Chi-square test\n",
|
||||
"chi2, p, _, _ = chi2_contingency(corr_matrix_age_diag)\n",
|
||||
"\n",
|
||||
"# Difference between observed and expected frequencies\n",
|
||||
"print(f\"Chi-Square Statistic: {chi2}\")\n",
|
||||
"print(f\"P-value: {p}\")\n",
|
||||
"\n",
|
||||
"# Check if SB (Sinusbradykardie) has a significantly higher frequency in the 60-70 age group\n",
|
||||
"sb_60_70 = corr_matrix_age_diag.loc[pd.Interval(60, 70, closed='right'), 'SB']\n",
|
||||
"sb_other = corr_matrix_age_diag.drop(pd.Interval(60, 70, closed='right')).sum()['SB']\n",
|
||||
"total_60_70 = corr_matrix_age_diag.loc[pd.Interval(60, 70, closed='right')].sum()\n",
|
||||
"total_other = corr_matrix_age_diag.drop(pd.Interval(60, 70, closed='right')).sum().sum()\n",
|
||||
"\n",
|
||||
"# Frequency table for the specific Chi-Square test\n",
|
||||
"observed = [[sb_60_70, total_60_70 - sb_60_70], [sb_other, total_other - sb_other]]\n",
|
||||
"chi2_sb, p_sb = chi2_contingency(observed)[:2]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(f\"Chi-Square Statistic for SB in 60-70 vs others: {chi2_sb}\")\n",
|
||||
"print(f\"P-value for SB in 60-70 vs others: {p_sb}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The results can be interpreted as followed:\n",
|
||||
"\n",
|
||||
"- The first value returned is the Chi-Square Statistic that shows the difference between the observed and the expected frequencies. Here, a bigger number indicates a bigger difference. The p-value shows the probability of this difference being statistically significant. If the p-value is below the significance level of 0.05, the difference is significant.\n",
|
||||
"\n",
|
||||
"- The Chi-Square Statistic for sinus bradycardia in the age group 60-70 compared to the other age groups, is a value that shows whether there is a significant difference in the frequency of sinus bradycardia in the age group 60-70 in comparison to the other age groups. If the p-value is smaller than the significance level of 0.05, the difference in the frequency between the age group 60-70 and the other age groups is significant."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue