diff --git a/notebooks/statistics.ipynb b/notebooks/statistics.ipynb index 44a823e..7352aaa 100644 --- a/notebooks/statistics.ipynb +++ b/notebooks/statistics.ipynb @@ -175,6 +175,39 @@ "print(f\"P-value for SB in 60-70 vs others: {p_sb}\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Change from group to category\n", + "age_categories = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]\n", + "df_dgc['age_group'] = pd.cut(df_dgc['age'], bins=age_categories)\n", + "corr_matrix_age_diag= pd.crosstab(df_dgc['age_group'], df_dgc['diag'])\n", + "\n", + "# Chi-square test\n", + "chi2, p, _, _ = chi2_contingency(corr_matrix_age_diag)\n", + "\n", + "# Difference between observed and expected frequencies\n", + "print(f\"Chi-Square Statistic: {chi2}\")\n", + "print(f\"P-value: {p}\")\n", + "\n", + "# Check if AFIB (atrial fibrillation /atrial flutter) has a significantly higher frequency in the 70-80 age group\n", + "afib_70_80 = corr_matrix_age_diag.loc[pd.Interval(70, 80, closed='right'), 'AFIB']\n", + "afib_other = corr_matrix_age_diag.drop(pd.Interval(70, 80, closed='right')).sum()['AFIB']\n", + "total_70_80 = corr_matrix_age_diag.loc[pd.Interval(70, 80, closed='right')].sum()\n", + "total_other_70_80 = corr_matrix_age_diag.drop(pd.Interval(70, 80, closed='right')).sum().sum()\n", + "\n", + "# Frequency table for the specific Chi-Square test\n", + "observed = [[afib_70_80, total_70_80 - afib_70_80], [afib_other, total_other_70_80 - afib_other]]\n", + "chi2_afib, p_afib = chi2_contingency(observed)[:2]\n", + "\n", + "\n", + "print(f\"Chi-Square Statistic for AFIB in 70-80 vs others: {chi2_afib}\")\n", + "print(f\"P-value for AFIB in 70-80 vs others: {p_afib}\")" + ] + }, { "cell_type": "markdown", "metadata": {},