DSA_SS24/notebooks/cluster_features.ipynb

551 lines
5.3 MiB
Plaintext
Raw Permalink Normal View History

{
"cells": [
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import matplotlib.cm as cm\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import sqlite3\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"from pandas.plotting import parallel_coordinates\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, silhouette_samples, silhouette_score\n",
"from sklearn.metrics.pairwise import euclidean_distances\n",
"from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
"from sklearn.metrics import confusion_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Import Data from Database"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# connect to the database\n",
"conn = sqlite3.connect('../features.db')\n",
"c = conn.cursor()\n",
"# get training, validation and test data\n",
"train = pd.read_sql_query(\"SELECT * FROM train\", conn)\n",
"valid = pd.read_sql_query(\"SELECT * FROM validation\", conn)\n",
"test = pd.read_sql_query(\"SELECT * FROM test\", conn)\n",
"# close the connection\n",
"conn.close()"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature names: ['age', 'gender', 'artial_rate', 'ventricular_rate', 'qrs_duration', 'qt_length', 'qrs_count', 'q_peak', 'r_axis', 't_axis']\n",
"Label names: ['GSVT', 'AFIB', 'SR', 'SB']\n"
]
}
],
"source": [
"feature_names = train.columns[2:]\n",
"print('Feature names:', list(feature_names))\n",
"\n",
"with open('../settings.json', 'r') as f:\n",
" settings = json.load(f)\n",
"label_names = list(settings['labels'].keys())\n",
"print('Label names:', label_names)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Prepare Data for CLustering"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train_x shape: (4378, 10)\n",
"features: ['id', 'age', 'gender', 'artial_rate', 'ventricular_rate', 'qrs_duration', 'qt_length', 'qrs_count', 'q_peak', 'r_axis', 't_axis']\n",
"number of classes: 4\n"
]
}
],
"source": [
"# get the target and features\n",
"train_y = train['y']\n",
"train_y = train_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})\n",
"train_x = train.drop(columns=['y'])\n",
"\n",
"valid_y = valid['y']\n",
"valid_y = valid_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})\n",
"valid_x = valid.drop(columns=['y'])\n",
"\n",
"test_y = test['y']\n",
"test_y = test_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})\n",
"test_x = test.drop(columns=['y'])\n",
"\n",
"# add train validation and test data wit concat\n",
"data_x = pd.concat([train_x, valid_x, test_x], axis=0)\n",
"data_y = pd.concat([train_y, valid_y, test_y], axis=0)\n",
"\n",
"# drop id column\n",
"data_x = data_x.drop(columns=['id'])\n",
"print('train_x shape:', data_x.shape)\n",
"\n",
"\n",
"# dealing with missing values\n",
"# Create an imputer object with a mean filling strategy\n",
"imputer = SimpleImputer(strategy='mean')\n",
"data_x = imputer.fit_transform(data_x)\n",
"\n",
"# Scale Data between 0 and 1\n",
"scaler = MinMaxScaler()\n",
"# Fit the scaler to your data and then transform it\n",
"data_x = scaler.fit_transform(data_x)\n",
"# convert to Series\n",
"data_x = pd.DataFrame(data_x)\n",
"\n",
"# print column names\n",
"print('features:', train_x.columns.to_list())\n",
"num_classes= len(set(valid_y.to_list()))\n",
"print('number of classes:', num_classes)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cluster Data with K-means"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqUAAAHHCAYAAACGDCH+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAACOB0lEQVR4nOzdd1hT1xsH8G/CCBsEGaKCiDhQcaAibgVFxb1HFfcoah211rq1FfdedVSto+7Rah24F+6Fe4uDpcgQ2bm/P/iRGgEFDdwkfD8+eR5z7snNewfJm3PuOVciCIIAIiIiIiIRScUOgIiIiIiISSkRERERiY5JKRERERGJjkkpEREREYmOSSkRERERiY5JKRERERGJjkkpEREREYmOSSkRERERiY5JKRERERGJTuOS0ocPH6JJkyYwNzeHRCLBnj17VLr+Z8+eQSKRYN26dSpdryZr0KABGjRokO/vm5qaip9++gnFixeHVCpFmzZtvnmdJ06cgEQiwYkTJ755XRnWrVsHiUSCZ8+eKco+3Wd5dV5NnjwZEolEpevMSsY2Xr58Oc/fi+hj+XWOF1R5uX/F+u7Ia8wT8s5XJaWPHz/GwIEDUbJkSRgYGMDMzAy1a9fGwoULkZCQoOoYlfj5+SE4OBi//fYbNmzYgGrVquXp++WnXr16QSKRwMzMLMv9+PDhQ0gkEkgkEsyZMyfX63/9+jUmT56M69evqyDavPfHH39g9uzZ6NChA9avX48RI0ZkW1cul+PPP/+Eh4cHLC0tYWpqitKlS6Nnz544f/58PkYtvunTp6v8x1pe+PfffzF58mSxw8gzGV9cn/6tCoKAgQMHQiKRKLY/48eSRCLBxo0bs1xf7dq1IZFIUKFChbwOPV8kJiZi/vz58PDwgLm5OQwMDFC6dGkMGTIEDx48yLc4Nm/ejAULFuTb+2mqjO+njIeJiQlKliyJDh06YOfOnZDL5WKHqNFu3ryJ3r17w8nJCQYGBjAxMUHlypXx008/4cmTJ2KHl3+EXNq3b59gaGgoWFhYCMOGDRNWrlwpLFmyROjSpYugp6cn9O/fP7erzLEPHz4IAIRx48bl2XvI5XIhISFBSE1NzbP3yI6fn5+gq6sr6OjoCFu3bs20fNKkSYKBgYEAQJg9e3au13/p0iUBgLB27dpcvS4pKUlISkrK9ft9q86dOwtFixbNUV1/f38BgNC6dWth4cKFwtKlS4Vhw4YJLi4uwqRJkxT10tLShISEBCEtLU1lcaampgoJCQmCXC5XlNWvX1+oX7++4nlenVcpKSlCQkKCUpmxsbHg5+en0vdZu3atAEC4dOmSytaZccy01dOnTzP9rcrlcmHQoEECAGHChAmK8uPHjwsABAMDA6FZs2bZrsvAwEAoX758vsSflyIjIwV3d3cBgNCiRQthwYIFwurVq4XRo0cLxYsXF/T09BR1J02alKfnia+vr+Do6Jhn61d3Od2/fn5+gkwmEzZs2CBs2LBBWLlypTBu3DjBzc1NACA0aNBAiImJUXqNWN8deU3Vn+crV64UdHR0BFtbW2HkyJHCypUrhWXLlgnff/+9YGtrK+jp6YmSk4hBNzcJ7NOnT9GlSxc4Ojri2LFjKFKkiGKZv78/Hj16hP3796sgVc5aZGQkAMDCwiLP3kMikcDAwCDP1v8lMpkMtWvXxl9//YVOnTopLdu8eTN8fX2xc+fOfInlw4cPMDIygr6+fr6836ciIiJydKzDw8OxbNky9O/fHytXrlRatmDBAsV5AwBSqVTlx1dHRwc6OjqfraPq8yo+Ph7GxsbQ1dWFrm6u/oxJREOHDsWKFSswbtw4TJ06NdPy5s2b4++//8abN29QuHBhRfnmzZtha2sLFxcXvHv3Lj9DzhO9evXCtWvXsGPHDrRv315p2bRp0zBu3DiRIlMNuVyO5ORkUb9L8oKuri6+++47pbJff/0VM2bMwNixY9G/f39s3bpVsUys7468psrP83PnzmHw4MGoXbs29u3bB1NTU6Xlc+fOxW+//fbF9WR8X2u83GSwGb/wz549m6P6KSkpwtSpU4WSJUsK+vr6gqOjozB27FghMTFRqZ6jo6Pg6+srnD59Wqhevbogk8kEJycnYf369Yo6Gb/mPn5k/Lr18/PL8pduVr8ADx8+LNSuXVswNzcXjI2NhdKlSwtjx45VLM9okfi0NfHo0aNCnTp1BCMjI8Hc3Fxo1aqVcOfOnSzf7+HDh4Kfn59gbm4umJmZCb169RLi4+O/uL/8/PwEY2NjYd26dYJMJhPevXunWHbx4kUBgLBz585MrS9v374VRo0aJVSoUEEwNjYWTE1NhaZNmwrXr19X1Mloifn0kbGd9evXF8qXLy9cvnxZqFu3rmBoaCj88MMPimUft/r17NlTkMlkmba/SZMmgoWFhfDq1avPbuf79++FkSNHCsWKFRP09fWF0qVLC7Nnz1a0NGYcg08fx48fz3J9QUFBAgBh3bp1X9jD/+2Hj9eVse03btwQ6tWrJxgaGgrOzs7C9u3bBUEQhBMnTgg1atQQDAwMhNKlSwuBgYFK68xoRXz69KnSOj/eZ1mdVzdu3BD8/PwEJycnQSaTCba2tkLv3r2FN2/eKK0/47y6ffu20LVrV8HCwkKoXLmy0rIMWe03Pz8/4dixYwIAYdeuXZn2yaZNmwQAwrlz57LdbxnbePLkSWHAgAGCpaWlYGpqKvTo0UOIiorKVP/ff/9V/L2YmJgIzZs3F27duqVY7ufnl2WsgiAIVapUEdq2bau0vgoVKggAhBs3bijKtmzZIgBQOg9fvnwp9O7dW7CxsRH09fUFV1dXYc2aNZniS0xMFCZOnCg4OzsL+vr6QrFixYTRo0dn+mwCIPj7+wu7d+8Wypcvr1jngQMHst1XGT5tKR02bJgAQOnzJkPGebl+/XrB2NhYWLZsmdLy8uXLC0OHDlWcq5/asGGDULVqVcHAwEAoVKiQ0LlzZyEkJESpzqlTp4QOHToIxYsXV2zz8OHDhQ8fPijVy/gcevnypdC6dWvB2NhYKFy4sDBq1KhMrTV//fWXULVqVcHExEQwNTUVKlSoICxYsOCz++X8+fMCgBz3qn16jmf3GS0I6cfr456R2NhY4YcffhAcHR0FfX19wdraWvD29hauXLkiCEL632l23yuCkPvzZOPGjYKrq6ugq6sr7N69+6v3kSAIwuzZswVPT0/B0tJSMDAwEKpWrar4TMrqvXNyjp4+fVqoVq2aIJPJhJIlSworVqzIVUupsbFxtsubNGkiSCQS4f79+4qyTz8Hk5KShAkTJghVq1YVzMzMBCMjI6FOnTrCsWPHMq3vzZs3wnfffSeYmpoK5ubmQs+ePYXr169nOva5OV+/9N2T4WvyhNDQUKFXr15C0aJFBX19fcHOzk5o1aqV0vdCdvtNV1dXePHixWfrfexz39fh4eFCnz59BBsbG0Emkwlubm6Zvhuz+h7Mbrsy9u/jx4+FJk2aCEZGRkKRIkWEKVOmZNpvX3uufyxXTSz//PMPSpYsiVq1auWofr9+/bB+/Xp06NABo0aNwoULFxAQEIC7d+9i9+7dSnUfPXqEDh06oG/fvvDz88Mff/yBXr16wd3dHeXLl0e7du1gYWGBESNGoGvXrmjevDlMTExyEz5u376NFi1awM3NDVOnToVMJsOjR49w9uzZz77uyJEjaNasGUqWLInJkycjISEBixcvRu3atXH16lWUKFFCqX6nTp3g5OSEgIAAXL16FatXr4aNjQ1mzpyZozjbtWuHQYMGYdeuXejTpw+A9JaSsmXLomrVqpnqP3nyBHv27EHHjh3h5OSE8PBw/P7776hfvz7u3LkDe3t7lCtXDlOnTsXEiRMxYMAA1K1bFwCUjuXbt2/RrFkzdOnSBd999x1sbW2zjG/hwoU4duwY/Pz8EBQUBB0dHfz+++84fPgwNmzYAHt7+2y3TRAEtGrVCsePH0ffvn1RuXJlHDp0CKNHj8arV68wf/58WFtbY8OGDfjtt9/w/v17BAQEAADKlSuX5TodHR0BANu3b0fHjh2/6tfiu3fv0KJFC3Tp0gU
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"kmeans = KMeans(n_clusters=num_classes, random_state=42)\n",
"clusters = kmeans.fit_predict(data_x)\n",
"\n",
"corr_ma = confusion_matrix(data_y, clusters)\n",
"sns.heatmap(corr_ma, annot=True, fmt=\"d\", cmap='Blues', yticklabels=label_names)\n",
"plt.xlabel('K-Means Clusters')\n",
"plt.ylabel('Diagnosis Group Labels')\n",
"plt.title('Confusion Matrix of Similiarity between KMeans Clusters and Diagnosis Groups')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cluster 0: 1464 patients\n",
"Cluster 1: 1772 patients\n",
"Cluster 2: 579 patients\n",
"Cluster 3: 563 patients\n"
]
}
],
"source": [
"# Initialize a dictionary to count patients in each cluster\n",
"cluster_patient_count = {i: 0 for i in range(kmeans.n_clusters)}\n",
"\n",
"# Iterate over the assigned clusters and increment the count for each cluster\n",
"for cluster in clusters:\n",
" cluster_patient_count[cluster] += 1\n",
"\n",
"# Print the number of patients in each cluster\n",
"for cluster, count in cluster_patient_count.items():\n",
" print(f\"Cluster {cluster}: {count} patients\")"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABmkAAAJwCAYAAABicYUDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVfvw8e+WbEnvDUIKCb2HXgSEEDoqiFhoAoogiIoK+lOKII9KUZCiIh0boKigiCA8VEU6SoeElpCEVFI3uzvvH3mzD0sSCJgQyv25rlywM2dmzpnZmZ17zpxzVIqiKAghhBBCCCGEEEIIIYQQQog7Sl3RGRBCCCGEEEIIIYQQQgghhHgQSSWNEEIIIYQQQgghhBBCCCFEBZBKGiGEEEIIIYQQQgghhBBCiAoglTRCCCGEEEIIIYQQQgghhBAVQCpphBBCCCGEEEIIIYQQQgghKoBU0gghhBBCCCGEEEIIIYQQQlQAqaQRQgghhBBCCCGEEEIIIYSoAFJJI4QQQgghhBBCCCGEEEIIUQGkkkYIIYQQQgghhBBCCCGEEKICSCWNEGVkyZIlqFQqYmNjKzor5ebflHHr1q2oVCq2bt1a5vm6lkqlYuLEieW6jeuFhITQvXv3Ml1nRZTj1KlTdOrUCTc3N1QqFWvXrr2j2/83QkJCGDRoUEVn41+bOHEiKpWqorMBQGxsLCqViiVLltim3U35Kyt36tpUnIrenxcuXMBgMLBz584Ky0Nx+vXrR9++fSs6G0IIIW7RgxAPiQIVfQ9ztzObzbz++usEBQWhVqt55JFHKjpLpXa/HNviYpmK1K5dO9q1a2f7fLflr6xUVFx+N+zPrl27MmzYsArbfnE2bNiAs7MzSUlJFZ0VUQpSSSPue4XBQuGfwWCgWrVqvPjiiyQkJNzy+t57770Ke3idnZ3NxIkTb/lh4vnz5xk+fDghISHo9Xp8fX155JFH7roHc3dK4Q/49OnTKzord5WBAwdy5MgRpk6dyvLly2ncuDFffvklH3300R3NR7t27Wznq1qtxtXVlerVq9O/f39+++23O5oXcXPlcU0srDgp/NNoNPj6+tKnTx+OHTtWptsqb7d73b4TJk+eTLNmzWjVqpVt2qBBg+z2vaurK/Xr12fGjBnk5eUVWcfBgwd55plnCAoKQq/X4+npSceOHVm8eDEWi6VI+rS0NAwGAyqVqsRj+cYbb7BmzRoOHTpUdoUVQogH2IMcDxXeU6xevdpuuslkonv37qjVahYtWnTDZVUqFStWrCg2TatWrVCpVNSpU+eWynG3ys3NZdasWTRr1gw3Nze778rJkyfvWD4qIgYpS4sWLeLDDz+kT58+LF26lJdffpmjR48yceLEO1qJWdy5HxgYSHR0NLNnz+bq1at3LC/i5n7++edyeVEzJCTE7nvg5ORE06ZNWbZsWZlvq7zdrdeGnTt3snHjRt544w3btOtjWgcHB8LCwhgwYABnz54tso6MjAwmTZpE/fr1cXZ2xmg0UqdOHd544w3i4uKK3W7fvn1RqVR2271W586dCQ8PZ9q0aWVTUFG+FCHuc4sXL1YAZfLkycry5cuVzz//XBk4cKCiVquV0NBQJSsr65bW5+TkpAwcOLDIdLPZrOTk5ChWq7WMcl5UUlKSAigTJkwo9TI7duxQXF1dFVdXV+WVV15RFi5cqEyZMkUJDw9XVCqVMnv27FKv69+U0WKxKDk5OYrFYrnlZW9FafZPTEyMAigffvhhmWwzODhY6datW5msq9CtHud/Kzs7WwGUt956y256t27dlODg4DuWD0VRlLZt2yqVK1dWli9frixfvlxZsGCBMnbsWCUsLEwBlL59+yomk8lumdzc3CLT7kX5+flKTk5ORWdDUZT/nSeLFy+2TSsufyVdE/+NLVu2KIAyevRoZfny5cqiRYuUMWPGKAaDQfHy8lLi4+PLfFtbtmwps3Ve60bX7Yo83omJiYqDg4Py5Zdf2k0fOHCgotfrbeffnDlzlHbt2imA8sQTT9il/fzzzxWNRqMEBgYqb7zxhrJw4UJl1qxZSvfu3RWVSqVMnTq1yHY/++wzxWAwKP7+/kWuN9dq2rSp0r9//7IprBBCPOAe5Hio8Hd+1apVtmkmk0np0aOHolKplIULF950WYPBoHTp0qXI/MJ7JYPBoNSuXfuWy3K3SUpKUiIjIxVA6d69u/LRRx8pCxcuVF577TUlKChIcXBwsKWdMGGCUp6PkyoiBilLTzzxhFKpUiW7aatWrSrXe87iXH/uL1q0SHnvvfeUTp06KSqVSgkODlYOHTpkt8zdFI/8G1arVcnJyVHMZnNFZ0VRlIIYt23btrbPxeVv5MiR5XJeBQcHKw0aNLDd33/wwQdKtWrVFED57LPPynxbZR0bXquka0NFH+9evXopnTp1sptWXEz74osvKjqdTvH09FQuXbpkS3vmzBklNDRU0Wg0Sr9+/ZRPPvlE+eyzz5QXX3xR8fLyUiIiIopsMz09XTEYDEpISIgSFBRU4m/vvHnzFEdHRyUjI6NsCy3KnPbOVQcJUbG6dOlC48aNARg6dCheXl7MnDmTH374gSeffPJfr1+j0aDRaP71espSamoqffr0wWg0snPnTqpWrWqb98orrxAdHc2YMWOIjIykZcuWJa4nKysLJyenf1VGtVqNwWC4rWVF+Sts/uru7l7u27JarZhMpht+H9zc3HjmmWfspv3nP/9h9OjRzJs3j5CQEN5//33bPL1eX275vZO0Wi1a7d3703yn89emTRv69Olj+1y9enVeeOEFli1bxuuvv37H8lFeKvJ4r1ixAq1WS48ePYrM02q1duffiBEjaNasGd988w0zZ84kMDCQP/74g+HDh9OiRQt+/vlnXFxcbOnHjBnD3r17+fvvv4vdbteuXQkODubLL79kypQpxeavb9++TJgwgXnz5uHs7FwGJRZCCPEgxkPXy8/Pp2/fvqxbt45PP/2UIUOG3HSZrl278uOPP3LlyhW8vb1t07/88kv8/PyIiIggNTW1PLN9RwwaNIgDBw6wevVqevfubTfv3Xff5a233qqgnJWN0sQgZSUxMfGOxFXwv1j9Rq499wHGjx/P77//Tvfu3enZsyfHjh3DaDQCd388UlqFLYfuVnc6f5UqVbK7vx80aBBhYWHMmjXrruui63ZU5PFOTExk/fr1LFiwoNj518a0gwcPplq1aowePZqlS5cyfvx4zGYzjz32GAkJCWzdupXWrVvbLT916lS7Zx+F1qxZg8ViYdGiRTz88MNs27aNtm3bFknXu3dvRo0axapVq3j22WfLoMSivEh3Z+KB9fDDDwMQExMDwPTp02nZsiVeXl4YjUYiIyOLNItXqVRkZWWxdOlSW5PFwv42S+qD+ZdffqFNmzY4OTnh4uJCt27d+Oeff+zSDBo0CGdnZy5dusQjjzyCs7MzPj4+jB071tZdTGxsLD4+PgBMmjTJtv0bNYf99NNPuXz5Mh9++KFdBQ2A0Wi0lWPy5Mm26YXl+O9//8uIESPw9fWlcuXKJZbRarUyceJEAgMDcXR0pH379hw9erRIX6TFjfvQrl076tSpw9GjR2nfvj2Ojo5UqlSJDz74wC6vJpOJd955h8jISNzc3HBycqJNmzZs2bKlxLKXhcWLF/Pwww/j6+uLXq+nVq1azJ8/v8T0GzdupEGDBhgMBmrVqsV3331XJE1aWhpjxoyxdQ0UHh7O+++/j9VqvWFerl69ypgxY+y6rIuKimL//v03XO7cuXOMGDGC6tWrYzQa8fLy4vHHH7c7hhMnTiQ4OBiA1157DZVKRUhICO3atWP9+vWcO3fO9n0LCQmxLZeXl8eECRMIDw9Hr9cTFBTE66+/XqRLJJVKxYsvvsjKlSupXbs2er2eDRs23DDfxdFoNMyePZtatWrxySefkJ6ebpt3/fet8Lu6Y8cORo8ejY+PD+7u7jz//POYTCbS0tIYMGAAHh4eeHh48Prrr6Moit32rFYrH330EbVr18ZgMODn58fzzz9fJAgvHJNox44dNG3aFIPBQFhYWJHm4/n5+UyaNImIiAg
"text/plain": [
"<Figure size 2000x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Apply PCA to reduce to 2 dimensions\n",
"pca = PCA(n_components=2)\n",
"reduced_data = pca.fit_transform(data_x)\n",
"\n",
"# Create a figure with two subplots\n",
"fig, axs = plt.subplots(1, 2, figsize=(20, 7))\n",
"\n",
"# First subplot for patient labels after PCA reduction\n",
"for i in range(len(set(data_y))):\n",
" cluster_data = reduced_data[data_y == i]\n",
" axs[0].scatter(cluster_data[:, 0], cluster_data[:, 1], label=f'Label {label_names[i]}', alpha=0.7, edgecolors='w')\n",
"\n",
"axs[0].set_title('Patient Original Labels after Dimensionality Reduction (PCA)')\n",
"axs[0].set_xlabel('Reduced Dimension 1')\n",
"axs[0].set_ylabel('Reduced Dimension 2')\n",
"axs[0].legend()\n",
"\n",
"# Second subplot for patient clusters after PCA reduction\n",
"for i in range(kmeans.n_clusters):\n",
" cluster_data = reduced_data[clusters == i]\n",
" axs[1].scatter(cluster_data[:, 0], cluster_data[:, 1], label=f'Cluster {i}', alpha=0.7, edgecolors='w')\n",
"\n",
"axs[1].set_title('Patient K-Means Clusters after Dimensionality Reduction (PCA)')\n",
"axs[1].set_xlabel('Reduced Dimension 1')\n",
"axs[1].set_ylabel('Reduced Dimension 2')\n",
"axs[1].legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABkgAAAKyCAYAAACezlJSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeVxU1f8/8NcdGGaGZRhWWUQWAVHcEsUFTc0Fd/vkki1uqeWSpmWa9s0dyXIpzbTS1NQ2taw0Sy39uLSYqYgbAgoqoCgw7DDb+f3B797PDDPAgDOs7+fj4aOYuXPvubOe932f8z4cY4yBEEIIIYQQQgghhBBCCCGkCRHVdQMIIYQQQgghhBBCCCGEEEJqGyVICCGEEEIIIYQQQgghhBDS5FCChBBCCCGEEEIIIYQQQgghTQ4lSAghhBBCCCGEEEIIIYQQ0uRQgoQQQgghhBBCCCGEEEIIIU0OJUgIIYQQQgghhBBCCCGEENLkUIKEEEIIIYQQQgghhBBCCCFNDiVICCGEEEIIIYQQQgghhBDS5FCChBBCCCGEEEIIIYQQQgghTQ4lSOqpnTt3guM4pKSk1HVTiJUtW7YMHMfVdTPqLY1GgwULFsDPzw8ikQhPP/10XTfJbI3ltU1JSQHHcdi5c2ddNwUA0KdPH/Tp00f4u761z1ICAgIwadKkWj9ufXg+hwwZgmnTptXZ8U355Zdf4OjoiIcPH9Z1UwghhBCzNYW48nHO8eTJk+A4DidPnrR4u/RxHIdly5ZZ9RjlBQQEYNiwYRbdZ12cR2JiIgYOHAhnZ2dwHIeDBw/W6vEfR1315y2tPsW1pmKV+tQ+S6mt7yZT6vr5vHv3LqRSKc6ePVtnbTBl3LhxGDt2bF03g1gJJUiqie988f+kUilCQ0Px6quv4sGDB9Xe3+rVq+vsB76oqAjLli0z+wuX/4Lev3+/we0qlQrDhg2DSCTC559/XuljOY7Dnj17TG4TFRUFjuPQtm3bap1HfVVSUoINGzaga9eucHZ2Nniv3Lx5s9ba8eWXX+KDDz6oteNZ2ueff473338fo0ePxq5duzBv3jxcu3YNy5Ytq9VAz9Rn38fHB9HR0di4cSPy8/NrrS2kaj///LNVgreAgACD94GDgwMiIyPxxRdfWPxY1lZfvxvOnj2Lo0ePYuHChcJt+r8hHMdBLBYjKCgIEyZMwK1bt4z2kZeXh+XLl6NDhw5wdHSETCZD27ZtsXDhQqSnp5s87tixY8FxnMFx9Q0aNAjBwcGIjY21zIkSQghp0ppyXMm7c+cOpk+fjoCAAEgkEnh6euLpp5+udxfFagt/4Xft2rV13ZR6ZeLEiYiPj0dMTAx2796Nzp0710k/tk+fPsLnVSQSQS6Xo1WrVhg/fjyOHTtWq20hVbPGd2L5mMTGxgaenp4YPXo0rl+/btFjWVtNv7drw4oVK9C1a1dERUUJt02aNMnguZfL5ejQoQPWrVuH0tJSo31cunQJL774Ivz8/CCRSODq6or+/ftjx44d0Gq1RtsrlUpIpVJwHFfha7lw4UIcOHAAcXFxljtZUn8wUi07duxgANiKFSvY7t272WeffcYmTpzIRCIRCwwMZIWFhdXan4ODA5s4caLR7RqNhhUXFzOdTmehlht7+PAhA8CWLl1q1vYnTpxgANi+ffuE21QqFRs+fDjjOI5t27atysdKpVI2ePBgo/tv374t3B8eHl7tc6lvHj58yCIiIhgANmzYMPbBBx+wbdu2sTfffJP5+fkxsVgsbLt06VJmzY/i0KFDmb+/v9X2b23PPvss8/X1Nbht3759DAA7ceJErbWj/Gf/888/Z6tXr2YDBw5kHMcxf39/FhcXZ/AYtVrNiouLa62N1qLT6VhxcTHTaDR13RTGGGO9e/dmvXv3Fv421b5Zs2ZZ5XPl7+/POnbsyHbv3s12797N3nvvPRYaGsoAsE8//dTixzL1+2ApFX031PXrPXLkSDZw4ECD2/jfkDlz5gifv1dffZXZ2dkxV1dXlpaWJmybnJzMAgMDmY2NDRs3bhz76KOP2KeffspeffVV5ubmxkJCQoyOmZuby6RSKQsICGB+fn4V/vZ+/PHHzN7enuXl5Vn2pAkhhDQ5TTmuZIyxM2fOMLlczuRyOXv99dfZtm3b2KpVq1hwcDDjOI5t3LjR7H09zjlqtVpWXFzMtFpttR9bHeY8P3xM/P7771vkmP7+/mzo0KEW2Revuq/z4yoqKmIA2Ntvv21we13EuL1792bNmzcX4oCtW7ey+fPns6CgIAaAjR07lqlUKoPHlJSUGN3WENWnuJb/nOzYsUO4zVT7KvpOfBymYpK5c+cyqVTK3NzcWEZGhsWPZa1rHpV9b9fl652ZmcnEYjH78ssvDW6fOHEik0gkwudv06ZNrE+fPgwAe/bZZw22/eyzz5iNjQ3z8fFhCxcuZNu2bWMbNmxgw4YNYxzHsZiYGKPjfvrpp0wqlTIvLy+j7xt9kZGRbPz48ZY5WVKv2NZeKqZxGTx4MDp37gwAmDp1Ktzc3LB+/Xr88MMPeO655x57/zY2NrCxsXns/ViTWq3G2LFjcejQIXzyySeYMmVKlY8ZMmQIfvzxRzx69Aju7u7C7V9++SWaNWuGkJAQ5OTkWLPZtWLSpEm4ePEi9u/fj1GjRhnct3LlSrz99tt11DLL0Ol0UKlUkEqlVj9WZmYmFAqF1Y8DAIWFhXBwcKh0G/3PPgAsWrQIv//+O4YNG4YRI0bg+vXrkMlkAABbW1vY2jb8r1l+VGN9Vdvt8/X1xYsvvij8PWnSJAQFBWHDhg31rixUTdTl652ZmYnDhw9j69atJu/v1asXRo8eDQCYPHkyQkNDMWfOHOzatQuLFi2CRqPBM888gwcPHuDkyZPo2bOnweNjYmKwZs0ao/0eOHAAWq0Wn3/+OZ566imcOnUKvXv3Ntpu1KhRmD17Nvbt24eXXnrJAmdMCCGkqWuKcWVOTg5Gjx4NmUyGs2fPomXLlsJ9r7/+OqKjozF37lxERESgR48eFe6H77s/zjmKRKJ63c9t6vjSprURD5oT4zo7OxvEAQDw7rvvYs6cOfj4448REBBg0NeUSCRWa29tqu9xbW23Tz8mAYBWrVphxowZ+OKLL7BgwYJaa4e11OXrvWfPHtja2mL48OFG99na2hp8/mbOnImuXbvim2++wfr16+Hj44O//voL06dPR/fu3fHzzz/DyclJ2H7u3Lk4f/48rly5YvK4Q4YMgb+/P7788kusWrXKZPvGjh2LpUuX4uOPP4ajo6MFzpjUF1Riy0KeeuopAMDt27cBAGvXrkWPHj3g5uYGmUyGiIgIo9JUHMehsLAQu3btEqaJ8fUpK6qjeuTIEfTq1QsODg5wcnLC0KFDcfXqVYNtJk2aBEdHR6SlpeHpp5+Go6MjPDw8MH/+fGEqWUpKCjw8PAAAy5cvF45vbkkajUaDcePG4YcffsCWLVvMvig4cuRISCQS7Nu3z+D2L7/8EmPHjq2wY7tnzx5ERERAJpPB1dUV48aNw927dw22OX36NMaMGYMWLVpAIpHAz88P8+bNQ3FxscF25jw/vK+//hoRERFwcnKCXC5Hu3bt8OGHH1Z6jn///TcOHz6MKVOmGCVHgLJOUmXTpiur/1/+NcrPz8fcuXMNpqUPGDAAFy5cAFA2Dfjw4cNITU0VXuOAgADh8aWlpVi6dCmCg4OF52zBggVGUxQ5jsOrr76KvXv3Ijw8HBKJBL/88kuNnyOg6s8I/zycOHECV69eFdq/c+dOjBkzBgDQt29f4Xb9qaHV+ZwkJydjyJAhcHJywgsvvFBlu0156qmn8M477yA1NdWghJyp2p38c7lv3z60adMGMpkM3bt3R3x8PADgk08+QXBwMKRSKfr06WOyjNjff/+NQYMGwdnZGfb29ujdu7dRKQL+2ElJSZg0aRIUCgWcnZ0xefJkFBUVGWx77Ngx9OzZEwqFAo6OjmjVqhUWL14s3F/Re/L3338XnmeFQoGRI0caTUetTjt27NiBp556Cp6enpBIJGjTpg22bNlS+ZNvon2TJk3C5s2bheeb/8c
"text/plain": [
"<Figure size 2000x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pca = PCA(n_components=3)\n",
"reduced_data = pca.fit_transform(data_x)\n",
"\n",
"fig = plt.figure(figsize=(20, 7))\n",
"\n",
"# First subplot\n",
"ax1 = fig.add_subplot(121, projection='3d')\n",
"for i in range(kmeans.n_clusters):\n",
" cluster_data = reduced_data[clusters == i]\n",
" ax1.scatter(cluster_data[:, 0], cluster_data[:, 1], cluster_data[:, 2], label=f'Cluster {i}', alpha=0.7, edgecolors='w')\n",
"ax1.set_title('Patient K-Means Clusters after Dimensionality Reduction (PCA)')\n",
"ax1.set_xlabel('Reduced Dimension 1')\n",
"ax1.set_ylabel('Reduced Dimension 2')\n",
"ax1.set_zlabel('Reduced Dimension 3')\n",
"ax1.legend()\n",
"\n",
"# Second subplot\n",
"ax2 = fig.add_subplot(122, projection='3d')\n",
"for i in range(len(set(data_y))):\n",
" cluster_data = reduced_data[data_y == i]\n",
" ax2.scatter(cluster_data[:, 0], cluster_data[:, 1], cluster_data[:, 2], label=f'Label {label_names[i]}', alpha=0.7, edgecolors='w') \n",
"ax2.set_title('Patient Original Labels after Dimensionality Reduction (PCA)')\n",
"ax2.set_xlabel('Reduced Dimension 1')\n",
"ax2.set_ylabel('Reduced Dimension 2')\n",
"ax2.set_zlabel('Reduced Dimension 3')\n",
"ax2.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Adjusted Rand Index (ARI): 0.15412707550646423\n",
"Normalized Mutual Information (NMI): 0.24282003848756695\n",
"Silhouette Score: 0.46722973644820026\n"
]
}
],
"source": [
"# Calculate Adjusted Rand Index (ARI)\n",
"ari = adjusted_rand_score(data_y, clusters)\n",
"print(f\"Adjusted Rand Index (ARI): {ari}\")\n",
"\n",
"# Calculate Normalized Mutual Information (NMI)\n",
"nmi = normalized_mutual_info_score(data_y, clusters)\n",
"print(f\"Normalized Mutual Information (NMI): {nmi}\")\n",
"\n",
"# Calculate Silhouette Score\n",
"silhouette_avg = silhouette_score(data_x, clusters)\n",
"print(f\"Silhouette Score: {silhouette_avg}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- The ARI and NMI scores suggest that the clustering algorithm has some effectiveness in mirroring the true label structure, but it's not highly accurate. The moderate scores indicate that while there is some alignment with the true labels, the clustering does not perfectly capture the underlying groupings.\n",
"\n",
"- The Silhouette Score indicates that the clustering has identified groups that are somewhat cohesive internally and separated from each other. This suggests that the clustering algorithm has been somewhat successful in identifying meaningful structures within the data, even if those structures don't perfectly align with the true labels."
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAyUAAALSCAYAAADDdH6KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd7idRbX/P2/dvZx9em/poQQCBAIh9IB0BKQpIAgXC/4sKFyRKnCxo4DoRRGxgHSkd+m9Q+qpSU7vu79tfn/MOZsckkBAEK/ub579PCfvnv3OzJo1a9pa31GEEIIiiiiiiCKKKKKIIooooohPCeqnXYAiiiiiiCKKKKKIIooo4j8bxUVJEUUUUUQRRRRRRBFFFPGporgoKaKIIooooogiiiiiiCI+VRQXJUUUUUQRRRRRRBFFFFHEp4rioqSIIooooogiiiiiiCKK+FRRXJQUUUQRRRRRRBFFFFFEEZ8qiouSIooooogiiiiiiCKKKOJTRXFRUkQRRRRRRBFFFFFEEUV8qiguSooooogiiiiiiCKKKKKITxXFRUkR/xb40Y9+REtLC5qmsWDBgk+7OBvhpJNOoqmpadozRVG44IILPvC3F1xwAYqifDIF+xTR1NTESSed9GkXo4gi/i3xr2g3UqkUFRUV/OlPf/qn5/2vKI8iNg/btqmvr+fqq6/+tItSxD8RxUXJJ4Df//73KIrCSy+9tMnv99hjD7baaqtPtAz33nvvFk14/x3w4IMP8p3vfIddd92V6667jksvvXSzaU866SQURdnkx+/3/xNL/e+FPfbYoyBHVVWJRqPMnj2bz3/+8zz00EMfWz7/SXo9hffqaSgUYt68efzgBz8gk8l8pHc+88wzXHDBBYyNjX28hf0XwoYy03WdRCLBwoUL+frXv84777zzkd+byWS44IILePzxxz++wgKdnZ3TyqxpGg0NDRx++OG89tprH2teH4RPSj+uuOIKIpEIxxxzTOHZ1GJh6hMMBpk3bx7nnnsuExMTH+r9n1TbbCl6enq44IILtri9puYKm/qcffbZn0gZ/6/0fcMw+OY3v8kll1xCLpf7tItTxD8J+qddgCI+Gdx7771cddVV/xETuEcffRRVVfntb3+LaZofmN7n83Httddu9FzTtE+ieJtFNptF1/99umBdXR2XXXYZAOl0mjVr1nDbbbfxxz/+kaOPPpo//vGPGIZRSL9y5UpU9cPti/wn6fWG2HffffnCF74AyN3mJ598ku9///u8/vrr3HzzzR/6fc888wwXXnghJ510EvF4/GMu7b8OpuQmhGB8fJzXX3+d66+/nquvvprLL7+cb37zmx/6nZlMhgsvvBCQi/GPG8ceeyyf+cxncF2X5cuX86tf/Yr77ruP55577kOdAp977rkfeWL7SeiHbdtcccUVfOMb39ikrf3Vr35FOBwmlUrx4IMPcskll/Doo4/y9NNPb/EJx/u1zT8ijy1FT08PF154IU1NTR+qrS666CKam5unPfukNi7/L/X9k08+mbPPPps///nPfPGLX/y0i1PEPwH/PjOiIv5jMTAwQCAQ2KIFCYCu65xwwgmfcKk+GP9uJzOxWGwjuf7P//wPZ555JldffTVNTU1cfvnlhe98Pt8/u4j/ZzFr1qxpsv2v//ovLMvitttuI5fL/dvp0seF98oNpE4efPDBfOtb32LOnDl85jOf+ZRKt2lsv/3208q86667csghh/CrX/2KX//611v8Hl3X/6U2Pe6++24GBwc5+uijN/n9kUceSVlZGSD1+7Of/Sy33XYbzz33HLvssss/nP+/mjw2xAEHHMAOO+zwaRfjH0I6nSYUCn2s74zH4+y33378/ve/Ly5K/kNQdN/6F8If//hHFi5cSCAQIJFIcMwxx7B27dppaZ588kmOOuooGhoa8Pl81NfX841vfINsNltIc9JJJ3HVVVcB010Y4F0XgR//+MdcddVVtLS0EAwG2W+//Vi7di1CCC6++GLq6uoIBAIceuihjIyMTCvDnXfeyYEHHkhNTQ0+n4/W1lYuvvhiXNedlm7KTe3ll19m8eLFBAIBmpubueaaa7ZIHo7jcPHFF9Pa2orP56OpqYn//u//Jp/PF9IoisJ1111HOp0u1PP3v//9Fst8c9ic//HUcXtnZ+e05/fddx9Lly4lEokQjUbZcccd+fOf//y+eWwqpuSpp55ixx13xO/309ra+r6TkI9LX0DqTDgcZv369Rx22GGEw2HKy8v59re/vVG7fhhomsYvfvEL5s2bx5VXXsn4+Hjhu/fGlNi2zYUXXsjMmTPx+/2Ulpay2267Fdy/3k+vAX784x+zePFiSktLCQQCLFy4kFtuuWWjMimKwle/+lXuuOMOttpqK3w+H/Pnz+f+++/fKO369es55ZRTCrre3NzMGWecgWVZhTRjY2P8v//3/6ivr8fn8zFjxgwuv/xyPM+b9q4bb7yRhQsXFnRk66235oorrvhoggWqqqoKrkkb4vnnn2f//fcnFosRDAZZunQpTz/9dOH7Cy64gLPOOguA5ubmghw7Ozs54ogj2H777ae97+CDD0ZRFO66665peSiKwn333feh5eB5Hj//+c+ZP38+fr+fyspKTj/9dEZHR6ela2pq4qCDDuKpp55ip512wu/309LSwh/+8IePLDOA0tJSbrzxRnRd55JLLik8tyyL8847j4ULFxKLxQiFQixZsoTHHnuskKazs5Py8nIALrzwwoLspvrxG2+8wUknnURLSwt+v5+qqiq++MUvMjw8/JHLu9deewHQ0dFReHbzzTcX+n5ZWRknnHAC69evn/a7TdmwLdH999MPgIceeojddtuNeDxOOBxm9uzZ/Pd///cH1uOOO+6gqamJ1tbWD13vj6NtNmfTt8SOTo1l77zzDnvuuSfBYJDa2lp++MMfFtI8/vjj7LjjjoDc4f84x6P77ruPJUuWEAqFiEQiHHjggbz99tvT0myJ7r1f207NDTZV3veOVVOyfOeddzjuuOMoKSlht912K3y/JTJdvXo1n/3sZ6mqqsLv91NXV8cxxxwzbYwAedr51FNPbTQPKeLfE/+a2wb/JhgfH2doaGij57Ztb/Tskksu4fvf/z5HH300p556KoODg/zyl79k991359VXXy0cs958881kMhnOOOMMSktLeeGFF/jlL3/JunXrCm4cp59+Oj09PTz00EPccMMNmyzbn/70JyzL4mtf+xojIyP88Ic/5Oijj2avvfbi8ccf57vf/S5r1qzhl7/8Jd/+9rf53e9+V/jt73//e8LhMN/85jcJh8M8+uijnHfeeUxMTPCjH/1oWj6jo6N85jOf4eijj+bYY4/lr3/9K2eccQamaX7gzsepp57K9ddfz5FHHsm3vvUtnn/+eS677DKWL1/O7bffDsANN9zAb37zG1544YWCS9bixYvf973AJtvFNE2i0egH/va9mNrFmT9/Pueccw7xeJxXX32V+++/n+OOO26L3/Pmm2+y3377UV5ezgUXXIDjOJx//vlUVlZulPbj1JcpuK7LsmXLWLRoET/+8Y95+OGH+clPfkJraytnnHHGh5bLFDRN49hjj+X73/8+Tz31FAceeOAm011wwQVcdtllnHrqqey0005MTEzw0ksv8corr7Dvvvt+oF5fccUVHHLIIRx//PFYlsWNN97IUUcdxd13371Rnk899RS33XYbX/7yl4lEIvziF7/gs5/9LN3d3ZSWlgLSFWOnnXZibGyM0047jTlz5rB+/XpuueUWMpkMpmmSyWRYunQp69ev5/TTT6ehoYFnnnmGc845h97eXn7+858DciJ37LHHsvfeexdOi5YvX87TTz/N17/+9Q+UYS6XK+hsOp3m6aef5vrrr+e4446btih59NFHOeCAA1i4cCHnn38+qqpy3XXXsddee/Hkk0+y0047ccQRR7Bq1Sr+8pe/8LOf/aywO11eXs6SJUu48847mZiYIBqNIoTg6aefRlVVnnzySQ455BBALnZVVWXXXXcF2GI5gLRPv//97zn55JM588wz6ejo4Morr+TVV1/l6aefnubit2bNGo4
"text/plain": [
"<Figure size 1000x800 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-26 18:44:50 +02:00
"CPU times: total: 8.11 s\n",
"Wall time: 11.5 s\n"
]
}
],
"source": [
"%%time\n",
"# Compute the distance matrix based on the Euclidean distance between data points\n",
"distance_matrix = euclidean_distances(data_x, data_x)\n",
"\n",
"# Create the heatmap\n",
"plt.figure(figsize=(10, 8))\n",
"sns.heatmap(distance_matrix, cmap='viridis')\n",
"plt.title('Heatmap of Euclidean Distances Between Data Points (Patient Features)')\n",
"plt.xlabel('Data Point Index')\n",
"plt.ylabel('Data Point Index')\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAggAAAK9CAYAAABB+5SlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAACA+ElEQVR4nO3dd3wUdf4G8Ge27ybZTUJ6IYWS0DsISBUEKYoiIKKCYgcs2Cv2dp7iKWfhVDzPOz0VPX+Kep6C/TxUQFSKQBCEQALpm2TLzOf3x8JCCGASksxu9nn7ysvZyezsJ0OSffKdb1FEREBERER0GIPeBRAREVHoYUAgIiKiehgQiIiIqB4GBCIiIqqHAYGIiIjqYUAgIiKiehgQiIiIqB4GBCIiIqqHAYGIiIjqYUAgagXZ2dmYM2dO8PGqVaugKApWrVoV3Ddy5Eh079699YtrI+bMmYPs7Gy9yyBqMxgQiE7A+vXrcfbZZyMrKws2mw3p6ekYO3YsnnzySb1LaxE///wz7rrrLmzfvr3e5/785z9j2bJlzf6aI0eOhKIowY/4+HgMGDAAL7zwAjRNa5bXeOCBB/D22283y7mI2goGBKIm+uqrr9C/f3+sW7cOl1xyCZ566ilcfPHFMBgMeOKJJ+ocu2nTJixdulSnSpvPzz//jLvvvrtVAwIAZGRk4OWXX8bLL7+MO+64A36/H3PnzsWtt97aLOdnQCCqz6R3AUTh6v7774fL5cLq1asRGxtb53NFRUV1Hlut1lasrO1xuVw477zzgo8vu+wy5OXl4amnnsK9994Ls9msY3VEbRNbEIiaaOvWrejWrVu9cAAASUlJdR4f2QfheH7++WeMGjUKDocD6enpeOSRR+odU1RUhLlz5yI5ORk2mw29evXCSy+9VOeYo/VzAIDt27dDUZR6f+1v3LgRZ599NuLj42Gz2dC/f3+88847wc8vW7YM06ZNAwCMGjUq2OS/atUqZGdn46effsKnn34a3D9y5Mjgc8vKynDNNdcgMzMTVqsVHTt2xMMPP9zkWwQOhwMnnXQS3G43iouLj3mc2+3GddddF3zdvLw8PProozh8EVtFUeB2u/HSSy8Fa2/ovxVRW8YWBKImysrKwtdff40ff/yx2ToXlpaWYvz48TjrrLMwffp0vPHGG7jpppvQo0cPnHbaaQCAmpoajBw5Elu2bMH8+fORk5OD119/HXPmzEFZWRmuvvrqRr/uTz/9hKFDhyI9PR0333wzoqKi8M9//hNTpkzBm2++iTPPPBPDhw/HVVddhT/96U+49dZb0aVLFwBAly5dsHjxYixYsADR0dG47bbbAADJyckAgOrqaowYMQK7du3CZZddhvbt2+Orr77CLbfcgsLCQixevLhJ12rbtm0wGo1HDWgAICI4/fTTsXLlSsydOxe9e/fGhx9+iBtuuAG7du3C448/DgB4+eWXcfHFF2PgwIG49NJLAQAdOnRoUk1EbYoQUZP8+9//FqPRKEajUQYPHiw33nijfPjhh+L1eusdm5WVJbNnzw4+XrlypQCQlStXBveNGDFCAMhf//rX4D6PxyMpKSkyderU4L7FixcLAPnb3/4W3Of1emXw4MESHR0tFRUVx3wNEZGCggIBIC+++GJw3ymnnCI9evSQ2tra4D5N02TIkCHSqVOn4L7XX3/9qOcUEenWrZuMGDGi3v57771XoqKiZPPmzXX233zzzWI0GmXHjh31nnO4ESNGSH5+vhQXF0txcbFs2LBBrrrqKgEgkydPDh43e/ZsycrKCj5+++23BYDcd999dc539tlni6IosmXLluC+qKioOv8+RCTCWwxETTR27Fh8/fXXOP3007Fu3To88sgjGDduHNLT0+s0zTdGdHR0nXvtFosFAwcOxLZt24L7VqxYgZSUFMycOTO4z2w246qrrkJVVRU+/fTTRr1mSUkJPvnkE0yfPh2VlZXYt28f9u3bh/3792PcuHH45ZdfsGvXriZ9PQDw+uuvY9iwYYiLiwuee9++fRgzZgxUVcVnn332u+fYuHEjEhMTkZiYiC5duuDJJ5/ExIkT8cILLxzzOStWrIDRaMRVV11VZ/91110HEcH777/f5K+JKBLwFgPRCRgwYACWL18Or9eLdevW4a233sLjjz+Os88+G2vXrkXXrl0bdb6MjAwoilJnX1xcHH744Yfg419//RWdOnWCwVA33x9s8v/1118b9ZpbtmyBiOCOO+7AHXfccdRjioqKkJ6e3qjzHvTLL7/ghx9+QGJi4jHP/Xuys7OxdOlSKIoCm82GTp061evncaRff/0VaWlpiImJqbO/qdeJKNIwIBA1A4vFggEDBmDAgAHo3LkzLrzwQrz++utYtGhRo85jNBqPul8O61TXUEcGjYNUVa3z+GBHweuvvx7jxo076nM6duzY6Nc//Pxjx47FjTfeeNTPd+7c+XfPERUVhTFjxjS5BiJqPAYEombWv39/AEBhYWGLnD8rKws//PADNE2r04qwcePG4OeBQMsDEBhBcLgj/3LOzc0FELhN8XtvwscKHcf7XIcOHVBVVdXqb/BZWVn4z3/+g8rKyjqtCEdeJ+D4XxdRpGIfBKImWrly5VH/sl+xYgUAIC8vr0Ved8KECdizZw9ee+214D6/348nn3wS0dHRGDFiBIDAG6DRaKx3j//Pf/5zncdJSUkYOXIknn322aOGmsOHEUZFRQGoHzoOfu5o+6dPn46vv/4aH374Yb3PlZWVwe/3H/uLPQETJkyAqqp46qmn6ux//PHHoShKcFQIcOzaiSIZWxCImmjBggWorq7GmWeeifz8fHi9Xnz11Vd47bXXkJ2djQsvvLBFXvfSSy/Fs88+izlz5uC7775DdnY23njjDXz55ZdYvHhx8K9ll8uFadOm4cknn4SiKOjQoQPefffdo97zX7JkCU4++WT06NEDl1xyCXJzc7F37158/fXX+O2337Bu3ToAQO/evWE0GvHwww+jvLwcVqsVo0ePRlJSEvr164enn34a9913Hzp27IikpCSMHj0aN9xwA9555x1MmjQJc+bMQb9+/eB2u7F+/Xq88cYb2L59OxISEpr9Ok2ePBmjRo3Cbbfdhu3bt6NXr17497//jX/961+45ppr6gxl7NevH/7zn//gscceQ1paGnJycjBo0KBmr4korOg7iIIofL3//vty0UUXSX5+vkRHR4vFYpGOHTvKggULZO/evXWObegwx27dutV7nSOH74mI7N27Vy688EJJSEgQi8UiPXr0qDNs8aDi4mKZOnWqOBwOiYuLk8suu0x+/PHHesMcRUS2bt0qF1xwgaSkpIjZbJb09HSZNGmSvPHGG3WOW7p0qeTm5orRaKzzNezZs0cmTpwoMTExAqDOkMfKykq55ZZbpGPHjmKxWCQhIUGGDBkijz766FGHhR7uWNelIdepsrJSrr32WklLSxOz2SydOnWSP/zhD6JpWp3jNm7cKMOHDxe73S4AOOSRSEQUkSb0fiIiIqI2jX0QiIiIqB4GBCIiIqqHAYGIiIjqYUAgIiKiehgQiIiIqB4GBCIiIqonJCZK0jQNu3fvRkxMDKc8JSIiagQRQWVlJdLS0uot4nYiQiIg7N69G5mZmXqXQUREFLZ27tyJjIyMZjtfSASEg1PD7ty5E06nU+dqiEgXbjeQlhbY3r0bOLDuAxEdX0VFBTIzM+stbX6iQiIgHLyt4HQ6GRCIItXhS107nQwIRI3U3Lfo2UmRiIiI6mFAICIionoYEIiIiKgeBgQiIiKqhwGBiIiI6gmJUQxERDAagQkTDm0Tka4YEIgoNNhswHvv6V0FER3AWwxERERUDwMCERER1cOAQEShwe0OzJ4YFRXYJiJdsQ8CEYWO6mq9KyCiA9iCQERERPUwIBAREVE9DAhERERUDwMCERER1cOAQERERPVwFAMRhQaDARgx4tA2EemKP4XNbOTIkbjmmmuCj7Ozs7F48eLgY0VR8Pbbb7d6XUQhz24HVq0KfNjteldDFPHCOiB8/fXXMBqNmDhxYqu8nqqqeOihh5Cfnw+73Y74+HgMGjQIf/nLX4L
"text/plain": [
"<Figure size 600x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"n_clusters = kmeans.n_clusters\n",
"# Compute the silhouette scores for each sample\n",
"silhouette_vals = silhouette_samples(data_x, clusters)\n",
"\n",
"# Start plotting\n",
"fig, ax = plt.subplots(figsize=(6, 8))\n",
"y_lower, y_upper = 0, 0\n",
"yticks = []\n",
"\n",
"for i, cluster in enumerate(np.unique(clusters)):\n",
" cluster_silhouette_vals = silhouette_vals[clusters == cluster]\n",
" cluster_silhouette_vals.sort()\n",
" y_upper += len(cluster_silhouette_vals)\n",
" \n",
" color = cm.nipy_spectral(float(i) / n_clusters)\n",
" # Ensure the y-axis range matches the length of cluster_silhouette_vals\n",
" y_range = np.arange(y_lower, y_lower + len(cluster_silhouette_vals))\n",
" ax.fill_betweenx(y_range,\n",
" 0, cluster_silhouette_vals,\n",
" facecolor=color, edgecolor=color, alpha=0.7)\n",
" \n",
" # Label the silhouette plots with their cluster numbers at the middle\n",
" ax.text(-0.05, y_lower + 0.5 * len(cluster_silhouette_vals), str(cluster))\n",
" \n",
" # Compute the new y_lower for next plot\n",
" y_lower = y_upper + 10 # 10 for the 0 samples\n",
"\n",
"# The vertical line for average silhouette score of all the values\n",
"average_score = silhouette_score(data_x, clusters)\n",
"ax.axvline(x=average_score, color=\"red\", linestyle=\"--\")\n",
"\n",
"ax.set_yticks([]) # Clear the yaxis clusters / ticks\n",
"ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n",
"plt.title('Silhouette Plot')\n",
"plt.xlabel('Silhouette Coefficient Values')\n",
"plt.ylabel('Cluster Label')\n",
"\n",
"# Add the silhouette score to the plot\n",
"plt.text(0.02, 0.95, f'Avg Silhouette Score: {average_score:.2f}', transform=ax.transAxes)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-06-26 18:44:50 +02:00
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAACVUAAAJOCAYAAAC9NnWuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dbA4d85UzPpkEDoCUV6EwQUadJE7IoXK1ixfKJe27WL9dqxX2yo2BGwI0gREJHee0no6X162d8fkwwZkkACgVDWyzMPM6fuUzNnzdp7a0ophRBCCCGEEEIIIYQQQgghhBBCCCGEEEIIAPTaLoAQQgghhBBCCCGEEEIIIYQQQgghhBBCnEgkqUoIIYQQQgghhBBCCCGEEEIIIYQQQgghypCkKiGEEEIIIYQQQgghhBBCCCGEEEIIIYQoQ5KqhBBCCCGEEEIIIYQQQgghhBBCCCGEEKIMSaoSQgghhBBCCCGEEEIIIYQQQgghhBBCiDIkqUoIIYQQQgghhBBCCCGEEEIIIYQQQgghypCkKiGEEEIIIYQQQgghhBBCCCGEEEIIIYQoQ5KqhBBCCCGEEEIIIYQQQgghhBBCCCGEEKIMSaoSQgghhBBCCCGEEEIIIYQQQgghhBBCiDIkqUoIIYQQJ43Ro0eTnJxc28WoNa+88grNmzfHYDDQpUuX2i5OmD///BNN0/j+++9ruygnreTkZEaPHn3c1rd7926sVisLFy48bussVdvXcq9evXjooYdqbf1CCCGEEEIIcSp7+umn0TTtiOb99NNP0TSNtLS0mi1UGWlpaWiaxqeffnrM1lERTdP4v//7vxpb3rHYjoyMDK688krq1q2LpmmMHz++xpZdE0aPHk1UVFRtF+OkVRq/+/PPP4/bOl9++WXatGlDIBA4busspWkaTz/99HFfL8CGDRswGo2sW7euVtYvhBCi5khSlRBCiBpRGvCo6PWf//znmKzz77//5umnnyY/P/+YLP9olO6PZcuWhQ0vKCigR48eWK1Wfv/990POq2kaf/31V7nxSimaNGmCpmlceOGFx6T8x1thYSHjxo2jc+fOREVFERERQYcOHXj44YfZt2/fcSvHe++9d9wDalU1c+ZMHnroIXr37s3EiRN54YUXartINerge4jVaqVhw4YMHTqUt956i6KioiNe9rG6V5QGiUtfNpuNdu3a8fjjj1NYWFij6zqcIzl3n3nmGXr27Env3r1Dw0aPHh22TTExMXTu3JnXXnsNt9tdreXv27ePp59+mlWrVlVrvpqyYcMGnn766QoD8Q8//DDvvvsu6enpx79gQgghhBBCiFoncayKrV+/nuuuu45GjRphsVho2LAh1157LevXr6/totWK07EC2X333ceMGTN45JFHmDRpEueff35tF6lG9e/fP3St67pOTEwMrVu35vrrr+ePP/44qmUfq7hicnJy2D2qXr169OnTh2nTptX4ug7lSOI8hYWFvPTSSzz88MPo+oGfpMtuj67rNGzYkCFDhhxRstdvv/1Wa4lTAF999VWFyYft2rVj+PDhPPnkk8e/UEIIIWqUsbYLIIQQ4tTyzDPPkJKSEjasQ4cOx2Rdf//9N+PGjWP06NHExcUdk3XUpMLCQoYMGcKaNWuYNm3aYYMSVquVr776inPPPTds+Lx589izZw8Wi+VYFve42bFjB4MGDWLXrl2MGDGC2267DbPZzJo1a/j444+ZNm0aW7ZsOS5lee+990hISDiurQVV1Zw5c9B1nY8//hiz2VzbxTlmSu8hXq+X9PR0/vzzT+69915ef/11fvrpJzp16lTtZR7re8X7779PVFQUxcXFzJw5k+eff545c+awcOHCatXM3bx5c1iAqTqqe+5mZWXx2Wef8dlnn5UbZ7FY+OijjwDIz89nypQpPPDAAyxdupRvvvmmymXat28f48aNIzk5uVzLah9++OExr6G4YcMGxo0bR//+/cu1inXJJZcQExPDe++9xzPPPHNMyyGEEEIIIYQ4cUkc64CpU6dy9dVXU6dOHW6++WZSUlJIS0vj448/5vvvv+ebb77hsssuq9KyHn/88SNOTrv++usZOXLkKRP3OtnMmTOHSy65hAceeKC2i3LMNG7cmBdffBEAu93Otm3bmDp1Kl988QVXXXUVX3zxBSaTqdrLPZZxxS5dunD//fcDwXjLhAkTuPzyy3n//fe5/fbbq7ycvn374nQ6jyi2eKg4T2U++eQTfD4fV199dblxgwcP5oYbbkApRWpqKu+99x7nnXcev/76K8OGDatyuX777TfefffdChOrnE4nRuOx/Sn8q6++Yt26ddx7773lxt1+++1ccMEFbN++nRYtWhzTcgghhDh2JKlKCCFEjRo2bBjdu3ev7WIcFbvdTmRkZI0us6ioiKFDh7Jq1SqmTp1apQfDCy64gMmTJ/PWW2+FPfx99dVXdOvWjezs7BotY23w+XxcfvnlZGRk8Oeff5ZLIHv++ed56aWXaql0NcPn8xEIBI46ESozM5OIiIhTOqEKyt9DHnnkEebMmcOFF17IxRdfzMaNG4mIiKjFEpZ35ZVXkpCQAASDJVdccQVTp07ln3/+4eyzz67yco5nwPiLL77AaDRy0UUXlRtnNBq57rrrQp/vvPNOevbsybfffsvrr79Ow4YNj3r9RxKcrEm6rnPllVfy+eefM27cuCPulkIIIYQQQghxcpM4VtD27du5/vrrad68OfPnzycxMTE07p577qFPnz5cf/31rFmzhubNmx+2LEaj8YgTGQwGAwaD4YjmFUcvMzPzhEz6q0mxsbFhcQ+A//73v4wdO5b33nuP5OTkEy4e2ahRo7Ay33DDDbRs2ZI33nijWklVuq5jtVqPRRErNHHiRC6++OIK13nGGWeEbdNll11Gp06dGD9+fLWSqg7leG5rRQYNGkR8fDyfffaZVOoTQoiTmHT/J4QQ4riaPn06ffr0ITIykujoaIYPH16uCfE1a9YwevRomjdvjtVqJSkpiZtuuomcnJzQNE8//TQPPvggACkpKaHmgtPS0khLS0PTtAqbWz64H/XS7rs2bNjANddcQ3x8fFhizxdffEG3bt2IiIigTp06jBw5kt27d1drm4uLizn//PNZsWIFU6ZMYfjw4VWa7+qrryYnJyes6WmPx8P333/PNddcU+E8gUCA8ePH0759e6xWK/Xr12fMmDHk5eWFTffjjz8yfPhwGjZsiMVioUWLFjz77LP4/f6w6fr370+HDh3YsGEDAwYMwGaz0ahRI15++eVy63777bdp3749NpuN+Ph4unfvzldffXXIbZwyZQqrV6/mscceK5dQBRATE8Pzzz9f6fylzbAf3DR0RedAeno6N954I40bN8ZisdCgQQMuueSSUNdgycnJrF+/nnnz5oXOp/79+4fmz8/P595776VJkyZYLBZatmzJSy+9FNbaTul6X331VcaPH0+LFi2wWCxs2LCh0m3w+Xw8++yzoWmTk5N59NFHw7pa0zSNiRMnYrfbQ2U7XHPiixcv5vzzzyc2NhabzUa/fv1YuHBh2DQ7d+7kzjvvpHXr1kRERFC3bl1GjBhRYXdp+fn53HfffSQnJ2OxWGjcuDE33HBDueS+QCDA888/T+PGjbFarQwcOJBt27YdsqyHc9555/HEE0+wc+dOvvjii9Dwo71XQDC4c95551GvXj0sFgvt2rXj/fffP+ryAqSmpgLBoPL9998fOndat27Nq6++ilIqbL7k5OSw2oyl3VEsXLiQf//73yQmJhIZGclll11GVlZW2HyHOncr8sMPP9CzZ0+ioqIOuz26roeWl5aWRm5uLg888AAdO3YkKiqKmJgYhg0bxurVq0Pz/Pnnn5x11lkA3HjjjeXO29GjR5drPaqq96/k5GQuvPBC/vrrr1B3qs2bN+fzzz8P23cjRowAYMCAAaH1l71XDB48mJ07d9Za94RCCCGEEEKIE9/pEsd65ZVXcDgcfPDBB2EJVQAJCQlMmDABu90eFg86VFlKx5XldDoZO3YsCQkJREdHc/HFF7N3795y21j6LFw2NlGV50CgSs+rx8K
"text/plain": [
"<Figure size 2400x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Create a figure with two subplots side by side\n",
"fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(24, 6))\n",
"\n",
"# First subplot for clusters\n",
"plot_data_clusters = pd.DataFrame(data_x.copy())\n",
"plot_data_clusters['Cluster'] = clusters\n",
"new_columns_clusters = plot_data_clusters.columns.tolist()\n",
"new_columns_clusters[0:10] = feature_names\n",
"plot_data_clusters.columns = new_columns_clusters\n",
"parallel_coordinates(plot_data_clusters, 'Cluster', colormap='viridis', ax=axes[0])\n",
"axes[0].set_title('Feature K-Means Cluster of each Data Point (Patient)')\n",
"axes[0].set_xlabel('Feature')\n",
"axes[0].set_ylabel('Feature Value')\n",
"axes[0].tick_params(axis='x', rotation=90)\n",
"\n",
"# Second subplot for labels\n",
"plot_data_labels = pd.DataFrame(data_x.copy())\n",
"label_data = data_y.reset_index(drop=True)\n",
"plot_data_labels['Cluster'] = label_data\n",
"\n",
"new_columns_labels = plot_data_labels.columns.tolist()\n",
"new_columns_labels[0:10] = feature_names\n",
"plot_data_labels.columns = new_columns_labels\n",
"parallel_coordinates(plot_data_labels, 'Cluster', colormap='viridis', ax=axes[1])\n",
"axes[1].set_title('Feature Original Label of each Data Point (Patient)')\n",
"axes[1].set_xlabel('Feature')\n",
"axes[1].set_ylabel('Feature Value')\n",
"axes[1].tick_params(axis='x', rotation=90)\n",
"# set the legend\n",
"axes[1].legend(loc='upper right', labels=label_names)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
2024-06-26 18:44:50 +02:00
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hURReH37t9N72ShAAJCb23gIKgohQRxY4VFBUrNtTP3nvDBsonIp+9IBaQqvTeewskkN7b9nLn+2OThZAAoavM+zz7JHfu3Jlz2+69vznnjCKEEEgkEolEIpFIJBKJRCKRSCSnEc2ZNkAikUgkEolEIpFIJBKJRHL2IUUpiUQikUgkEolEIpFIJBLJaUeKUhKJRCKRSCQSiUQikUgkktOOFKUkEolEIpFIJBKJRCKRSCSnHSlKSSQSiUQikUgkEolEIpFITjtSlJJIJBKJRCKRSCQSiUQikZx2pCglkUgkEolEIpFIJBKJRCI57UhRSiKRSCQSiUQikUgkEolEctqRopREIpFIJBKJRCKRSCQSieS0I0UpiUQikUj+wTz//PMoinJc237xxRcoikJmZubJNeogMjMzURSFL7744pT1UR+KonDfffedtPZOxX4UFBRw9dVXExUVhaIojBs37qS1fTIYOXIkwcHBZ9qMfywLFixAURQWLFhw2vp88803ad26NaqqnrY+a1AUheeff/609wuwbds2dDodW7ZsOSP9SyQSieT4kaKURCKRSE4pNcJHfZ///Oc/p6TPZcuW8fzzz1NeXn5K2j8ZbN26lZtuuonGjRtjNBpJSEjgxhtvZOvWrWfatDNCzQv8Tz/9dKZNOW089NBDzJ49myeeeIIvv/ySQYMGnWmTTirnn39+4F7XaDSEhobSqlUrbr75ZubOnXtCbY8fP/6UCJ1JSUm1vqNiY2M577zzmDZt2knv60jk5uby/PPPs2HDhgZvU1lZyRtvvMHjjz+ORnPgEf/g/dFoNCQkJDBgwIDjEsv++OOPMyY8AXzzzTf1irdt27ZlyJAhPPvss6ffKIlEIpGcELozbYBEIpFIzg5efPFFkpOTa5W1b9/+lPS1bNkyXnjhBUaOHEl4ePgp6eNE+Pnnn7n++uuJjIxk1KhRJCcnk5mZyaRJk/jpp5/47rvvuOKKKxrU1tNPP33c4t7NN9/M8OHDMRqNx7W95MT466+/uPzyyxk7duyZNuWUkZiYyGuvvQaAzWYjPT2dn3/+ma+++oprr72Wr776Cr1ef8ztjh8/nujoaEaOHHmSLYbOnTvzyCOPAH5x6NNPP+XKK69kwoQJ3HXXXQ1up2/fvjgcDgwGwzHbkJubywsvvEBSUhKdO3du0Daff/45Xq+X66+/vs66iy++mFtuuQUhBBkZGYwfP54LL7yQGTNmMHjw4Abb9ccff/Dxxx/XK0w5HA50ulP7avHNN9+wZcsWHnzwwTrr7rrrLi655BL27NlDSkrKKbVDIpFIJCcPKUpJJBKJ5LQwePBgunfvfqbNOCFsNhtBQUEn1MaePXu4+eabad68OYsWLSImJiaw7oEHHuC8887j5ptvZtOmTTRv3vyotuh0uuN+EdRqtWi12uPaVnLiFBYW/i1F05NJWFgYN910U62y119/nTFjxjB+/HiSkpJ44403zpB19dO4ceNaNt9yyy2kpqby3nvvHZMopdFoMJlMp8LEepk8eTKXXXZZvX22bNmy1j5dccUVdOzYkXHjxh2TKHUkTue+1sdFF11EREQEU6ZM4cUXXzyjtkgkEomk4cjwPYlEIpH8LZg5cybnnXceQUFBhISEMGTIkDqhbJs2bWLkyJE0b94ck8lEXFwct912GyUlJYE6zz//PI8++igAycnJgbCVzMzMI+YFOjQfSk2upm3btnHDDTcQERFBnz59Auu/+uorunXrhtlsJjIykuHDh5OVlXXU/Xzrrbew2+1MnDixliAFEB0dzaefforNZuPNN99skC315ZRyOByMGTOG6OhoQkJCuOyyy8jJyamzj/XllEpKSuLSSy9lyZIlpKWlYTKZaN68Of/73/9q9VFaWsrYsWPp0KEDwcHBhIaGMnjwYDZu3HjUY3AivP3225x77rlERUVhNpvp1q3bEUP+vv76a1q1aoXJZKJbt24sWrSoTp2cnBxuu+02GjVqhNFopF27dnz++efHbePevXu55ppriIyMxGKx0KtXL2bMmBFYX3PchRB8/PHHgWv0SKiqyrhx42jXrh0mk4lGjRoxevRoysrKatX79ddfGTJkCAkJCRiNRlJSUnjppZfw+Xx12ly5ciWXXHIJERERBAUF0bFjR95///069XJychg2bBjBwcHExMQwduzYettrKFqtlg8++IC2bdvy0UcfUVFREVg3efJkLrzwQmJjYzEajbRt25YJEybU2j4pKYmtW7eycOHCwLE7//zzgVNzXcbFxdGmTRsyMjICZevXr2fw4MGEhoYSHBxM//79WbFiRa3t6sspdf7559O+fXu2bdvGBRdcgMVioXHjxrXu9wULFtCjRw8Abr311sA+HilcMSMjg02bNnHRRRc1aJ86dOhAdHR0YJ8WL17MNddcQ9OmTTEajTRp0oSHHnoIh8MR2GbkyJF8/PHHQO2QwBrqyynVkHur5jj98MMPvPLKKyQmJmIymejfvz/p6em1jt2MGTPYt29foO+kpKTAer1ez/nnn8+vv/7aoGMgkUgkkr8H0lNKIpFIJKeFiooKiouLa5VFR0cD8OWXXzJixAgGDhzIG2+8gd1uZ8KECfTp04f169cHXjzmzp3L3r17ufXWW4mLi2Pr1q1MnDiRrVu3smLFChRF4corr2TXrl18++23vPfee4E+YmJiKCoqOma7r7nmGlq0aMGrr76KEAKAV155hWeeeYZrr72W22+/naKiIj788EP69u3L+vXrj+j98vvvv5OUlMR5551X7/q+ffuSlJRUS8Q4ki31MXLkSH744QduvvlmevXqxcKFCxkyZEiD9zk9PZ2rr76aUaNGMWLECD7//HNGjhxJt27daNeuHeAXXn755ReuueYakpOTKSgo4NNPP6Vfv35s27aNhISEBvd3LLz//vtcdtll3Hjjjbjdbr777juuueYapk+fXmcfFy5cyPfff8+YMWMwGo2MHz+eQYMGsWrVqkDoaEFBAb169QokRo+JiWHmzJmMGjWKysrKesOEjkRBQQHnnnsudrudMWPGEBUVxZQpU7jsssv46aefuOKKK+jbty9ffvklN998cyCs6miMHj2aL774gltvvZUxY8aQkZHBRx99xPr161m6dGkgBO6LL74gODiYhx9+mODgYP766y+effZZKisreeuttwLtzZ07l0svvZT4+HgeeOAB4uLi2L59O9OnT+eBBx4I1PP5fAwcOJCePXvy9ttvM2/ePN555x1SUlK4++67j+nYHIxWq+X666/nmWeeYcmSJYFzN2HCBNq1a8dll12GTqfj999/55577kFVVe69914Axo0bx/33309wcDBPPfUUAI0aNQJOzXXp8XjIysoiKioK8OeDO++88wgNDeWxxx5Dr9fz6aefcv7557Nw4UJ69ux5xPbKysoYNGgQV155Jddeey0//fQTjz/+OB06dGDw4MG0adOGF198kWeffZY777wz8F1x7rnnHrbNZcuWAdC1a9cG7VNZWRllZWWkpqYC8OOPP2K327n77ruJiopi1apVfPjhh2RnZ/Pjjz8C/mswNzeXuXPn8uWXXx61j2O9t15//XU0Gg1jx46loqKCN998kxtvvJGVK1cC8NRTT1FRUUF2djbvvfceQJ1E/N26dePXX3+lsrKS0NDQBh0LiUQikZxhhEQikUgkp5DJkycLoN6PEEJUVVWJ8PBwcccdd9TaLj8/X4SFhdUqt9vtddr/9ttvBSAWLVoUKHvrrbcEIDIyMmrVzcjIEICYPHlynXYA8dxzzwWWn3vuOQGI66+/vla9zMxModVqxSuvvFKrfPPmzUKn09UpP5jy8nIBiMsvv/ywdYQQ4rLLLhOAqKysPKItB6+rYe3atQIQDz74YK16I0eOrLO
"text/plain": [
"<Figure size 1200x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Create a figure for the plot\n",
"plt.figure(figsize=(12, 6))\n",
"\n",
"# Prepare data for plotting\n",
"plot_data_labels = pd.DataFrame(data_x.copy())\n",
"label_data = data_y.reset_index(drop=True)\n",
"plot_data_labels['Cluster'] = label_data\n",
"\n",
"# Update column names with feature names\n",
"new_columns_labels = plot_data_labels.columns.tolist()\n",
"new_columns_labels[0:10] = feature_names\n",
"plot_data_labels.columns = new_columns_labels\n",
"\n",
"# Plotting with parallel coordinates\n",
"parallel_coordinates(plot_data_labels, 'Cluster', colormap='viridis')\n",
"plt.title('Feature Original Label of each Data Point (Patient)')\n",
"plt.xlabel('Feature')\n",
"plt.ylabel('Feature Value')\n",
"plt.xticks(rotation=90)\n",
"# Set the legend\n",
"plt.legend(loc='upper right', labels=label_names)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}