DSA_SoSe_24/Experiments.ipynb

849 lines
72 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
2024-06-11 17:22:38 +02:00
"execution_count": 1,
"id": "initial_id",
"metadata": {
"jupyter": {
"is_executing": true
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
2024-06-07 09:36:01 +02:00
"from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
"from sklearn.model_selection import KFold\n",
"from sklearn import decomposition"
]
},
{
"cell_type": "code",
2024-06-11 17:22:38 +02:00
"execution_count": 2,
"id": "67503952-9074-4cdb-9d7e-d9142f7c319c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>thalach</th>\n",
" <th>oldpeak</th>\n",
" <th>sex_0</th>\n",
" <th>sex_1</th>\n",
" <th>cp_1</th>\n",
" <th>cp_2</th>\n",
" <th>cp_3</th>\n",
" <th>...</th>\n",
" <th>slope_1</th>\n",
" <th>slope_2</th>\n",
" <th>slope_3</th>\n",
" <th>thal_3.0</th>\n",
" <th>thal_6.0</th>\n",
" <th>thal_7.0</th>\n",
" <th>ca_0.0</th>\n",
" <th>ca_1.0</th>\n",
" <th>ca_2.0</th>\n",
" <th>ca_3.0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.708333</td>\n",
" <td>0.481132</td>\n",
" <td>0.244292</td>\n",
" <td>0.603053</td>\n",
" <td>0.370968</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.791667</td>\n",
" <td>0.622642</td>\n",
" <td>0.365297</td>\n",
" <td>0.282443</td>\n",
" <td>0.241935</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.791667</td>\n",
" <td>0.245283</td>\n",
" <td>0.235160</td>\n",
" <td>0.442748</td>\n",
" <td>0.419355</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.166667</td>\n",
" <td>0.339623</td>\n",
" <td>0.283105</td>\n",
" <td>0.885496</td>\n",
" <td>0.564516</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.250000</td>\n",
" <td>0.339623</td>\n",
" <td>0.178082</td>\n",
" <td>0.770992</td>\n",
" <td>0.225806</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" age trestbps chol thalach oldpeak sex_0 sex_1 cp_1 \\\n",
"0 0.708333 0.481132 0.244292 0.603053 0.370968 False True True \n",
"1 0.791667 0.622642 0.365297 0.282443 0.241935 False True False \n",
"2 0.791667 0.245283 0.235160 0.442748 0.419355 False True False \n",
"3 0.166667 0.339623 0.283105 0.885496 0.564516 False True False \n",
"4 0.250000 0.339623 0.178082 0.770992 0.225806 True False False \n",
"\n",
" cp_2 cp_3 ... slope_1 slope_2 slope_3 thal_3.0 thal_6.0 thal_7.0 \\\n",
"0 False False ... False False True False True False \n",
"1 False False ... False True False True False False \n",
"2 False False ... False True False False False True \n",
"3 False True ... False False True True False False \n",
"4 True False ... True False False True False False \n",
"\n",
" ca_0.0 ca_1.0 ca_2.0 ca_3.0 \n",
"0 True False False False \n",
"1 False False False True \n",
"2 False False True False \n",
"3 True False False False \n",
"4 True False False False \n",
"\n",
"[5 rows x 28 columns]"
]
},
2024-06-11 17:22:38 +02:00
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('./data/dataset_cleaned.csv')\n",
"\n",
"# extract all columns except 'goal' --> X\n",
"X = df.loc[:, df.columns != 'goal']\n",
"# extract only the column 'goal' --> y\n",
"y = df.loc[:, 'goal']\n",
"\n",
"# add new axis to y, new shape: (n, 1)\n",
"y = y.to_numpy()\n",
"y = y.reshape((len(y),1))\n",
"\n",
"# binarize y\n",
"y[y>0] = 1\n",
"\n",
"factor_columns = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal', 'ca']\n",
"numeric_columns = [column for column in X.columns if column not in factor_columns]\n",
"\n",
"# transform factors into onehot vectors\n",
"X = pd.get_dummies(X, columns=factor_columns)\n",
"\n",
"# min max scaling of numeric columns\n",
"scaler = MinMaxScaler()\n",
"X[numeric_columns] = scaler.fit_transform(X[numeric_columns])\n",
"\n",
"X.head()"
]
},
{
"cell_type": "code",
2024-06-07 09:51:06 +02:00
"execution_count": 3,
"id": "2bbee865-c000-43da-84d9-ce7e04874110",
"metadata": {},
"outputs": [],
"source": [
"def get_model(n_features):\n",
" model = tf.keras.models.Sequential([\n",
" tf.keras.layers.InputLayer(shape=(n_features,)),\n",
" tf.keras.layers.Dense(30, activation='relu'),\n",
" tf.keras.layers.Dense(30, activation='relu'),\n",
" tf.keras.layers.Dense(1, activation='sigmoid')\n",
" ], name='test')\n",
" model.compile(optimizer=tf.keras.optimizers.Adam(), \n",
" loss=tf.keras.losses.BinaryCrossentropy())\n",
" return model"
]
},
{
"cell_type": "code",
2024-06-07 09:51:06 +02:00
"execution_count": 4,
"id": "38eb4f87-ca3c-4ecf-a8ca-29422822d933",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"Training 10 folds for 20 epochs\n",
"Fold 0\n",
"\tTrain samples:\t267\tTest samples:\t30\n",
"\tAccuracy: 90.000%\n",
"Fold 1\n",
"\tTrain samples:\t267\tTest samples:\t30\n",
2024-06-11 17:22:38 +02:00
"\tAccuracy: 86.667%\n",
2024-06-07 09:51:06 +02:00
"Fold 2\n",
"\tTrain samples:\t267\tTest samples:\t30\n",
"\tAccuracy: 90.000%\n",
"Fold 3\n",
"\tTrain samples:\t267\tTest samples:\t30\n",
2024-06-11 17:22:38 +02:00
"\tAccuracy: 93.333%\n",
2024-06-07 09:51:06 +02:00
"Fold 4\n",
"\tTrain samples:\t267\tTest samples:\t30\n",
2024-06-11 17:22:38 +02:00
"WARNING:tensorflow:5 out of the last 5 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x0000024C840482C0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n",
"\tAccuracy: 83.333%\n",
2024-06-07 09:51:06 +02:00
"Fold 5\n",
"\tTrain samples:\t267\tTest samples:\t30\n",
2024-06-11 17:22:38 +02:00
"WARNING:tensorflow:6 out of the last 6 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x0000024C867CF920> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n",
"\tAccuracy: 90.000%\n",
2024-06-07 09:51:06 +02:00
"Fold 6\n",
"\tTrain samples:\t267\tTest samples:\t30\n",
2024-06-11 17:22:38 +02:00
"\tAccuracy: 76.667%\n",
2024-06-07 09:51:06 +02:00
"Fold 7\n",
"\tTrain samples:\t268\tTest samples:\t29\n",
2024-06-11 17:22:38 +02:00
"\tAccuracy: 89.655%\n",
2024-06-07 09:51:06 +02:00
"Fold 8\n",
"\tTrain samples:\t268\tTest samples:\t29\n",
"\tAccuracy: 79.310%\n",
"Fold 9\n",
"\tTrain samples:\t268\tTest samples:\t29\n",
2024-06-11 17:22:38 +02:00
"\tAccuracy: 79.310%\n",
"Avg accuracy 85.828%\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"\n",
2024-06-07 09:36:01 +02:00
"use_pca = True\n",
"# number of components extracted from the pca\n",
"n_features = 8\n",
2024-06-07 09:36:01 +02:00
"n_features = n_features if use_pca else len(X.columns)\n",
"\n",
"epochs = 20\n",
"k_folds = 10\n",
"\n",
"# used to split the dataset into k folds\n",
"kf = KFold(n_splits=k_folds)\n",
"\n",
"accuracies = []\n",
2024-06-07 09:51:06 +02:00
"print(f'Training {k_folds} folds for {epochs} epochs')\n",
"for i, (train_idx, test_idx) in enumerate(kf.split(X)):\n",
"\n",
2024-06-07 09:51:06 +02:00
" print(f'Fold {i}')\n",
" \n",
" # extract train and test data from the cleaned dataset\n",
" X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
" y_train, y_test = y[train_idx], y[test_idx]\n",
"\n",
2024-06-07 09:51:06 +02:00
" print(f'\\tTrain samples:\\t{len(X_train)}\\tTest samples:\\t{len(X_test)}')\n",
"\n",
2024-06-07 09:36:01 +02:00
" if use_pca:\n",
" # do pca based on the train data of the given fold to extract 'n_features'\n",
" pca = decomposition.PCA(n_components=n_features)\n",
" pca.fit(X_train)\n",
" X_train = pca.transform(X_train)\n",
"\n",
" # train the model using the components extracted from pca\n",
" model = get_model(n_features)\n",
" model.fit(X_train, y_train, epochs=epochs, verbose=0)\n",
"\n",
2024-06-07 09:36:01 +02:00
" if use_pca:\n",
" # transform test data using on the pca model trained on the train data\n",
" X_test = pca.transform(X_test)\n",
" \n",
" y_pred = model.predict(X_test, verbose=0)\n",
2024-06-07 09:36:01 +02:00
" y_pred = y_pred > 0.5 # threshold to binarize\n",
"\n",
" # calculate the accuracy of the train data for the current fold\n",
" accuracy = sum(y_pred == y_test)[0] / len(y_pred)\n",
" accuracies.append(accuracy)\n",
2024-06-07 09:51:06 +02:00
" print(f'\\tAccuracy: {accuracy:.3%}')\n",
"\n",
"# calculate the average accuracy over all folds\n",
"avg_accuracy = sum(accuracies) / len(accuracies)\n",
2024-06-07 09:51:06 +02:00
"print(f'Avg accuracy {avg_accuracy:.3%}')"
]
2024-06-05 13:12:53 +02:00
},
{
"cell_type": "code",
2024-06-07 09:51:06 +02:00
"execution_count": 5,
2024-06-05 13:12:53 +02:00
"id": "95215693-47c9-4202-92f5-efbc65bc32c9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"Training 5 folds\n",
"Fold 0\n",
"\tTrain samples:\t237\tTest samples:\t60\n"
2024-06-05 13:12:53 +02:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\maxwi\\anaconda3\\Lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"\tAccuracy 58.333%\n",
"\n",
"Fold 1\n",
"\tTrain samples:\t237\tTest samples:\t60\n"
2024-06-05 13:12:53 +02:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\maxwi\\anaconda3\\Lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"\tAccuracy 50.000%\n",
"\n",
"Fold 2\n",
"\tTrain samples:\t238\tTest samples:\t59\n"
2024-06-05 13:12:53 +02:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\maxwi\\anaconda3\\Lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"\tAccuracy 55.932%\n",
"\n",
"Fold 3\n",
"\tTrain samples:\t238\tTest samples:\t59\n"
2024-06-05 13:12:53 +02:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\maxwi\\anaconda3\\Lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"\tAccuracy 57.627%\n",
"\n",
"Fold 4\n",
"\tTrain samples:\t238\tTest samples:\t59\n"
2024-06-05 13:12:53 +02:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\maxwi\\anaconda3\\Lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"\tAccuracy 52.542%\n",
"\n",
"Avg accuracy 54.887%\n"
2024-06-05 13:12:53 +02:00
]
}
],
"source": [
"from sklearn.cluster import KMeans\n",
"\n",
2024-06-07 09:36:01 +02:00
"use_pca = True\n",
2024-06-05 13:12:53 +02:00
"# number of components extracted from the pca\n",
"n_features = 10\n",
"\n",
"k_folds = 5\n",
"\n",
"# used to split the dataset into k folds\n",
"kf = KFold(n_splits=k_folds)\n",
"\n",
"accuracies = []\n",
2024-06-07 09:51:06 +02:00
"print(f'Training {k_folds} folds')\n",
2024-06-05 13:12:53 +02:00
"for i, (train_idx, test_idx) in enumerate(kf.split(X[numeric_columns])):\n",
"\n",
2024-06-07 09:51:06 +02:00
" print(f'Fold {i}')\n",
" \n",
2024-06-05 13:12:53 +02:00
" # extract train and test data from the cleaned dataset\n",
" X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
" y_train, y_test = y[train_idx], y[test_idx]\n",
"\n",
2024-06-07 09:51:06 +02:00
" print(f'\\tTrain samples:\\t{len(X_train)}\\tTest samples:\\t{len(X_test)}')\n",
2024-06-05 13:12:53 +02:00
"\n",
2024-06-07 09:36:01 +02:00
" if use_pca:\n",
" # do pca based on the train data of the given fold to extract 'n_features'\n",
" pca = decomposition.PCA(n_components=n_features)\n",
" pca.fit(X_train)\n",
" X_train = pca.transform(X_train)\n",
2024-06-05 13:12:53 +02:00
"\n",
2024-06-07 09:36:01 +02:00
" model = KMeans(n_clusters=2, n_init=10)\n",
2024-06-05 13:12:53 +02:00
" model.fit(X_train)\n",
"\n",
2024-06-07 09:36:01 +02:00
" if use_pca:\n",
" X_test = pca.transform(X_test)\n",
" \n",
2024-06-05 13:12:53 +02:00
" y_pred = model.predict(X_test)\n",
"\n",
" # calculate the accuracy of the train data for the current fold\n",
" accuracy1 = sum(y_pred == y_test)[0] / len(y_pred)\n",
" accuracy2 = sum(y_pred != y_test)[0] / len(y_pred)\n",
" accuracy = max(accuracy1, accuracy2)\n",
" accuracies.append(accuracy)\n",
2024-06-07 09:51:06 +02:00
" print(f'\\tAccuracy {accuracy:.3%}')\n",
" print()\n",
2024-06-05 13:12:53 +02:00
"\n",
"# calculate the average accuracy over all folds\n",
"avg_accuracy = sum(accuracies) / len(accuracies)\n",
2024-06-07 09:51:06 +02:00
"print(f'Avg accuracy {avg_accuracy:.3%}')"
2024-06-05 13:12:53 +02:00
]
},
{
"cell_type": "code",
2024-06-07 09:51:06 +02:00
"execution_count": 6,
2024-06-05 13:12:53 +02:00
"id": "880302e4-82c1-47b9-9fe3-cb3567511639",
"metadata": {},
2024-06-07 09:36:01 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-06-07 09:51:06 +02:00
"Training 5 folds\n",
"Fold 0\n",
"\tTrain samples:\t237\tTest samples:\t60\n",
"\tAccuracy 85.000%\n",
"\n",
"Fold 1\n",
"\tTrain samples:\t237\tTest samples:\t60\n",
"\tAccuracy 90.000%\n",
"\n",
"Fold 2\n",
"\tTrain samples:\t238\tTest samples:\t59\n",
"\tAccuracy 84.746%\n",
"\n",
"Fold 3\n",
"\tTrain samples:\t238\tTest samples:\t59\n",
"\tAccuracy 76.271%\n",
"\n",
"Fold 4\n",
"\tTrain samples:\t238\tTest samples:\t59\n",
"\tAccuracy 77.966%\n",
"\n",
"Avg accuracy 82.797%\n"
2024-06-07 09:36:01 +02:00
]
}
],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"use_pca = True\n",
"# number of components extracted from the pca\n",
"n_features = 10\n",
"\n",
"k_folds = 5\n",
"\n",
"# used to split the dataset into k folds\n",
"kf = KFold(n_splits=k_folds)\n",
"\n",
"accuracies = []\n",
2024-06-07 09:51:06 +02:00
"print(f'Training {k_folds} folds')\n",
2024-06-07 09:36:01 +02:00
"for i, (train_idx, test_idx) in enumerate(kf.split(X[numeric_columns])):\n",
2024-06-07 09:51:06 +02:00
" print(f'Fold {i}')\n",
2024-06-07 09:36:01 +02:00
"\n",
" # extract train and test data from the cleaned dataset\n",
" X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
" y_train, y_test = y[train_idx], y[test_idx]\n",
" y_train, y_test = y_train[:, 0], y_test[:, 0]\n",
"\n",
2024-06-07 09:51:06 +02:00
" print(f'\\tTrain samples:\\t{len(X_train)}\\tTest samples:\\t{len(X_test)}')\n",
2024-06-07 09:36:01 +02:00
"\n",
" if use_pca:\n",
" # do pca based on the train data of the given fold to extract 'n_features'\n",
" pca = decomposition.PCA(n_components=n_features)\n",
" pca.fit(X_train)\n",
" X_train = pca.transform(X_train)\n",
"\n",
" model = RandomForestClassifier(max_depth=2, random_state=0)\n",
" model.fit(X_train, y_train)\n",
"\n",
" if use_pca:\n",
" X_test = pca.transform(X_test)\n",
" \n",
" y_pred = model.predict(X_test)\n",
"\n",
" # calculate the accuracy of the train data for the current fold\n",
" accuracy = sum(y_pred == y_test) / len(y_pred)\n",
" accuracies.append(accuracy)\n",
2024-06-07 09:51:06 +02:00
" print(f'\\tAccuracy {accuracy:.3%}')\n",
" print()\n",
2024-06-07 09:36:01 +02:00
"\n",
"# calculate the average accuracy over all folds\n",
"avg_accuracy = sum(accuracies) / len(accuracies)\n",
2024-06-07 09:51:06 +02:00
"print(f'Avg accuracy {avg_accuracy:.3%}')"
2024-06-07 09:36:01 +02:00
]
2024-06-07 15:16:29 +02:00
},
{
"cell_type": "markdown",
"id": "15b73e96-8b24-4087-b491-f9248577a886",
"metadata": {},
"source": [
"### Clustering and PCA\n",
"Um zu analysieren, ob ähnliche Merkmale auch zur gleichen Diagnose führen, wird zuerst ein k-Means Clustering angewandt."
]
},
{
"cell_type": "code",
2024-06-11 17:22:38 +02:00
"execution_count": 7,
2024-06-07 15:16:29 +02:00
"id": "79631688-07cb-450d-9958-8d8341722d7d",
"metadata": {},
"outputs": [
2024-06-11 17:22:38 +02:00
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\maxwi\\anaconda3\\Lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.\n",
" warnings.warn(\n"
]
},
2024-06-07 15:16:29 +02:00
{
"data": {
"text/html": [
2024-06-11 17:22:38 +02:00
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\
2024-06-07 15:16:29 +02:00
],
"text/plain": [
2024-06-11 17:22:38 +02:00
"KMeans(n_clusters=2, n_init='auto', random_state=42)"
2024-06-07 15:16:29 +02:00
]
},
2024-06-11 17:22:38 +02:00
"execution_count": 7,
2024-06-07 15:16:29 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prepare data for clustering\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.metrics import confusion_matrix\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"# prepare model KMeans\n",
"kmeans = KMeans(n_clusters=2, random_state=42, n_init='auto')\n",
"kmeans.fit(X)"
]
},
{
"cell_type": "code",
2024-06-11 17:22:38 +02:00
"execution_count": 8,
2024-06-07 15:16:29 +02:00
"id": "98eb04bb-e1f2-43e2-a18f-8c4c6c5dc788",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"50.2% der Datensätze wurden mithilfe von KMeans richtig einem Cluster zugeordnet\n"
]
}
],
"source": [
"# calculate percentage of data points correctly assigned to each cluster\n",
"cluster1 = kmeans.labels_ == 0\n",
"cluster2 = kmeans.labels_ == 1\n",
"\n",
"perc_cluster1 = np.round(np.mean(cluster1 == y) * 100, decimals=2)\n",
"perc_cluster2 = np.round(np.mean(cluster2 == y) * 100, decimals=2)\n",
"\n",
"# choose cluster with higher correspondence\n",
"if perc_cluster1 > perc_cluster2:\n",
" km_healthy = cluster1\n",
" max_perc = perc_cluster1\n",
"else:\n",
" km_healthy = cluster2\n",
" max_perc = perc_cluster2\n",
"\n",
"print(f\"{max_perc}% der Datensätze wurden mithilfe von KMeans richtig einem Cluster zugeordnet\")\n",
"\n",
"# hier vlt noch irgendwie diskutieren ob das ein smart way ist um das auszuwerten, anscheinend gibt's dafür andere Metriken"
]
},
{
"cell_type": "code",
2024-06-11 17:22:38 +02:00
"execution_count": 9,
2024-06-07 15:16:29 +02:00
"id": "e622bdca-9518-4483-8f76-9b0613b2d055",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Proportion of variance explained by each principal component:\n",
"[2.34198813e-01 1.25628556e-01 1.09931362e-01 8.74811618e-02\n",
" 7.82747684e-02 6.31208837e-02 6.24229494e-02 5.34948492e-02\n",
" 4.17139647e-02 3.17012077e-02 2.52492654e-02 2.21354486e-02\n",
" 1.84895571e-02 1.74748048e-02 8.28895271e-03 5.47222590e-03\n",
" 4.87868838e-03 3.91078109e-03 3.44014667e-03 2.69161359e-03\n",
2024-06-11 17:22:38 +02:00
" 5.88469272e-33 3.26180402e-33 1.59388562e-33 1.39694325e-33\n",
" 1.30446173e-33 1.30446173e-33 1.30446173e-33 1.11776656e-34]\n"
2024-06-07 15:16:29 +02:00
]
},
{
"data": {
2024-06-11 17:22:38 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAIhCAYAAACsQmneAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwGklEQVR4nO3deZyN9fvH8feZfQwGM2PGGMbYyb5UCNmTopU2S6ivtFhaUDRoQYu0IRUioSKEypIlkcJYo2RJMRMGM/YxM5/fH35zcmY9a6PT6/l4nMdj5jqfc53rc9/nvs8199znPhZjjBEAAADgpXwKuwAAAADAk2h4AQAA4NVoeAEAAODVaHgBAADg1Wh4AQAA4NVoeAEAAODVaHgBAADg1Wh4AQAA4NVoeAEAAODVaHhht+nTp8tisVhvfn5+iomJ0YMPPqjDhw8XdnlO+/nnnzVy5EgdPHgwx329evVShQoV/vGaHHHixAndc889Kl26tCwWi2677bYcY44dO6aAgADdc889eeZJTU1VkSJF1LlzZ7fUNXLkSFksFrfkKkzZX/fZb6tXr/b4c+f22vTkY93BYrFo5MiR+Y5ZvXq1LBaLPv/8c4/W4o59l6f3BVnLwpOvJ8nxeXz55Ze69dZbFRkZqYCAAJUqVUpt2rTRrFmzdOnSJc8V+h/xySefaMKECYVdxn+CX2EXgH+fadOmqXr16jp//rzWrl2rMWPGaM2aNdqxY4dCQkIKuzyH/fzzzxo1apRuvPHGHG8EI0aM0IABAwqnMDu98MIL+uKLLzR16lRVqlRJpUqVyjEmIiJCnTt31oIFC3Ty5EmVLFkyx5g5c+bo/Pnz6tOnj1vq6tu3r2666Sa35LoaZL3us6tZs2YhVFOwTp06acOGDSpTpkxhl3LVcGXf5el9QYMGDbRhw4ar5vVkjFHv3r01ffp03XzzzRo/frzKlSunlJQUrVq1Sv3799fx48ev+v3j1e6TTz7Rzp07NXDgwMIuxevR8MJhtWrVUqNGjSRJrVq1UkZGhl544QUtWLBA999/f66POXfunIoUKfJPllmgS5cuFXgEslKlSv9QNc7buXOnKlWqlOeyz9KnTx/NmzdPs2bN0mOPPZbj/qlTpyoyMlKdOnVyqZ6sdR0TE6OYmBiXcl1Nrnzd/xtEREQoIiKisMu4qriy7/L0vqB48eK6/vrrPfocjnj11Vc1ffp0jRo1Ss8//7zNfbfeequeeeYZ/fbbb4VUHeA4TmmAy7J20r///ruky/8yK1q0qHbs2KH27durWLFiatOmjaTL/37v37+/ypYtq4CAAFWsWFHPPfecLl68aJPTYrHoscce03vvvaeqVasqMDBQNWvW1Jw5c3I8/86dO9WlSxeVLFlSQUFBqlevnj766CObMVn/Lpw5c6aefPJJlS1bVoGBgfrggw909913S7r8Bpj1L8/p06db55L9qO+FCxc0bNgwxcXFKSAgQGXLltWjjz6qU6dO2YyrUKGCbrnlFn399ddq0KCBgoODVb16dU2dOtWu5VrQsjp48KAsFotWrFih3bt3F/gv9g4dOigmJkbTpk3Lcd/u3bu1ceNG9ejRQ35+flq+fLm6dOmimJgYBQUFqXLlyvrf//6n48eP2zwu67SFLVu26K677lLJkiWtjUFupzTMnTtX7du3V5kyZRQcHKwaNWpo6NChOnv2rM24rNfQb7/9pptvvllFixZVuXLl9OSTT+Z4rVy8eFGjR49WjRo1FBQUpLCwMLVq1Urr16+3jjHGaOLEiapXr56Cg4NVsmRJ3XXXXdq/f79d68Iec+bMkcVi0TvvvGMTj4+Pl6+vr5YvXy7p7/X2yiuv6KWXXlL58uUVFBSkRo0aaeXKlQU+j73rJrdTGm688UbVqlVLP/30k5o3b64iRYqoYsWKGjt2rDIzM20en5qaqqeeesrmdT5w4MAc6yo1NVUPPfSQwsLCVLRoUd1000369ddfHVl0unDhggYPHqyoqCgFBwerZcuWSkhIsN4/c+ZMWSwWbdiwIcdjR48eLX9/fx05csSh55Qc23flti/I2k/NnDlTNWrUUJEiRVS3bl0tXrw4x3Pt2bNH9957ryIjIxUYGKjy5curR48e1tdzbqc0ZNWza9cutWnTRiEhIYqIiNBjjz2mc+fO2eR/99131aJFC5UuXVohISGqXbu2XnnlFadOO7h06ZLGjRun6tWra8SIEbmOiYqK0g033GD93dF9+7Rp01StWjUFBwerUaNG+uGHH2SM0auvvqq4uDgVLVpUrVu3ztFUZ72Gv/vuO11//fUKDg5W2bJlNWLECGVkZNiMdbQme9bj3r17dd9996l06dIKDAxUjRo19O6779qMyVqXs2fP1nPPPafo6GgVL15cbdu21S+//GIzlyVLluj333+3OeUmy6RJk1S3bl0VLVpUxYoVU/Xq1fXss8/muj5gBwPYadq0aUaS+emnn2zib775ppFkpkyZYowxpmfPnsbf399UqFDBjBkzxqxcudJ888035vz586ZOnTomJCTEvPbaa2bZsmVmxIgRxs/Pz9x88802OSWZcuXKmZo1a5rZs2ebRYsWmZtuuslIMp999pl13J49e0yxYsVMpUqVzIwZM8ySJUvMvffeaySZcePGWcetWrXKSDJly5Y1d911l1m0aJFZvHixSUpKMi+//LKRZN59912zYcMGs2HDBnP06FHrXGJjY615MjMzTYcOHYyfn58ZMWKEWbZsmXnttddMSEiIqV+/vrlw4YJ1bGxsrImJiTE1a9Y0M2bMMN988425++67jSSzZs2afJe1PcvqwoULZsOGDaZ+/fqmYsWK1tpTUlLyzDt8+HAjyWzdutUm/vTTTxtJZvfu3cYYYyZNmmTGjBljFi1aZNasWWM++ugjU7duXVOtWjWTlpZmfVx8fLyRZGJjY82QIUPM8uXLzYIFC2zuu9ILL7xg3njjDbNkyRKzevVqM3nyZBMXF2datWplM65nz54mICDA1KhRw7z22mtmxYoV5vnnnzcWi8WMGjXKOu7SpUumVatWxs/Pzzz11FNm6dKlZtGiRebZZ581s2fPto576KGHjL+/v3nyySfN119/bT755BNTvXp1ExkZaZKSkvJdF1mv+x9++MFcunTJ5paenm4ztl+/fiYgIMC6jaxcudL4+PiY4cOHW8ccOHDA+vq+4YYbzLx588xnn31mGjdubPz9/c369etzPPeBAwesMXvXTW6PbdmypQkLCzNVqlQxkydPNsuXLzf9+/c3ksxHH31kHXf27FlTr149Ex4ebsaPH29WrFhh3nzzTRMaGmpat25tMjMzjTGXt4dWrVqZwMBA89JLL5lly5aZ+Ph4U7FiRSPJxMfH57tss7bLcuXKmS5dupgvv/zSfPzxx6Zy5cqmePHiZt++fcYYYy5evGiioqLM/fffb/P4S5cumejoaHP33Xfn+zyu7ruy7rtyX2DM5f1UhQoVzLXXXms+/fRTs3TpUnPjjTcaPz8/a+3GGLN161ZTtGhRU6FCBTN58mSzcuVK8/HHH5uuXbua1NRUm2WxatUq6+OytoPy5ctbl+/IkSONn5+fueWWW2xqGTRokJk0aZL5+uuvzbfffmveeOMNEx4ebh588EGbcbnNI7v169cbSWbIkCH5jsvi6L49NjbWNG3a1MyfP9988cUXpmrVqqZUqVJm0KBBpkuXLmbx4sVm1qxZJjIy0tSpU8f6ejPm79dwdHS0eeutt8w333xjnnjiCSPJPProo07XZM963LVrlwkNDTW1a9c2M2bMMMuWLTNPPvmk8fHxMSNHjrSOy1qXFSpUMPfff79ZsmSJmT17tilfvrypUqWKdb+xa9cu06xZMxMVFWXdf2/YsMEYY8zs2bONJPP444+bZcuWmRUrVpjJkyebJ554wq51gpxoeGG37G/8p0+fNosXLzYRERGmWLFi1sahZ8+eRpKZOnWqzeMnT55sJJlPP/3UJj5u3DgjySxbtswak2SCg4NtmpH09HRTvXp1U7lyZWvsnnvuMYGBgebQoUM2OTt27GiKFCliTp06ZYz5ewfUokWLHPP
2024-06-07 15:16:29 +02:00
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# apply PCA\n",
"pca = PCA()\n",
"pca.fit(X)\n",
"\n",
"print(f\"Proportion of variance explained by each principal component:\\n{pca.explained_variance_ratio_}\")\n",
"\n",
"# Plot the proportion of variance explained\n",
"plt.figure(figsize=(8, 6))\n",
"plt.bar(range(1, len(pca.explained_variance_ratio_) + 1), pca.explained_variance_ratio_, color='skyblue')\n",
"plt.xlabel('Principal Component')\n",
"plt.ylabel('Proportion of Variance Explained')\n",
"plt.title('Proportion of Variance Explained by Principal Components')\n",
"plt.xticks(range(1, len(pca.explained_variance_ratio_) + 1))\n",
"plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
"plt.show()"
]
},
2024-06-07 20:52:07 +02:00
{
"cell_type": "markdown",
"id": "088df814-6d04-450a-9091-a1e4acc6805e",
"metadata": {},
"source": [
"#### Interpretation\n",
"Eine Hauptkomponente mit einem größeren Anteil der Varianz erklärt mehr Variation in den Daten und ist daher wichtiger für die Reduktion der Dimensionalität. Die Summe aller Anteile der Varianz erklärt die Gesamtvarianz der Daten. \n",
"In diesem spezifischen Fall erklärt die erste Hauptkomponente (PC1) etwa 23.4% der Gesamtvarianz, die zweite Hauptkomponente (PC2) etwa 12.6% usw. Basierend auf diesen Daten kann beurteilt werden, wie viel Varianz jede Hauptkomponente in den Daten erklärt und wie wichtig jede Hauptkomponente für die Repräsentation der Daten ist."
]
},
2024-06-07 15:16:29 +02:00
{
"cell_type": "code",
2024-06-11 17:22:38 +02:00
"execution_count": 10,
2024-06-07 15:16:29 +02:00
"id": "6e850f89-f6ba-4cce-8203-1e307e172505",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Contributions of features to the first principal component:\n",
" Feature Contribution\n",
"17 exang_1 0.332492\n",
"23 thal_7.0 0.331968\n",
"10 cp_4 0.328138\n",
"19 slope_2 0.272838\n",
"6 sex_1 0.196119\n",
"15 restecg_2 0.131840\n",
"25 ca_1.0 0.107372\n",
"4 oldpeak 0.094386\n",
"26 ca_2.0 0.078705\n",
"0 age 0.046441\n",
"27 ca_3.0 0.046405\n",
"22 thal_6.0 0.041738\n",
"1 trestbps 0.020995\n",
"20 slope_3 0.020165\n",
"12 fbs_1 0.013534\n",
"2 chol 0.005950\n",
"14 restecg_1 0.004777\n",
"7 cp_1 -0.009925\n",
"11 fbs_0 -0.013534\n",
"3 thalach -0.092913\n",
"13 restecg_0 -0.136618\n",
"8 cp_2 -0.139917\n",
"9 cp_3 -0.178297\n",
"5 sex_0 -0.196119\n",
"24 ca_0.0 -0.232482\n",
"18 slope_1 -0.293003\n",
"16 exang_0 -0.332492\n",
"21 thal_3.0 -0.373706\n"
]
}
],
"source": [
"# get the loadings or weights of features in the first principal component\n",
"first_pc_loadings = pca.components_[0]\n",
"\n",
"# create a DataFrame to display the contributions of features to the first principal component\n",
"pc_loadings_df = pd.DataFrame({\"Feature\": X.columns, \"Contribution\": first_pc_loadings})\n",
"pc_loadings_df = pc_loadings_df.sort_values(by=\"Contribution\", ascending=False)\n",
"\n",
"print(\"Contributions of features to the first principal component:\")\n",
"print(pc_loadings_df)"
]
2024-06-07 20:52:07 +02:00
},
{
"cell_type": "markdown",
"id": "7798ac52-c736-4598-951a-0901918b3a21",
"metadata": {},
"source": [
"#### Interpretation\n",
"Die Werte der Spalte \"Contribution\" zeigen die Stärke des Beitrags jedes Merkmals zur ersten Hauptkomponente. Merkmale mit größeren Beträgen haben eine größere Bedeutung für die erste Hauptkomponente und tragen mehr zur Variation der Daten bei. \n",
"In diesem spezifischen Fall tragen beispielsweise die Merkmale \"exang_1\", \"thal_7.0\" und \"cp_4\" am stärksten zur ersten Hauptkomponente bei, während \"thal_3.0\", \"slope_1\" und \"exang_0\" die stärksten negativen Beiträge haben.\n",
"Darauf basierend kann analysiert werden, welche Merkmale die größte Bedeutung für die erste (wichtigste) Hauptkomponente haben und somit die größte Variation in den Daten erklären. Das kann helfen, die wichtigsten Merkmale zu identifizieren, die die gegebenen Datenstrukturen beeinflussen.\n",
"\n",
"Hier würde das bedeuten, dass 'exang_1' (existing exercised induced angina), 'thal_7' (reversable effect caused by thalassemia) und cp_4 (asymptomatic type of chest pain) einen potenziell größeren Einfluss auf die Zielvariable haben, als andere Merkmale."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-06-11 17:22:38 +02:00
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}