added Gradient Boosting Tree Classifier
parent
9924e1675d
commit
642431e484
|
@ -152,11 +152,16 @@ The exact procedure for creating the matrix can be found in the notebook [demogr
|
||||||
|
|
||||||
The following two hypotheses were applied in this project:
|
The following two hypotheses were applied in this project:
|
||||||
|
|
||||||
1. Using ECG data, a classifier can classify the four disease groupings with an accuracy of 80%.
|
**Hypotheses 1**:
|
||||||
|
1. Using ECG data, a classifier can classify the four diagnostic groupings with an accuracy of at least 80%.
|
||||||
|
|
||||||
Result:
|
Result:
|
||||||
- For the first hypothesis, an accuracy of 83 % was achieved with the XGBoost classifier. The detailed procedure can be found in the following notebook: [ml_xgboost.ipynb](notebooks/ml_xgboost.ipynb)
|
- For the first hypothesis, an accuracy of 83 % was achieved with the XGBoost classifier. The detailed procedure can be found in the following notebook: [ml_xgboost.ipynb](notebooks/ml_xgboost.ipynb)
|
||||||
|
- Also a 82 % accuracy was achieved with a Gradient Boosting Tree Classifier. The detailed procedure can be found in the following notebook: [ml_grad_boost_tree.ipynb](notebooks/ml_grad_boost_tree.ipynb)
|
||||||
|
|
||||||
|
With those Classifiers, the hypothesis can be proven, that a classifier is able to classify the diagnostic Groups with a accuracy of at least 80%.
|
||||||
|
|
||||||
|
**Hypotheses 2**:
|
||||||
|
|
||||||
2. Sinus bradycardia occurs significantly more frequently in the 60 to 70 age group than in other age groups.
|
2. Sinus bradycardia occurs significantly more frequently in the 60 to 70 age group than in other age groups.
|
||||||
|
|
||||||
|
|
Binary file not shown.
File diff suppressed because one or more lines are too long
|
@ -1,8 +1,15 @@
|
||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Extreme Gradient Boosting (XGBoost) Training and Analysis"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 36,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -14,6 +21,8 @@
|
||||||
"import xgboost as xgb\n",
|
"import xgboost as xgb\n",
|
||||||
"from sklearn.model_selection import GridSearchCV\n",
|
"from sklearn.model_selection import GridSearchCV\n",
|
||||||
"from sklearn.metrics import confusion_matrix\n",
|
"from sklearn.metrics import confusion_matrix\n",
|
||||||
|
"from sklearn.impute import SimpleImputer\n",
|
||||||
|
"from sklearn.preprocessing import MinMaxScaler\n",
|
||||||
"import seaborn as sns"
|
"import seaborn as sns"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -26,7 +35,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -50,7 +59,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": 42,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -323,12 +332,12 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"load the best model"
|
"load the best model to get the best hyperparameters from it"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 43,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -353,14 +362,14 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 44,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"[23:05:40] WARNING: C:/Users/administrator/workspace/xgboost-win64_release_1.6.0/src/learner.cc:627: \n",
|
"[20:16:51] WARNING: C:/Users/administrator/workspace/xgboost-win64_release_1.6.0/src/learner.cc:627: \n",
|
||||||
"Parameters: { \"best_iteration\", \"best_ntree_limit\", \"scikit_learn\" } might not be used.\n",
|
"Parameters: { \"best_iteration\", \"best_ntree_limit\", \"scikit_learn\" } might not be used.\n",
|
||||||
"\n",
|
"\n",
|
||||||
" This could be a false alarm, with some parameters getting used by language bindings but\n",
|
" This could be a false alarm, with some parameters getting used by language bindings but\n",
|
||||||
|
@ -474,8 +483,8 @@
|
||||||
"[97]\ttrain-merror:0.00029\teval-merror:0.18265\n",
|
"[97]\ttrain-merror:0.00029\teval-merror:0.18265\n",
|
||||||
"[98]\ttrain-merror:0.00029\teval-merror:0.18265\n",
|
"[98]\ttrain-merror:0.00029\teval-merror:0.18265\n",
|
||||||
"[99]\ttrain-merror:0.00029\teval-merror:0.18265\n",
|
"[99]\ttrain-merror:0.00029\teval-merror:0.18265\n",
|
||||||
"CPU times: total: 15.5 s\n",
|
"CPU times: total: 17.6 s\n",
|
||||||
"Wall time: 1.2 s\n"
|
"Wall time: 1.36 s\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -497,7 +506,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 45,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -537,7 +546,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 46,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue