create file
parent
34dd8ca284
commit
a936b47f2b
|
@ -0,0 +1,105 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Decison Tree"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import Data from Database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# connect to the database\n",
|
||||
"conn = sqlite3.connect('../features.db')\n",
|
||||
"c = conn.cursor()\n",
|
||||
"# get training, validation and test data\n",
|
||||
"train = pd.read_sql_query(\"SELECT * FROM train\", conn)\n",
|
||||
"valid = pd.read_sql_query(\"SELECT * FROM validation\", conn)\n",
|
||||
"test = pd.read_sql_query(\"SELECT * FROM test\", conn)\n",
|
||||
"# close the connection\n",
|
||||
"conn.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Format Data for Machine Learning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get the target and features\n",
|
||||
"train_y = train['y']\n",
|
||||
"train_y = train_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})\n",
|
||||
"train_x = train.drop(columns=['y'])\n",
|
||||
"\n",
|
||||
"valid_y = valid['y']\n",
|
||||
"valid_y = valid_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})\n",
|
||||
"valid_x = valid.drop(columns=['y'])\n",
|
||||
"\n",
|
||||
"test_y = test['y']\n",
|
||||
"test_y = test_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})\n",
|
||||
"test_x = test.drop(columns=['y'])\n",
|
||||
"\n",
|
||||
"# drop id column\n",
|
||||
"train_x = train_x.drop(columns=['id'])\n",
|
||||
"valid_x = valid_x.drop(columns=['id'])\n",
|
||||
"test_x = test_x.drop(columns=['id'])\n",
|
||||
"\n",
|
||||
"print('train_x shape:', train_x.shape)\n",
|
||||
"print('test_x shape:', test_x.shape)\n",
|
||||
"print('valid_x shape:', valid_x.shape)\n",
|
||||
"# print column names\n",
|
||||
"print('features:', train_x.columns.to_list())\n",
|
||||
"feature_names = train_x.columns.to_list()\n",
|
||||
"\n",
|
||||
"# Create an imputer object with a mean filling strategy\n",
|
||||
"imputer = SimpleImputer(strategy='mean')\n",
|
||||
"\n",
|
||||
"train_x = imputer.fit_transform(train_x)\n",
|
||||
"valid_x = imputer.transform(valid_x)\n",
|
||||
"test_x = imputer.transform(test_x)\n",
|
||||
"\n",
|
||||
"# Scale Data between 0 and 1\n",
|
||||
"scaler = MinMaxScaler()\n",
|
||||
"# Fit the scaler to your data and then transform it\n",
|
||||
"train_x = scaler.fit_transform(train_x)\n",
|
||||
"valid_x = scaler.transform(valid_x)\n",
|
||||
"test_x = scaler.transform(test_x)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# use xgboost\n",
|
||||
"dtrain = xgb.DMatrix(train_x, label=train_y)\n",
|
||||
"dvalid = xgb.DMatrix(valid_x, label=valid_y)\n",
|
||||
"dtest = xgb.DMatrix(test_x, label=test_y)\n",
|
||||
"\n",
|
||||
"num_classes= len(set(valid_y.to_list()))\n",
|
||||
"print('number of classes:', num_classes)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue