# Decison Tree

In [6]:
import sqlite3
import os
from datetime import datetime
from joblib import dump, load
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

import seaborn as sns

## Import Data from Database

In [7]:
# connect to the database
conn = sqlite3.connect('../features.db')
c = conn.cursor()
# get training, validation and test data
train = pd.read_sql_query("SELECT * FROM train", conn)
valid = pd.read_sql_query("SELECT * FROM validation", conn)
test = pd.read_sql_query("SELECT * FROM test", conn)
# close the connection
conn.close()

## Format Data for Machine Learning

In [8]:
# get the target and features
train_y = train['y']
train_y = train_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})
train_x = train.drop(columns=['y'])

valid_y = valid['y']
valid_y = valid_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})
valid_x = valid.drop(columns=['y'])

test_y = test['y']
test_y = test_y.map({'GSVT': 0, 'AFIB': 1, 'SR': 2, 'SB': 3})
test_x = test.drop(columns=['y'])

# drop id column
train_x = train_x.drop(columns=['id'])
valid_x = valid_x.drop(columns=['id'])
test_x = test_x.drop(columns=['id'])

print('train_x shape:', train_x.shape)
print('test_x shape:', test_x.shape)
print('valid_x shape:', valid_x.shape)
# print column names
print('features:', train_x.columns.to_list())
feature_names = train_x.columns.to_list()

# Create an imputer object with a mean filling strategy
imputer = SimpleImputer(strategy='mean')

train_x = imputer.fit_transform(train_x)
valid_x = imputer.transform(valid_x)
test_x = imputer.transform(test_x)

# Scale Data between 0 and 1
scaler = MinMaxScaler()
# Fit the scaler to your data and then transform it
train_x = scaler.fit_transform(train_x)
valid_x = scaler.transform(valid_x)
test_x = scaler.transform(test_x)



# use xgboost
dtrain = xgb.DMatrix(train_x, label=train_y)
dvalid = xgb.DMatrix(valid_x, label=valid_y)
dtest = xgb.DMatrix(test_x, label=test_y)

num_classes= len(set(valid_y.to_list()))
print('number of classes:', num_classes)

train_x shape: (3502, 10)
test_x shape: (438, 10)
valid_x shape: (438, 10)
features: ['age', 'gender', 'artial_rate', 'ventricular_rate', 'qrs_duration', 'qt_length', 'qrs_count', 'q_peak', 'r_axis', 't_axis']
number of classes: 4


In [9]:

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


# Beispiel: Begrenzung der Tiefe des Baumes
dt_classifier = DecisionTreeClassifier(max_depth=5)

# Schritt 3: Trainieren des Modells mit Trainingsdaten
dt_classifier.fit(train_x, train_y)

# Schritt 4: Bewertung des Modells mit Validierungsdaten
valid_pred = dt_classifier.predict(valid_x)
valid_accuracy = accuracy_score(valid_y, valid_pred)
print(f'Validierungsgenauigkeit: {valid_accuracy}')

# Schritt 5: Hyperparameter-Optimierung

# Schritt 6: Endg√ºltige Bewertung mit Testdaten
test_pred = dt_classifier.predict(test_x)
test_accuracy = accuracy_score(test_y, test_pred)
print(f'Testgenauigkeit: {test_accuracy}')


Validierungsgenauigkeit: 0.7557077625570776
Testgenauigkeit: 0.7922374429223744
