From c9732002272bb41d371575e43e78418f62a0317e Mon Sep 17 00:00:00 2001 From: Nils <1826514@stud.hs-mannheim.de> Date: Fri, 7 Jun 2024 09:44:17 +0200 Subject: [PATCH] filter and avg qrs, ~90% finished --- notebooks/qrs_filter.ipynb | 359 +++++++++++++++++++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 notebooks/qrs_filter.ipynb diff --git a/notebooks/qrs_filter.ipynb b/notebooks/qrs_filter.ipynb new file mode 100644 index 0000000..30cb3e9 --- /dev/null +++ b/notebooks/qrs_filter.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# QRS Complex Detection\n", + "This notebook holds all the functions to detect a QRS-Complex in an ECG Signal. To be able to differentiate if the signal is from a \"healthy\" person or a person with a \"heartcondition\".\n", + "First of all there must be defined some thresholds to what a \"healthy\" QRS-Complex is. To do so we normalize the Data and extract the normal peaks and lengths of the ECG-Signal in the QRS Region." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary packages\n", + "\n", + "import neurokit2 as nk\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "import sys\n", + "import wfdb\n", + "\n", + "sys.path.append('../scripts')\n", + "import data_helper" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading SB\n", + "Reading AFIB\n", + "Reading GSVT\n", + "Reading SR\n", + "Number of patients per category:\n", + "SB: 16559\n", + "AFIB: 9839\n", + "GSVT: 948\n", + "SR: 9720\n" + ] + } + ], + "source": [ + "data_org = data_helper.load_data(only_demographic=False)\n", + "\n", + "print(\"Number of patients per category:\")\n", + "for cat_name in data_org.keys():\n", + " print(f\"{cat_name}: {len(data_org[cat_name])}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleaning Data from Noice" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# seperating data by diagnose\n", + "sb_data = data_org['SB']\n", + "afib_data = data_org['AFIB']\n", + "gsvt_data = data_org['GSVT']\n", + "sr_data = data_org['SR']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: total: 562 ms\n", + "Wall time: 1.46 s\n" + ] + } + ], + "source": [ + "%%time\n", + "with open('../filter_params.json','r') as f:\n", + " filter_params = json.load(f)\n", + "sig_channel = 0\n", + "np.random.seed(501)\n", + "sampling_rate = 500\n", + "### Pick Random Samples from Set\n", + "amount_sb_records = len(sb_data)\n", + "sample_indizies = np.random.choice(amount_sb_records,1000,replace=False)\n", + "sb_data_sample = [sb_data[x].p_signal[:,sig_channel] for x in sample_indizies]\n", + "clean_sb = [nk.ecg_clean(elem,sampling_rate=sampling_rate) for elem in sb_data_sample]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: total: 375 ms\n", + "Wall time: 1.77 s\n" + ] + } + ], + "source": [ + "%%time\n", + "amount_afib_records = len(afib_data)\n", + "sample_indizies = np.random.choice(amount_afib_records,1000,replace=False)\n", + "afib_data_sample = [afib_data[x].p_signal[:,sig_channel] for x in sample_indizies]\n", + "clean_afib = [nk.ecg_clean(elem,sampling_rate=sampling_rate) for elem in afib_data_sample]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: total: 547 ms\n", + "Wall time: 1.8 s\n" + ] + } + ], + "source": [ + "%%time\n", + "gsvt_data_sample= [gsvt_data[x].p_signal[:,sig_channel] for x in range(len(gsvt_data))]\n", + "clean_gsvt = [nk.ecg_clean(elem,sampling_rate=sampling_rate) for elem in gsvt_data_sample]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: total: 391 ms\n", + "Wall time: 1.72 s\n" + ] + } + ], + "source": [ + "%%time\n", + "amount_sr_records = len(sr_data)\n", + "sample_indizies = np.random.choice(amount_sr_records,1000,replace=False)\n", + "sr_data_sample = [sr_data[x].p_signal[:,sig_channel] for x in sample_indizies] \n", + "clean_sr = [nk.ecg_clean(elem,sampling_rate=sampling_rate) for elem in sr_data_sample]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lokalisierung der QRS-Komplexe" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "avg_sb = np.mean(clean_sb,axis=1)\n", + "\n", + "avg_afib = np.mean(clean_afib,axis=1)\n", + "\n", + "avg_gsvt = np.mean(clean_gsvt,axis=1)\n", + "\n", + "avg_sr = np.mean(clean_sr,axis=1)\n", + "\n", + "def get_r_peaks(clean_ecg_data,sampling_rate)->list:\n", + " return [nk.ecg_peaks(record,sampling_rate=sampling_rate)[1] for record in clean_ecg_data]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "sb_r_peaks = get_r_peaks(clean_sb,sampling_rate)\n", + "afib_r_peaks = get_r_peaks(clean_afib,sampling_rate)\n", + "# gsvt_r_peaks = get_r_peaks(clean_gsvt,sampling_rate)\n", + "sr_r_peaks = get_r_peaks(clean_sr,sampling_rate)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# sb_r_peaks = [x for x in sb_r_peaks if len(x['ECG_R_Peaks'])>3]\n", + "def remove_with_to_few(signals,clean):\n", + " index_for_removal = []\n", + " for index, element in enumerate(signals):\n", + " if len(element['ECG_R_Peaks']) < 3:\n", + " index_for_removal.append(index)\n", + " for i in index_for_removal:\n", + " del clean[i]\n", + " del signals[i]\n", + " return signals,clean\n", + "\n", + "sb_r_peaks,clean_sb = remove_with_to_few(sb_r_peaks,clean_sb)\n", + "afib_r_peaks,clean_afib = remove_with_to_few(afib_r_peaks,clean_afib)\n", + "sr_r_peaks,clean_sr = remove_with_to_few(sr_r_peaks,clean_sr)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sb done\n", + "afib done\n", + "sr done\n" + ] + } + ], + "source": [ + "def get_infos(ecg_data,r_peaks,sampling_rate:int)->list:\n", + " return [nk.ecg_delineate(ecg_data[x],r_peaks[x]['ECG_R_Peaks'],sampling_rate=sampling_rate,method=\"dwt\")[1]for x in range(len(ecg_data))]\n", + "\n", + "sb_info = get_infos(clean_sb,sb_r_peaks,sampling_rate=sampling_rate)\n", + "print('sb done')\n", + "afib_info = get_infos(clean_afib,afib_r_peaks,sampling_rate=sampling_rate)\n", + "print('afib done')\n", + "sr_info = get_infos(clean_sr,sr_r_peaks,sampling_rate=sampling_rate)\n", + "print('sr done')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "# Removes nan values from P and T peaks and removed corosponding 'base' signals\n", + "def remove_nan(signal,clean_signal):\n", + " nan_indizies = []\n", + " for i in range(len(signal)):\n", + " if math.isnan(signal[i]['ECG_P_Peaks'][0]):\n", + " nan_indizies.append(i)\n", + " for i in range(len(signal)):\n", + " if math.isnan(signal[i]['ECG_T_Peaks'][0]):\n", + " nan_indizies.append(i)\n", + " nth_removed_index = 0\n", + " for i in nan_indizies:\n", + " del signal[i-nth_removed_index]\n", + " del clean_signal[i-nth_removed_index]\n", + " nth_removed_index+=1\n", + " return signal,clean_signal\n", + "\n", + "sb_info,clean_sb = remove_nan(sb_info,clean_sb)\n", + "afib_info,clean_afib = remove_nan(afib_info,clean_afib)\n", + "sr_info,clean_sr = remove_nan(sr_info,clean_sr)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sb_p 854 sb_t 854 sb_clean 854\n" + ] + } + ], + "source": [ + "# get first P Peaks in Signals\n", + "first_sb_p_peak = [elem['ECG_P_Peaks'][0] for elem in sb_info]\n", + "first_afib_p_peak = [elem['ECG_P_Peaks'][0] for elem in afib_info]\n", + "first_sr_p_peak = [elem['ECG_P_Peaks'][0] for elem in sr_info]\n", + "\n", + "# get first T Peaks in Signals\n", + "first_sb_t_peak = [elem['ECG_T_Peaks'][0] for elem in sb_info]\n", + "first_afib_t_peak = [elem['ECG_T_Peaks'][0] for elem in afib_info]\n", + "first_sr_t_peak = [elem['ECG_T_Peaks'][0] for elem in sr_info]\n", + "\n", + "print(\"sb_p\",len(first_sb_p_peak),'sb_t',len(first_sb_t_peak),'sb_clean',len(clean_sb))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def cut_signals(signals,first_p,first_t):\n", + " return [signals[i][first_p[i]:first_t[i]] for i in range(len(signals))]\n", + " \n", + "\n", + "sliced_sb = cut_signals(clean_sb,first_sb_p_peak,first_sb_t_peak)\n", + "sliced_afib = cut_signals(clean_afib,first_afib_p_peak,first_afib_t_peak)\n", + "sliced_sr = cut_signals(clean_sr,first_sr_p_peak,first_sr_t_peak)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}