{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading SB\n", "Length of SB: 16559\n", "Reading AFIB\n", "Length of AFIB: 9839\n", "Reading GSVT\n", "Length of GSVT: 948\n", "Reading SR\n", "Length of SR: 9720\n" ] } ], "source": [ "import pickle\n", "from matplotlib import pyplot as plt\n", "import wfdb\n", "# read pickle files and check len and print first record and first record keys\n", "\n", "path = \"C:/Studium/dsa/data\"\n", "#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\"\n", "\n", "categories_dict = {\n", "'SB': [426177001],\n", "'AFIB': [164889003, 164890007],\n", "'GSVT': [426761007, 713422000, 233896004, 233897008, 713422000],\n", "'SR': [426783006, 427393009]\n", "}\n", "\n", "\n", "data = {}\n", "for cat_name in categories_dict.keys():\n", " print(f\"Reading {cat_name}\")\n", " with open(f'{path}/{cat_name}.pkl', 'rb') as f:\n", " records = pickle.load(f)\n", " data[cat_name] = records\n", " print(f\"Length of {cat_name}: {len(records)}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Check for missing data in timeseries" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "First record in SB: ['Age: 59', 'Sex: Female', 'Dx: 426177001,164934002', 'Rx: Unknown', 'Hx: Unknown', 'Sx: Unknown']\n", "Missing sex in JS34080 and comments: Sex: Unknown\n", "Missing age in JS12543 and comments: Age: NaN\n", "Missing age in JS12571 and comments: Age: NaN\n", "Missing age in JS12576 and comments: Age: NaN\n", "Missing sex in JS12576 and comments: Sex: Unknown\n", "Missing age in JS12609 and comments: Age: NaN\n", "Missing sex in JS12609 and comments: Sex: Unknown\n", "Missing age in JS13024 and comments: Age: NaN\n", "Missing sex in JS13024 and comments: Sex: Unknown\n", "Missing age in JS13504 and comments: Age: NaN\n", "Missing age in JS13505 and comments: Age: NaN\n", "Missing age in JS13575 and comments: Age: NaN\n", "Missing age in JS13583 and comments: Age: NaN\n", "Missing age in JS13645 and comments: Age: NaN\n", "Missing age in JS13646 and comments: Age: NaN\n", "Missing age in JS13647 and comments: Age: NaN\n", "Missing age in JS14027 and comments: Age: NaN\n", "Missing age in JS14050 and comments: Age: NaN\n", "Missing age in JS14498 and comments: Age: NaN\n", "Missing age in JS14555 and comments: Age: NaN\n", "Missing age in JS14995 and comments: Age: NaN\n", "Missing sex in JS14995 and comments: Sex: Unknown\n", "Missing age in JS18505 and comments: Age: NaN\n", "Missing age in JS18506 and comments: Age: NaN\n", "Missing age in JS18507 and comments: Age: NaN\n", "Missing age in JS18508 and comments: Age: NaN\n", "Missing age in JS18509 and comments: Age: NaN\n", "Missing age in JS18510 and comments: Age: NaN\n", "Missing age in JS18511 and comments: Age: NaN\n", "Missing age in JS18512 and comments: Age: NaN\n", "Missing age in JS18513 and comments: Age: NaN\n", "Missing age in JS18514 and comments: Age: NaN\n", "Missing age in JS18515 and comments: Age: NaN\n", "Missing sex in JS18515 and comments: Sex: Unknown\n", "Missing age in JS18574 and comments: Age: NaN\n", "Missing age in JS19386 and comments: Age: NaN\n", "Missing age in JS19447 and comments: Age: NaN\n", "Missing age in JS10867 and comments: Age: NaN\n", "Missing sex in JS10867 and comments: Sex: Unknown\n", "Missing age in JS11507 and comments: Age: NaN\n", "Missing sex in JS11507 and comments: Sex: Unknown\n", "Missing age in JS22918 and comments: Age: NaN\n", "Missing sex in JS22918 and comments: Sex: Unknown\n", "Missing age in JS23063 and comments: Age: NaN\n", "Missing sex in JS23063 and comments: Sex: Unknown\n", "Missing age in JS23064 and comments: Age: NaN\n", "Missing age in JS23787 and comments: Age: NaN\n", "Missing sex in JS23787 and comments: Sex: Unknown\n", "Missing age in JS24143 and comments: Age: NaN\n", "Missing sex in JS24143 and comments: Sex: Unknown\n", "Missing age in JS24144 and comments: Age: NaN\n", "Missing sex in JS24144 and comments: Sex: Unknown\n", "Missing age in JS24145 and comments: Age: NaN\n", "Missing age in JS45355 and comments: Age: NaN\n", "Missing age in JS45356 and comments: Age: NaN\n", "Missing age in JS45357 and comments: Age: NaN\n", "Missing age in JS45358 and comments: Age: NaN\n", "Missing age in JS45359 and comments: Age: NaN\n", "Missing age in JS45360 and comments: Age: NaN\n", "Missing sex in JS45360 and comments: Sex: Unknown\n", "Missing age in JS45361 and comments: Age: NaN\n", "Missing sex in JS45361 and comments: Sex: Unknown\n", "Missing age in JS45364 and comments: Age: NaN\n", "Missing age in JS45367 and comments: Age: NaN\n", "Missing sex in JS45367 and comments: Sex: Unknown\n", "Missing age in JS45369 and comments: Age: NaN\n", "Missing age in JS45370 and comments: Age: NaN\n", "Missing sex in JS45370 and comments: Sex: Unknown\n", "Missing age in JS45382 and comments: Age: NaN\n", "Missing sex in JS45382 and comments: Sex: Unknown\n", "Missing age in JS45383 and comments: Age: NaN\n", "Missing sex in JS45383 and comments: Sex: Unknown\n", "Missing age in JS45384 and comments: Age: NaN\n", "Missing sex in JS45384 and comments: Sex: Unknown\n", "Missing age in JS45385 and comments: Age: NaN\n", "Missing sex in JS45385 and comments: Sex: Unknown\n", "Missing timeseries in 0 records\n", "Missing age in 55 records\n", "Missing sex in 21 records\n" ] } ], "source": [ "# print first record and first record keys\n", "print(f\"First record in SB: {data['SB'][0].comments}\")\n", "\n", "missing_timeseries = []\n", "missing_age = []\n", "missing_sex = []\n", "for cat_name, records in data.items():\n", " for record in records:\n", " if len(record.p_signal) != 5000:\n", " missing_timeseries.append(record)\n", " print(f\"Missing timeseries in {record.record_name}\")\n", " #if record.comments[2]== '':\n", " if 'Age: ' not in record.comments[0] or record.comments[0] == 'Age: NaN':\n", " missing_age.append(record)\n", " print(f\"Missing age in {record.record_name} and comments: {record.comments[0]}\")\n", " if record.comments[1] == 'Sex: Unknown' or record.comments[1] == '':\n", " missing_sex.append(record)\n", " print(f\"Missing sex in {record.record_name} and comments: {record.comments[1]}\")\n", " \n", "print(f\"Missing timeseries in {len(missing_timeseries)} records\")\n", "print(f\"Missing age in {len(missing_age)} records\")\n", "print(f\"Missing sex in {len(missing_sex)} records\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 2 }