DSA_SS24/notebooks/data_quality.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading SB\n",
      "Length of SB: 50\n",
      "Reading AFIB\n",
      "Length of AFIB: 27\n",
      "Reading GSVT\n",
      "Length of GSVT: 0\n",
      "Reading SR\n",
      "Length of SR: 13\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "from matplotlib import pyplot as plt\n",
    "import wfdb\n",
    "# read pickle files and check len and print first record and first record keys\n",
    "\n",
    "#path = \"C:/Studium/dsa/data\"\n",
    "#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\"\n",
    "path = \"C:/Users/klara/projects/DSA/data\"\n",
    "\n",
    "\n",
    "categories_dict = {\n",
    "'SB':    [426177001],\n",
    "'AFIB':  [164889003, 164890007],\n",
    "'GSVT':  [426761007, 713422000, 233896004, 233897008, 713422000],\n",
    "'SR':    [426783006, 427393009]\n",
    "}\n",
    "\n",
    "\n",
    "data = {}\n",
    "for cat_name in categories_dict.keys():\n",
    "    print(f\"Reading {cat_name}\")\n",
    "    with open(f'{path}/{cat_name}.pkl', 'rb') as f:\n",
    "        records = pickle.load(f)\n",
    "        data[cat_name] = records\n",
    "        print(f\"Length of {cat_name}: {len(records)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check for missing data in timeseries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Missing timeseries in 0 records\n"
     ]
    }
   ],
   "source": [
    "missing_timeseries = []\n",
    "for cat_name, records in data.items():\n",
    "    for record in records:\n",
    "        if len(record.p_signal) != 5000:\n",
    "            missing_timeseries.append(record)\n",
    "            print(f\"Missing timeseries in {record.record_name}\")\n",
    "        #if record.comments[2]== '':\n",
    "print(f\"Missing timeseries in {len(missing_timeseries)} records\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"execution_count": 1,`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Reading SB\n",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"Length of SB: 50\n",`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`"Reading AFIB\n",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"Length of AFIB: 27\n",`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`"Reading GSVT\n",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"Length of GSVT: 0\n",`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`"Reading SR\n",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"Length of SR: 13\n"`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`]`
			`}`
			`],`
			`"source": [`
			`"import pickle\n",`
			`"from matplotlib import pyplot as plt\n",`
			`"import wfdb\n",`
			`"# read pickle files and check len and print first record and first record keys\n",`
			`"\n",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"#path = \"C:/Studium/dsa/data\"\n",`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`"#path = \"C:/Users/Nils/Documents/HS-Mannheim/0000_MASTER/DSA/EKG_Prog/data\"\n",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"path = \"C:/Users/klara/projects/DSA/data\"\n",`
			`"\n",`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`"\n",`
			`"categories_dict = {\n",`
			`"'SB': [426177001],\n",`
			`"'AFIB': [164889003, 164890007],\n",`
			`"'GSVT': [426761007, 713422000, 233896004, 233897008, 713422000],\n",`
			`"'SR': [426783006, 427393009]\n",`
			`"}\n",`
			`"\n",`
			`"\n",`
			`"data = {}\n",`
			`"for cat_name in categories_dict.keys():\n",`
			`" print(f\"Reading {cat_name}\")\n",`
			`" with open(f'{path}/{cat_name}.pkl', 'rb') as f:\n",`
			`" records = pickle.load(f)\n",`
			`" data[cat_name] = records\n",`
			`" print(f\"Length of {cat_name}: {len(records)}\")"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"## Check for missing data in timeseries"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"execution_count": 3,`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Missing timeseries in 0 records\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"missing_timeseries = []\n",`
			`"for cat_name, records in data.items():\n",`
			`" for record in records:\n",`
			`" if len(record.p_signal) != 5000:\n",`
			`" missing_timeseries.append(record)\n",`
			`" print(f\"Missing timeseries in {record.record_name}\")\n",`
			`" #if record.comments[2]== '':\n",`
			`"print(f\"Missing timeseries in {len(missing_timeseries)} records\")"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
Pfad angepasst 2024-05-29 09:25:12 +02:00			`"version": "3.11.9"`
quality checks and demographic plots 2024-05-08 17:45:29 +02:00			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`