Annpassung

main
klara 2024-06-08 18:06:08 +02:00
parent c973200227
commit babb31648c
1 changed files with 23 additions and 33 deletions

View File

@ -11,7 +11,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -29,27 +29,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Reading GSVT\n", "Reading GSVT\n"
"Reading AFIB\n", ]
"Reading SR\n", },
"Reading SB\n", {
"Number of patients per category:\n", "ename": "FileNotFoundError",
"GSVT: 0\n", "evalue": "[Errno 2] No such file or directory: 'C:/Studium/dsa/data/GSVT.pkl'",
"AFIB: 27\n", "output_type": "error",
"SR: 13\n", "traceback": [
"SB: 50\n" "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mdata_helper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43monly_demographic\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNumber of patients per category:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m cat_name \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mkeys():\n",
"File \u001b[1;32mc:\\Users\\klara\\projects\\DSA\\DSA_SS24\\notebooks\\../scripts\\data_helper.py:37\u001b[0m, in \u001b[0;36mload_data\u001b[1;34m(only_demographic, path_settings)\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m cat_name \u001b[38;5;129;01min\u001b[39;00m labels\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m 36\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReading \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcat_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m---> 37\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mpath_data\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcat_name\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.pkl\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 38\u001b[0m records \u001b[38;5;241m=\u001b[39m pickle\u001b[38;5;241m.\u001b[39mload(f)\n\u001b[0;32m 39\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m only_demographic:\n",
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'C:/Studium/dsa/data/GSVT.pkl'"
] ]
} }
], ],
"source": [ "source": [
"data = data_helper.load_data(only_demographic=False)\n", "data = data_helper.load_data(only_demographic=True)\n",
"\n", "\n",
"print(\"Number of patients per category:\")\n", "print(\"Number of patients per category:\")\n",
"for cat_name in data.keys():\n", "for cat_name in data.keys():\n",
@ -58,37 +62,23 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "ename": "NameError",
"output_type": "stream", "evalue": "name 'data_helper' is not defined",
"text": [
"Reading GSVT\n",
"Reading AFIB\n",
"Reading SR\n",
"Reading SB\n"
]
},
{
"ename": "ValueError",
"evalue": "All arrays must be of the same length",
"output_type": "error", "output_type": "error",
"traceback": [ "traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[27], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m data_org \u001b[38;5;241m=\u001b[39m data_helper\u001b[38;5;241m.\u001b[39mload_data(only_demographic\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m----> 3\u001b[0m df_dgc \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_org\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m data_org \u001b[38;5;241m=\u001b[39m \u001b[43mdata_helper\u001b[49m\u001b[38;5;241m.\u001b[39mload_data(only_demographic\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 3\u001b[0m df_dgc \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(data_org)\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\core\\frame.py:767\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[1;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[0;32m 761\u001b[0m mgr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_mgr(\n\u001b[0;32m 762\u001b[0m data, axes\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mindex\u001b[39m\u001b[38;5;124m\"\u001b[39m: index, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m: columns}, dtype\u001b[38;5;241m=\u001b[39mdtype, copy\u001b[38;5;241m=\u001b[39mcopy\n\u001b[0;32m 763\u001b[0m )\n\u001b[0;32m 765\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m 766\u001b[0m \u001b[38;5;66;03m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n\u001b[1;32m--> 767\u001b[0m mgr \u001b[38;5;241m=\u001b[39m \u001b[43mdict_to_mgr\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtyp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmanager\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 768\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, ma\u001b[38;5;241m.\u001b[39mMaskedArray):\n\u001b[0;32m 769\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mma\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m mrecords\n", "\u001b[1;31mNameError\u001b[0m: name 'data_helper' is not defined"
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\core\\internals\\construction.py:503\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[1;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n\u001b[0;32m 499\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 500\u001b[0m \u001b[38;5;66;03m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n\u001b[0;32m 501\u001b[0m arrays \u001b[38;5;241m=\u001b[39m [x\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m x \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m arrays]\n\u001b[1;32m--> 503\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marrays_to_mgr\u001b[49m\u001b[43m(\u001b[49m\u001b[43marrays\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtyp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtyp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconsolidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\core\\internals\\construction.py:114\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[1;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m verify_integrity:\n\u001b[0;32m 112\u001b[0m \u001b[38;5;66;03m# figure out the index, if necessary\u001b[39;00m\n\u001b[0;32m 113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m index \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 114\u001b[0m index \u001b[38;5;241m=\u001b[39m \u001b[43m_extract_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43marrays\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 116\u001b[0m index \u001b[38;5;241m=\u001b[39m ensure_index(index)\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\pandas\\core\\internals\\construction.py:677\u001b[0m, in \u001b[0;36m_extract_index\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 675\u001b[0m lengths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mset\u001b[39m(raw_lengths))\n\u001b[0;32m 676\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(lengths) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m--> 677\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll arrays must be of the same length\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 679\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m have_dicts:\n\u001b[0;32m 680\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 681\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 682\u001b[0m )\n",
"\u001b[1;31mValueError\u001b[0m: All arrays must be of the same length"
] ]
} }
], ],
"source": [ "source": [
"data_org = data_helper.load_data(only_demographic=False)\n", "data_org = data_helper.load_data(only_demographic=True)\n",
"\n", "\n",
"df_dgc = pd.DataFrame(data_org)" "df_dgc = pd.DataFrame(data_org)"
] ]