import wfdb import os import pickle import bz2 import numpy as np import pandas as pd # Funktionen zum Bearbeiten der Daten def get_diagnosis_ids(record): diagnosis = record.comments[2] diagnosis = diagnosis.replace('Dx: ', '') list_diagnosis = [int(x.strip()) for x in diagnosis.split(',')] return list_diagnosis def get_diagnosis_name(diagnosis): name = [diagnosis_lookup[diagnosis_lookup['Snomed_CT'] == x]['Full Name'].to_string(index=False) for x in diagnosis] return name def filter_signal_df_on_diag(df_dict, diagnosis_dict, filter_codes_df): filter_cod_li = list(filter_codes_df['Snomed_CT']) + [0] filter_dict_diag = {k: v for k, v in diagnosis_dict.items() if all(i in filter_cod_li for i in v)} filtered_df_dict = {i: df.loc[df.index.isin(filter_dict_diag.keys())] for i, df in df_dict.items()} return filtered_df_dict # Verzeichnisse und Dateipfade project_dir = 'C:/Users/arman/PycharmProjects/pythonProject/DSA/a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0' data_dir = project_dir + '/WFDBRecords' path_diag_lookup = "C:/Users/arman/PycharmProjects/pythonProject/DSA/a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/ConditionNames_SNOMED-CT.csv" # Daten erkunden diagnosis_lookup = pd.read_csv(path_diag_lookup) categories = { 'SB': [426177001], 'AFIB': [164889003, 164890007], 'GSVT': [426761007, 713422000, 233896004, 233897008, 713422000], 'SR': [426783006, 427393009] } diag_dict = {k: [] for k in categories.keys()} counter = 0 max_counter = 100_000 for dir_th in os.listdir(data_dir): path_to_1000_records = data_dir + '/' + dir_th for dir_hd in os.listdir(path_to_1000_records): path_to_100_records = path_to_1000_records + '/' + dir_hd for record_name in os.listdir(path_to_100_records): if '.hea' not in record_name: continue record_name = record_name.replace('.hea', '') try: record = wfdb.rdrecord(path_to_100_records + '/' + record_name) diagnosis = np.array(get_diagnosis_ids(record)) for category_name, category_codes in categories.items(): if any(i in category_codes for i in diagnosis): diag_dict[category_name].append(record) break counter += 1 counter_bool = counter >= max_counter if counter % 100 == 0: print(f"Gelesen {counter} Datensätze") if counter_bool: break except Exception as e: print(f"Fehler beim Lesen des Datensatzes {record_name}: {e}") if counter_bool: break if counter_bool: break for cat_name, records in diag_dict.items(): print(f"Schreibe {cat_name} in eine komprimierte Datei mit {len(records)} Datensätzen") if not os.path.exists('./data'): os.makedirs('./data') compressed_filename = f'./data/{cat_name}.pkl.bz2' with bz2.open(compressed_filename, 'wb') as f: pickle.dump(records, f)