DSA_SS24/scripts/feature_db_checks.py

37 lines
1.1 KiB
Python

import sqlite3
import pandas as pd
conn = sqlite3.connect('features.db')
c = conn.cursor()
# print names of available tables
c.execute("SELECT name FROM sqlite_master WHERE type='table';")
print("Table names: ", c.fetchall())
# for each table in the database, print the number of rows
for table in ['train', 'test', 'validation']:
c.execute(f'SELECT COUNT(*) FROM {table}')
print(f"Number of rows in the {table} table: ", c.fetchall()[0][0])
# print the number of rows in features table
c.execute('SELECT COUNT(*) FROM features')
print("Number of rows in the features table: ", c.fetchall()[0][0])
# print column names
c.execute('PRAGMA table_info(features)')
print("Column names in the features table: ", c.fetchall())
# count for each label how many rows there are
c.execute('SELECT y, COUNT(*) FROM features GROUP BY y')
print("Number of rows for each label: ", c.fetchall())
# Load data from the features table into a DataFrame
df = pd.read_sql_query("SELECT * FROM features", conn)
# Now you can work with the data in the df DataFrame
print(df.head(15))
# close the connection
conn.close()