37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
|
import sqlite3
|
||
|
import pandas as pd
|
||
|
|
||
|
|
||
|
conn = sqlite3.connect('features.db')
|
||
|
c = conn.cursor()
|
||
|
# print names of available tables
|
||
|
c.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
||
|
print("Table names: ", c.fetchall())
|
||
|
|
||
|
# for each table in the database, print the number of rows
|
||
|
for table in ['train', 'test', 'validation']:
|
||
|
c.execute(f'SELECT COUNT(*) FROM {table}')
|
||
|
print(f"Number of rows in the {table} table: ", c.fetchall()[0][0])
|
||
|
|
||
|
|
||
|
|
||
|
# print the number of rows in features table
|
||
|
c.execute('SELECT COUNT(*) FROM features')
|
||
|
print("Number of rows in the features table: ", c.fetchall()[0][0])
|
||
|
# print column names
|
||
|
c.execute('PRAGMA table_info(features)')
|
||
|
print("Column names in the features table: ", c.fetchall())
|
||
|
|
||
|
# count for each label how many rows there are
|
||
|
c.execute('SELECT y, COUNT(*) FROM features GROUP BY y')
|
||
|
print("Number of rows for each label: ", c.fetchall())
|
||
|
|
||
|
|
||
|
# Load data from the features table into a DataFrame
|
||
|
df = pd.read_sql_query("SELECT * FROM features", conn)
|
||
|
# Now you can work with the data in the df DataFrame
|
||
|
print(df.head(15))
|
||
|
|
||
|
# close the connection
|
||
|
|
||
|
conn.close()
|