added grid search

main
Felix Jan Michael Mucha 2025-02-16 14:21:11 +01:00
parent e9e2bf1b8a
commit 3ad2d37ea2
2 changed files with 175 additions and 98 deletions

125
BERT.py
View File

@ -9,6 +9,7 @@ from transformers import BertForSequenceClassification, AutoTokenizer
import numpy as np import numpy as np
from datetime import datetime from datetime import datetime
import json import json
import itertools
import Datasets import Datasets
import dataset_helper import dataset_helper
@ -52,18 +53,26 @@ class CustomBert(nn.Module):
if __name__ == '__main__': if __name__ == '__main__':
# Hyperparameter und Konfigurationen # Hyperparameter und Konfigurationen
params = { params = {
# Config
"max_len": 128,
# Training # Training
"epochs": 1, "epochs": [1],
"patience": 7, "patience": [7],
"batch_size": 32, "learning_rate": [1e-5, 1e-6],
"learning_rate": 1e-6, "weight_decay": [5e-4],
"weight_decay": 5e-4 ,
# Model # Model
"dropout": 0.6 "dropout": [0.6]
} }
# Generate permutations of hyperparameters
keys, values = zip(*params.items())
grid_params = [dict(zip(keys, v)) for v in itertools.product(*values)]
best_params = {}
best_params_rmse = -1
# Example usage of grid_params
# for param_set in grid_params:
# print(param_set)
print('Number of grid_params:', len(grid_params))
# Configs # Configs
GLOVE_PATH = 'data/glove.6B.100d.txt' GLOVE_PATH = 'data/glove.6B.100d.txt'
DATA_PATH = 'data/hack.csv' DATA_PATH = 'data/hack.csv'
@ -72,7 +81,11 @@ if __name__ == '__main__':
TEST_SIZE = 0.1 TEST_SIZE = 0.1
VAL_SIZE = 0.1 VAL_SIZE = 0.1
N_MODELS = 2 MAX_LEN = 280
BATCH_SIZE = 32
N_MODELS = 1
USE_GIRD_SEARCH = True
models = [] models = []
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@ -91,17 +104,29 @@ if __name__ == '__main__':
print("Tokenizer Initialized") print("Tokenizer Initialized")
# Dataset und DataLoader # Dataset und DataLoader
train_dataset = Datasets.BertDataset(tokenizer, data_split['train']['X'], data_split['train']['y'], max_len=params["max_len"]) train_dataset = Datasets.BertDataset(tokenizer, data_split['train']['X'], data_split['train']['y'], max_len=MAX_LEN)
val_dataset = Datasets.BertDataset(tokenizer, data_split['val']['X'], data_split['val']['y'], max_len=params["max_len"]) val_dataset = Datasets.BertDataset(tokenizer, data_split['val']['X'], data_split['val']['y'], max_len=MAX_LEN)
test_dataset = Datasets.BertDataset(tokenizer, data_split['test']['X'], data_split['test']['y'], max_len=params["max_len"]) test_dataset = Datasets.BertDataset(tokenizer, data_split['test']['X'], data_split['test']['y'], max_len=MAX_LEN)
train_loader = DataLoader(train_dataset, batch_size=params["batch_size"], shuffle=True) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=params["batch_size"], shuffle=False) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=params["batch_size"], shuffle=False) test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
subset_size = len(train_dataset) // N_MODELS subset_size = len(train_dataset) // N_MODELS
device = ml_helper.get_device(verbose=True, include_mps=False) device = ml_helper.get_device(verbose=True, include_mps=False)
# assert if N_MODLES > 1, than grid_params should be len 1
if N_MODELS > 1 and len(grid_params) > 1 or N_MODELS > 1 and USE_GIRD_SEARCH:
raise ValueError("If N_MODELS > 1, than grid_params should be len 1")
if not USE_GIRD_SEARCH:
print('Using best params')
# load best params
params_name = f'models/best_params_BERT.json'
with open(params_name, 'r') as f:
best_params = json.load(f)
grid_params = [best_params]
for i in range(N_MODELS): for i in range(N_MODELS):
model_name = f'BERT.pt' model_name = f'BERT.pt'
hist_name = f'BERT_history' hist_name = f'BERT_history'
@ -112,46 +137,60 @@ if __name__ == '__main__':
subset_indices = dataset_helper.ensemble_data_idx(train_dataset.labels, N_MODELS, i, methods='bootstrap') subset_indices = dataset_helper.ensemble_data_idx(train_dataset.labels, N_MODELS, i, methods='bootstrap')
train_dataset_sub = Subset(train_dataset, subset_indices) train_dataset_sub = Subset(train_dataset, subset_indices)
train_loader = DataLoader(train_dataset_sub, batch_size=params["batch_size"], shuffle=True) train_loader = DataLoader(train_dataset_sub, batch_size=BATCH_SIZE, shuffle=True)
for para_idx, params in enumerate(grid_params):
if len(grid_params) > 1:
model_name = f'BERT_{i}_param_{para_idx}.pt'
hist_name = f'BERT_{i}_param_{para_idx}_history'
model = CustomBert(dropout=params["dropout"]) model = CustomBert(dropout=params["dropout"])
model = model.to(device) model = model.to(device)
criterion = nn.MSELoss() criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"]) optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])
early_stopping = EarlyStopping.EarlyStoppingCallback(patience=params["patience"], verbose=True, model_name=model_name) early_stopping = EarlyStopping.EarlyStoppingCallback(patience=params["patience"], verbose=True, model_name=model_name)
hist = ml_history.History() hist = ml_history.History()
# Training und Validierung # Training und Validierung
for epoch in range(params["epochs"]): for epoch in range(params["epochs"]):
ml_train.train_epoch(model, train_loader, criterion, optimizer, device, hist, epoch, params["epochs"], bert_freeze=FREEZE_BERT, is_bert=True) ml_train.train_epoch(model, train_loader, criterion, optimizer, device, hist, epoch, params["epochs"], bert_freeze=FREEZE_BERT, is_bert=True)
val_rmse = ml_train.validate_epoch(model, val_loader, epoch, criterion, device, hist, is_bert=True) val_rmse = ml_train.validate_epoch(model, val_loader, epoch, criterion, device, hist, is_bert=True)
early_stopping(val_rmse, model) early_stopping(val_rmse, model)
if early_stopping.early_stop: if early_stopping.early_stop:
print("Early stopping triggered.") print("Early stopping triggered.")
break break
# Load best model # Load best model
model.load_state_dict(torch.load('models/checkpoints/' + model_name, weights_only=False)) model.load_state_dict(torch.load('models/checkpoints/' + model_name, weights_only=False))
models.append(model) models.append(model)
# Test Evaluation # Test Evaluation
test_labels, test_preds = ml_train.test_loop(model, test_loader, device, is_bert=True) test_labels, test_preds = ml_train.test_loop(model, test_loader, device, is_bert=True)
hist.add_test_results(test_labels, test_preds) hist.add_test_results(test_labels, test_preds)
# save training history # save training history
hist.save_history(hist_name, timestamp) hist.save_history(hist_name, timestamp)
# RMSE, MAE und R²-Score für das Test-Set # RMSE, MAE und R²-Score für das Test-Set
test_mae = mean_absolute_error(test_labels, test_preds) test_mae = mean_absolute_error(test_labels, test_preds)
test_rmse = np.sqrt(mean_squared_error(test_labels, test_preds)) test_rmse = np.sqrt(mean_squared_error(test_labels, test_preds))
test_r2 = r2_score(test_labels, test_preds) test_r2 = r2_score(test_labels, test_preds)
print(f"Test RMSE: {test_rmse:.4f}, Test MAE: {test_mae:.4f}, Test R²: {test_r2:.4f}") print(f"Test RMSE: {test_rmse:.4f}, Test MAE: {test_mae:.4f}, Test R²: {test_r2:.4f}")
if test_rmse > best_params_rmse:
best_params_rmse = test_rmse
best_params = params
if len(grid_params) > 1:
best_params_name = f'models/best_params_BERT.json'
with open(best_params_name, 'w') as f:
json.dump(best_params, f)
if N_MODELS >1: if N_MODELS >1:

View File

@ -9,6 +9,7 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np import numpy as np
from datetime import datetime from datetime import datetime
import json import json
import itertools
import Datasets import Datasets
import dataset_helper import dataset_helper
@ -108,21 +109,28 @@ class TransformerBinaryClassifier(nn.Module):
if __name__ == '__main__': if __name__ == '__main__':
# Hyperparameter und Konfigurationen # Hyperparameter und Konfigurationen
params = { params = {
# Config
"max_len": 280,
# Training # Training
"epochs": 1, "epochs": [1],
"patience": 7, "patience": [7],
"batch_size": 32, "learning_rate": [1e-4], # 1e-4
"learning_rate": 1e-4, # 1e-4 "weight_decay": [5e-4],
"weight_decay": 5e-4 ,
# Model # Model
'nhead': 2, # 5 'nhead': [2], # 5
"dropout": 0.2, "dropout": [0.2],
'hiden_dim': 2048, 'hiden_dim': [1024, 2048],
'num_layers': 6 'num_layers': [6]
} }
# Generate permutations of hyperparameters
keys, values = zip(*params.items())
grid_params = [dict(zip(keys, v)) for v in itertools.product(*values)]
best_params = {}
best_params_rmse = -1
# Example usage of grid_params
# for param_set in grid_params:
# print(param_set)
print('Number of grid_params:', len(grid_params))
# Configs # Configs
GLOVE_PATH = 'data/glove.6B.100d.txt' GLOVE_PATH = 'data/glove.6B.100d.txt'
DATA_PATH = 'data/hack.csv' DATA_PATH = 'data/hack.csv'
@ -130,7 +138,11 @@ if __name__ == '__main__':
TEST_SIZE = 0.1 TEST_SIZE = 0.1
VAL_SIZE = 0.1 VAL_SIZE = 0.1
N_MODELS = 2 MAX_LEN = 280
BATCH_SIZE = 32
N_MODELS = 1
USE_GIRD_SEARCH = True
models = [] models = []
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@ -145,17 +157,29 @@ if __name__ == '__main__':
data_split = dataset_helper.split_data(X, y, test_size=TEST_SIZE, val_size=VAL_SIZE) data_split = dataset_helper.split_data(X, y, test_size=TEST_SIZE, val_size=VAL_SIZE)
# Dataset und DataLoader # Dataset und DataLoader
train_dataset = Datasets.GloveDataset(data_split['train']['X'], data_split['train']['y'], word_index, max_len=params["max_len"]) train_dataset = Datasets.GloveDataset(data_split['train']['X'], data_split['train']['y'], word_index, max_len=MAX_LEN)
val_dataset = Datasets.GloveDataset(data_split['val']['X'], data_split['val']['y'], word_index, max_len=params["max_len"]) val_dataset = Datasets.GloveDataset(data_split['val']['X'], data_split['val']['y'], word_index, max_len=MAX_LEN)
test_dataset = Datasets.GloveDataset(data_split['test']['X'], data_split['test']['y'], word_index, max_len=params["max_len"]) test_dataset = Datasets.GloveDataset(data_split['test']['X'], data_split['test']['y'], word_index, max_len=MAX_LEN)
train_loader = DataLoader(train_dataset, batch_size=params["batch_size"], shuffle=True) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=params["batch_size"], shuffle=False) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=params["batch_size"], shuffle=False) test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
subset_size = len(train_dataset) // N_MODELS subset_size = len(train_dataset) // N_MODELS
device = ml_helper.get_device(verbose=True, include_mps=False) device = ml_helper.get_device(verbose=True, include_mps=False)
# assert if N_MODLES > 1, than grid_params should be len 1
if N_MODELS > 1 and len(grid_params) > 1 or N_MODELS > 1 and USE_GIRD_SEARCH:
raise ValueError("If N_MODELS > 1, than grid_params should be len 1")
if not USE_GIRD_SEARCH:
print('Using best params')
# load best params
params_name = f'models/best_params_Transformer.json'
with open(params_name, 'r') as f:
best_params = json.load(f)
grid_params = [best_params]
for i in range(N_MODELS): for i in range(N_MODELS):
model_name = f'Transformer.pt' model_name = f'Transformer.pt'
hist_name = f'Transformer_history' hist_name = f'Transformer_history'
@ -166,53 +190,67 @@ if __name__ == '__main__':
subset_indices = dataset_helper.ensemble_data_idx(train_dataset.labels, N_MODELS, i, methods='bootstrap') subset_indices = dataset_helper.ensemble_data_idx(train_dataset.labels, N_MODELS, i, methods='bootstrap')
train_dataset_sub = Subset(train_dataset, subset_indices) train_dataset_sub = Subset(train_dataset, subset_indices)
train_loader = DataLoader(train_dataset_sub, batch_size=params["batch_size"], shuffle=True) train_loader = DataLoader(train_dataset_sub, batch_size=BATCH_SIZE, shuffle=True)
# Modell initialisieren for para_idx, params in enumerate(grid_params):
model = TransformerBinaryClassifier( if len(grid_params) > 1:
embeddings=embedding_matrix, model_name = f'Transformer_{i}_param_{para_idx}.pt'
nhead=params['nhead'], hist_name = f'Transformer_{i}_param_{para_idx}_history'
dim_feedforward=params['hiden_dim'],
num_layers=params['num_layers'],
positional_dropout=params["dropout"],
classifier_dropout=params["dropout"],
)
model = model.to(device)
criterion = nn.MSELoss() # Modell initialisieren
optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"]) #, weight_decay=params["weight_decay"]) model = TransformerBinaryClassifier(
early_stopping = EarlyStopping.EarlyStoppingCallback(patience=params["patience"], verbose=True, model_name=model_name) embeddings=embedding_matrix,
nhead=params['nhead'],
dim_feedforward=params['hiden_dim'],
num_layers=params['num_layers'],
positional_dropout=params["dropout"],
classifier_dropout=params["dropout"],
)
model = model.to(device)
hist = ml_history.History() criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"]) #, weight_decay=params["weight_decay"])
early_stopping = EarlyStopping.EarlyStoppingCallback(patience=params["patience"], verbose=True, model_name=model_name)
# Training und Validierung hist = ml_history.History()
for epoch in range(params["epochs"]):
ml_train.train_epoch(model, train_loader, criterion, optimizer, device, hist, epoch, params["epochs"])
val_rmse = ml_train.validate_epoch(model, val_loader, epoch, criterion, device, hist) # Training und Validierung
for epoch in range(params["epochs"]):
ml_train.train_epoch(model, train_loader, criterion, optimizer, device, hist, epoch, params["epochs"])
early_stopping(val_rmse, model) val_rmse = ml_train.validate_epoch(model, val_loader, epoch, criterion, device, hist)
if early_stopping.early_stop:
print("Early stopping triggered.")
break
# Load best model early_stopping(val_rmse, model)
model.load_state_dict(torch.load('models/checkpoints/' + model_name, weights_only=False)) if early_stopping.early_stop:
models.append(model) print("Early stopping triggered.")
break
# Test Evaluation
test_labels, test_preds = ml_train.test_loop(model, test_loader, device)
hist.add_test_results(test_labels, test_preds) # Load best model
model.load_state_dict(torch.load('models/checkpoints/' + model_name, weights_only=False))
models.append(model)
# Test Evaluation
test_labels, test_preds = ml_train.test_loop(model, test_loader, device)
# save training history hist.add_test_results(test_labels, test_preds)
hist.save_history(hist_name, timestamp)
# RMSE, MAE und R²-Score für das Test-Set # save training history
test_mae = mean_absolute_error(test_labels, test_preds) hist.save_history(hist_name, timestamp)
test_rmse = np.sqrt(mean_squared_error(test_labels, test_preds))
test_r2 = r2_score(test_labels, test_preds) # RMSE, MAE und R²-Score für das Test-Set
print(f"Test RMSE: {test_rmse:.4f}, Test MAE: {test_mae:.4f}, Test R²: {test_r2:.4f}") test_mae = mean_absolute_error(test_labels, test_preds)
test_rmse = np.sqrt(mean_squared_error(test_labels, test_preds))
test_r2 = r2_score(test_labels, test_preds)
print(f"Test RMSE: {test_rmse:.4f}, Test MAE: {test_mae:.4f}, Test R²: {test_r2:.4f}")
if test_rmse > best_params_rmse:
best_params_rmse = test_rmse
best_params = params
if len(grid_params) > 1:
best_params_name = f'models/best_params_Transformer.json'
with open(best_params_name, 'w') as f:
json.dump(best_params, f)
if N_MODELS >1: if N_MODELS >1:
# Ensemble Prediction # Ensemble Prediction