From 35251933d78ad37079d31b692a146562b4b02c2f Mon Sep 17 00:00:00 2001 From: Michelle Goeppinger Date: Sun, 16 Feb 2025 13:02:33 +0100 Subject: [PATCH] Hyperparameter training --- CNN_HYPER.py | 200 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 CNN_HYPER.py diff --git a/CNN_HYPER.py b/CNN_HYPER.py new file mode 100644 index 0000000..2cb2b65 --- /dev/null +++ b/CNN_HYPER.py @@ -0,0 +1,200 @@ +import random +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import GridSearchCV +from sklearn.base import BaseEstimator, RegressorMixin +import numpy as np +from tqdm import tqdm + +# Lokale Imports +import Datasets +import dataset_helper +import EarlyStopping +import ml_helper +import ml_history +import ml_train + +# Zufälligkeit fixieren +SEED = 501 +random.seed(SEED) +np.random.seed(SEED) +torch.manual_seed(SEED) +torch.cuda.manual_seed_all(SEED) +torch.backends.cudnn.deterministic = True + + +class EnhancedCNNRegressor(nn.Module): + def __init__(self, vocab_size, embedding_dim, filter_sizes, num_filters, embedding_matrix, dropout): + super(EnhancedCNNRegressor, self).__init__() + self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False) + + # Convolutional Layers + self.convs = nn.ModuleList([ + nn.Sequential( + nn.Conv2d(1, num_filters, (fs, embedding_dim)), + nn.BatchNorm2d(num_filters), + nn.ReLU(), + nn.MaxPool2d((params["max_len"] - fs + 1, 1)), + nn.Dropout(dropout) + ) + for fs in filter_sizes + ]) + + # Fully Connected Layers + self.fc1 = nn.Linear(len(filter_sizes) * num_filters, 128) + self.fc2 = nn.Linear(128, 1) + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + x = self.embedding(x).unsqueeze(1) + conv_outputs = [conv(x).squeeze(3).squeeze(2) for conv in self.convs] + x = torch.cat(conv_outputs, 1) + x = torch.relu(self.fc1(x)) + x = self.dropout(x) + return self.fc2(x).squeeze(1) + + +class SklearnCNNWrapper(BaseEstimator, RegressorMixin): + def __init__(self, vocab_size, embedding_dim, filter_sizes, num_filters, dropout, lr, weight_decay, embedding_matrix, early_stopping_enabled=True): + self.vocab_size = vocab_size + self.embedding_dim = embedding_dim + self.filter_sizes = filter_sizes + self.num_filters = num_filters + self.dropout = dropout + self.lr = lr + self.weight_decay = weight_decay + self.embedding_matrix = embedding_matrix + self.early_stopping_enabled = early_stopping_enabled + + # Geräteerkennung + self.device = ( + torch.device("cuda") if torch.cuda.is_available() else + torch.device("mps") if torch.backends.mps.is_available() else + torch.device("cpu") + ) + print(f"Gerät erkannt und gesetzt: {self.device}") + + # Modellinitialisierung + self.model = EnhancedCNNRegressor( + vocab_size=self.vocab_size, + embedding_dim=self.embedding_dim, + filter_sizes=self.filter_sizes, + num_filters=self.num_filters, + embedding_matrix=self.embedding_matrix, + dropout=self.dropout + ).to(self.device) + print(f"Modellgerät nach Initialisierung: {next(self.model.parameters()).device}") + + # Kriterien, EarlyStopping und History + self.criterion = nn.MSELoss() + self.early_stopping = EarlyStopping.EarlyStoppingCallback(patience=5, verbose=True, model_name="temp_model.pt") + self.history = ml_history.History() + + def fit(self, X, y): + print(f"Gerät in fit() vor Training: {self.device}") + print(f"Modellgerät zu Beginn des Trainings: {next(self.model.parameters()).device}") + + # Datenaufbereitung + train_dataset = Datasets.GloveDataset(X, y, word_index, max_len=params["max_len"]) + train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) + val_loader = DataLoader(train_dataset, batch_size=32, shuffle=False) + + # Optimierer + optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay) + self.model.train() + + # Training über mehrere Epochen + for epoch in tqdm(range(5), desc="Training Epochs"): + print(f"Start Training Epoch {epoch+1}") + ml_train.train_epoch(self.model, train_loader, self.criterion, optimizer, self.device, self.history, epoch, 5) + val_rmse = ml_train.validate_epoch(self.model, val_loader, epoch, self.criterion, self.device, self.history) + + # Validierungsverlust ausgeben + print(f"Epoch {epoch+1}: Validation RMSE = {val_rmse}") + + # Early Stopping (falls aktiviert) + if self.early_stopping_enabled: + self.early_stopping(val_rmse, self.model) + if self.early_stopping.early_stop: + print(f"Early stopping triggered in epoch {epoch+1}.") + break + + # Trainingsergebnisse speichern + self.history.save_history("training_history.json") + return self + + def predict(self, X): + print(f"Gerät in predict(): {self.device}") + print(f"Modellgerät in predict(): {next(self.model.parameters()).device}") + + # Datenaufbereitung + test_dataset = Datasets.GloveDataset(X, np.zeros(len(X)), word_index, max_len=params["max_len"]) + test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) + + self.model.eval() + predictions = [] + with torch.no_grad(): + for batch_X, _ in tqdm(test_loader, desc="Predicting"): + batch_X = batch_X.to(self.device) + outputs = self.model(batch_X).cpu().numpy() + predictions.extend(outputs) + return np.array(predictions) + + def score(self, X, y): + predictions = self.predict(X) + return -mean_squared_error(y, predictions) + + +if __name__ == '__main__': + # Konfigurationen + params = { + "max_len": 280, + "epochs": 5, # Für Debugging auf 5 reduziert + "batch_size": 32, + "learning_rate": 0.001, + "weight_decay": 5e-4, + "filter_sizes": [2, 3, 4, 5], + "num_filters": 150, + "dropout": 0.6 + } + + # Daten und Embedding laden + GLOVE_PATH = 'data/glove.6B.100d.txt' + DATA_PATH = 'data/hack.csv' + EMBEDDING_DIM = 100 + + embedding_matrix, word_index, vocab_size, d_model = dataset_helper.get_embedding_matrix( + gloVe_path=GLOVE_PATH, emb_len=EMBEDDING_DIM) + + X, y = dataset_helper.load_preprocess_data(path_data=DATA_PATH, verbose=True) + + # Hyperparameter Grid + param_grid = { + 'filter_sizes': [[3, 4, 5]], + 'num_filters': [100, 150], + 'dropout': [0.3, 0.5], + 'lr': [0.001], + 'weight_decay': [5e-4] + } + + # GridSearchCV ausführen + wrapper = SklearnCNNWrapper( + vocab_size=vocab_size, + embedding_dim=EMBEDDING_DIM, + filter_sizes=params["filter_sizes"], + num_filters=params["num_filters"], + dropout=params["dropout"], + lr=params["learning_rate"], + weight_decay=params["weight_decay"], + embedding_matrix=embedding_matrix + ) + + grid_search = GridSearchCV(wrapper, param_grid, scoring='neg_mean_squared_error', cv=3, verbose=2) + grid_search.fit(X, y) + + # Ergebnisse ausgeben + print("Beste Parameter:", grid_search.best_params_) + print("Bestes Ergebnis (Negative MSE):", -grid_search.best_score_)