arman 2025-02-16 20:27:11 +01:00
commit c9109e1430
6 changed files with 381 additions and 759 deletions

View File

@ -1,200 +0,0 @@
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np
from tqdm import tqdm
# Lokale Imports
import Datasets
import dataset_helper
import EarlyStopping
import ml_helper
import ml_history
import ml_train
# Zufälligkeit fixieren
SEED = 501
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
class EnhancedCNNRegressor(nn.Module):
def __init__(self, vocab_size, embedding_dim, filter_sizes, num_filters, embedding_matrix, dropout):
super(EnhancedCNNRegressor, self).__init__()
self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
# Convolutional Layers
self.convs = nn.ModuleList([
nn.Sequential(
nn.Conv2d(1, num_filters, (fs, embedding_dim)),
nn.BatchNorm2d(num_filters),
nn.ReLU(),
nn.MaxPool2d((params["max_len"] - fs + 1, 1)),
nn.Dropout(dropout)
)
for fs in filter_sizes
])
# Fully Connected Layers
self.fc1 = nn.Linear(len(filter_sizes) * num_filters, 128)
self.fc2 = nn.Linear(128, 1)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.embedding(x).unsqueeze(1)
conv_outputs = [conv(x).squeeze(3).squeeze(2) for conv in self.convs]
x = torch.cat(conv_outputs, 1)
x = torch.relu(self.fc1(x))
x = self.dropout(x)
return self.fc2(x).squeeze(1)
class SklearnCNNWrapper(BaseEstimator, RegressorMixin):
def __init__(self, vocab_size, embedding_dim, filter_sizes, num_filters, dropout, lr, weight_decay, embedding_matrix, early_stopping_enabled=True):
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout = dropout
self.lr = lr
self.weight_decay = weight_decay
self.embedding_matrix = embedding_matrix
self.early_stopping_enabled = early_stopping_enabled
# Geräteerkennung
self.device = (
torch.device("cuda") if torch.cuda.is_available() else
torch.device("mps") if torch.backends.mps.is_available() else
torch.device("cpu")
)
print(f"Gerät erkannt und gesetzt: {self.device}")
# Modellinitialisierung
self.model = EnhancedCNNRegressor(
vocab_size=self.vocab_size,
embedding_dim=self.embedding_dim,
filter_sizes=self.filter_sizes,
num_filters=self.num_filters,
embedding_matrix=self.embedding_matrix,
dropout=self.dropout
).to(self.device)
print(f"Modellgerät nach Initialisierung: {next(self.model.parameters()).device}")
# Kriterien, EarlyStopping und History
self.criterion = nn.MSELoss()
self.early_stopping = EarlyStopping.EarlyStoppingCallback(patience=5, verbose=True, model_name="temp_model.pt")
self.history = ml_history.History()
def fit(self, X, y):
print(f"Gerät in fit() vor Training: {self.device}")
print(f"Modellgerät zu Beginn des Trainings: {next(self.model.parameters()).device}")
# Datenaufbereitung
train_dataset = Datasets.GloveDataset(X, y, word_index, max_len=params["max_len"])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
# Optimierer
optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
self.model.train()
# Training über mehrere Epochen
for epoch in tqdm(range(5), desc="Training Epochs"):
print(f"Start Training Epoch {epoch+1}")
ml_train.train_epoch(self.model, train_loader, self.criterion, optimizer, self.device, self.history, epoch, 5)
val_rmse = ml_train.validate_epoch(self.model, val_loader, epoch, self.criterion, self.device, self.history)
# Validierungsverlust ausgeben
print(f"Epoch {epoch+1}: Validation RMSE = {val_rmse}")
# Early Stopping (falls aktiviert)
if self.early_stopping_enabled:
self.early_stopping(val_rmse, self.model)
if self.early_stopping.early_stop:
print(f"Early stopping triggered in epoch {epoch+1}.")
break
# Trainingsergebnisse speichern
self.history.save_history("training_history.json")
return self
def predict(self, X):
print(f"Gerät in predict(): {self.device}")
print(f"Modellgerät in predict(): {next(self.model.parameters()).device}")
# Datenaufbereitung
test_dataset = Datasets.GloveDataset(X, np.zeros(len(X)), word_index, max_len=params["max_len"])
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
self.model.eval()
predictions = []
with torch.no_grad():
for batch_X, _ in tqdm(test_loader, desc="Predicting"):
batch_X = batch_X.to(self.device)
outputs = self.model(batch_X).cpu().numpy()
predictions.extend(outputs)
return np.array(predictions)
def score(self, X, y):
predictions = self.predict(X)
return -mean_squared_error(y, predictions)
if __name__ == '__main__':
# Konfigurationen
params = {
"max_len": 280,
"epochs": 5, # Für Debugging auf 5 reduziert
"batch_size": 32,
"learning_rate": 0.001,
"weight_decay": 5e-4,
"filter_sizes": [2, 3, 4, 5],
"num_filters": 150,
"dropout": 0.6
}
# Daten und Embedding laden
GLOVE_PATH = 'data/glove.6B.100d.txt'
DATA_PATH = 'data/hack.csv'
EMBEDDING_DIM = 100
embedding_matrix, word_index, vocab_size, d_model = dataset_helper.get_embedding_matrix(
gloVe_path=GLOVE_PATH, emb_len=EMBEDDING_DIM)
X, y = dataset_helper.load_preprocess_data(path_data=DATA_PATH, verbose=True)
# Hyperparameter Grid
param_grid = {
'filter_sizes': [[3, 4, 5]],
'num_filters': [100, 150],
'dropout': [0.3, 0.5],
'lr': [0.001],
'weight_decay': [5e-4]
}
# GridSearchCV ausführen
wrapper = SklearnCNNWrapper(
vocab_size=vocab_size,
embedding_dim=EMBEDDING_DIM,
filter_sizes=params["filter_sizes"],
num_filters=params["num_filters"],
dropout=params["dropout"],
lr=params["learning_rate"],
weight_decay=params["weight_decay"],
embedding_matrix=embedding_matrix
)
grid_search = GridSearchCV(wrapper, param_grid, scoring='neg_mean_squared_error', cv=3, verbose=2)
grid_search.fit(X, y)
# Ergebnisse ausgeben
print("Beste Parameter:", grid_search.best_params_)
print("Bestes Ergebnis (Negative MSE):", -grid_search.best_score_)

View File

@ -1,225 +0,0 @@
import random
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import Datasets
import dataset_helper
import EarlyStopping
import ml_helper
import ml_history
import ml_train
SEED = 501
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
class EnhancedCNNRegressor(nn.Module):
def __init__(self, vocab_size, embedding_dim, filter_sizes, num_filters, embedding_matrix, dropout):
super(EnhancedCNNRegressor, self).__init__()
self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
# Convolutional Schichten mit Batch-Normalisierung
self.convs = nn.ModuleList([
nn.Sequential(
nn.Conv2d(1, num_filters, (fs, embedding_dim)),
nn.BatchNorm2d(num_filters), # Batch-Normalisierung
nn.ReLU(),
nn.MaxPool2d((params["max_len"] - fs + 1, 1)),
nn.Dropout(dropout) # Dropout nach jeder Schicht
)
for fs in filter_sizes
])
# Fully-Connected Layer
self.fc1 = nn.Linear(len(filter_sizes) * num_filters, 128) # Erweiterte Dense-Schicht
self.fc2 = nn.Linear(128, 1) # Ausgangsschicht (Regression)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.embedding(x).unsqueeze(1) # [Batch, 1, Seq, Embedding]
conv_outputs = [conv(x).squeeze(3).squeeze(2) for conv in self.convs] # Pooling reduziert Dim
x = torch.cat(conv_outputs, 1) # Kombiniere Features von allen Filtern
x = torch.relu(self.fc1(x)) # Zusätzliche Dense-Schicht
x = self.dropout(x)
return self.fc2(x).squeeze(1)
def train_model(model, train_dataset, test_dataset, criterion, optimizer, epochs, batch_size):
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_losses, train_losses = [], []
train_r2_scores, test_r2_scores = [], []
for epoch in range(epochs):
model.train()
running_loss = 0.0
running_r2 = 0.0
# Training
for inputs, labels in train_loader:
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
running_r2 += r2_score(labels.cpu().numpy(), outputs.cpu().detach().numpy())
train_losses.append(running_loss / len(train_loader))
train_r2_scores.append(running_r2 / len(train_loader))
# Test
model.eval() # Set model to evaluation mode
test_loss = 0.0
test_r2 = 0.0
with torch.no_grad(): # No gradient calculation for testing
for inputs, labels in test_loader:
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
test_r2 += r2_score(labels.cpu().numpy(), outputs.cpu().detach().numpy())
test_losses.append(test_loss / len(test_loader))
test_r2_scores.append(test_r2 / len(test_loader))
print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_losses[-1]:.4f}, Train R²: {train_r2_scores[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}, Test R²: {test_r2_scores[-1]:.4f}')
return train_losses, test_losses, train_r2_scores, test_r2_scores
# Bootstrap Aggregation (Bagging) Update
def bootstrap_aggregation(ModelClass, train_dataset, test_dataset, num_models=5, epochs=10, batch_size=32, learning_rate=0.001):
models = []
all_train_losses, all_test_losses = [], []
all_train_r2_scores, all_test_r2_scores = [], []
subset_size = len(train_dataset) // num_models
for i in range(num_models):
print(f"Training Model {i + 1}/{num_models}...")
start_idx = i * subset_size
end_idx = start_idx + subset_size
subset_indices = list(range(0, start_idx)) + list(range(end_idx, len(train_dataset)))
subset = Subset(train_dataset, subset_indices)
model = ModelClass(vocab_size, EMBEDDING_DIM, params["filter_sizes"], params["num_filters"], embedding_matrix, params["dropout"])
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_losses, test_losses, train_r2_scores, test_r2_scores = train_model(model, subset, test_dataset, criterion, optimizer, epochs, batch_size)
models.append(model)
all_train_losses.append(train_losses)
all_test_losses.append(test_losses)
all_train_r2_scores.append(train_r2_scores)
all_test_r2_scores.append(test_r2_scores)
# Plot für alle Modelle
plt.figure(figsize=(12, 6))
for i in range(num_models):
plt.plot(all_train_losses[i], label=f'Model {i + 1} Train Loss')
plt.plot(all_test_losses[i], label=f'Model {i + 1} Test Loss', linestyle = 'dashed')
plt.title("Training and Test Loss for all Models")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
for i in range(num_models):
plt.plot(all_train_r2_scores[i], label=f'Model {i + 1} Train R²')
plt.plot(all_test_r2_scores[i], label=f'Model {i + 1} Test R²', linestyle = 'dashed')
plt.title("Training and Test R² for all Models")
plt.xlabel('Epochs')
plt.ylabel('')
plt.legend()
plt.show()
return models, all_train_losses, all_test_losses, all_train_r2_scores, all_test_r2_scores
# Ensemble Prediction
def ensemble_predict(models, test_dataset):
dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
all_predictions = []
with torch.no_grad():
for inputs, _ in dataloader:
inputs = inputs.to(device)
predictions = torch.stack([model(inputs).squeeze() for model in models])
avg_predictions = predictions.mean(dim=0)
all_predictions.extend(avg_predictions.cpu().numpy())
return np.array(all_predictions)
if __name__ == '__main__':
# Hyperparameter und Konfigurationen
params = {
# Config
"max_len": 280,
# Training
"epochs": 2,
"patience": 7,
"batch_size": 16,
"learning_rate": 0.001,
"weight_decay": 5e-4 ,
# Model
"filter_sizes": [2, 3, 4, 5],
"num_filters": 150,
"dropout": 0.6
}
# Configs
MODEL_NAME = 'CNN.pt'
HIST_NAME = 'CNN_history'
GLOVE_PATH = 'data/glove.6B.100d.txt'
DATA_PATH = 'data/hack.csv'
EMBEDDING_DIM = 100
TEST_SIZE = 0.1
VAL_SIZE = 0.1
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Daten laden und vorbereiten
embedding_matrix, word_index, vocab_size, d_model = dataset_helper.get_embedding_matrix(
gloVe_path=GLOVE_PATH, emb_len=EMBEDDING_DIM)
X, y = dataset_helper.load_preprocess_data(path_data=DATA_PATH, verbose=True)
# Aufteilen der Daten
data_split = dataset_helper.split_data(X, y, test_size=TEST_SIZE, val_size=VAL_SIZE)
# Dataset und DataLoader
train_dataset = Datasets.GloveDataset(data_split['train']['X'], data_split['train']['y'], word_index, max_len=params["max_len"])
val_dataset = Datasets.GloveDataset(data_split['val']['X'], data_split['val']['y'], word_index, max_len=params["max_len"])
test_dataset = Datasets.GloveDataset(data_split['test']['X'], data_split['test']['y'], word_index, max_len=params["max_len"])
# Bootstrap Aggregation (Bagging) Training
models, all_train_losses, all_test_losses, all_train_r2_scores, all_test_r2_scores = bootstrap_aggregation(
EnhancedCNNRegressor, train_dataset, test_dataset, num_models=2, epochs=params["epochs"], batch_size=params["batch_size"], learning_rate=params["learning_rate"])
# Ensemble Prediction
test_predictions = ensemble_predict(models, test_dataset)
# Test Evaluation
# test_labels = np.array([y for _, y in test_dataset])
test_mse = mean_squared_error(test_dataset.labels.to_numpy(), test_predictions)
test_mae = mean_absolute_error(test_dataset.labels.to_numpy(), test_predictions)
test_r2 = r2_score(test_dataset.labels.to_numpy(), test_predictions)
print(f"Test RMSE: {test_mse:.4f}, Test MAE: {test_mae:.4f}, Test R²: {test_r2:.4f}")

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,280 +0,0 @@
import random
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import Datasets
import dataset_helper
import EarlyStopping
import ml_helper
import ml_history
import ml_train
SEED = 501
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
class PositionalEncoding(nn.Module):
"""
https://pytorch.org/tutorials/beginner/transformer_tutorial.html
"""
def __init__(self, d_model, vocab_size=5000, dropout=0.1):
super().__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(vocab_size, d_model)
position = torch.arange(0, vocab_size, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(
torch.arange(0, d_model, 2).float()
* (-math.log(10000.0) / d_model)
)
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer("pe", pe)
def forward(self, x):
x = x + self.pe[:, : x.size(1), :]
return self.dropout(x)
class TransformerBinaryClassifier(nn.Module):
"""
Text classifier based on a pytorch TransformerEncoder.
"""
def __init__(
self,
embeddings,
nhead=8,
dim_feedforward=2048,
num_layers=6,
positional_dropout=0.1,
classifier_dropout=0.1,
):
super().__init__()
vocab_size, d_model = embeddings.size()
assert d_model % nhead == 0, "nheads must divide evenly into d_model"
self.emb = nn.Embedding.from_pretrained(embeddings, freeze=False)
self.pos_encoder = PositionalEncoding(
d_model=d_model,
dropout=positional_dropout,
vocab_size=vocab_size,
)
encoder_layer = nn.TransformerEncoderLayer(
d_model=d_model,
nhead=nhead,
dim_feedforward=dim_feedforward,
dropout=classifier_dropout,
)
self.transformer_encoder = nn.TransformerEncoder(
encoder_layer,
num_layers=num_layers,
)
# normalize to stabilize and stop overfitting
self.batch_norm = nn.BatchNorm1d(d_model)
self.classifier = nn.Linear(d_model, 1)
self.d_model = d_model
def forward(self, x):
x = self.emb(x) * math.sqrt(self.d_model)
x = self.pos_encoder(x)
x = self.transformer_encoder(x)
x = x.mean(dim=1)
# normalize to stabilize and stop overfitting
#x = self.batch_norm(x)
#NOTE: no activation function for regression
x = self.classifier(x)
x = x.squeeze(1)
return x
def train_model(model, train_dataset, test_dataset, criterion, optimizer, epochs, batch_size):
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_losses, train_losses = [], []
train_r2_scores, test_r2_scores = [], []
for epoch in range(epochs):
model.train()
running_loss = 0.0
running_r2 = 0.0
# Training
for inputs, labels in train_loader:
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
running_r2 += r2_score(labels.cpu().numpy(), outputs.cpu().detach().numpy())
train_losses.append(running_loss / len(train_loader))
train_r2_scores.append(running_r2 / len(train_loader))
# Test
model.eval() # Set model to evaluation mode
test_loss = 0.0
test_r2 = 0.0
with torch.no_grad(): # No gradient calculation for testing
for inputs, labels in test_loader:
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
test_r2 += r2_score(labels.cpu().numpy(), outputs.cpu().detach().numpy())
test_losses.append(test_loss / len(test_loader))
test_r2_scores.append(test_r2 / len(test_loader))
print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_losses[-1]:.4f}, Train R²: {train_r2_scores[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}, Test R²: {test_r2_scores[-1]:.4f}')
return train_losses, test_losses, train_r2_scores, test_r2_scores
# Bootstrap Aggregation (Bagging) Update
def bootstrap_aggregation(ModelClass, train_dataset, test_dataset, num_models=5, epochs=10, batch_size=32, learning_rate=0.001):
models = []
all_train_losses, all_test_losses = [], []
all_train_r2_scores, all_test_r2_scores = [], []
subset_size = len(train_dataset) // num_models
for i in range(num_models):
print(f"Training Model {i + 1}/{num_models}...")
start_idx = i * subset_size
end_idx = start_idx + subset_size
subset_indices = list(range(0, start_idx)) + list(range(end_idx, len(train_dataset)))
subset = Subset(train_dataset, subset_indices)
model = ModelClass(vocab_size, EMBEDDING_DIM, params["filter_sizes"], params["num_filters"], embedding_matrix, params["dropout"])
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_losses, test_losses, train_r2_scores, test_r2_scores = train_model(model, subset, test_dataset, criterion, optimizer, epochs, batch_size)
models.append(model)
all_train_losses.append(train_losses)
all_test_losses.append(test_losses)
all_train_r2_scores.append(train_r2_scores)
all_test_r2_scores.append(test_r2_scores)
# Plot für alle Modelle
plt.figure(figsize=(12, 6))
for i in range(num_models):
plt.plot(all_train_losses[i], label=f'Model {i + 1} Train Loss')
plt.plot(all_test_losses[i], label=f'Model {i + 1} Test Loss', linestyle = 'dashed')
plt.title("Training and Test Loss for all Models")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
for i in range(num_models):
plt.plot(all_train_r2_scores[i], label=f'Model {i + 1} Train R²')
plt.plot(all_test_r2_scores[i], label=f'Model {i + 1} Test R²', linestyle = 'dashed')
plt.title("Training and Test R² for all Models")
plt.xlabel('Epochs')
plt.ylabel('')
plt.legend()
plt.show()
return models, all_train_losses, all_test_losses, all_train_r2_scores, all_test_r2_scores
# Ensemble Prediction
def ensemble_predict(models, test_dataset):
dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
all_predictions = []
with torch.no_grad():
for inputs, _ in dataloader:
inputs = inputs.to(device)
predictions = torch.stack([model(inputs).squeeze() for model in models])
avg_predictions = predictions.mean(dim=0)
all_predictions.extend(avg_predictions.cpu().numpy())
return np.array(all_predictions)
if __name__ == '__main__':
# Hyperparameter und Konfigurationen
params = {
# Config
"max_len": 280,
# Training
"epochs": 25,
"patience": 7,
"batch_size": 32,
"learning_rate": 1e-4, # 1e-4
"weight_decay": 5e-4 ,
# Model
'nhead': 2, # 5
"dropout": 0.2,
'hiden_dim': 2048,
'num_layers': 6
}
# TODO set seeds
# Configs
MODEL_NAME = 'transfomrer.pt'
HIST_NAME = 'transformer_history'
GLOVE_PATH = 'data/glove.6B.100d.txt'
DATA_PATH = 'data/hack.csv'
EMBEDDING_DIM = 100
TEST_SIZE = 0.1
VAL_SIZE = 0.1
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Daten laden und vorbereiten
embedding_matrix, word_index, vocab_size, d_model = dataset_helper.get_embedding_matrix(
gloVe_path=GLOVE_PATH, emb_len=EMBEDDING_DIM)
X, y = dataset_helper.load_preprocess_data(path_data=DATA_PATH, verbose=True)
# Aufteilen der Daten
data_split = dataset_helper.split_data(X, y, test_size=TEST_SIZE, val_size=VAL_SIZE)
# Dataset und DataLoader
train_dataset = Datasets.GloveDataset(data_split['train']['X'], data_split['train']['y'], word_index, max_len=params["max_len"])
val_dataset = Datasets.GloveDataset(data_split['val']['X'], data_split['val']['y'], word_index, max_len=params["max_len"])
test_dataset = Datasets.GloveDataset(data_split['test']['X'], data_split['test']['y'], word_index, max_len=params["max_len"])
# Bootstrap Aggregation (Bagging) Training
models, all_train_losses, all_test_losses, all_train_r2_scores, all_test_r2_scores = bootstrap_aggregation(
TransformerBinaryClassifier, train_dataset, test_dataset, num_models=2, epochs=params["epochs"], batch_size=params["batch_size"], learning_rate=params["learning_rate"])
# Ensemble Prediction
test_predictions = ensemble_predict(models, test_dataset)
# Test Evaluation
# test_labels = np.array([y for _, y in test_dataset])
test_mse = mean_squared_error(test_dataset.labels.to_numpy(), test_predictions)
test_mae = mean_absolute_error(test_dataset.labels.to_numpy(), test_predictions)
test_r2 = r2_score(test_dataset.labels.to_numpy(), test_predictions)
print(f"Test RMSE: {test_mse:.4f}, Test MAE: {test_mae:.4f}, Test R²: {test_r2:.4f}")