From 6c859703fdbd5e0befcb99f4fc52a8f97e7adf9a Mon Sep 17 00:00:00 2001 From: arman Date: Sun, 9 Feb 2025 11:07:11 +0100 Subject: [PATCH] lstm updated --- lstm_1b.py | 258 +++++++++++++++++++++++++++-------------------------- 1 file changed, 131 insertions(+), 127 deletions(-) diff --git a/lstm_1b.py b/lstm_1b.py index 06404ca..a70dd2c 100644 --- a/lstm_1b.py +++ b/lstm_1b.py @@ -1,169 +1,173 @@ import time import json - import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader -from sklearn.metrics import accuracy_score +from sklearn.metrics import accuracy_score, f1_score +from torch.optim.lr_scheduler import ReduceLROnPlateau +import matplotlib.pyplot as plt -import ml_helper -import ml_history +# Automatische Geräteauswahl (Apple MPS, CUDA, CPU) +if torch.backends.mps.is_available(): + device = torch.device("mps") +elif torch.cuda.is_available(): + device = torch.device("cuda") +else: + device = torch.device("cpu") +print('Using device:', device) class ImprovedLSTMBinaryClassifier(nn.Module): - def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers, dropout=0.1, bidirectional=False): + def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.1): super(ImprovedLSTMBinaryClassifier, self).__init__() - self.embedding = nn.Embedding(vocab_size, embed_dim) - self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional) - self.layer_norm = nn.LayerNorm(hidden_dim * 2 if bidirectional else hidden_dim) - self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, 1) + self.lstm = nn.LSTM(input_dim, + hidden_dim, + num_layers, + batch_first=True, + dropout=dropout, + bidirectional=False) + self.layer_norm = nn.LayerNorm(hidden_dim) + + # Zusätzliche Fully Connected Layers ohne ReLU + self.fc1 = nn.Linear(hidden_dim, 128) + self.fc2 = nn.Linear(128, 64) + self.fc3 = nn.Linear(64, 32) + self.fc4 = nn.Linear(32, 1) + self.sigmoid = nn.Sigmoid() + self.dropout = nn.Dropout(dropout) def forward(self, input_ids): - input_ids = input_ids.long() - embedded = self.embedding(input_ids) - lstm_output, _ = self.lstm(embedded) - pooled_output = lstm_output[:, -1, :] - pooled_output = self.layer_norm(pooled_output) - logits = self.fc(pooled_output) - return self.sigmoid(logits) - + lstm_out, _ = self.lstm(input_ids) + lstm_out = self.dropout(lstm_out) + pooled = lstm_out[:, -1, :] # Letztes verstecktes Zustand + normalized = self.layer_norm(pooled) + + # Mehrere Fully Connected Schichten + x = self.fc1(normalized) + x = self.fc2(x) + x = self.fc3(x) + x = self.fc4(x) + + return self.sigmoid(x) +# Training und Evaluation if __name__ == "__main__": - # Load the data - data_path = 'data/idx_based_padded' + # Daten laden (Annahme: Eingebettete Daten sind bereits vorbereitet) + data_path = '/content/drive/MyDrive/Colab Notebooks/ANLP_WS24_CA2/data/embedded_padded' train_dataset = torch.load(data_path + '/train.pt') test_dataset = torch.load(data_path + '/test.pt') val_dataset = torch.load(data_path + '/val.pt') - # +2 for padding and unk tokens - vocab_size = train_dataset.vocab_size + 2 - embed_dim = 100 # train_dataset.emb_dim - - # NOTE: Info comes from data explore notebook: 280 is max length, - # 139 contains 80% and 192 contains 95% of the data - max_len = 280 - - device = ml_helper.get_device(verbose=True) - - # Model hyperparameters + # Hyperparameter + input_dim = 100 hidden_dim = 256 num_layers = 2 - dropout = 0.3 - bidirectional = True # Enable bidirectional LSTM + dropout = 0.3 + batch_size = 64 - model = ImprovedLSTMBinaryClassifier(vocab_size, embed_dim, hidden_dim, num_layers, dropout, bidirectional) - - # Training parameters - epochs = 3 - batch_size = 8 - learning_rate = 2e-5 - - # Optimizer and loss function - optimizer = optim.Adam(model.parameters(), lr=learning_rate) - criterion = nn.BCEWithLogitsLoss() - - # Data loaders + # DataLoader train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) - test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) + test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) - ################################################################################################ - # Training - ################################################################################################ - # Initialize the history - history = ml_history.History() + # Modell initialisieren + model = ImprovedLSTMBinaryClassifier( + input_dim=input_dim, + hidden_dim=hidden_dim, + num_layers=num_layers, + dropout=dropout + ).to(device) - # Model to device - model.to(device) + criterion = nn.BCELoss() + optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5) + scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True) + + best_val_loss = float('inf') + best_test_accuracy = 0 + patience = 3 + counter = 0 + + history = {'train_loss': [], 'val_loss': [], 'test_acc': [], 'test_f1': []} - print("Starting training...") - start_training_time = time.time() - - # Training loop - model.train() + epochs = 5 for epoch in range(epochs): - epoch_start_time = time.time() - history.batch_reset() - + # Training + model.train() + total_loss = 0 + start_time = time.time() + for batch in train_loader: optimizer.zero_grad() - # prepare batch input_ids = batch['input_ids'].to(device) labels = batch['labels'].unsqueeze(1).to(device) - # forward pass + outputs = model(input_ids) loss = criterion(outputs, labels) - # backward pass + loss.backward() + nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() - # calculate accuracy train - preds = outputs.round() - train_acc = accuracy_score(labels.cpu().detach().numpy(), - preds.cpu().detach().numpy()) - # update batch history - history.batch_update_train(loss.item(), train_acc) + + total_loss += loss.item() - # calculate accuracy val + avg_train_loss = total_loss / len(train_loader) + + # Validierung model.eval() + val_loss = 0 with torch.no_grad(): - for val_batch in val_loader: - val_input_ids = val_batch['input_ids'].to(device) - val_labels_batch = val_batch['labels'].unsqueeze(1).to(device) - val_outputs = model(val_input_ids) - val_acc = accuracy_score(val_outputs.round().cpu().numpy(), - val_labels_batch.cpu().numpy()) - history.batch_update_val(val_acc) - model.train() + for batch in val_loader: + input_ids = batch['input_ids'].to(device) + labels = batch['labels'].unsqueeze(1).to(device) + outputs = model(input_ids) + val_loss += criterion(outputs, labels).item() + + avg_val_loss = val_loss / len(val_loader) + + # Test Evaluation + test_preds = [] + test_labels = [] + with torch.no_grad(): + for batch in test_loader: + input_ids = batch['input_ids'].to(device) + labels = batch['labels'].unsqueeze(1).to(device) + outputs = model(input_ids) + preds = (outputs > 0.5).float() + test_preds.extend(preds.cpu().numpy()) + test_labels.extend(labels.cpu().numpy()) + + test_accuracy = accuracy_score(test_labels, test_preds) + test_f1 = f1_score(test_labels, test_preds) + + # History aktualisieren + history['train_loss'].append(avg_train_loss) + history['val_loss'].append(avg_val_loss) + history['test_acc'].append(test_accuracy) + history['test_f1'].append(test_f1) + + # Lernrate anpassen + scheduler.step(avg_val_loss) + + # Ausgabe + epoch_time = time.time() - start_time + print(f'Epoch {epoch+1}/{epochs} | Time: {epoch_time:.2f}s') + print(f'Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}') + print(f'Test Acc: {test_accuracy:.4f} | Test F1: {test_f1:.4f}\n') - # update epoch history - history.update() + # Bestes Modell speichern + if test_accuracy > best_test_accuracy: + best_test_accuracy = test_accuracy + torch.save(model.state_dict(), "best_lstm_model.pth") + print(f"🚀 Neues bestes Modell gespeichert (Acc: {test_accuracy:.4f})") - epoch_end_time = time.time() - - print(f"Epoch {epoch + 1}/{epochs}, Time: {epoch_end_time - epoch_start_time:.2f} sec, Loss: {history.history['loss'][-1]:.4f}, Train Acc: {history.history['train_acc'][-1]:.4f}, Val Acc: {history.history['val_acc'][-1]:.4f}") - - end_training_time = time.time() - print(f"Training finished in {end_training_time - start_training_time:.2f} seconds") - - ################################################################################################ - # Evaluation - ################################################################################################ - print("Starting evaluation...") - - model.eval() - predictions, true_labels = [], [] - with torch.no_grad(): - for batch in test_loader: - input_ids = batch['input_ids'].to(device) - labels = batch['labels'].unsqueeze(1).to(device) - - outputs = model(input_ids) - preds = outputs.round() - predictions.extend(preds.cpu().numpy()) - true_labels.extend(labels.cpu().numpy()) - - accuracy = accuracy_score(true_labels, predictions) - print(f"Accuracy: {accuracy}") - - ################################################################################################ - # Save model and hyperparameters - ################################################################################################ - timestamp = time.strftime("%Y%m%d-%H%M%S") - - ml_helper.save_model_and_hyperparameters(model, 'improved_lstm', accuracy, timestamp, - max_len=max_len, - vocab_size=vocab_size, - embed_dim=embed_dim, - hidden_dim=hidden_dim, - num_layers=num_layers, - dropout=dropout, - epochs=epochs, - batch_size=batch_size, - learning_rate=learning_rate) - - # Save history - history_path = f'models/improved_lstm_history_{timestamp}.json' - with open(history_path, 'w') as f: - json.dump(history.get_history(), f) + # Early Stopping + if avg_val_loss < best_val_loss: + best_val_loss = avg_val_loss + counter = 0 + else: + counter += 1 + if counter >= patience: + print("⛔ Early Stopping ausgelöst!") + break