lstm updated

main
arman 2025-02-09 11:07:11 +01:00
parent 75766ad784
commit 6c859703fd
1 changed files with 131 additions and 127 deletions

View File

@ -1,169 +1,173 @@
import time import time
import json import json
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
import ml_helper # Automatische Geräteauswahl (Apple MPS, CUDA, CPU)
import ml_history if torch.backends.mps.is_available():
device = torch.device("mps")
elif torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print('Using device:', device)
class ImprovedLSTMBinaryClassifier(nn.Module): class ImprovedLSTMBinaryClassifier(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers, dropout=0.1, bidirectional=False): def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.1):
super(ImprovedLSTMBinaryClassifier, self).__init__() super(ImprovedLSTMBinaryClassifier, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim) self.lstm = nn.LSTM(input_dim,
self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional) hidden_dim,
self.layer_norm = nn.LayerNorm(hidden_dim * 2 if bidirectional else hidden_dim) num_layers,
self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, 1) batch_first=True,
dropout=dropout,
bidirectional=False)
self.layer_norm = nn.LayerNorm(hidden_dim)
# Zusätzliche Fully Connected Layers ohne ReLU
self.fc1 = nn.Linear(hidden_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 32)
self.fc4 = nn.Linear(32, 1)
self.sigmoid = nn.Sigmoid() self.sigmoid = nn.Sigmoid()
self.dropout = nn.Dropout(dropout)
def forward(self, input_ids): def forward(self, input_ids):
input_ids = input_ids.long() lstm_out, _ = self.lstm(input_ids)
embedded = self.embedding(input_ids) lstm_out = self.dropout(lstm_out)
lstm_output, _ = self.lstm(embedded) pooled = lstm_out[:, -1, :] # Letztes verstecktes Zustand
pooled_output = lstm_output[:, -1, :] normalized = self.layer_norm(pooled)
pooled_output = self.layer_norm(pooled_output)
logits = self.fc(pooled_output)
return self.sigmoid(logits)
# Mehrere Fully Connected Schichten
x = self.fc1(normalized)
x = self.fc2(x)
x = self.fc3(x)
x = self.fc4(x)
return self.sigmoid(x)
# Training und Evaluation
if __name__ == "__main__": if __name__ == "__main__":
# Load the data # Daten laden (Annahme: Eingebettete Daten sind bereits vorbereitet)
data_path = 'data/idx_based_padded' data_path = '/content/drive/MyDrive/Colab Notebooks/ANLP_WS24_CA2/data/embedded_padded'
train_dataset = torch.load(data_path + '/train.pt') train_dataset = torch.load(data_path + '/train.pt')
test_dataset = torch.load(data_path + '/test.pt') test_dataset = torch.load(data_path + '/test.pt')
val_dataset = torch.load(data_path + '/val.pt') val_dataset = torch.load(data_path + '/val.pt')
# +2 for padding and unk tokens # Hyperparameter
vocab_size = train_dataset.vocab_size + 2 input_dim = 100
embed_dim = 100 # train_dataset.emb_dim
# NOTE: Info comes from data explore notebook: 280 is max length,
# 139 contains 80% and 192 contains 95% of the data
max_len = 280
device = ml_helper.get_device(verbose=True)
# Model hyperparameters
hidden_dim = 256 hidden_dim = 256
num_layers = 2 num_layers = 2
dropout = 0.3 dropout = 0.3
bidirectional = True # Enable bidirectional LSTM batch_size = 64
model = ImprovedLSTMBinaryClassifier(vocab_size, embed_dim, hidden_dim, num_layers, dropout, bidirectional) # DataLoader
# Training parameters
epochs = 3
batch_size = 8
learning_rate = 2e-5
# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()
# Data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
################################################################################################ # Modell initialisieren
# Training model = ImprovedLSTMBinaryClassifier(
################################################################################################ input_dim=input_dim,
# Initialize the history hidden_dim=hidden_dim,
history = ml_history.History() num_layers=num_layers,
dropout=dropout
).to(device)
# Model to device criterion = nn.BCELoss()
model.to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
print("Starting training...") best_val_loss = float('inf')
start_training_time = time.time() best_test_accuracy = 0
patience = 3
counter = 0
# Training loop history = {'train_loss': [], 'val_loss': [], 'test_acc': [], 'test_f1': []}
model.train()
epochs = 5
for epoch in range(epochs): for epoch in range(epochs):
epoch_start_time = time.time() # Training
history.batch_reset() model.train()
total_loss = 0
start_time = time.time()
for batch in train_loader: for batch in train_loader:
optimizer.zero_grad() optimizer.zero_grad()
# prepare batch
input_ids = batch['input_ids'].to(device) input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device) labels = batch['labels'].unsqueeze(1).to(device)
# forward pass
outputs = model(input_ids) outputs = model(input_ids)
loss = criterion(outputs, labels) loss = criterion(outputs, labels)
# backward pass
loss.backward() loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 1)
optimizer.step() optimizer.step()
# calculate accuracy train
preds = outputs.round()
train_acc = accuracy_score(labels.cpu().detach().numpy(),
preds.cpu().detach().numpy())
# update batch history
history.batch_update_train(loss.item(), train_acc)
# calculate accuracy val total_loss += loss.item()
avg_train_loss = total_loss / len(train_loader)
# Validierung
model.eval() model.eval()
val_loss = 0
with torch.no_grad(): with torch.no_grad():
for val_batch in val_loader: for batch in val_loader:
val_input_ids = val_batch['input_ids'].to(device) input_ids = batch['input_ids'].to(device)
val_labels_batch = val_batch['labels'].unsqueeze(1).to(device) labels = batch['labels'].unsqueeze(1).to(device)
val_outputs = model(val_input_ids) outputs = model(input_ids)
val_acc = accuracy_score(val_outputs.round().cpu().numpy(), val_loss += criterion(outputs, labels).item()
val_labels_batch.cpu().numpy())
history.batch_update_val(val_acc)
model.train()
# update epoch history avg_val_loss = val_loss / len(val_loader)
history.update()
epoch_end_time = time.time() # Test Evaluation
test_preds = []
print(f"Epoch {epoch + 1}/{epochs}, Time: {epoch_end_time - epoch_start_time:.2f} sec, Loss: {history.history['loss'][-1]:.4f}, Train Acc: {history.history['train_acc'][-1]:.4f}, Val Acc: {history.history['val_acc'][-1]:.4f}") test_labels = []
end_training_time = time.time()
print(f"Training finished in {end_training_time - start_training_time:.2f} seconds")
################################################################################################
# Evaluation
################################################################################################
print("Starting evaluation...")
model.eval()
predictions, true_labels = [], []
with torch.no_grad(): with torch.no_grad():
for batch in test_loader: for batch in test_loader:
input_ids = batch['input_ids'].to(device) input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device) labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids) outputs = model(input_ids)
preds = outputs.round() preds = (outputs > 0.5).float()
predictions.extend(preds.cpu().numpy()) test_preds.extend(preds.cpu().numpy())
true_labels.extend(labels.cpu().numpy()) test_labels.extend(labels.cpu().numpy())
accuracy = accuracy_score(true_labels, predictions) test_accuracy = accuracy_score(test_labels, test_preds)
print(f"Accuracy: {accuracy}") test_f1 = f1_score(test_labels, test_preds)
################################################################################################ # History aktualisieren
# Save model and hyperparameters history['train_loss'].append(avg_train_loss)
################################################################################################ history['val_loss'].append(avg_val_loss)
timestamp = time.strftime("%Y%m%d-%H%M%S") history['test_acc'].append(test_accuracy)
history['test_f1'].append(test_f1)
ml_helper.save_model_and_hyperparameters(model, 'improved_lstm', accuracy, timestamp, # Lernrate anpassen
max_len=max_len, scheduler.step(avg_val_loss)
vocab_size=vocab_size,
embed_dim=embed_dim,
hidden_dim=hidden_dim,
num_layers=num_layers,
dropout=dropout,
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate)
# Save history # Ausgabe
history_path = f'models/improved_lstm_history_{timestamp}.json' epoch_time = time.time() - start_time
with open(history_path, 'w') as f: print(f'Epoch {epoch+1}/{epochs} | Time: {epoch_time:.2f}s')
json.dump(history.get_history(), f) print(f'Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}')
print(f'Test Acc: {test_accuracy:.4f} | Test F1: {test_f1:.4f}\n')
# Bestes Modell speichern
if test_accuracy > best_test_accuracy:
best_test_accuracy = test_accuracy
torch.save(model.state_dict(), "best_lstm_model.pth")
print(f"🚀 Neues bestes Modell gespeichert (Acc: {test_accuracy:.4f})")
# Early Stopping
if avg_val_loss < best_val_loss:
best_val_loss = avg_val_loss
counter = 0
else:
counter += 1
if counter >= patience:
print("⛔ Early Stopping ausgelöst!")
break