lstm updated

main
arman 2025-02-09 11:07:11 +01:00
parent 75766ad784
commit 6c859703fd
1 changed files with 131 additions and 127 deletions

View File

@ -1,169 +1,173 @@
import time
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
import ml_helper
import ml_history
# Automatische Geräteauswahl (Apple MPS, CUDA, CPU)
if torch.backends.mps.is_available():
device = torch.device("mps")
elif torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print('Using device:', device)
class ImprovedLSTMBinaryClassifier(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers, dropout=0.1, bidirectional=False):
def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.1):
super(ImprovedLSTMBinaryClassifier, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional)
self.layer_norm = nn.LayerNorm(hidden_dim * 2 if bidirectional else hidden_dim)
self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, 1)
self.lstm = nn.LSTM(input_dim,
hidden_dim,
num_layers,
batch_first=True,
dropout=dropout,
bidirectional=False)
self.layer_norm = nn.LayerNorm(hidden_dim)
# Zusätzliche Fully Connected Layers ohne ReLU
self.fc1 = nn.Linear(hidden_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 32)
self.fc4 = nn.Linear(32, 1)
self.sigmoid = nn.Sigmoid()
self.dropout = nn.Dropout(dropout)
def forward(self, input_ids):
input_ids = input_ids.long()
embedded = self.embedding(input_ids)
lstm_output, _ = self.lstm(embedded)
pooled_output = lstm_output[:, -1, :]
pooled_output = self.layer_norm(pooled_output)
logits = self.fc(pooled_output)
return self.sigmoid(logits)
lstm_out, _ = self.lstm(input_ids)
lstm_out = self.dropout(lstm_out)
pooled = lstm_out[:, -1, :] # Letztes verstecktes Zustand
normalized = self.layer_norm(pooled)
# Mehrere Fully Connected Schichten
x = self.fc1(normalized)
x = self.fc2(x)
x = self.fc3(x)
x = self.fc4(x)
return self.sigmoid(x)
# Training und Evaluation
if __name__ == "__main__":
# Load the data
data_path = 'data/idx_based_padded'
# Daten laden (Annahme: Eingebettete Daten sind bereits vorbereitet)
data_path = '/content/drive/MyDrive/Colab Notebooks/ANLP_WS24_CA2/data/embedded_padded'
train_dataset = torch.load(data_path + '/train.pt')
test_dataset = torch.load(data_path + '/test.pt')
val_dataset = torch.load(data_path + '/val.pt')
# +2 for padding and unk tokens
vocab_size = train_dataset.vocab_size + 2
embed_dim = 100 # train_dataset.emb_dim
# NOTE: Info comes from data explore notebook: 280 is max length,
# 139 contains 80% and 192 contains 95% of the data
max_len = 280
device = ml_helper.get_device(verbose=True)
# Model hyperparameters
# Hyperparameter
input_dim = 100
hidden_dim = 256
num_layers = 2
dropout = 0.3
bidirectional = True # Enable bidirectional LSTM
dropout = 0.3
batch_size = 64
model = ImprovedLSTMBinaryClassifier(vocab_size, embed_dim, hidden_dim, num_layers, dropout, bidirectional)
# Training parameters
epochs = 3
batch_size = 8
learning_rate = 2e-5
# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()
# Data loaders
# DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
################################################################################################
# Training
################################################################################################
# Initialize the history
history = ml_history.History()
# Modell initialisieren
model = ImprovedLSTMBinaryClassifier(
input_dim=input_dim,
hidden_dim=hidden_dim,
num_layers=num_layers,
dropout=dropout
).to(device)
# Model to device
model.to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
best_val_loss = float('inf')
best_test_accuracy = 0
patience = 3
counter = 0
history = {'train_loss': [], 'val_loss': [], 'test_acc': [], 'test_f1': []}
print("Starting training...")
start_training_time = time.time()
# Training loop
model.train()
epochs = 5
for epoch in range(epochs):
epoch_start_time = time.time()
history.batch_reset()
# Training
model.train()
total_loss = 0
start_time = time.time()
for batch in train_loader:
optimizer.zero_grad()
# prepare batch
input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device)
# forward pass
outputs = model(input_ids)
loss = criterion(outputs, labels)
# backward pass
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 1)
optimizer.step()
# calculate accuracy train
preds = outputs.round()
train_acc = accuracy_score(labels.cpu().detach().numpy(),
preds.cpu().detach().numpy())
# update batch history
history.batch_update_train(loss.item(), train_acc)
total_loss += loss.item()
# calculate accuracy val
avg_train_loss = total_loss / len(train_loader)
# Validierung
model.eval()
val_loss = 0
with torch.no_grad():
for val_batch in val_loader:
val_input_ids = val_batch['input_ids'].to(device)
val_labels_batch = val_batch['labels'].unsqueeze(1).to(device)
val_outputs = model(val_input_ids)
val_acc = accuracy_score(val_outputs.round().cpu().numpy(),
val_labels_batch.cpu().numpy())
history.batch_update_val(val_acc)
model.train()
for batch in val_loader:
input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids)
val_loss += criterion(outputs, labels).item()
avg_val_loss = val_loss / len(val_loader)
# Test Evaluation
test_preds = []
test_labels = []
with torch.no_grad():
for batch in test_loader:
input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids)
preds = (outputs > 0.5).float()
test_preds.extend(preds.cpu().numpy())
test_labels.extend(labels.cpu().numpy())
test_accuracy = accuracy_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds)
# History aktualisieren
history['train_loss'].append(avg_train_loss)
history['val_loss'].append(avg_val_loss)
history['test_acc'].append(test_accuracy)
history['test_f1'].append(test_f1)
# Lernrate anpassen
scheduler.step(avg_val_loss)
# Ausgabe
epoch_time = time.time() - start_time
print(f'Epoch {epoch+1}/{epochs} | Time: {epoch_time:.2f}s')
print(f'Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}')
print(f'Test Acc: {test_accuracy:.4f} | Test F1: {test_f1:.4f}\n')
# update epoch history
history.update()
# Bestes Modell speichern
if test_accuracy > best_test_accuracy:
best_test_accuracy = test_accuracy
torch.save(model.state_dict(), "best_lstm_model.pth")
print(f"🚀 Neues bestes Modell gespeichert (Acc: {test_accuracy:.4f})")
epoch_end_time = time.time()
print(f"Epoch {epoch + 1}/{epochs}, Time: {epoch_end_time - epoch_start_time:.2f} sec, Loss: {history.history['loss'][-1]:.4f}, Train Acc: {history.history['train_acc'][-1]:.4f}, Val Acc: {history.history['val_acc'][-1]:.4f}")
end_training_time = time.time()
print(f"Training finished in {end_training_time - start_training_time:.2f} seconds")
################################################################################################
# Evaluation
################################################################################################
print("Starting evaluation...")
model.eval()
predictions, true_labels = [], []
with torch.no_grad():
for batch in test_loader:
input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids)
preds = outputs.round()
predictions.extend(preds.cpu().numpy())
true_labels.extend(labels.cpu().numpy())
accuracy = accuracy_score(true_labels, predictions)
print(f"Accuracy: {accuracy}")
################################################################################################
# Save model and hyperparameters
################################################################################################
timestamp = time.strftime("%Y%m%d-%H%M%S")
ml_helper.save_model_and_hyperparameters(model, 'improved_lstm', accuracy, timestamp,
max_len=max_len,
vocab_size=vocab_size,
embed_dim=embed_dim,
hidden_dim=hidden_dim,
num_layers=num_layers,
dropout=dropout,
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate)
# Save history
history_path = f'models/improved_lstm_history_{timestamp}.json'
with open(history_path, 'w') as f:
json.dump(history.get_history(), f)
# Early Stopping
if avg_val_loss < best_val_loss:
best_val_loss = avg_val_loss
counter = 0
else:
counter += 1
if counter >= patience:
print("⛔ Early Stopping ausgelöst!")
break