lstm updated

main
arman 2025-02-09 11:07:11 +01:00
parent 75766ad784
commit 6c859703fd
1 changed files with 131 additions and 127 deletions

View File

@ -1,169 +1,173 @@
import time import time
import json import json
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
import ml_helper # Automatische Geräteauswahl (Apple MPS, CUDA, CPU)
import ml_history if torch.backends.mps.is_available():
device = torch.device("mps")
elif torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print('Using device:', device)
class ImprovedLSTMBinaryClassifier(nn.Module): class ImprovedLSTMBinaryClassifier(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers, dropout=0.1, bidirectional=False): def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.1):
super(ImprovedLSTMBinaryClassifier, self).__init__() super(ImprovedLSTMBinaryClassifier, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim) self.lstm = nn.LSTM(input_dim,
self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional) hidden_dim,
self.layer_norm = nn.LayerNorm(hidden_dim * 2 if bidirectional else hidden_dim) num_layers,
self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, 1) batch_first=True,
dropout=dropout,
bidirectional=False)
self.layer_norm = nn.LayerNorm(hidden_dim)
# Zusätzliche Fully Connected Layers ohne ReLU
self.fc1 = nn.Linear(hidden_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 32)
self.fc4 = nn.Linear(32, 1)
self.sigmoid = nn.Sigmoid() self.sigmoid = nn.Sigmoid()
self.dropout = nn.Dropout(dropout)
def forward(self, input_ids): def forward(self, input_ids):
input_ids = input_ids.long() lstm_out, _ = self.lstm(input_ids)
embedded = self.embedding(input_ids) lstm_out = self.dropout(lstm_out)
lstm_output, _ = self.lstm(embedded) pooled = lstm_out[:, -1, :] # Letztes verstecktes Zustand
pooled_output = lstm_output[:, -1, :] normalized = self.layer_norm(pooled)
pooled_output = self.layer_norm(pooled_output)
logits = self.fc(pooled_output) # Mehrere Fully Connected Schichten
return self.sigmoid(logits) x = self.fc1(normalized)
x = self.fc2(x)
x = self.fc3(x)
x = self.fc4(x)
return self.sigmoid(x)
# Training und Evaluation
if __name__ == "__main__": if __name__ == "__main__":
# Load the data # Daten laden (Annahme: Eingebettete Daten sind bereits vorbereitet)
data_path = 'data/idx_based_padded' data_path = '/content/drive/MyDrive/Colab Notebooks/ANLP_WS24_CA2/data/embedded_padded'
train_dataset = torch.load(data_path + '/train.pt') train_dataset = torch.load(data_path + '/train.pt')
test_dataset = torch.load(data_path + '/test.pt') test_dataset = torch.load(data_path + '/test.pt')
val_dataset = torch.load(data_path + '/val.pt') val_dataset = torch.load(data_path + '/val.pt')
# +2 for padding and unk tokens # Hyperparameter
vocab_size = train_dataset.vocab_size + 2 input_dim = 100
embed_dim = 100 # train_dataset.emb_dim
# NOTE: Info comes from data explore notebook: 280 is max length,
# 139 contains 80% and 192 contains 95% of the data
max_len = 280
device = ml_helper.get_device(verbose=True)
# Model hyperparameters
hidden_dim = 256 hidden_dim = 256
num_layers = 2 num_layers = 2
dropout = 0.3 dropout = 0.3
bidirectional = True # Enable bidirectional LSTM batch_size = 64
model = ImprovedLSTMBinaryClassifier(vocab_size, embed_dim, hidden_dim, num_layers, dropout, bidirectional) # DataLoader
# Training parameters
epochs = 3
batch_size = 8
learning_rate = 2e-5
# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()
# Data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
################################################################################################ # Modell initialisieren
# Training model = ImprovedLSTMBinaryClassifier(
################################################################################################ input_dim=input_dim,
# Initialize the history hidden_dim=hidden_dim,
history = ml_history.History() num_layers=num_layers,
dropout=dropout
).to(device)
# Model to device criterion = nn.BCELoss()
model.to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
best_val_loss = float('inf')
best_test_accuracy = 0
patience = 3
counter = 0
history = {'train_loss': [], 'val_loss': [], 'test_acc': [], 'test_f1': []}
print("Starting training...") epochs = 5
start_training_time = time.time()
# Training loop
model.train()
for epoch in range(epochs): for epoch in range(epochs):
epoch_start_time = time.time() # Training
history.batch_reset() model.train()
total_loss = 0
start_time = time.time()
for batch in train_loader: for batch in train_loader:
optimizer.zero_grad() optimizer.zero_grad()
# prepare batch
input_ids = batch['input_ids'].to(device) input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device) labels = batch['labels'].unsqueeze(1).to(device)
# forward pass
outputs = model(input_ids) outputs = model(input_ids)
loss = criterion(outputs, labels) loss = criterion(outputs, labels)
# backward pass
loss.backward() loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 1)
optimizer.step() optimizer.step()
# calculate accuracy train
preds = outputs.round() total_loss += loss.item()
train_acc = accuracy_score(labels.cpu().detach().numpy(),
preds.cpu().detach().numpy())
# update batch history
history.batch_update_train(loss.item(), train_acc)
# calculate accuracy val avg_train_loss = total_loss / len(train_loader)
# Validierung
model.eval() model.eval()
val_loss = 0
with torch.no_grad(): with torch.no_grad():
for val_batch in val_loader: for batch in val_loader:
val_input_ids = val_batch['input_ids'].to(device) input_ids = batch['input_ids'].to(device)
val_labels_batch = val_batch['labels'].unsqueeze(1).to(device) labels = batch['labels'].unsqueeze(1).to(device)
val_outputs = model(val_input_ids) outputs = model(input_ids)
val_acc = accuracy_score(val_outputs.round().cpu().numpy(), val_loss += criterion(outputs, labels).item()
val_labels_batch.cpu().numpy())
history.batch_update_val(val_acc) avg_val_loss = val_loss / len(val_loader)
model.train()
# Test Evaluation
test_preds = []
test_labels = []
with torch.no_grad():
for batch in test_loader:
input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids)
preds = (outputs > 0.5).float()
test_preds.extend(preds.cpu().numpy())
test_labels.extend(labels.cpu().numpy())
test_accuracy = accuracy_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds)
# History aktualisieren
history['train_loss'].append(avg_train_loss)
history['val_loss'].append(avg_val_loss)
history['test_acc'].append(test_accuracy)
history['test_f1'].append(test_f1)
# Lernrate anpassen
scheduler.step(avg_val_loss)
# Ausgabe
epoch_time = time.time() - start_time
print(f'Epoch {epoch+1}/{epochs} | Time: {epoch_time:.2f}s')
print(f'Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}')
print(f'Test Acc: {test_accuracy:.4f} | Test F1: {test_f1:.4f}\n')
# update epoch history # Bestes Modell speichern
history.update() if test_accuracy > best_test_accuracy:
best_test_accuracy = test_accuracy
torch.save(model.state_dict(), "best_lstm_model.pth")
print(f"🚀 Neues bestes Modell gespeichert (Acc: {test_accuracy:.4f})")
epoch_end_time = time.time() # Early Stopping
if avg_val_loss < best_val_loss:
print(f"Epoch {epoch + 1}/{epochs}, Time: {epoch_end_time - epoch_start_time:.2f} sec, Loss: {history.history['loss'][-1]:.4f}, Train Acc: {history.history['train_acc'][-1]:.4f}, Val Acc: {history.history['val_acc'][-1]:.4f}") best_val_loss = avg_val_loss
counter = 0
end_training_time = time.time() else:
print(f"Training finished in {end_training_time - start_training_time:.2f} seconds") counter += 1
if counter >= patience:
################################################################################################ print("⛔ Early Stopping ausgelöst!")
# Evaluation break
################################################################################################
print("Starting evaluation...")
model.eval()
predictions, true_labels = [], []
with torch.no_grad():
for batch in test_loader:
input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids)
preds = outputs.round()
predictions.extend(preds.cpu().numpy())
true_labels.extend(labels.cpu().numpy())
accuracy = accuracy_score(true_labels, predictions)
print(f"Accuracy: {accuracy}")
################################################################################################
# Save model and hyperparameters
################################################################################################
timestamp = time.strftime("%Y%m%d-%H%M%S")
ml_helper.save_model_and_hyperparameters(model, 'improved_lstm', accuracy, timestamp,
max_len=max_len,
vocab_size=vocab_size,
embed_dim=embed_dim,
hidden_dim=hidden_dim,
num_layers=num_layers,
dropout=dropout,
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate)
# Save history
history_path = f'models/improved_lstm_history_{timestamp}.json'
with open(history_path, 'w') as f:
json.dump(history.get_history(), f)