lstm updated

main
arman 2025-02-09 11:07:11 +01:00
parent 75766ad784
commit 6c859703fd
1 changed files with 131 additions and 127 deletions

View File

@ -1,169 +1,173 @@
import time import time
import json import json
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
import ml_helper # Automatische Geräteauswahl (Apple MPS, CUDA, CPU)
import ml_history if torch.backends.mps.is_available():
device = torch.device("mps")
elif torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print('Using device:', device)
class ImprovedLSTMBinaryClassifier(nn.Module): class ImprovedLSTMBinaryClassifier(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers, dropout=0.1, bidirectional=False): def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.1):
super(ImprovedLSTMBinaryClassifier, self).__init__() super(ImprovedLSTMBinaryClassifier, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim) self.lstm = nn.LSTM(input_dim,
self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional) hidden_dim,
self.layer_norm = nn.LayerNorm(hidden_dim * 2 if bidirectional else hidden_dim) num_layers,
self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, 1) batch_first=True,
dropout=dropout,
bidirectional=False)
self.layer_norm = nn.LayerNorm(hidden_dim)
# Zusätzliche Fully Connected Layers ohne ReLU
self.fc1 = nn.Linear(hidden_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 32)
self.fc4 = nn.Linear(32, 1)
self.sigmoid = nn.Sigmoid() self.sigmoid = nn.Sigmoid()
self.dropout = nn.Dropout(dropout)
def forward(self, input_ids): def forward(self, input_ids):
input_ids = input_ids.long() lstm_out, _ = self.lstm(input_ids)
embedded = self.embedding(input_ids) lstm_out = self.dropout(lstm_out)
lstm_output, _ = self.lstm(embedded) pooled = lstm_out[:, -1, :] # Letztes verstecktes Zustand
pooled_output = lstm_output[:, -1, :] normalized = self.layer_norm(pooled)
pooled_output = self.layer_norm(pooled_output)
logits = self.fc(pooled_output)
return self.sigmoid(logits)
# Mehrere Fully Connected Schichten
x = self.fc1(normalized)
x = self.fc2(x)
x = self.fc3(x)
x = self.fc4(x)
return self.sigmoid(x)
# Training und Evaluation
if __name__ == "__main__": if __name__ == "__main__":
# Load the data # Daten laden (Annahme: Eingebettete Daten sind bereits vorbereitet)
data_path = 'data/idx_based_padded' data_path = '/content/drive/MyDrive/Colab Notebooks/ANLP_WS24_CA2/data/embedded_padded'
train_dataset = torch.load(data_path + '/train.pt') train_dataset = torch.load(data_path + '/train.pt')
test_dataset = torch.load(data_path + '/test.pt') test_dataset = torch.load(data_path + '/test.pt')
val_dataset = torch.load(data_path + '/val.pt') val_dataset = torch.load(data_path + '/val.pt')
# +2 for padding and unk tokens # Hyperparameter
vocab_size = train_dataset.vocab_size + 2 input_dim = 100
embed_dim = 100 # train_dataset.emb_dim
# NOTE: Info comes from data explore notebook: 280 is max length,
# 139 contains 80% and 192 contains 95% of the data
max_len = 280
device = ml_helper.get_device(verbose=True)
# Model hyperparameters
hidden_dim = 256 hidden_dim = 256
num_layers = 2 num_layers = 2
dropout = 0.3 dropout = 0.3
bidirectional = True # Enable bidirectional LSTM batch_size = 64
model = ImprovedLSTMBinaryClassifier(vocab_size, embed_dim, hidden_dim, num_layers, dropout, bidirectional) # DataLoader
# Training parameters
epochs = 3
batch_size = 8
learning_rate = 2e-5
# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()
# Data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
################################################################################################ # Modell initialisieren
# Training model = ImprovedLSTMBinaryClassifier(
################################################################################################ input_dim=input_dim,
# Initialize the history hidden_dim=hidden_dim,
history = ml_history.History() num_layers=num_layers,
dropout=dropout
).to(device)
# Model to device criterion = nn.BCELoss()
model.to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
print("Starting training...") best_val_loss = float('inf')
start_training_time = time.time() best_test_accuracy = 0
patience = 3
counter = 0
# Training loop history = {'train_loss': [], 'val_loss': [], 'test_acc': [], 'test_f1': []}
model.train()
epochs = 5
for epoch in range(epochs): for epoch in range(epochs):
epoch_start_time = time.time() # Training
history.batch_reset() model.train()
total_loss = 0
start_time = time.time()
for batch in train_loader: for batch in train_loader:
optimizer.zero_grad() optimizer.zero_grad()
# prepare batch
input_ids = batch['input_ids'].to(device) input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device) labels = batch['labels'].unsqueeze(1).to(device)
# forward pass
outputs = model(input_ids) outputs = model(input_ids)
loss = criterion(outputs, labels) loss = criterion(outputs, labels)
# backward pass
loss.backward() loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 1)
optimizer.step() optimizer.step()
# calculate accuracy train
preds = outputs.round()
train_acc = accuracy_score(labels.cpu().detach().numpy(),
preds.cpu().detach().numpy())
# update batch history
history.batch_update_train(loss.item(), train_acc)
# calculate accuracy val total_loss += loss.item()
avg_train_loss = total_loss / len(train_loader)
# Validierung
model.eval() model.eval()
val_loss = 0
with torch.no_grad(): with torch.no_grad():
for val_batch in val_loader: for batch in val_loader:
val_input_ids = val_batch['input_ids'].to(device) input_ids = batch['input_ids'].to(device)
val_labels_batch = val_batch['labels'].unsqueeze(1).to(device) labels = batch['labels'].unsqueeze(1).to(device)
val_outputs = model(val_input_ids) outputs = model(input_ids)
val_acc = accuracy_score(val_outputs.round().cpu().numpy(), val_loss += criterion(outputs, labels).item()
val_labels_batch.cpu().numpy())
history.batch_update_val(val_acc)
model.train()
# update epoch history avg_val_loss = val_loss / len(val_loader)
history.update()
epoch_end_time = time.time() # Test Evaluation
test_preds = []
test_labels = []
with torch.no_grad():
for batch in test_loader:
input_ids = batch['input_ids'].to(device)
labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids)
preds = (outputs > 0.5).float()
test_preds.extend(preds.cpu().numpy())
test_labels.extend(labels.cpu().numpy())
print(f"Epoch {epoch + 1}/{epochs}, Time: {epoch_end_time - epoch_start_time:.2f} sec, Loss: {history.history['loss'][-1]:.4f}, Train Acc: {history.history['train_acc'][-1]:.4f}, Val Acc: {history.history['val_acc'][-1]:.4f}") test_accuracy = accuracy_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds)
end_training_time = time.time() # History aktualisieren
print(f"Training finished in {end_training_time - start_training_time:.2f} seconds") history['train_loss'].append(avg_train_loss)
history['val_loss'].append(avg_val_loss)
history['test_acc'].append(test_accuracy)
history['test_f1'].append(test_f1)
################################################################################################ # Lernrate anpassen
# Evaluation scheduler.step(avg_val_loss)
################################################################################################
print("Starting evaluation...")
model.eval() # Ausgabe
predictions, true_labels = [], [] epoch_time = time.time() - start_time
with torch.no_grad(): print(f'Epoch {epoch+1}/{epochs} | Time: {epoch_time:.2f}s')
for batch in test_loader: print(f'Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}')
input_ids = batch['input_ids'].to(device) print(f'Test Acc: {test_accuracy:.4f} | Test F1: {test_f1:.4f}\n')
labels = batch['labels'].unsqueeze(1).to(device)
outputs = model(input_ids) # Bestes Modell speichern
preds = outputs.round() if test_accuracy > best_test_accuracy:
predictions.extend(preds.cpu().numpy()) best_test_accuracy = test_accuracy
true_labels.extend(labels.cpu().numpy()) torch.save(model.state_dict(), "best_lstm_model.pth")
print(f"🚀 Neues bestes Modell gespeichert (Acc: {test_accuracy:.4f})")
accuracy = accuracy_score(true_labels, predictions) # Early Stopping
print(f"Accuracy: {accuracy}") if avg_val_loss < best_val_loss:
best_val_loss = avg_val_loss
################################################################################################ counter = 0
# Save model and hyperparameters else:
################################################################################################ counter += 1
timestamp = time.strftime("%Y%m%d-%H%M%S") if counter >= patience:
print("⛔ Early Stopping ausgelöst!")
ml_helper.save_model_and_hyperparameters(model, 'improved_lstm', accuracy, timestamp, break
max_len=max_len,
vocab_size=vocab_size,
embed_dim=embed_dim,
hidden_dim=hidden_dim,
num_layers=num_layers,
dropout=dropout,
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate)
# Save history
history_path = f'models/improved_lstm_history_{timestamp}.json'
with open(history_path, 'w') as f:
json.dump(history.get_history(), f)