gnn/beispiele/11.3_Transformer.py

100 lines
3.9 KiB
Python

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
class PosEnc(layers.Layer):
def __init__(self, **kwargs):
super(PosEnc, self).__init__(**kwargs)
def build(self, input_shape):
_, seq_len, d_model = input_shape
self.positional_encoding = self.get_pos_enc(seq_len, d_model)
super(PosEnc, self).build(input_shape)
def call(self, x):
return x + self.positional_encoding
@staticmethod
def get_pos_enc(seq_len, d_model):
angles = np.arange(seq_len)[:, np.newaxis] / np.power(10000, 2 * np.arange(d_model)[np.newaxis, :] // d_model)
angles[:, 0::2] = np.sin(angles[:, 0::2])
angles[:, 1::2] = np.cos(angles[:, 1::2])
return tf.cast(angles[np.newaxis, ...], tf.float32)
# -- generiert Buchstabensequenzen x und Targetbuchstaben y
def gen_train_data(text, tokenizer, seq_len):
encoded = tokenizer.texts_to_sequences([text])[0]
sequences = []
for i in range(seq_len, len(encoded)):
sequence = encoded[i-seq_len:i+1]
sequences.append(sequence)
sequences = np.array(sequences)
x, y = sequences[:, :-1], sequences[:, -1]
return x, y
def create_transformer_model(vocab_size, d_model, nhead, max_seq_len, mask):
inputs = tf.keras.Input(shape=(max_seq_len,))
embedding = layers.Embedding(input_dim=vocab_size, output_dim=d_model)(inputs)
pos_encoding = PosEnc()(embedding)
x = pos_encoding
# Multi-Head Attention mit Residual-Verbindung
attention_output = layers.MultiHeadAttention(num_heads=nhead, key_dim=d_model // nhead)(x, x, attention_mask=mask)
x = layers.Add()([x, attention_output])
x = layers.LayerNormalization()(x)
x = layers.Dropout(0.1)(x)
# Zwei Dense Layer mit Residual-Verbindung
d_1 = layers.Dense(d_model, activation='relu')(x)
d_2 = layers.Dense(d_model, activation='relu')(d_1)
x = layers.Add()([x, d_2])
x = layers.LayerNormalization()(x)
x = layers.Dropout(0.1)(x)
logits = layers.Dense(vocab_size, activation='softmax')(x[:, -1, :])
model = tf.keras.Model(inputs=inputs, outputs=logits)
return model
# --- Parameter
train_text = "Welches Tier ist das größte? Der Wal. Welches Tier ist das kleinste? Der Einzeller."
seq_len = 32 # Sequenzlänge
batch_size = 32 # Batch-Länge
epochs = 100 # Trainingsepochen
# --- Der Tokenizer codiert folgende Zeichen
chars = "\n,.;:-/!?$&'ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyzßöäüÖÄÜ0123456789"
tokenizer = Tokenizer(char_level=True, filters='', lower=False)
tokenizer.fit_on_texts(chars)
# --- Die Maske ist in Keras vom Datentyp bool
mask = np.ones((seq_len, seq_len), dtype=bool)
mask[np.triu_indices(seq_len, 1)] = False
# --- erzeuge Trainingsdaten
x_train, y_train = gen_train_data(train_text, tokenizer, seq_len)
# --- erzeuge das Transformer-Model
vocab_size = len(tokenizer.word_index)+1
d_model = vocab_size # Dimension der Ausgabe des Modells
nhead = 4 # 1 x Multi-Head Attention mit 4 Köpfen
model = create_transformer_model(vocab_size, d_model, nhead, seq_len, mask)
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
# --- Trainiere das Modell
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)
# --- Generiere den Text
text = "Welches Bier ist das kleinste?"
for _ in range(seq_len):
enc_txt = tokenizer.texts_to_sequences([text])[0]
padded_txt = pad_sequences([enc_txt], maxlen=seq_len, padding='pre', truncating='pre')
logits = model.predict(padded_txt) # aktivieren
next_char = np.argmax(logits[0, :]) # Besten nehmen
next_char = tokenizer.index_word[next_char]
text += next_char # Buchstabe anhängen
print("Generierter Text:",text)
if next_char=='.': # Punkt = Stopp!
break