From e00f915c73a168bdeaad97b528ae5c26ba207fb1 Mon Sep 17 00:00:00 2001 From: Thomas Martin <2121321@stud.hs-mannheim.de> Date: Sun, 10 Nov 2024 15:33:59 +0100 Subject: [PATCH] small change --- Aufgabe_4.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Aufgabe_4.py b/Aufgabe_4.py index 335cf97..d258721 100644 --- a/Aufgabe_4.py +++ b/Aufgabe_4.py @@ -151,9 +151,9 @@ def calcState(pacman, ghost, labyrinth): clock = pygame.time.Clock() q = np.random.rand(((ROWS * COLS)**2) * 16, 4)*0.1 # q[s][a]=0..0.1, q[pac + ghost][4] -alpha = 0.9 # Lernrate +alpha = 0.5 # Lernrate gamma = 0.9 # Discount Faktor -epsilon = 30 # für Epsilon-Greedy Aktionsauswahl +epsilon = 10 # für Epsilon-Greedy Aktionsauswahl max_iter = 0 iter = 0 @@ -167,7 +167,7 @@ while True: print("Won: ", win, " Lose: ", lose) # Initialize Pacman and Ghost positions pacman = Pacman(1, 1) - ghost = Ghost(4, 1) + ghost = Ghost(COLS - 2, ROWS - 2) labyrinth = deepcopy(labyrinth_origin) # Game loop # reward = 1 @@ -209,7 +209,7 @@ while True: ghost.move_towards_pacman(pacman) # neuer eindimensionaler Zustand - reward = -0.1 + reward = -1 new_s = calcState(pacman, ghost, labyrinth) if pacman.caught(ghost): @@ -232,7 +232,7 @@ while True: q[s][a] += alpha * (reward + gamma * np.max(q[new_s]) - q[s][a]) if(round > 100000): - epsilon = 0 + # epsilon = 0 draw_labyrinth() pacman.draw() ghost.draw()