From e00f915c73a168bdeaad97b528ae5c26ba207fb1 Mon Sep 17 00:00:00 2001
From: Thomas Martin <2121321@stud.hs-mannheim.de>
Date: Sun, 10 Nov 2024 15:33:59 +0100
Subject: [PATCH] small change

---
 Aufgabe_4.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Aufgabe_4.py b/Aufgabe_4.py
index 335cf97..d258721 100644
--- a/Aufgabe_4.py
+++ b/Aufgabe_4.py
@@ -151,9 +151,9 @@ def calcState(pacman, ghost, labyrinth):
 clock = pygame.time.Clock()
 q = np.random.rand(((ROWS * COLS)**2) * 16, 4)*0.1 # q[s][a]=0..0.1, q[pac + ghost][4]
 
-alpha = 0.9  # Lernrate
+alpha = 0.5  # Lernrate
 gamma = 0.9  # Discount Faktor
-epsilon = 30  # für Epsilon-Greedy Aktionsauswahl
+epsilon = 10  # für Epsilon-Greedy Aktionsauswahl
 
 max_iter = 0
 iter = 0
@@ -167,7 +167,7 @@ while True:
         print("Won: ", win, " Lose: ", lose)
     # Initialize Pacman and Ghost positions
     pacman = Pacman(1, 1)
-    ghost = Ghost(4, 1)
+    ghost = Ghost(COLS - 2, ROWS - 2)
     labyrinth = deepcopy(labyrinth_origin)
     # Game loop            # reward = 1
 
@@ -209,7 +209,7 @@ while True:
             ghost.move_towards_pacman(pacman)
 
         # neuer eindimensionaler Zustand
-        reward = -0.1
+        reward = -1
         new_s = calcState(pacman, ghost, labyrinth)
 
         if pacman.caught(ghost):
@@ -232,7 +232,7 @@ while True:
         q[s][a] += alpha * (reward + gamma * np.max(q[new_s]) - q[s][a])
 
         if(round > 100000):
-            epsilon = 0
+            # epsilon = 0
             draw_labyrinth()
             pacman.draw()
             ghost.draw()