commencing with actual reinforcement learning

2025-12-01 15:16:34 +01:00 · 2025-12-01 15:16:34 +01:00 · a891d51ca9
parent 48a351518d
commit a891d51ca9
2 changed files with 48 additions and 53 deletions
--- a/04_pacman_rl/pacman.py
+++ b/04_pacman_rl/pacman.py
@ -135,77 +135,72 @@ def main():
    q = rl.q_init()

    # Game loop
+    not_won = True
    running = True
    iter = 0
-    while running:
-        screen.fill(BLACK)
-        iter = iter + 1
+    while not_won:
+        
+        labyrinth = [
+            "##########",
+            "#........#",
+            "#.##..##.#",
+            "#........#",
+            "##########"
+        ]
+        
        # Handle events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
+                not_won = False
+                    
+        while running:
+            screen.fill(BLACK)
+            iter = iter + 1
+
+            # Check for collisions (game over if ghost catches pacman)
+            if pacman.x == ghost.x and pacman.y == ghost.y:
+                print("Game Over! The ghost caught Pacman.")
                running = False

-        # Check for collisions (game over if ghost catches pacman)
-        if pacman.x == ghost.x and pacman.y == ghost.y:
-            print("Game Over! The ghost caught Pacman.")
-            running = False
+            # Eat cookies
+            if labyrinth[pacman.y][pacman.x] == ".":
+                labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]

-        # Eat cookies
-        if labyrinth[pacman.y][pacman.x] == ".":
-            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
+            # Check if all cookies are eaten (game over)
+            if all("." not in row for row in labyrinth):
+                print("You Win! Pacman ate all the cookies.")
+                running = False
+                not_won = False

-        # Check if all cookies are eaten (game over)
-        if all("." not in row for row in labyrinth):
-            print("You Win! Pacman ate all the cookies.")
-            running = False
+            # Start of my code ######################################################################
            
-        # Start of my code
-        labyrinth_copy = [list(row) for row in labyrinth]  # Create proper deep copy
-        s_not_terminal = True
-        a = None
-        iteration = 0
-        max_iterations = 50  # Prevent infinite loops
-        
-        while s_not_terminal and iteration < max_iterations:
-            iteration += 1
-            # print("s: " + str(s)) # debugging
-            # print("q[s] before action: " + str(q[s])) # debugging
+            labyrinth_copy = [list(row) for row in labyrinth]  # Create proper deep copy            

            a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
            s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy)
            
            q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a])

-            if all("." not in row for row in labyrinth_copy):
-                s_not_terminal = False
-                q[s][a] = 10.0
-                print("There is a parallel universe with victory")
-
-            
            s = s_new
-            time.sleep(0.025)

-        if iteration >= max_iterations:
-            print(f"Max iterations reached for this loop ")
+            # zumindest angeben wo der nächste punkt ist, ohne geist im zustand s.
+            # After everything was calculated; just move Pacman according to highest action a in Q-Table q.
+            move_pacman(pacman, a)

-        s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
-        a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
-        move_pacman(pacman, a)
+            if iter%3==0:
+                # Ghost moves towards Pacman
+                ghost.move_towards_pacman(pacman)

-        if iter%3==0:
-            # Ghost moves towards Pacman
-            ghost.move_towards_pacman(pacman)
+            # Draw the labyrinth, pacman, and ghost
+            draw_labyrinth()
+            pacman.draw()
+            ghost.draw()

-        # Draw the labyrinth, pacman, and ghost
-        draw_labyrinth()
-        pacman.draw()
-        ghost.draw()
+            # Update display
+            pygame.display.flip()

-        # Update display
-        pygame.display.flip()
-
-        # Cap the frame rate
-        clock.tick(5)
+            # Cap the frame rate
+            clock.tick(5)

    pygame.quit()

--- a/04_pacman_rl/reinforcement_learning.py
+++ b/04_pacman_rl/reinforcement_learning.py
@ -108,7 +108,7 @@ def epsilon_greedy(q, s, epsilon=0.025):
            
            return a    

-def max_q(q, s_new, labyrinth, depth=0, max_depth=2):
+def max_q(q, s_new, labyrinth, depth=0, max_depth=1):
    """Calculate Q-values for all possible actions in state s_new and return the maximum"""
    q_max = 0
    for a in range(4):
@ -133,7 +133,7 @@ def max_q(q, s_new, labyrinth, depth=0, max_depth=2):

 def calc_reward(s_new, labyrinth):
        
-    # Reward for cookies
+    # Reward for cookies; punish for not eating cookies
    r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0

    return r