stuff persistent

2025-12-08 15:15:42 +01:00 · 2025-12-08 15:15:42 +01:00 · 3fb0afd80e
parent c88d8d003e
commit 3fb0afd80e
2 changed files with 156 additions and 39 deletions
--- a/04_pacman_rl/pacman.py
+++ b/04_pacman_rl/pacman.py
@ -1,6 +1,8 @@
 import pygame
 import math
 import reinforcement_learning as rl
 import json
 import os
 # Initialize pygame
 pygame.init()
@ -120,19 +122,113 @@ def move_pacman(pacman, a):
    if a == 3: # down
        pacman.move(0, 1)
-# Main game function
+def save_q_table(q, filename="q_table.json"):
-def main():
+    """Save Q-table to JSON file."""
    # Convert tuple keys to strings for JSON serialization
    q_json = {str(k): v for k, v in q.items()}
    with open(filename, 'w') as f:
        json.dump(q_json, f)
    print(f"Q-table saved to {filename}")
 def load_q_table(filename="q_table.json"):
    """Load Q-table from JSON file, or return None if file doesn't exist."""
    if not os.path.exists(filename):
        print(f"No saved Q-table found at {filename}. Starting fresh.")
        return None
    with open(filename, 'r') as f:
        q_json = json.load(f)
    # Convert string keys back to tuples
    q = {eval(k): v for k, v in q_json.items()}
    print(f"Q-table loaded from {filename}")
    return q
 # Training function (without visualization)
 def train(q, num_iterations=10000):
    """Train the agent for num_iterations without pygame visualization."""
    global labyrinth
    outer_iter = 0
    total_iterations = 0
    while total_iterations < num_iterations:
        labyrinth = [
            "##########",
            "#........#",
            "#.##..##.#",
            "#........#",
            "##########"
        ]
        running = True
        iter = 0
        # Initialize Pacman and Ghost positions (no visual objects needed)
        pacman_x, pacman_y = 1, 1
        ghost_x, ghost_y = COLS - 2, ROWS - 2
        s = (pacman_x, pacman_y, ghost_x, ghost_y)
        while running and total_iterations < num_iterations:
            iter = iter + 1
            total_iterations += 1
            # Check for collisions
            if pacman_x == ghost_x and pacman_y == ghost_y:
                running = False
                break
            # Eat cookies
            if labyrinth[pacman_y][pacman_x] == ".":
                labyrinth[pacman_y] = labyrinth[pacman_y][:pacman_x] + " " + labyrinth[pacman_y][pacman_x+1:]
            # Check if all cookies are eaten
            if all("." not in row for row in labyrinth):
                running = False
                break
            # Q-Learning
            a = rl.epsilon_greedy(q, s)
            s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
            q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
            s = s_new
            # Update Pacman position
            if a == 0:  # left
                pacman_x = max(1, pacman_x - 1) if labyrinth[pacman_y][pacman_x - 1] != "#" else pacman_x
            elif a == 1:  # right
                pacman_x = min(COLS - 2, pacman_x + 1) if labyrinth[pacman_y][pacman_x + 1] != "#" else pacman_x
            elif a == 2:  # up
                pacman_y = max(1, pacman_y - 1) if labyrinth[pacman_y - 1][pacman_x] != "#" else pacman_y
            elif a == 3:  # down
                pacman_y = min(ROWS - 2, pacman_y + 1) if labyrinth[pacman_y + 1][pacman_x] != "#" else pacman_y
            # Ghost movement
            if iter % 3 == 0:
                if ghost_x < pacman_x and labyrinth[ghost_y][ghost_x + 1] != "#":
                    ghost_x += 1
                elif ghost_x > pacman_x and labyrinth[ghost_y][ghost_x - 1] != "#":
                    ghost_x -= 1
                elif ghost_y < pacman_y and labyrinth[ghost_y + 1][ghost_x] != "#":
                    ghost_y += 1
                elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
                    ghost_y -= 1
                s = (pacman_x, pacman_y, ghost_x, ghost_y)
        outer_iter += 1
        if outer_iter % 100 == 0:
            print(f"Training iteration {outer_iter}, Total steps: {total_iterations}")
    return q
 # Visualization function (with pygame)
 def visualize(q, num_games=10):
    """Visualize the trained agent playing the game."""
    global labyrinth
    q = rl.q_init()
    clock = pygame.time.Clock()
-    # Game loop
+    for game_num in range(num_games):
    not_won = True
    outer_iter = 0
    while not_won:
        labyrinth = [
            "##########",
            "#........#",
@ -146,22 +242,17 @@ def main():
        # Initialize Pacman and Ghost positions
        pacman = Pacman(1, 1)
        ghost = Ghost(COLS - 2, ROWS - 2)
-        s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
+        s = (pacman.x, pacman.y, ghost.x, ghost.y)
-        # Handle events
+        print(f"Game {game_num + 1}/{num_games}")
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                not_won = False
        print(outer_iter)  
        while running or iter < 100:
            screen.fill(BLACK)
            iter = iter + 1
-            # Check for collisions (game over if ghost catches pacman)
+            # Check for collisions
            if pacman.x == ghost.x and pacman.y == ghost.y:
                print("Game Over! The ghost caught Pacman.")
                outer_iter = outer_iter + 1
                running = False
                break
@ -169,49 +260,52 @@ def main():
            if labyrinth[pacman.y][pacman.x] == ".":
                labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
-            # Check if all cookies are eaten (game over)
+            # Check if all cookies are eaten
            if all("." not in row for row in labyrinth):
                print("You Win! Pacman ate all the cookies.")
                running = False
                not_won = False
                break
-            # Q-Learning part ############################################################################       
+            # Q-Learning
-
+            a = rl.epsilon_greedy(q, s)
            a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
            s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
            # print(s) # debugging
            # print(q[s]) # debugging
            q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
            s = s_new
            move_pacman(pacman, a)
            if iter % 3 == 0:
                # Ghost moves towards Pacman
                ghost.move_towards_pacman(pacman)
                # Update state
                s = (pacman.x, pacman.y, ghost.x, ghost.y)
-            # End of Q-Learning part ######################################################################
+            # Draw
            # Draw the labyrinth, pacman, and ghost
            draw_labyrinth()
            pacman.draw()
            ghost.draw()
            # Update display
            pygame.display.flip()
-            # Cap the frame rate
+            tick_speed = 20 # if game_num % 20 == 0 else 100
            # tick_speed = 100 
            tick_speed = 5 if outer_iter % 20 == 0 else 50
            clock.tick(tick_speed)
 # Main function
 def main():
    global labyrinth
    # Load existing Q-table or create new one
    q = load_q_table("q_table.json")
    if q is None:
        q = rl.q_init()
    print("Training for 10000 iterations...")
    q = train(q, num_iterations=20000)
    print("\nTraining complete! Starting visualization...")
    visualize(q, num_games=10)
    pygame.quit()
    # Save Q-table when exiting
    save_q_table(q, "q_table.json")
 if __name__ == "__main__":
    main()
--- a/04_pacman_rl/reinforcement_learning.py
+++ b/04_pacman_rl/reinforcement_learning.py
@ -136,6 +136,11 @@ def take_action(s, a, labyrinth):
        row_list[s_new[0]] = " "
        labyrinth[s_new[1]] = "".join(row_list)
    # Check if all cookies are eaten
    if all("." not in row for row in labyrinth):
        r = 100.0
        #print("All cookies eaten")
    return tuple(s_new), r, labyrinth
 def max_q(q, s_new, labyrinth):
@ -149,3 +154,21 @@ def max_q(q, s_new, labyrinth):
            q_max = max(q_max, q[s_new][a])
    return q_max
 def get_nearest_cookie(pacman_x, pacman_y, labyrinth):
    cookies = [
        (x, y)
        for y, row in enumerate(labyrinth)
        for x, cell in enumerate(row)
        if cell == "."
    ]
    if cookies:
        nearest = min(
            cookies, key=lambda c: abs(c[0] - pacman_x) + abs(c[1] - pacman_y)
        )
        cookie_dx = int(np.sign(nearest[0] - pacman_x))
        cookie_dy = int(np.sign(nearest[1] - pacman_y))
    else:
        cookie_dx, cookie_dy = 0, 0
    return cookie_dx, cookie_dy