Finished assigment

2025-12-01 15:33:28 +01:00 · 2025-12-01 15:33:28 +01:00 · 93076e2426
parent 454ac5092d
commit 93076e2426
3 changed files with 114 additions and 58 deletions
--- a/game.py
+++ b/game.py
@ -1,8 +1,8 @@
 import pygame
-import random
 import math
+import os

-from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value
+from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action

 # Initialize pygame
 pygame.init()
@ -12,6 +12,8 @@ SCREEN_WIDTH = 400
 SCREEN_HEIGHT = 400
 CELL_SIZE = 40

+os.environ['SDL_VIDEODRIVER'] = 'dummy'
+
 # Define colors
 YELLOW = (255, 255, 0)
 RED = (255, 0, 0)
@ -20,7 +22,7 @@ BLUE = (0, 0, 255)
 BLACK = (0, 0, 0)

 # Labyrinth as a string
-labyrinth = [
+labyrinth_init = [
    "##########",
    "#........#",
    "#.##..##.#",
@ -28,13 +30,16 @@ labyrinth = [
    "##########"
 ]

+labyrinth = labyrinth_init.copy()
+
 # Get labyrinth dimensions
 ROWS = len(labyrinth)
 COLS = len(labyrinth[0])

 # Initialize game screen
-screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
-pygame.display.set_caption("Micro-Pacman")
+# screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+# pygame.display.set_caption("Micro-Pacman")
+screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE))

 # Pacman class
 class Pacman:
@ -100,7 +105,7 @@ class Ghost:
                    pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))

 # Draw walls and cookies
-def draw_labyrinth():
+def draw_labyrinth(labyrinth):
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == "#":
@ -110,19 +115,20 @@ def draw_labyrinth():


 # Main game function
-def main(q_values, EPSILON, ALPHA, GAMMA):
+def run_game(q_values, EPSILON, ALPHA, GAMMA):
    clock = pygame.time.Clock()
+    labyrinth = labyrinth_init.copy()

    # Initialize Pacman and Ghost positions
    pacman = Pacman(1, 1)
    ghost = Ghost(COLS - 2, ROWS - 2)

-    # Game loop
    #? -------------------------MY CODE-----------------------------------
    state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
    reward = 0
    #? -------------------------MY CODE-----------------------------------

+    #? GAME LOOP
    running = True
    iter = 0
    while running:
@ -156,7 +162,6 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
            pacman.move(0, 1)
        #? -------------------------MY CODE-----------------------------------

-
        if iter%3==0:
            # Ghost moves towards Pacman
            ghost.move_towards_pacman(pacman)
@ -165,6 +170,7 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
        if pacman.x == ghost.x and pacman.y == ghost.y:
            print("Game Over! The ghost caught Pacman.")
            running = False
+            reward = -10

        # Eat cookies
        if labyrinth[pacman.y][pacman.x] == ".":
@ -173,29 +179,44 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
            print("You Win! Pacman ate all the cookies.")
+            reward = 10
            running = False

        # Draw the labyrinth, pacman, and ghost
        #? -------------------------MY CODE-----------------------------------
-        new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
+        if not running:
+            new_state = state
+        else:
+            new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)

-        best_value_new_state, _ = get_best_q_value(q_values, new_state)
+        best_action_new_state, _ = get_best_q_action(q_values, new_state)
+        best_value_new_state = q_values[(new_state, best_action_new_state)]

-        current_value = q_values.get((state, action), 0)
+        current_value = q_values.get((state, action))
        adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
        q_values[(state, action)] = current_value + adjusted_value
+
+        state = new_state
+
+        if not running:
+            counter = 0
+            for y, row in enumerate(labyrinth):
+                for x, cell in enumerate(row):
+                    if cell == ".":
+                        counter += 1
+            return 20-counter
        #? -------------------------MY CODE-----------------------------------
-        draw_labyrinth()
+        draw_labyrinth(labyrinth)
        pacman.draw()
        ghost.draw()

        # Update display
-        pygame.display.flip()
+        # pygame.display.flip()

        # Cap the frame rate
-        clock.tick(1)
+        clock.tick(10000)

    pygame.quit()

 if __name__ == "__main__":
-    main()
+    run_game()
--- a/main.py
+++ b/main.py
@ -1,10 +1,13 @@
-from util import epsilon_greedy, get_start_state, test
+import matplotlib.pyplot as plt
+
+from game import run_game
+from util import initial_q_fill


-AMOUNT_RUNS = 10
+AMOUNT_RUNS = 5000
 EPSILON = 0.1
 ALPHA = 0.1
-GAMMA = 0.1
+GAMMA = 0.9


 """
@ -13,12 +16,15 @@ action: Direction
 q_value: (state, action)
 """
 q_values = {}
+initial_q_fill(q_values)

+cookies_per_run = []
 # Amount of single runs
 for x in range(AMOUNT_RUNS):
-    state = get_start_state()
+    amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
+    cookies_per_run.append(amount_cookies_ate)
+    print(f"Run {x}: {amount_cookies_ate} cookies ate\n")

-    # Single run, until win or death
-    while(True):
-        action = epsilon_greedy(q_values, state, EPSILON)

+plt.plot(cookies_per_run)
+plt.show()
--- a/util.py
+++ b/util.py
@ -8,6 +8,17 @@ class Direction(Enum):
    LEFT = 3


+def initial_q_fill(q_values):
+    for x in range(8):
+        for y in range(3):
+            for cookie_direction in Direction:
+                for action in Direction:
+                    state = (x, y, cookie_direction)
+                    q_values[(state, action)] = random.random() * 0.2 - 0.1
+            
+
+
+
 def get_start_state():
    first_direction_cookie = random.choice([True, False])
    if first_direction_cookie:
@ -18,68 +29,86 @@ def get_start_state():


 def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
-    # distance pacman - ghost
-    x_dist = abs(pac_x - ghost_x)
-    y_dist = abs(pac_y - ghost_y)
+    x_ghost_dist = abs(pac_x - ghost_x)
+    y_ghost_dist = abs(pac_y - ghost_y)

-    # closest cookie
-    best_distance = 12
-    best_cords = None
+    cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)

-    all_cookie_locations = get_all_cookies_locations(labyrinth)
-    for (cookie_x, cookie_y) in all_cookie_locations:
-        dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
-
-        if dist < best_distance:
-            best_distance = dist
-            best_cords = (cookie_x, cookie_y)
-
-    # closest cookie direction
-    cookie_direction = None
-    real_dist_x = pac_x - cookie_x
-    real_dist_y = pac_y - cookie_y
-
-    #TODO
-    if real_dist_x >= 0 & real_dist_y > 0:
-        cookie_direction = Direction
-
-    return x_dist, y_dist, cookie_direction
+    return x_ghost_dist, y_ghost_dist, cookie_direction


-def get_all_cookies_locations(labyrinth):
-    cookie_locations = []
+def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
+    cookie_distances = []
+
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == ".":
-                cookie_locations.append((x, y))
+                x_dist = abs(pac_x - x)
+                y_dist = abs(pac_y - y)
+                dist = x_dist + y_dist
+                cookie_distances.append((dist, (x, y)))

-    return cookie_locations
+    closest_cookie = min(cookie_distances, key=lambda item: item[0])
+    closest_cookie_cords = closest_cookie[1]
+    cookie_x = closest_cookie_cords[0]
+    cookie_y = closest_cookie_cords[1]
+
+
+    dx = cookie_x - pac_x
+    dy = cookie_y - pac_y 
+    
+    if abs(dx) >= abs(dy):
+        #? X distance bigger
+
+        if dy > 0:
+            return Direction.DOWN
+        elif dy < 0:
+            return Direction.UP
+        else:
+            #? Cookie on the same Y level
+            if dx > 0:
+                return Direction.RIGHT
+            else:
+                return Direction.LEFT
+    else:
+        #? Y distance bigger
+
+        if dx > 0:
+            return Direction.RIGHT
+        elif dx < 0:
+            return Direction.LEFT
+        else:
+            #? Cookie on the same X level
+            if dy > 0:
+                return Direction.DOWN
+            else:
+                return Direction.UP



 def epsilon_greedy(q_values, state, epsilon):
-    best_action, states_for_epsilon = get_best_q_value(q_values, state)
+    best_action, actions_for_epsilon = get_best_q_action(q_values, state)

    if random.random() < epsilon:
-        if not states_for_epsilon:
+        if not actions_for_epsilon:
            best_action = get_random_direction()
            return best_action

-        random_action = random.choice(states_for_epsilon)
+        random_action = random.choice(actions_for_epsilon)
        return random_action

    return best_action


-def get_best_q_value(q_values, state):
+def get_best_q_action(q_values, state):
    best_action = None
    best_value = None

-    states_for_epsilon = []
+    actions_for_epsilon = []

    for (q_state, q_action), value in q_values.items():
        if q_state == state:
-            states_for_epsilon.append(q_action)
+            actions_for_epsilon.append(q_action)

            if best_value is None:
                best_value = value
@ -93,7 +122,7 @@ def get_best_q_value(q_values, state):
    if not best_action:
        best_action = get_random_direction()

-    return best_action, states_for_epsilon
+    return best_action, actions_for_epsilon


 def get_random_direction():