Finished assigment

2025-12-01 15:33:28 +01:00 · 2025-12-01 15:33:28 +01:00 · 93076e2426
parent 454ac5092d
commit 93076e2426
3 changed files with 114 additions and 58 deletions
--- a/game.py
+++ b/game.py
@ -1,8 +1,8 @@
 import pygame
 import random
 import math
 import os
-from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value
+from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
 # Initialize pygame
 pygame.init()
@ -12,6 +12,8 @@ SCREEN_WIDTH = 400
 SCREEN_HEIGHT = 400
 CELL_SIZE = 40
 os.environ['SDL_VIDEODRIVER'] = 'dummy'
 # Define colors
 YELLOW = (255, 255, 0)
 RED = (255, 0, 0)
@ -20,7 +22,7 @@ BLUE = (0, 0, 255)
 BLACK = (0, 0, 0)
 # Labyrinth as a string
-labyrinth = [
+labyrinth_init = [
    "##########",
    "#........#",
    "#.##..##.#",
@ -28,13 +30,16 @@ labyrinth = [
    "##########"
 ]
 labyrinth = labyrinth_init.copy()
 # Get labyrinth dimensions
 ROWS = len(labyrinth)
 COLS = len(labyrinth[0])
 # Initialize game screen
-screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+# screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
-pygame.display.set_caption("Micro-Pacman")
+# pygame.display.set_caption("Micro-Pacman")
 screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE))
 # Pacman class
 class Pacman:
@ -100,7 +105,7 @@ class Ghost:
                    pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
 # Draw walls and cookies
-def draw_labyrinth():
+def draw_labyrinth(labyrinth):
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == "#":
@ -110,19 +115,20 @@ def draw_labyrinth():
 # Main game function
-def main(q_values, EPSILON, ALPHA, GAMMA):
+def run_game(q_values, EPSILON, ALPHA, GAMMA):
    clock = pygame.time.Clock()
    labyrinth = labyrinth_init.copy()
    # Initialize Pacman and Ghost positions
    pacman = Pacman(1, 1)
    ghost = Ghost(COLS - 2, ROWS - 2)
    # Game loop
    #? -------------------------MY CODE-----------------------------------
    state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
    reward = 0
    #? -------------------------MY CODE-----------------------------------
    #? GAME LOOP
    running = True
    iter = 0
    while running:
@ -156,7 +162,6 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
            pacman.move(0, 1)
        #? -------------------------MY CODE-----------------------------------
        if iter%3==0:
            # Ghost moves towards Pacman
            ghost.move_towards_pacman(pacman)
@ -165,6 +170,7 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
        if pacman.x == ghost.x and pacman.y == ghost.y:
            print("Game Over! The ghost caught Pacman.")
            running = False
            reward = -10
        # Eat cookies
        if labyrinth[pacman.y][pacman.x] == ".":
@ -173,29 +179,44 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
            print("You Win! Pacman ate all the cookies.")
            reward = 10
            running = False
        # Draw the labyrinth, pacman, and ghost
        #? -------------------------MY CODE-----------------------------------
        if not running:
            new_state = state
        else:
            new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
-        best_value_new_state, _ = get_best_q_value(q_values, new_state)
+        best_action_new_state, _ = get_best_q_action(q_values, new_state)
        best_value_new_state = q_values[(new_state, best_action_new_state)]
-        current_value = q_values.get((state, action), 0)
+        current_value = q_values.get((state, action))
        adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
        q_values[(state, action)] = current_value + adjusted_value
        state = new_state
        if not running:
            counter = 0
            for y, row in enumerate(labyrinth):
                for x, cell in enumerate(row):
                    if cell == ".":
                        counter += 1
            return 20-counter
        #? -------------------------MY CODE-----------------------------------
-        draw_labyrinth()
+        draw_labyrinth(labyrinth)
        pacman.draw()
        ghost.draw()
        # Update display
-        pygame.display.flip()
+        # pygame.display.flip()
        # Cap the frame rate
-        clock.tick(1)
+        clock.tick(10000)
    pygame.quit()
 if __name__ == "__main__":
-    main()
+    run_game()
--- a/main.py
+++ b/main.py
@ -1,10 +1,13 @@
-from util import epsilon_greedy, get_start_state, test
+import matplotlib.pyplot as plt
 from game import run_game
 from util import initial_q_fill
-AMOUNT_RUNS = 10
+AMOUNT_RUNS = 5000
 EPSILON = 0.1
 ALPHA = 0.1
-GAMMA = 0.1
+GAMMA = 0.9
 """
@ -13,12 +16,15 @@ action: Direction
 q_value: (state, action)
 """
 q_values = {}
 initial_q_fill(q_values)
 cookies_per_run = []
 # Amount of single runs
 for x in range(AMOUNT_RUNS):
-    state = get_start_state()
+    amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
    cookies_per_run.append(amount_cookies_ate)
    print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
    # Single run, until win or death
    while(True):
        action = epsilon_greedy(q_values, state, EPSILON)
 plt.plot(cookies_per_run)
 plt.show()
--- a/util.py
+++ b/util.py
@ -8,6 +8,17 @@ class Direction(Enum):
    LEFT = 3
 def initial_q_fill(q_values):
    for x in range(8):
        for y in range(3):
            for cookie_direction in Direction:
                for action in Direction:
                    state = (x, y, cookie_direction)
                    q_values[(state, action)] = random.random() * 0.2 - 0.1
 def get_start_state():
    first_direction_cookie = random.choice([True, False])
    if first_direction_cookie:
@ -18,68 +29,86 @@ def get_start_state():
 def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
-    # distance pacman - ghost
+    x_ghost_dist = abs(pac_x - ghost_x)
-    x_dist = abs(pac_x - ghost_x)
+    y_ghost_dist = abs(pac_y - ghost_y)
    y_dist = abs(pac_y - ghost_y)
-    # closest cookie
+    cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
    best_distance = 12
    best_cords = None
-    all_cookie_locations = get_all_cookies_locations(labyrinth)
+    return x_ghost_dist, y_ghost_dist, cookie_direction
    for (cookie_x, cookie_y) in all_cookie_locations:
        dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
        if dist < best_distance:
            best_distance = dist
            best_cords = (cookie_x, cookie_y)
    # closest cookie direction
    cookie_direction = None
    real_dist_x = pac_x - cookie_x
    real_dist_y = pac_y - cookie_y
    #TODO
    if real_dist_x >= 0 & real_dist_y > 0:
        cookie_direction = Direction
    return x_dist, y_dist, cookie_direction
-def get_all_cookies_locations(labyrinth):
+def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
-    cookie_locations = []
+    cookie_distances = []
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == ".":
-                cookie_locations.append((x, y))
+                x_dist = abs(pac_x - x)
                y_dist = abs(pac_y - y)
                dist = x_dist + y_dist
                cookie_distances.append((dist, (x, y)))
-    return cookie_locations
+    closest_cookie = min(cookie_distances, key=lambda item: item[0])
    closest_cookie_cords = closest_cookie[1]
    cookie_x = closest_cookie_cords[0]
    cookie_y = closest_cookie_cords[1]
    dx = cookie_x - pac_x
    dy = cookie_y - pac_y 
    if abs(dx) >= abs(dy):
        #? X distance bigger
        if dy > 0:
            return Direction.DOWN
        elif dy < 0:
            return Direction.UP
        else:
            #? Cookie on the same Y level
            if dx > 0:
                return Direction.RIGHT
            else:
                return Direction.LEFT
    else:
        #? Y distance bigger
        if dx > 0:
            return Direction.RIGHT
        elif dx < 0:
            return Direction.LEFT
        else:
            #? Cookie on the same X level
            if dy > 0:
                return Direction.DOWN
            else:
                return Direction.UP
 def epsilon_greedy(q_values, state, epsilon):
-    best_action, states_for_epsilon = get_best_q_value(q_values, state)
+    best_action, actions_for_epsilon = get_best_q_action(q_values, state)
    if random.random() < epsilon:
-        if not states_for_epsilon:
+        if not actions_for_epsilon:
            best_action = get_random_direction()
            return best_action
-        random_action = random.choice(states_for_epsilon)
+        random_action = random.choice(actions_for_epsilon)
        return random_action
    return best_action
-def get_best_q_value(q_values, state):
+def get_best_q_action(q_values, state):
    best_action = None
    best_value = None
-    states_for_epsilon = []
+    actions_for_epsilon = []
    for (q_state, q_action), value in q_values.items():
        if q_state == state:
-            states_for_epsilon.append(q_action)
+            actions_for_epsilon.append(q_action)
            if best_value is None:
                best_value = value
@ -93,7 +122,7 @@ def get_best_q_value(q_values, state):
    if not best_action:
        best_action = get_random_direction()
-    return best_action, states_for_epsilon
+    return best_action, actions_for_epsilon
 def get_random_direction():