From 93076e24264136ddaf141443fb02e4df347a7eed Mon Sep 17 00:00:00 2001
From: 2wenty1ne <sirdarkvic@gmail.com>
Date: Mon, 1 Dec 2025 15:33:28 +0100
Subject: [PATCH] Finished assigment

---
 game.py | 53 ++++++++++++++++++++----------
 main.py | 20 ++++++++----
 util.py | 99 +++++++++++++++++++++++++++++++++++++--------------------
 3 files changed, 114 insertions(+), 58 deletions(-)

diff --git a/game.py b/game.py
index 1417865..5f18516 100644
--- a/game.py
+++ b/game.py
@@ -1,8 +1,8 @@
 import pygame
-import random
 import math
+import os
 
-from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value
+from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
 
 # Initialize pygame
 pygame.init()
@@ -12,6 +12,8 @@ SCREEN_WIDTH = 400
 SCREEN_HEIGHT = 400
 CELL_SIZE = 40
 
+os.environ['SDL_VIDEODRIVER'] = 'dummy'
+
 # Define colors
 YELLOW = (255, 255, 0)
 RED = (255, 0, 0)
@@ -20,7 +22,7 @@ BLUE = (0, 0, 255)
 BLACK = (0, 0, 0)
 
 # Labyrinth as a string
-labyrinth = [
+labyrinth_init = [
     "##########",
     "#........#",
     "#.##..##.#",
@@ -28,13 +30,16 @@ labyrinth = [
     "##########"
 ]
 
+labyrinth = labyrinth_init.copy()
+
 # Get labyrinth dimensions
 ROWS = len(labyrinth)
 COLS = len(labyrinth[0])
 
 # Initialize game screen
-screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
-pygame.display.set_caption("Micro-Pacman")
+# screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+# pygame.display.set_caption("Micro-Pacman")
+screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE))
 
 # Pacman class
 class Pacman:
@@ -100,7 +105,7 @@ class Ghost:
                     pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
 
 # Draw walls and cookies
-def draw_labyrinth():
+def draw_labyrinth(labyrinth):
     for y, row in enumerate(labyrinth):
         for x, cell in enumerate(row):
             if cell == "#":
@@ -110,19 +115,20 @@ def draw_labyrinth():
 
 
 # Main game function
-def main(q_values, EPSILON, ALPHA, GAMMA):
+def run_game(q_values, EPSILON, ALPHA, GAMMA):
     clock = pygame.time.Clock()
+    labyrinth = labyrinth_init.copy()
 
     # Initialize Pacman and Ghost positions
     pacman = Pacman(1, 1)
     ghost = Ghost(COLS - 2, ROWS - 2)
 
-    # Game loop
     #? -------------------------MY CODE-----------------------------------
     state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
     reward = 0
     #? -------------------------MY CODE-----------------------------------
 
+    #? GAME LOOP
     running = True
     iter = 0
     while running:
@@ -156,7 +162,6 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
             pacman.move(0, 1)
         #? -------------------------MY CODE-----------------------------------
 
-
         if iter%3==0:
             # Ghost moves towards Pacman
             ghost.move_towards_pacman(pacman)
@@ -165,6 +170,7 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
         if pacman.x == ghost.x and pacman.y == ghost.y:
             print("Game Over! The ghost caught Pacman.")
             running = False
+            reward = -10
 
         # Eat cookies
         if labyrinth[pacman.y][pacman.x] == ".":
@@ -173,29 +179,44 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
         # Check if all cookies are eaten (game over)
         if all("." not in row for row in labyrinth):
             print("You Win! Pacman ate all the cookies.")
+            reward = 10
             running = False
 
         # Draw the labyrinth, pacman, and ghost
         #? -------------------------MY CODE-----------------------------------
-        new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
+        if not running:
+            new_state = state
+        else:
+            new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
 
-        best_value_new_state, _ = get_best_q_value(q_values, new_state)
+        best_action_new_state, _ = get_best_q_action(q_values, new_state)
+        best_value_new_state = q_values[(new_state, best_action_new_state)]
 
-        current_value = q_values.get((state, action), 0)
+        current_value = q_values.get((state, action))
         adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
         q_values[(state, action)] = current_value + adjusted_value
+
+        state = new_state
+
+        if not running:
+            counter = 0
+            for y, row in enumerate(labyrinth):
+                for x, cell in enumerate(row):
+                    if cell == ".":
+                        counter += 1
+            return 20-counter
         #? -------------------------MY CODE-----------------------------------
-        draw_labyrinth()
+        draw_labyrinth(labyrinth)
         pacman.draw()
         ghost.draw()
 
         # Update display
-        pygame.display.flip()
+        # pygame.display.flip()
 
         # Cap the frame rate
-        clock.tick(1)
+        clock.tick(10000)
 
     pygame.quit()
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    run_game()
\ No newline at end of file
diff --git a/main.py b/main.py
index 3faf4ff..0187b46 100644
--- a/main.py
+++ b/main.py
@@ -1,10 +1,13 @@
-from util import epsilon_greedy, get_start_state, test
+import matplotlib.pyplot as plt
+
+from game import run_game
+from util import initial_q_fill
 
 
-AMOUNT_RUNS = 10
+AMOUNT_RUNS = 5000
 EPSILON = 0.1
 ALPHA = 0.1
-GAMMA = 0.1
+GAMMA = 0.9
 
 
 """
@@ -13,12 +16,15 @@ action: Direction
 q_value: (state, action)
 """
 q_values = {}
+initial_q_fill(q_values)
 
+cookies_per_run = []
 # Amount of single runs
 for x in range(AMOUNT_RUNS):
-    state = get_start_state()
+    amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
+    cookies_per_run.append(amount_cookies_ate)
+    print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
 
-    # Single run, until win or death
-    while(True):
-        action = epsilon_greedy(q_values, state, EPSILON)
 
+plt.plot(cookies_per_run)
+plt.show()
diff --git a/util.py b/util.py
index ce3373b..f632371 100644
--- a/util.py
+++ b/util.py
@@ -8,6 +8,17 @@ class Direction(Enum):
     LEFT = 3
 
 
+def initial_q_fill(q_values):
+    for x in range(8):
+        for y in range(3):
+            for cookie_direction in Direction:
+                for action in Direction:
+                    state = (x, y, cookie_direction)
+                    q_values[(state, action)] = random.random() * 0.2 - 0.1
+            
+
+
+
 def get_start_state():
     first_direction_cookie = random.choice([True, False])
     if first_direction_cookie:
@@ -18,68 +29,86 @@ def get_start_state():
 
 
 def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
-    # distance pacman - ghost
-    x_dist = abs(pac_x - ghost_x)
-    y_dist = abs(pac_y - ghost_y)
+    x_ghost_dist = abs(pac_x - ghost_x)
+    y_ghost_dist = abs(pac_y - ghost_y)
 
-    # closest cookie
-    best_distance = 12
-    best_cords = None
+    cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
 
-    all_cookie_locations = get_all_cookies_locations(labyrinth)
-    for (cookie_x, cookie_y) in all_cookie_locations:
-        dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
-
-        if dist < best_distance:
-            best_distance = dist
-            best_cords = (cookie_x, cookie_y)
-
-    # closest cookie direction
-    cookie_direction = None
-    real_dist_x = pac_x - cookie_x
-    real_dist_y = pac_y - cookie_y
-
-    #TODO
-    if real_dist_x >= 0 & real_dist_y > 0:
-        cookie_direction = Direction
-
-    return x_dist, y_dist, cookie_direction
+    return x_ghost_dist, y_ghost_dist, cookie_direction
 
 
-def get_all_cookies_locations(labyrinth):
-    cookie_locations = []
+def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
+    cookie_distances = []
+
     for y, row in enumerate(labyrinth):
         for x, cell in enumerate(row):
             if cell == ".":
-                cookie_locations.append((x, y))
+                x_dist = abs(pac_x - x)
+                y_dist = abs(pac_y - y)
+                dist = x_dist + y_dist
+                cookie_distances.append((dist, (x, y)))
 
-    return cookie_locations
+    closest_cookie = min(cookie_distances, key=lambda item: item[0])
+    closest_cookie_cords = closest_cookie[1]
+    cookie_x = closest_cookie_cords[0]
+    cookie_y = closest_cookie_cords[1]
+
+
+    dx = cookie_x - pac_x
+    dy = cookie_y - pac_y 
+    
+    if abs(dx) >= abs(dy):
+        #? X distance bigger
+
+        if dy > 0:
+            return Direction.DOWN
+        elif dy < 0:
+            return Direction.UP
+        else:
+            #? Cookie on the same Y level
+            if dx > 0:
+                return Direction.RIGHT
+            else:
+                return Direction.LEFT
+    else:
+        #? Y distance bigger
+
+        if dx > 0:
+            return Direction.RIGHT
+        elif dx < 0:
+            return Direction.LEFT
+        else:
+            #? Cookie on the same X level
+            if dy > 0:
+                return Direction.DOWN
+            else:
+                return Direction.UP
 
 
 
 def epsilon_greedy(q_values, state, epsilon):
-    best_action, states_for_epsilon = get_best_q_value(q_values, state)
+    best_action, actions_for_epsilon = get_best_q_action(q_values, state)
 
     if random.random() < epsilon:
-        if not states_for_epsilon:
+        if not actions_for_epsilon:
             best_action = get_random_direction()
             return best_action
 
-        random_action = random.choice(states_for_epsilon)
+        random_action = random.choice(actions_for_epsilon)
         return random_action
 
     return best_action
 
 
-def get_best_q_value(q_values, state):
+def get_best_q_action(q_values, state):
     best_action = None
     best_value = None
 
-    states_for_epsilon = []
+    actions_for_epsilon = []
 
     for (q_state, q_action), value in q_values.items():
         if q_state == state:
-            states_for_epsilon.append(q_action)
+            actions_for_epsilon.append(q_action)
 
             if best_value is None:
                 best_value = value
@@ -93,7 +122,7 @@ def get_best_q_value(q_values, state):
     if not best_action:
         best_action = get_random_direction()
 
-    return best_action, states_for_epsilon
+    return best_action, actions_for_epsilon
 
 
 def get_random_direction():