refactor

2025-12-10 11:49:20 +01:00 · 2025-12-10 11:49:20 +01:00 · 1082c90fea
parent a52de42507
commit 1082c90fea
7 changed files with 194 additions and 141 deletions
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -2,116 +2,24 @@ import pygame
 import math
 import os

-from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
-import conf
-
-# Initialize pygame
-
-# Define constants
-SCREEN_WIDTH = 400
-SCREEN_HEIGHT = 400
-CELL_SIZE = 40
-
-# Define colors
-YELLOW = (255, 255, 0)
-RED = (255, 0, 0)
-WHITE = (255, 255, 255)
-BLUE = (0, 0, 255)
-BLACK = (0, 0, 0)
-
-REWARD_ON_HALF = 50
-
-# Labyrinth as a string
-LABYRINTH_INIT = [
-    "##########",
-    "#........#",
-    "#.##..##.#",
-    "#........#",
-    "##########"
-]
-
-# Get labyrinth dimensions
-ROWS = len(LABYRINTH_INIT)
-COLS = len(LABYRINTH_INIT[0])
-
-
-
-class Pacman:
-    def __init__(self, screen, x, y):
-        self.screen = screen
-        self.x = x
-        self.y = y
-        self.count = 0
-
-    def move(self, labyrinth, dx, dy):
-        new_x, new_y = self.x + dx, self.y + dy
-        if labyrinth[new_y][new_x] != "#":
-            self.x = new_x
-            self.y = new_y
-
-    def draw(self):
-        radius = CELL_SIZE // 2 - 4
-        start_angle = math.pi / 6
-        end_angle = -math.pi / 6
-        pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
-            # Calculate the points for the mouth
-        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
-                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
-        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
-                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
-        self.count += 1
-        if self.count%2==0:
-            # Draw the mouth by filling a polygon
-            pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
-
-
-class Ghost:
-    # Define the pixel art for the ghost using strings
-    ghost_pixels = [
-        " #### ",
-        "######",
-        "## # #",
-        "######",
-        "######",
-        "# # # "
-    ]
-
-    def __init__(self, screen, x, y):
-        self.screen = screen
-        self.x = x
-        self.y = y
-
-    def move_towards_pacman(self, labyrinth, pacman):
-        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
-            self.x += 1
-        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
-            self.x -= 1
-        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
-            self.y += 1
-        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
-            self.y -= 1
-
-    def draw(self):
-        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
-        for row_idx, row in enumerate(self.ghost_pixels):
-            for col_idx, pixel in enumerate(row):
-                if pixel == "#":
-                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
-                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
-                    pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
-
+from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill
+import data.classes_consts as consts
+import data.conf as conf
+# import data.classes as classes
+from data.classes import Pacman, Ghost


 def start_try(EPSILON, ALPHA, GAMMA):
    #? Learning initial
    q_values = initial_q_fill()
+    print(len(q_values))
    
    #? Game initial
    pygame.init()
    screen = None

    if conf.show_game:
-        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+        screen = consts.screen
        pygame.display.set_caption("Micro-Pacman")

    #? Start try
@ -136,7 +44,7 @@ def start_try(EPSILON, ALPHA, GAMMA):
        print(f"Run {x+1}: {iterations_per_run} iterations")

    if conf.show_trained:
-        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+        screen = consts.screen
        pygame.display.set_caption("Micro-Pacman")

        while True:
@ -152,24 +60,21 @@ def start_try(EPSILON, ALPHA, GAMMA):

 def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
    clock = pygame.time.Clock()
-    labyrinth = LABYRINTH_INIT.copy()
+    labyrinth = consts.LABYRINTH_INIT.copy()


    # Initialize Pacman and Ghost positions
    pacman = Pacman(screen, 1, 1)
-    ghost = Ghost(screen, COLS - 2, ROWS - 2)
+    ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2)

-    #? -------------------------MY CODE-----------------------------------
    state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
-    #? -------------------------MY CODE-----------------------------------

    #? GAME LOOP
    running = True
    iter = 0
    while running:
-        #? -------------------------MY CODE-----------------------------------
        reward = 0
-        #? -------------------------MY CODE-----------------------------------
+        iter = iter + 1

        # Handle events
        for event in pygame.event.get():
@ -177,12 +82,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
                running = False

        if conf.show_game:
-            screen.fill(BLACK)
+            screen.fill(consts.BLACK)

-
-        iter = iter + 1
-
-        # Handle Pacman movement
+        #? Arrow key movements
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
            pacman.move(-1, 0)
@ -193,8 +95,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
        if keys[pygame.K_DOWN]:
            pacman.move(0, 1)

-
-        #? -------------------------MY CODE-----------------------------------
+        #? Agent movements
        action = epsilon_greedy(q_values, state, EPSILON)
        if action == Direction.LEFT:
            pacman.move(labyrinth, -1, 0)
@ -204,7 +105,6 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
            pacman.move(labyrinth, 0, -1)
        if action == Direction.DOWN:
            pacman.move(labyrinth, 0, 1)
-        #? -------------------------MY CODE-----------------------------------


        if iter%3==0:
@ -220,9 +120,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
        if labyrinth[pacman.y][pacman.x] == ".":
            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]

-        #? -------------------------MY CODE-----------------------------------
            #? half reward
-
            # cookie_counter = 0
            
            # for y, row in enumerate(labyrinth):
@ -234,10 +132,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
            #     # reward = REWARD_ON_HALF
            #     if show_game:
            #         print("Got half reward")
-        #? -------------------------MY CODE-----------------------------------


-        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
            # time_reward = calc_time_reward(iter)
            # reward = REWARD_ON_WIN * time_reward
@ -245,11 +141,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
            running = False

            if conf.show_game:
-                # print(f"You Win! Took {iter} iterations, reward: {time_reward}")
                print(f"You Win! Took {iter} iterations")


-        #? -------------------------MY CODE-----------------------------------
        if not running:
            new_state = state
        else:
@ -271,10 +165,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
                    if cell == ".":
                        counter += 1
            return 20-counter, iter
-        #? -------------------------MY CODE-----------------------------------
        

-        # Draw the labyrinth, pacman, and ghost
        if conf.show_game:
            draw_labyrinth(screen, labyrinth)
            pacman.draw()
@ -287,16 +179,5 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
            clock.tick(40)


-
-def draw_labyrinth(screen, labyrinth):
-    for y, row in enumerate(labyrinth):
-        for x, cell in enumerate(row):
-            if cell == "#":
-                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
-            elif cell == ".":
-                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
-
-
-
 if __name__ == "__main__":
    run_game()
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -5,7 +5,7 @@ import numpy as np
 import pandas as pd

 from ReinforcmentLearning.game import start_try
-import conf
+import data.conf as conf



--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -1,5 +1,10 @@
 from enum import Enum
 import random
+import pygame
+
+import numpy as np
+
+import data.classes_consts as consts

 class Direction(Enum):
    UP = 0
@ -17,17 +22,24 @@ def initial_q_fill():
                for action in Direction:
                    state = (x, y, cookie_direction)
                    q_values[(state, action)] = random.random() * 0.2 - 0.1
+                    # q_values[state][action] = random.random() * 0.2 - 0.1

    return q_values


+def initial_q_fill2():
+    indexer = consts.indexer

-def get_start_state():
-    first_direction_cookie = random.choice([True, False])
-    if first_direction_cookie:
-        return (7, 2, Direction.DOWN)
+    """Initialize Q-table using linear indexing"""    
+    # Create 2D array: [state_index, action]
+    # 300 states × 4 actions = 1200 entries
+    q_table = np.random.uniform(
+        low=-0.1, 
+        high=0.1, 
+        size=(indexer.total_states, 4)  # 300 × 4
+    )
    
-    return(7, 2, Direction.RIGHT)
+    return q_table, indexer



@ -140,3 +152,17 @@ def calc_time_reward(amount_iterations):
        return 1
    
    return - (1 / 1000) * amount_iterations + 11
+
+
+
+def draw_labyrinth(screen, labyrinth):
+    CELL_SIZE = consts.CELL_SIZE
+    BLUE = consts.BLUE
+    WHITE = consts.WHITE
+
+    for y, row in enumerate(labyrinth):
+        for x, cell in enumerate(row):
+            if cell == "#":
+                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
+            elif cell == ".":
+                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
--- a/data/classes.py
+++ b/data/classes.py
@ -0,0 +1,111 @@
+import math
+import pygame
+
+from data.classes_consts import CELL_SIZE, YELLOW, BLACK, RED
+
+
+class Pacman:
+    def __init__(self, screen, x, y):
+        self.screen = screen
+        self.x = x
+        self.y = y
+        self.count = 0
+
+    def move(self, labyrinth, dx, dy):
+        new_x, new_y = self.x + dx, self.y + dy
+        if labyrinth[new_y][new_x] != "#":
+            self.x = new_x
+            self.y = new_y
+
+    def draw(self):
+        radius = CELL_SIZE // 2 - 4
+        start_angle = math.pi / 6
+        end_angle = -math.pi / 6
+        pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
+            # Calculate the points for the mouth
+        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
+                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
+        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
+                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
+        self.count += 1
+        if self.count%2==0:
+            # Draw the mouth by filling a polygon
+            pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
+
+
+class Ghost:
+    # Define the pixel art for the ghost using strings
+    ghost_pixels = [
+        " #### ",
+        "######",
+        "## # #",
+        "######",
+        "######",
+        "# # # "
+    ]
+
+    def __init__(self, screen, x, y):
+        self.screen = screen
+        self.x = x
+        self.y = y
+
+    def move_towards_pacman(self, labyrinth, pacman):
+        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
+            self.x += 1
+        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
+            self.x -= 1
+        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
+            self.y += 1
+        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
+            self.y -= 1
+
+    def draw(self):
+        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
+        for row_idx, row in enumerate(self.ghost_pixels):
+            for col_idx, pixel in enumerate(row):
+                if pixel == "#":
+                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
+                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
+                    pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
+
+
+
+class StateIndexer:
+    """Converts (x, y, cookie_dir) states to unique indices"""
+    def __init__(self):
+        # State space boundaries
+        self.x_min, self.x_max = -7, 7      # 15 values: -7 to 7 inclusive
+        self.y_min, self.y_max = -2, 2      # 5 values: -2 to 2 inclusive
+        self.dir_min, self.dir_max = 0, 3   # 4 directions: 0 to 3
+        
+        # Ranges
+        self.x_range = self.x_max - self.x_min + 1  # 15
+        self.y_range = self.y_max - self.y_min + 1  # 5
+        self.dir_range = self.dir_max - self.dir_min + 1  # 4
+        
+        # Multipliers for indexing
+        self.y_dir_product = self.y_range * self.dir_range  # 5 * 4 = 20
+        self.total_states = self.x_range * self.y_dir_product  # 15 * 20 = 300
+        
+    def to_index(self, x, y, cookie_dir):
+        """Convert state to unique index 0..299"""
+        # Convert to zero-based indices
+        x_idx = x - self.x_min          # -7→0, -6→1, ..., 7→14
+        y_idx = y - self.y_min          # -2→0, -1→1, ..., 2→4
+        dir_idx = cookie_dir - self.dir_min  # 0→0, 1→1, 2→2, 3→3
+        
+        # Linear mapping: (x * y_range * dir_range) + (y * dir_range) + dir
+        return (x_idx * self.y_dir_product) + (y_idx * self.dir_range) + dir_idx
+    
+    def from_index(self, idx):
+        """Convert index back to state"""
+        dir_idx = idx % self.dir_range
+        idx //= self.dir_range
+        y_idx = idx % self.y_range
+        x_idx = idx // self.y_range
+        
+        return (
+            x_idx + self.x_min,
+            y_idx + self.y_min,
+            dir_idx + self.dir_min
+        )
--- a/data/classes_consts.py
+++ b/data/classes_consts.py
@ -0,0 +1,29 @@
+import pygame
+
+
+
+LABYRINTH_INIT = [
+    "##########",
+    "#........#",
+    "#.##..##.#",
+    "#........#",
+    "##########"
+]
+
+SCREEN_WIDTH = 400
+SCREEN_HEIGHT = 400
+CELL_SIZE = 40
+
+# Define colors
+YELLOW = (255, 255, 0)
+RED = (255, 0, 0)
+WHITE = (255, 255, 255)
+BLUE = (0, 0, 255)
+BLACK = (0, 0, 0)
+
+# Get labyrinth dimensions
+ROWS = len(LABYRINTH_INIT)
+COLS = len(LABYRINTH_INIT[0])
+
+
+screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
--- a/data/conf.py
+++ b/data/conf.py
@ -1,3 +1,8 @@
+from data.classes import StateIndexer
+
+
+indexer = StateIndexer()
+
 EPSILON = 0.01
 # EPSILON = 0.005
 ALPHA = 0.2
@ -7,8 +12,9 @@ AMOUNT_RUNS = 5000
 AMOUNT_TRIES = 5

 REWARD_ON_WIN = 400
+REWARD_ON_HALF = 50
 REWARD_ON_LOSE = -250

 plot_result = True
-show_game = False
+show_game = True
 show_trained = True
--- a/main.py
+++ b/main.py
@ -1,7 +1,7 @@
 from GenTunic.gen_tuning import gen_tuning_main
 from ReinforcmentLearning.learning import multipleTries, oneTry
 from ReinforcmentLearning.util import calc_time_reward
-import conf
+import data.conf as conf