From 1082c90feab5551e8cb8ad71fe7033d27ffa97d4 Mon Sep 17 00:00:00 2001
From: 2wenty1ne <sirdarkvic@gmail.com>
Date: Wed, 10 Dec 2025 11:49:20 +0100
Subject: [PATCH] refactor

---
 ReinforcmentLearning/game.py     | 147 +++----------------------------
 ReinforcmentLearning/learning.py |   2 +-
 ReinforcmentLearning/util.py     |  36 ++++++--
 data/classes.py                  | 111 +++++++++++++++++++++++
 data/classes_consts.py           |  29 ++++++
 conf.py => data/conf.py          |   8 +-
 main.py                          |   2 +-
 7 files changed, 194 insertions(+), 141 deletions(-)
 create mode 100644 data/classes.py
 create mode 100644 data/classes_consts.py
 rename conf.py => data/conf.py (56%)

diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py
index be58510..753450f 100644
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@@ -2,116 +2,24 @@ import pygame
 import math
 import os
 
-from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
-import conf
-
-# Initialize pygame
-
-# Define constants
-SCREEN_WIDTH = 400
-SCREEN_HEIGHT = 400
-CELL_SIZE = 40
-
-# Define colors
-YELLOW = (255, 255, 0)
-RED = (255, 0, 0)
-WHITE = (255, 255, 255)
-BLUE = (0, 0, 255)
-BLACK = (0, 0, 0)
-
-REWARD_ON_HALF = 50
-
-# Labyrinth as a string
-LABYRINTH_INIT = [
-    "##########",
-    "#........#",
-    "#.##..##.#",
-    "#........#",
-    "##########"
-]
-
-# Get labyrinth dimensions
-ROWS = len(LABYRINTH_INIT)
-COLS = len(LABYRINTH_INIT[0])
-
-
-
-class Pacman:
-    def __init__(self, screen, x, y):
-        self.screen = screen
-        self.x = x
-        self.y = y
-        self.count = 0
-
-    def move(self, labyrinth, dx, dy):
-        new_x, new_y = self.x + dx, self.y + dy
-        if labyrinth[new_y][new_x] != "#":
-            self.x = new_x
-            self.y = new_y
-
-    def draw(self):
-        radius = CELL_SIZE // 2 - 4
-        start_angle = math.pi / 6
-        end_angle = -math.pi / 6
-        pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
-            # Calculate the points for the mouth
-        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
-                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
-        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
-                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
-        self.count += 1
-        if self.count%2==0:
-            # Draw the mouth by filling a polygon
-            pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
-
-
-class Ghost:
-    # Define the pixel art for the ghost using strings
-    ghost_pixels = [
-        " #### ",
-        "######",
-        "## # #",
-        "######",
-        "######",
-        "# # # "
-    ]
-
-    def __init__(self, screen, x, y):
-        self.screen = screen
-        self.x = x
-        self.y = y
-
-    def move_towards_pacman(self, labyrinth, pacman):
-        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
-            self.x += 1
-        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
-            self.x -= 1
-        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
-            self.y += 1
-        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
-            self.y -= 1
-
-    def draw(self):
-        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
-        for row_idx, row in enumerate(self.ghost_pixels):
-            for col_idx, pixel in enumerate(row):
-                if pixel == "#":
-                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
-                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
-                    pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
-
+from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill
+import data.classes_consts as consts
+import data.conf as conf
+# import data.classes as classes
+from data.classes import Pacman, Ghost
 
 
 def start_try(EPSILON, ALPHA, GAMMA):
     #? Learning initial
     q_values = initial_q_fill()
+    print(len(q_values))
     
     #? Game initial
     pygame.init()
     screen = None
 
     if conf.show_game:
-        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+        screen = consts.screen
         pygame.display.set_caption("Micro-Pacman")
 
     #? Start try
@@ -136,7 +44,7 @@ def start_try(EPSILON, ALPHA, GAMMA):
         print(f"Run {x+1}: {iterations_per_run} iterations")
 
     if conf.show_trained:
-        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+        screen = consts.screen
         pygame.display.set_caption("Micro-Pacman")
 
         while True:
@@ -152,24 +60,21 @@ def start_try(EPSILON, ALPHA, GAMMA):
 
 def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
     clock = pygame.time.Clock()
-    labyrinth = LABYRINTH_INIT.copy()
+    labyrinth = consts.LABYRINTH_INIT.copy()
 
 
     # Initialize Pacman and Ghost positions
     pacman = Pacman(screen, 1, 1)
-    ghost = Ghost(screen, COLS - 2, ROWS - 2)
+    ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2)
 
-    #? -------------------------MY CODE-----------------------------------
     state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
-    #? -------------------------MY CODE-----------------------------------
 
     #? GAME LOOP
     running = True
     iter = 0
     while running:
-        #? -------------------------MY CODE-----------------------------------
         reward = 0
-        #? -------------------------MY CODE-----------------------------------
+        iter = iter + 1
 
         # Handle events
         for event in pygame.event.get():
@@ -177,12 +82,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
                 running = False
 
         if conf.show_game:
-            screen.fill(BLACK)
+            screen.fill(consts.BLACK)
 
-
-        iter = iter + 1
-
-        # Handle Pacman movement
+        #? Arrow key movements
         keys = pygame.key.get_pressed()
         if keys[pygame.K_LEFT]:
             pacman.move(-1, 0)
@@ -193,8 +95,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
         if keys[pygame.K_DOWN]:
             pacman.move(0, 1)
 
-
-        #? -------------------------MY CODE-----------------------------------
+        #? Agent movements
         action = epsilon_greedy(q_values, state, EPSILON)
         if action == Direction.LEFT:
             pacman.move(labyrinth, -1, 0)
@@ -204,7 +105,6 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
             pacman.move(labyrinth, 0, -1)
         if action == Direction.DOWN:
             pacman.move(labyrinth, 0, 1)
-        #? -------------------------MY CODE-----------------------------------
 
 
         if iter%3==0:
@@ -220,9 +120,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
         if labyrinth[pacman.y][pacman.x] == ".":
             labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
 
-        #? -------------------------MY CODE-----------------------------------
             #? half reward
-
             # cookie_counter = 0
             
             # for y, row in enumerate(labyrinth):
@@ -234,10 +132,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
             #     # reward = REWARD_ON_HALF
             #     if show_game:
             #         print("Got half reward")
-        #? -------------------------MY CODE-----------------------------------
 
 
-        # Check if all cookies are eaten (game over)
         if all("." not in row for row in labyrinth):
             # time_reward = calc_time_reward(iter)
             # reward = REWARD_ON_WIN * time_reward
@@ -245,11 +141,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
             running = False
 
             if conf.show_game:
-                # print(f"You Win! Took {iter} iterations, reward: {time_reward}")
                 print(f"You Win! Took {iter} iterations")
 
 
-        #? -------------------------MY CODE-----------------------------------
         if not running:
             new_state = state
         else:
@@ -271,10 +165,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
                     if cell == ".":
                         counter += 1
             return 20-counter, iter
-        #? -------------------------MY CODE-----------------------------------
         
 
-        # Draw the labyrinth, pacman, and ghost
         if conf.show_game:
             draw_labyrinth(screen, labyrinth)
             pacman.draw()
@@ -287,16 +179,5 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
             clock.tick(40)
 
 
-
-def draw_labyrinth(screen, labyrinth):
-    for y, row in enumerate(labyrinth):
-        for x, cell in enumerate(row):
-            if cell == "#":
-                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
-            elif cell == ".":
-                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
-
-
-
 if __name__ == "__main__":
     run_game()
\ No newline at end of file
diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py
index 6f9170c..287259e 100644
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@@ -5,7 +5,7 @@ import numpy as np
 import pandas as pd
 
 from ReinforcmentLearning.game import start_try
-import conf
+import data.conf as conf
 
 
 
diff --git a/ReinforcmentLearning/util.py b/ReinforcmentLearning/util.py
index dc670e2..75807de 100644
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@@ -1,5 +1,10 @@
 from enum import Enum
 import random
+import pygame
+
+import numpy as np
+
+import data.classes_consts as consts
 
 class Direction(Enum):
     UP = 0
@@ -17,17 +22,24 @@ def initial_q_fill():
                 for action in Direction:
                     state = (x, y, cookie_direction)
                     q_values[(state, action)] = random.random() * 0.2 - 0.1
+                    # q_values[state][action] = random.random() * 0.2 - 0.1
 
     return q_values
 
 
+def initial_q_fill2():
+    indexer = consts.indexer
 
-def get_start_state():
-    first_direction_cookie = random.choice([True, False])
-    if first_direction_cookie:
-        return (7, 2, Direction.DOWN)
+    """Initialize Q-table using linear indexing"""    
+    # Create 2D array: [state_index, action]
+    # 300 states × 4 actions = 1200 entries
+    q_table = np.random.uniform(
+        low=-0.1, 
+        high=0.1, 
+        size=(indexer.total_states, 4)  # 300 × 4
+    )
     
-    return(7, 2, Direction.RIGHT)
+    return q_table, indexer
 
 
 
@@ -140,3 +152,17 @@ def calc_time_reward(amount_iterations):
         return 1
     
     return - (1 / 1000) * amount_iterations + 11
+
+
+
+def draw_labyrinth(screen, labyrinth):
+    CELL_SIZE = consts.CELL_SIZE
+    BLUE = consts.BLUE
+    WHITE = consts.WHITE
+
+    for y, row in enumerate(labyrinth):
+        for x, cell in enumerate(row):
+            if cell == "#":
+                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
+            elif cell == ".":
+                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
diff --git a/data/classes.py b/data/classes.py
new file mode 100644
index 0000000..c4a7f2c
--- /dev/null
+++ b/data/classes.py
@@ -0,0 +1,111 @@
+import math
+import pygame
+
+from data.classes_consts import CELL_SIZE, YELLOW, BLACK, RED
+
+
+class Pacman:
+    def __init__(self, screen, x, y):
+        self.screen = screen
+        self.x = x
+        self.y = y
+        self.count = 0
+
+    def move(self, labyrinth, dx, dy):
+        new_x, new_y = self.x + dx, self.y + dy
+        if labyrinth[new_y][new_x] != "#":
+            self.x = new_x
+            self.y = new_y
+
+    def draw(self):
+        radius = CELL_SIZE // 2 - 4
+        start_angle = math.pi / 6
+        end_angle = -math.pi / 6
+        pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
+            # Calculate the points for the mouth
+        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
+                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
+        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
+                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
+        self.count += 1
+        if self.count%2==0:
+            # Draw the mouth by filling a polygon
+            pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
+
+
+class Ghost:
+    # Define the pixel art for the ghost using strings
+    ghost_pixels = [
+        " #### ",
+        "######",
+        "## # #",
+        "######",
+        "######",
+        "# # # "
+    ]
+
+    def __init__(self, screen, x, y):
+        self.screen = screen
+        self.x = x
+        self.y = y
+
+    def move_towards_pacman(self, labyrinth, pacman):
+        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
+            self.x += 1
+        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
+            self.x -= 1
+        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
+            self.y += 1
+        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
+            self.y -= 1
+
+    def draw(self):
+        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
+        for row_idx, row in enumerate(self.ghost_pixels):
+            for col_idx, pixel in enumerate(row):
+                if pixel == "#":
+                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
+                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
+                    pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
+
+
+
+class StateIndexer:
+    """Converts (x, y, cookie_dir) states to unique indices"""
+    def __init__(self):
+        # State space boundaries
+        self.x_min, self.x_max = -7, 7      # 15 values: -7 to 7 inclusive
+        self.y_min, self.y_max = -2, 2      # 5 values: -2 to 2 inclusive
+        self.dir_min, self.dir_max = 0, 3   # 4 directions: 0 to 3
+        
+        # Ranges
+        self.x_range = self.x_max - self.x_min + 1  # 15
+        self.y_range = self.y_max - self.y_min + 1  # 5
+        self.dir_range = self.dir_max - self.dir_min + 1  # 4
+        
+        # Multipliers for indexing
+        self.y_dir_product = self.y_range * self.dir_range  # 5 * 4 = 20
+        self.total_states = self.x_range * self.y_dir_product  # 15 * 20 = 300
+        
+    def to_index(self, x, y, cookie_dir):
+        """Convert state to unique index 0..299"""
+        # Convert to zero-based indices
+        x_idx = x - self.x_min          # -7→0, -6→1, ..., 7→14
+        y_idx = y - self.y_min          # -2→0, -1→1, ..., 2→4
+        dir_idx = cookie_dir - self.dir_min  # 0→0, 1→1, 2→2, 3→3
+        
+        # Linear mapping: (x * y_range * dir_range) + (y * dir_range) + dir
+        return (x_idx * self.y_dir_product) + (y_idx * self.dir_range) + dir_idx
+    
+    def from_index(self, idx):
+        """Convert index back to state"""
+        dir_idx = idx % self.dir_range
+        idx //= self.dir_range
+        y_idx = idx % self.y_range
+        x_idx = idx // self.y_range
+        
+        return (
+            x_idx + self.x_min,
+            y_idx + self.y_min,
+            dir_idx + self.dir_min
+        )
diff --git a/data/classes_consts.py b/data/classes_consts.py
new file mode 100644
index 0000000..0e1b890
--- /dev/null
+++ b/data/classes_consts.py
@@ -0,0 +1,29 @@
+import pygame
+
+
+
+LABYRINTH_INIT = [
+    "##########",
+    "#........#",
+    "#.##..##.#",
+    "#........#",
+    "##########"
+]
+
+SCREEN_WIDTH = 400
+SCREEN_HEIGHT = 400
+CELL_SIZE = 40
+
+# Define colors
+YELLOW = (255, 255, 0)
+RED = (255, 0, 0)
+WHITE = (255, 255, 255)
+BLUE = (0, 0, 255)
+BLACK = (0, 0, 0)
+
+# Get labyrinth dimensions
+ROWS = len(LABYRINTH_INIT)
+COLS = len(LABYRINTH_INIT[0])
+
+
+screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
\ No newline at end of file
diff --git a/conf.py b/data/conf.py
similarity index 56%
rename from conf.py
rename to data/conf.py
index 3ac202a..5abd12a 100644
--- a/conf.py
+++ b/data/conf.py
@@ -1,3 +1,8 @@
+from data.classes import StateIndexer
+
+
+indexer = StateIndexer()
+
 EPSILON = 0.01
 # EPSILON = 0.005
 ALPHA = 0.2
@@ -7,8 +12,9 @@ AMOUNT_RUNS = 5000
 AMOUNT_TRIES = 5
 
 REWARD_ON_WIN = 400
+REWARD_ON_HALF = 50
 REWARD_ON_LOSE = -250
 
 plot_result = True
-show_game = False
+show_game = True
 show_trained = True
\ No newline at end of file
diff --git a/main.py b/main.py
index d7e7d00..85ae2d9 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,7 @@
 from GenTunic.gen_tuning import gen_tuning_main
 from ReinforcmentLearning.learning import multipleTries, oneTry
 from ReinforcmentLearning.util import calc_time_reward
-import conf
+import data.conf as conf