added numpy for state, refactor

2025-12-10 18:54:43 +01:00 · 2025-12-10 18:54:43 +01:00 · 8aeb8c1449
parent 1082c90fea
commit 8aeb8c1449
8 changed files with 29 additions and 282 deletions
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -5,14 +5,12 @@ import os
 from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill
 import data.classes_consts as consts
 import data.conf as conf
-# import data.classes as classes
 from data.classes import Pacman, Ghost


 def start_try(EPSILON, ALPHA, GAMMA):
    #? Learning initial
    q_values = initial_q_fill()
-    print(len(q_values))
    
    #? Game initial
    pygame.init()
@ -41,7 +39,7 @@ def start_try(EPSILON, ALPHA, GAMMA):
        cookies_per_run.append(amount_cookies_ate)
        iterations.append(iterations_per_run)

-        print(f"Run {x+1}: {iterations_per_run} iterations")
+        # print(f"Run {x+1}: {iterations_per_run} iterations")

    if conf.show_trained:
        screen = consts.screen
@ -62,7 +60,6 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
    clock = pygame.time.Clock()
    labyrinth = consts.LABYRINTH_INIT.copy()

-
    # Initialize Pacman and Ghost positions
    pacman = Pacman(screen, 1, 1)
    ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2)
@ -149,12 +146,12 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
        else:
            new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)

-        best_action_new_state, _ = get_best_q_action(q_values, new_state)
-        best_value_new_state = q_values[(new_state, best_action_new_state)]
+        best_action_new_state = get_best_q_action(q_values, new_state)
+        best_value_new_state = q_values[new_state][best_action_new_state.value]

-        current_value = q_values.get((state, action))
+        current_value = q_values[state][action.value]
        adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
-        q_values[(state, action)] = current_value + adjusted_value
+        q_values[state][action.value] = current_value + adjusted_value

        state = new_state

--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -1,5 +1,3 @@
-# import matplotlib
-# matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -19,29 +19,15 @@ def initial_q_fill():
    for x in range(-7, 8):
        for y in range(-2, 3):
            for cookie_direction in Direction:
-                for action in Direction:
-                    state = (x, y, cookie_direction)
-                    q_values[(state, action)] = random.random() * 0.2 - 0.1
-                    # q_values[state][action] = random.random() * 0.2 - 0.1
+                state = (x, y, cookie_direction)
+                q_values[state] = np.zeros(4)
+
+                for action_idx in range(len(Direction)):
+                    q_values[state][action_idx] = random.random() * 0.2 - 0.1

    return q_values


-def initial_q_fill2():
-    indexer = consts.indexer
-
-    """Initialize Q-table using linear indexing"""    
-    # Create 2D array: [state_index, action]
-    # 300 states × 4 actions = 1200 entries
-    q_table = np.random.uniform(
-        low=-0.1, 
-        high=0.1, 
-        size=(indexer.total_states, 4)  # 300 × 4
-    )
-    
-    return q_table, indexer
-
-

 def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
    x_ghost_dist = pac_x - ghost_x
@ -70,8 +56,11 @@ def get_closest_cookie_direction(labyrinth, pac_x, pac_y):


    dx = cookie_x - pac_x
-    dy = cookie_y - pac_y 
-    
+    dy = cookie_y - pac_y
+    return cords_to_direction(dx, dy)
+
+
+def cords_to_direction(dx, dy):
    if abs(dx) >= abs(dy):
        #? X distance bigger

@ -102,41 +91,19 @@ def get_closest_cookie_direction(labyrinth, pac_x, pac_y):


 def epsilon_greedy(q_values, state, epsilon):
-    best_action, actions_for_epsilon = get_best_q_action(q_values, state)
-
    if random.random() < epsilon:
-        if not actions_for_epsilon:
-            best_action = get_random_direction()
-            return best_action
-
-        random_action = random.choice(actions_for_epsilon)
+        random_action = get_random_direction()
        return random_action

+    best_action = get_best_q_action(q_values, state)
    return best_action


 def get_best_q_action(q_values, state):
-    best_action = None
-    best_value = None
+    state_q_values = q_values[state]
+    best_action_index = np.argmax(state_q_values)

-    actions_for_epsilon = []
-
-    for (q_state, q_action), value in q_values.items():
-        if q_state == state:
-            actions_for_epsilon.append(q_action)
-            if best_value is None:
-                best_value = value
-                best_action = q_action
-                continue
-
-            if value > best_value:
-                best_value = value
-                best_action = q_action
-
-    if not best_action:
-        best_action = get_random_direction()
-
-    return best_action, actions_for_epsilon
+    return Direction(best_action_index)


 def get_random_direction():
--- a/clean_game.py
+++ b/clean_game.py
@ -1,171 +0,0 @@
-import pygame
-import random
-import math
-
-# Initialize pygame
-pygame.init()
-
-# Define constants
-SCREEN_WIDTH = 400
-SCREEN_HEIGHT = 400
-CELL_SIZE = 40
-
-# Define colors
-YELLOW = (255, 255, 0)
-RED = (255, 0, 0)
-WHITE = (255, 255, 255)
-BLUE = (0, 0, 255)
-BLACK = (0, 0, 0)
-
-# Labyrinth as a string
-labyrinth = [
-    "##########",
-    "#........#",
-    "#.##..##.#",
-    "#........#",
-    "##########"
-]
-
-# Get labyrinth dimensions
-ROWS = len(labyrinth)
-COLS = len(labyrinth[0])
-
-# Initialize game screen
-screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
-pygame.display.set_caption("Micro-Pacman")
-
-# Pacman class
-class Pacman:
-    def __init__(self, x, y):
-        self.x = x
-        self.y = y
-        self.count = 0
-
-    def move(self, dx, dy):
-        new_x, new_y = self.x + dx, self.y + dy
-        if labyrinth[new_y][new_x] != "#":
-            self.x = new_x
-            self.y = new_y
-
-    def draw(self):
-        radius = CELL_SIZE // 2 - 4
-        start_angle = math.pi / 6
-        end_angle = -math.pi / 6
-        pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
-            # Calculate the points for the mouth
-        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
-                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
-        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
-                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
-        self.count += 1
-        if self.count%2==0:
-            # Draw the mouth by filling a polygon
-            pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
-
-# Ghost class with pixel art
-class Ghost:
-    # Define the pixel art for the ghost using strings
-    ghost_pixels = [
-        " #### ",
-        "######",
-        "## # #",
-        "######",
-        "######",
-        "# # # "
-    ]
-
-    def __init__(self, x, y):
-        self.x = x
-        self.y = y
-
-    def move_towards_pacman(self, pacman):
-        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
-            self.x += 1
-        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
-            self.x -= 1
-        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
-            self.y += 1
-        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
-            self.y -= 1
-
-    def draw(self):
-        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
-        for row_idx, row in enumerate(self.ghost_pixels):
-            for col_idx, pixel in enumerate(row):
-                if pixel == "#":
-                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
-                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
-                    pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
-
-# Draw walls and cookies
-def draw_labyrinth():
-    for y, row in enumerate(labyrinth):
-        for x, cell in enumerate(row):
-            if cell == "#":
-                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
-            elif cell == ".":
-                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
-
-# Main game function
-def main():
-    clock = pygame.time.Clock()
-
-    # Initialize Pacman and Ghost positions
-    pacman = Pacman(1, 1)
-    ghost = Ghost(COLS - 2, ROWS - 2)
-
-    # Game loop
-    running = True
-    iter = 0
-    while running:
-        screen.fill(BLACK)
-        iter = iter + 1
-        # Handle events
-        for event in pygame.event.get():
-            if event.type == pygame.QUIT:
-                running = False
-
-        # Handle Pacman movement
-        keys = pygame.key.get_pressed()
-        if keys[pygame.K_LEFT]:
-            pacman.move(-1, 0)
-        if keys[pygame.K_RIGHT]:
-            pacman.move(1, 0)
-        if keys[pygame.K_UP]:
-            pacman.move(0, -1)
-        if keys[pygame.K_DOWN]:
-            pacman.move(0, 1)
-
-        if iter%3==0:
-            # Ghost moves towards Pacman
-            ghost.move_towards_pacman(pacman)
-
-        # Check for collisions (game over if ghost catches pacman)
-        if pacman.x == ghost.x and pacman.y == ghost.y:
-            print("Game Over! The ghost caught Pacman.")
-            running = False
-
-        # Eat cookies
-        if labyrinth[pacman.y][pacman.x] == ".":
-            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
-
-        # Check if all cookies are eaten (game over)
-        if all("." not in row for row in labyrinth):
-            print("You Win! Pacman ate all the cookies.")
-            running = False
-
-        # Draw the labyrinth, pacman, and ghost
-        draw_labyrinth()
-        pacman.draw()
-        ghost.draw()
-
-        # Update display
-        pygame.display.flip()
-
-        # Cap the frame rate
-        clock.tick(5)
-
-    pygame.quit()
-
-if __name__ == "__main__":
-    main()
--- a/data/classes.py
+++ b/data/classes.py
@ -67,45 +67,3 @@ class Ghost:
                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
                    pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
-
-
-
-class StateIndexer:
-    """Converts (x, y, cookie_dir) states to unique indices"""
-    def __init__(self):
-        # State space boundaries
-        self.x_min, self.x_max = -7, 7      # 15 values: -7 to 7 inclusive
-        self.y_min, self.y_max = -2, 2      # 5 values: -2 to 2 inclusive
-        self.dir_min, self.dir_max = 0, 3   # 4 directions: 0 to 3
-        
-        # Ranges
-        self.x_range = self.x_max - self.x_min + 1  # 15
-        self.y_range = self.y_max - self.y_min + 1  # 5
-        self.dir_range = self.dir_max - self.dir_min + 1  # 4
-        
-        # Multipliers for indexing
-        self.y_dir_product = self.y_range * self.dir_range  # 5 * 4 = 20
-        self.total_states = self.x_range * self.y_dir_product  # 15 * 20 = 300
-        
-    def to_index(self, x, y, cookie_dir):
-        """Convert state to unique index 0..299"""
-        # Convert to zero-based indices
-        x_idx = x - self.x_min          # -7→0, -6→1, ..., 7→14
-        y_idx = y - self.y_min          # -2→0, -1→1, ..., 2→4
-        dir_idx = cookie_dir - self.dir_min  # 0→0, 1→1, 2→2, 3→3
-        
-        # Linear mapping: (x * y_range * dir_range) + (y * dir_range) + dir
-        return (x_idx * self.y_dir_product) + (y_idx * self.dir_range) + dir_idx
-    
-    def from_index(self, idx):
-        """Convert index back to state"""
-        dir_idx = idx % self.dir_range
-        idx //= self.dir_range
-        y_idx = idx % self.y_range
-        x_idx = idx // self.y_range
-        
-        return (
-            x_idx + self.x_min,
-            y_idx + self.y_min,
-            dir_idx + self.dir_min
-        )
--- a/data/classes_consts.py
+++ b/data/classes_consts.py
@ -1,5 +1,7 @@
 import pygame

+from data import conf
+


 LABYRINTH_INIT = [
@ -26,4 +28,6 @@ ROWS = len(LABYRINTH_INIT)
 COLS = len(LABYRINTH_INIT[0])


-screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+screen = None
+if conf.show_game:
+    screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
--- a/data/conf.py
+++ b/data/conf.py
@ -1,10 +1,4 @@
-from data.classes import StateIndexer
-
-
-indexer = StateIndexer()
-
-EPSILON = 0.01
-# EPSILON = 0.005
+EPSILON = 0.005
 ALPHA = 0.2
 GAMMA = 0.8

@ -16,5 +10,6 @@ REWARD_ON_HALF = 50
 REWARD_ON_LOSE = -250

 plot_result = True
-show_game = True
-show_trained = True
+show_game = False
+show_trained = False
+
--- a/main.py
+++ b/main.py
@ -5,7 +5,6 @@ import data.conf as conf



-
 oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA)
 # multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
 # gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)