From 1082c90feab5551e8cb8ad71fe7033d27ffa97d4 Mon Sep 17 00:00:00 2001 From: 2wenty1ne Date: Wed, 10 Dec 2025 11:49:20 +0100 Subject: [PATCH] refactor --- ReinforcmentLearning/game.py | 147 +++---------------------------- ReinforcmentLearning/learning.py | 2 +- ReinforcmentLearning/util.py | 36 ++++++-- data/classes.py | 111 +++++++++++++++++++++++ data/classes_consts.py | 29 ++++++ conf.py => data/conf.py | 8 +- main.py | 2 +- 7 files changed, 194 insertions(+), 141 deletions(-) create mode 100644 data/classes.py create mode 100644 data/classes_consts.py rename conf.py => data/conf.py (56%) diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py index be58510..753450f 100644 --- a/ReinforcmentLearning/game.py +++ b/ReinforcmentLearning/game.py @@ -2,116 +2,24 @@ import pygame import math import os -from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill -import conf - -# Initialize pygame - -# Define constants -SCREEN_WIDTH = 400 -SCREEN_HEIGHT = 400 -CELL_SIZE = 40 - -# Define colors -YELLOW = (255, 255, 0) -RED = (255, 0, 0) -WHITE = (255, 255, 255) -BLUE = (0, 0, 255) -BLACK = (0, 0, 0) - -REWARD_ON_HALF = 50 - -# Labyrinth as a string -LABYRINTH_INIT = [ - "##########", - "#........#", - "#.##..##.#", - "#........#", - "##########" -] - -# Get labyrinth dimensions -ROWS = len(LABYRINTH_INIT) -COLS = len(LABYRINTH_INIT[0]) - - - -class Pacman: - def __init__(self, screen, x, y): - self.screen = screen - self.x = x - self.y = y - self.count = 0 - - def move(self, labyrinth, dx, dy): - new_x, new_y = self.x + dx, self.y + dy - if labyrinth[new_y][new_x] != "#": - self.x = new_x - self.y = new_y - - def draw(self): - radius = CELL_SIZE // 2 - 4 - start_angle = math.pi / 6 - end_angle = -math.pi / 6 - pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4) - # Calculate the points for the mouth - start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)), - self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle))) - end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)), - self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle))) - self.count += 1 - if self.count%2==0: - # Draw the mouth by filling a polygon - pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos]) - - -class Ghost: - # Define the pixel art for the ghost using strings - ghost_pixels = [ - " #### ", - "######", - "## # #", - "######", - "######", - "# # # " - ] - - def __init__(self, screen, x, y): - self.screen = screen - self.x = x - self.y = y - - def move_towards_pacman(self, labyrinth, pacman): - if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#": - self.x += 1 - elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#": - self.x -= 1 - elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#": - self.y += 1 - elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#": - self.y -= 1 - - def draw(self): - pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art - for row_idx, row in enumerate(self.ghost_pixels): - for col_idx, pixel in enumerate(row): - if pixel == "#": - pixel_x = self.x * CELL_SIZE + col_idx * pixel_size - pixel_y = self.y * CELL_SIZE + row_idx * pixel_size - pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) - +from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill +import data.classes_consts as consts +import data.conf as conf +# import data.classes as classes +from data.classes import Pacman, Ghost def start_try(EPSILON, ALPHA, GAMMA): #? Learning initial q_values = initial_q_fill() + print(len(q_values)) #? Game initial pygame.init() screen = None if conf.show_game: - screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) + screen = consts.screen pygame.display.set_caption("Micro-Pacman") #? Start try @@ -136,7 +44,7 @@ def start_try(EPSILON, ALPHA, GAMMA): print(f"Run {x+1}: {iterations_per_run} iterations") if conf.show_trained: - screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) + screen = consts.screen pygame.display.set_caption("Micro-Pacman") while True: @@ -152,24 +60,21 @@ def start_try(EPSILON, ALPHA, GAMMA): def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): clock = pygame.time.Clock() - labyrinth = LABYRINTH_INIT.copy() + labyrinth = consts.LABYRINTH_INIT.copy() # Initialize Pacman and Ghost positions pacman = Pacman(screen, 1, 1) - ghost = Ghost(screen, COLS - 2, ROWS - 2) + ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2) - #? -------------------------MY CODE----------------------------------- state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) - #? -------------------------MY CODE----------------------------------- #? GAME LOOP running = True iter = 0 while running: - #? -------------------------MY CODE----------------------------------- reward = 0 - #? -------------------------MY CODE----------------------------------- + iter = iter + 1 # Handle events for event in pygame.event.get(): @@ -177,12 +82,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): running = False if conf.show_game: - screen.fill(BLACK) + screen.fill(consts.BLACK) - - iter = iter + 1 - - # Handle Pacman movement + #? Arrow key movements keys = pygame.key.get_pressed() if keys[pygame.K_LEFT]: pacman.move(-1, 0) @@ -193,8 +95,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): if keys[pygame.K_DOWN]: pacman.move(0, 1) - - #? -------------------------MY CODE----------------------------------- + #? Agent movements action = epsilon_greedy(q_values, state, EPSILON) if action == Direction.LEFT: pacman.move(labyrinth, -1, 0) @@ -204,7 +105,6 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): pacman.move(labyrinth, 0, -1) if action == Direction.DOWN: pacman.move(labyrinth, 0, 1) - #? -------------------------MY CODE----------------------------------- if iter%3==0: @@ -220,9 +120,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): if labyrinth[pacman.y][pacman.x] == ".": labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] - #? -------------------------MY CODE----------------------------------- #? half reward - # cookie_counter = 0 # for y, row in enumerate(labyrinth): @@ -234,10 +132,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): # # reward = REWARD_ON_HALF # if show_game: # print("Got half reward") - #? -------------------------MY CODE----------------------------------- - # Check if all cookies are eaten (game over) if all("." not in row for row in labyrinth): # time_reward = calc_time_reward(iter) # reward = REWARD_ON_WIN * time_reward @@ -245,11 +141,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): running = False if conf.show_game: - # print(f"You Win! Took {iter} iterations, reward: {time_reward}") print(f"You Win! Took {iter} iterations") - #? -------------------------MY CODE----------------------------------- if not running: new_state = state else: @@ -271,10 +165,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): if cell == ".": counter += 1 return 20-counter, iter - #? -------------------------MY CODE----------------------------------- - # Draw the labyrinth, pacman, and ghost if conf.show_game: draw_labyrinth(screen, labyrinth) pacman.draw() @@ -287,16 +179,5 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): clock.tick(40) - -def draw_labyrinth(screen, labyrinth): - for y, row in enumerate(labyrinth): - for x, cell in enumerate(row): - if cell == "#": - pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)) - elif cell == ".": - pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5) - - - if __name__ == "__main__": run_game() \ No newline at end of file diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py index 6f9170c..287259e 100644 --- a/ReinforcmentLearning/learning.py +++ b/ReinforcmentLearning/learning.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd from ReinforcmentLearning.game import start_try -import conf +import data.conf as conf diff --git a/ReinforcmentLearning/util.py b/ReinforcmentLearning/util.py index dc670e2..75807de 100644 --- a/ReinforcmentLearning/util.py +++ b/ReinforcmentLearning/util.py @@ -1,5 +1,10 @@ from enum import Enum import random +import pygame + +import numpy as np + +import data.classes_consts as consts class Direction(Enum): UP = 0 @@ -17,17 +22,24 @@ def initial_q_fill(): for action in Direction: state = (x, y, cookie_direction) q_values[(state, action)] = random.random() * 0.2 - 0.1 + # q_values[state][action] = random.random() * 0.2 - 0.1 return q_values +def initial_q_fill2(): + indexer = consts.indexer -def get_start_state(): - first_direction_cookie = random.choice([True, False]) - if first_direction_cookie: - return (7, 2, Direction.DOWN) + """Initialize Q-table using linear indexing""" + # Create 2D array: [state_index, action] + # 300 states × 4 actions = 1200 entries + q_table = np.random.uniform( + low=-0.1, + high=0.1, + size=(indexer.total_states, 4) # 300 × 4 + ) - return(7, 2, Direction.RIGHT) + return q_table, indexer @@ -140,3 +152,17 @@ def calc_time_reward(amount_iterations): return 1 return - (1 / 1000) * amount_iterations + 11 + + + +def draw_labyrinth(screen, labyrinth): + CELL_SIZE = consts.CELL_SIZE + BLUE = consts.BLUE + WHITE = consts.WHITE + + for y, row in enumerate(labyrinth): + for x, cell in enumerate(row): + if cell == "#": + pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)) + elif cell == ".": + pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5) diff --git a/data/classes.py b/data/classes.py new file mode 100644 index 0000000..c4a7f2c --- /dev/null +++ b/data/classes.py @@ -0,0 +1,111 @@ +import math +import pygame + +from data.classes_consts import CELL_SIZE, YELLOW, BLACK, RED + + +class Pacman: + def __init__(self, screen, x, y): + self.screen = screen + self.x = x + self.y = y + self.count = 0 + + def move(self, labyrinth, dx, dy): + new_x, new_y = self.x + dx, self.y + dy + if labyrinth[new_y][new_x] != "#": + self.x = new_x + self.y = new_y + + def draw(self): + radius = CELL_SIZE // 2 - 4 + start_angle = math.pi / 6 + end_angle = -math.pi / 6 + pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4) + # Calculate the points for the mouth + start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)), + self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle))) + end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)), + self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle))) + self.count += 1 + if self.count%2==0: + # Draw the mouth by filling a polygon + pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos]) + + +class Ghost: + # Define the pixel art for the ghost using strings + ghost_pixels = [ + " #### ", + "######", + "## # #", + "######", + "######", + "# # # " + ] + + def __init__(self, screen, x, y): + self.screen = screen + self.x = x + self.y = y + + def move_towards_pacman(self, labyrinth, pacman): + if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#": + self.x += 1 + elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#": + self.x -= 1 + elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#": + self.y += 1 + elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#": + self.y -= 1 + + def draw(self): + pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art + for row_idx, row in enumerate(self.ghost_pixels): + for col_idx, pixel in enumerate(row): + if pixel == "#": + pixel_x = self.x * CELL_SIZE + col_idx * pixel_size + pixel_y = self.y * CELL_SIZE + row_idx * pixel_size + pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) + + + +class StateIndexer: + """Converts (x, y, cookie_dir) states to unique indices""" + def __init__(self): + # State space boundaries + self.x_min, self.x_max = -7, 7 # 15 values: -7 to 7 inclusive + self.y_min, self.y_max = -2, 2 # 5 values: -2 to 2 inclusive + self.dir_min, self.dir_max = 0, 3 # 4 directions: 0 to 3 + + # Ranges + self.x_range = self.x_max - self.x_min + 1 # 15 + self.y_range = self.y_max - self.y_min + 1 # 5 + self.dir_range = self.dir_max - self.dir_min + 1 # 4 + + # Multipliers for indexing + self.y_dir_product = self.y_range * self.dir_range # 5 * 4 = 20 + self.total_states = self.x_range * self.y_dir_product # 15 * 20 = 300 + + def to_index(self, x, y, cookie_dir): + """Convert state to unique index 0..299""" + # Convert to zero-based indices + x_idx = x - self.x_min # -7→0, -6→1, ..., 7→14 + y_idx = y - self.y_min # -2→0, -1→1, ..., 2→4 + dir_idx = cookie_dir - self.dir_min # 0→0, 1→1, 2→2, 3→3 + + # Linear mapping: (x * y_range * dir_range) + (y * dir_range) + dir + return (x_idx * self.y_dir_product) + (y_idx * self.dir_range) + dir_idx + + def from_index(self, idx): + """Convert index back to state""" + dir_idx = idx % self.dir_range + idx //= self.dir_range + y_idx = idx % self.y_range + x_idx = idx // self.y_range + + return ( + x_idx + self.x_min, + y_idx + self.y_min, + dir_idx + self.dir_min + ) diff --git a/data/classes_consts.py b/data/classes_consts.py new file mode 100644 index 0000000..0e1b890 --- /dev/null +++ b/data/classes_consts.py @@ -0,0 +1,29 @@ +import pygame + + + +LABYRINTH_INIT = [ + "##########", + "#........#", + "#.##..##.#", + "#........#", + "##########" +] + +SCREEN_WIDTH = 400 +SCREEN_HEIGHT = 400 +CELL_SIZE = 40 + +# Define colors +YELLOW = (255, 255, 0) +RED = (255, 0, 0) +WHITE = (255, 255, 255) +BLUE = (0, 0, 255) +BLACK = (0, 0, 0) + +# Get labyrinth dimensions +ROWS = len(LABYRINTH_INIT) +COLS = len(LABYRINTH_INIT[0]) + + +screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) \ No newline at end of file diff --git a/conf.py b/data/conf.py similarity index 56% rename from conf.py rename to data/conf.py index 3ac202a..5abd12a 100644 --- a/conf.py +++ b/data/conf.py @@ -1,3 +1,8 @@ +from data.classes import StateIndexer + + +indexer = StateIndexer() + EPSILON = 0.01 # EPSILON = 0.005 ALPHA = 0.2 @@ -7,8 +12,9 @@ AMOUNT_RUNS = 5000 AMOUNT_TRIES = 5 REWARD_ON_WIN = 400 +REWARD_ON_HALF = 50 REWARD_ON_LOSE = -250 plot_result = True -show_game = False +show_game = True show_trained = True \ No newline at end of file diff --git a/main.py b/main.py index d7e7d00..85ae2d9 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ from GenTunic.gen_tuning import gen_tuning_main from ReinforcmentLearning.learning import multipleTries, oneTry from ReinforcmentLearning.util import calc_time_reward -import conf +import data.conf as conf