diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py index 753450f..7472787 100644 --- a/ReinforcmentLearning/game.py +++ b/ReinforcmentLearning/game.py @@ -5,14 +5,12 @@ import os from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill import data.classes_consts as consts import data.conf as conf -# import data.classes as classes from data.classes import Pacman, Ghost def start_try(EPSILON, ALPHA, GAMMA): #? Learning initial q_values = initial_q_fill() - print(len(q_values)) #? Game initial pygame.init() @@ -41,7 +39,7 @@ def start_try(EPSILON, ALPHA, GAMMA): cookies_per_run.append(amount_cookies_ate) iterations.append(iterations_per_run) - print(f"Run {x+1}: {iterations_per_run} iterations") + # print(f"Run {x+1}: {iterations_per_run} iterations") if conf.show_trained: screen = consts.screen @@ -62,7 +60,6 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): clock = pygame.time.Clock() labyrinth = consts.LABYRINTH_INIT.copy() - # Initialize Pacman and Ghost positions pacman = Pacman(screen, 1, 1) ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2) @@ -149,12 +146,12 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): else: new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) - best_action_new_state, _ = get_best_q_action(q_values, new_state) - best_value_new_state = q_values[(new_state, best_action_new_state)] + best_action_new_state = get_best_q_action(q_values, new_state) + best_value_new_state = q_values[new_state][best_action_new_state.value] - current_value = q_values.get((state, action)) + current_value = q_values[state][action.value] adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value) - q_values[(state, action)] = current_value + adjusted_value + q_values[state][action.value] = current_value + adjusted_value state = new_state diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py index 287259e..db5d94e 100644 --- a/ReinforcmentLearning/learning.py +++ b/ReinforcmentLearning/learning.py @@ -1,5 +1,3 @@ -# import matplotlib -# matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np import pandas as pd diff --git a/ReinforcmentLearning/util.py b/ReinforcmentLearning/util.py index 75807de..6236a76 100644 --- a/ReinforcmentLearning/util.py +++ b/ReinforcmentLearning/util.py @@ -19,29 +19,15 @@ def initial_q_fill(): for x in range(-7, 8): for y in range(-2, 3): for cookie_direction in Direction: - for action in Direction: - state = (x, y, cookie_direction) - q_values[(state, action)] = random.random() * 0.2 - 0.1 - # q_values[state][action] = random.random() * 0.2 - 0.1 + state = (x, y, cookie_direction) + q_values[state] = np.zeros(4) + + for action_idx in range(len(Direction)): + q_values[state][action_idx] = random.random() * 0.2 - 0.1 return q_values -def initial_q_fill2(): - indexer = consts.indexer - - """Initialize Q-table using linear indexing""" - # Create 2D array: [state_index, action] - # 300 states × 4 actions = 1200 entries - q_table = np.random.uniform( - low=-0.1, - high=0.1, - size=(indexer.total_states, 4) # 300 × 4 - ) - - return q_table, indexer - - def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): x_ghost_dist = pac_x - ghost_x @@ -70,8 +56,11 @@ def get_closest_cookie_direction(labyrinth, pac_x, pac_y): dx = cookie_x - pac_x - dy = cookie_y - pac_y - + dy = cookie_y - pac_y + return cords_to_direction(dx, dy) + + +def cords_to_direction(dx, dy): if abs(dx) >= abs(dy): #? X distance bigger @@ -102,41 +91,19 @@ def get_closest_cookie_direction(labyrinth, pac_x, pac_y): def epsilon_greedy(q_values, state, epsilon): - best_action, actions_for_epsilon = get_best_q_action(q_values, state) - if random.random() < epsilon: - if not actions_for_epsilon: - best_action = get_random_direction() - return best_action - - random_action = random.choice(actions_for_epsilon) + random_action = get_random_direction() return random_action + best_action = get_best_q_action(q_values, state) return best_action def get_best_q_action(q_values, state): - best_action = None - best_value = None + state_q_values = q_values[state] + best_action_index = np.argmax(state_q_values) - actions_for_epsilon = [] - - for (q_state, q_action), value in q_values.items(): - if q_state == state: - actions_for_epsilon.append(q_action) - if best_value is None: - best_value = value - best_action = q_action - continue - - if value > best_value: - best_value = value - best_action = q_action - - if not best_action: - best_action = get_random_direction() - - return best_action, actions_for_epsilon + return Direction(best_action_index) def get_random_direction(): diff --git a/clean_game.py b/clean_game.py deleted file mode 100644 index cdea1a7..0000000 --- a/clean_game.py +++ /dev/null @@ -1,171 +0,0 @@ -import pygame -import random -import math - -# Initialize pygame -pygame.init() - -# Define constants -SCREEN_WIDTH = 400 -SCREEN_HEIGHT = 400 -CELL_SIZE = 40 - -# Define colors -YELLOW = (255, 255, 0) -RED = (255, 0, 0) -WHITE = (255, 255, 255) -BLUE = (0, 0, 255) -BLACK = (0, 0, 0) - -# Labyrinth as a string -labyrinth = [ - "##########", - "#........#", - "#.##..##.#", - "#........#", - "##########" -] - -# Get labyrinth dimensions -ROWS = len(labyrinth) -COLS = len(labyrinth[0]) - -# Initialize game screen -screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) -pygame.display.set_caption("Micro-Pacman") - -# Pacman class -class Pacman: - def __init__(self, x, y): - self.x = x - self.y = y - self.count = 0 - - def move(self, dx, dy): - new_x, new_y = self.x + dx, self.y + dy - if labyrinth[new_y][new_x] != "#": - self.x = new_x - self.y = new_y - - def draw(self): - radius = CELL_SIZE // 2 - 4 - start_angle = math.pi / 6 - end_angle = -math.pi / 6 - pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4) - # Calculate the points for the mouth - start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)), - self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle))) - end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)), - self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle))) - self.count += 1 - if self.count%2==0: - # Draw the mouth by filling a polygon - pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos]) - -# Ghost class with pixel art -class Ghost: - # Define the pixel art for the ghost using strings - ghost_pixels = [ - " #### ", - "######", - "## # #", - "######", - "######", - "# # # " - ] - - def __init__(self, x, y): - self.x = x - self.y = y - - def move_towards_pacman(self, pacman): - if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#": - self.x += 1 - elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#": - self.x -= 1 - elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#": - self.y += 1 - elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#": - self.y -= 1 - - def draw(self): - pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art - for row_idx, row in enumerate(self.ghost_pixels): - for col_idx, pixel in enumerate(row): - if pixel == "#": - pixel_x = self.x * CELL_SIZE + col_idx * pixel_size - pixel_y = self.y * CELL_SIZE + row_idx * pixel_size - pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) - -# Draw walls and cookies -def draw_labyrinth(): - for y, row in enumerate(labyrinth): - for x, cell in enumerate(row): - if cell == "#": - pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)) - elif cell == ".": - pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5) - -# Main game function -def main(): - clock = pygame.time.Clock() - - # Initialize Pacman and Ghost positions - pacman = Pacman(1, 1) - ghost = Ghost(COLS - 2, ROWS - 2) - - # Game loop - running = True - iter = 0 - while running: - screen.fill(BLACK) - iter = iter + 1 - # Handle events - for event in pygame.event.get(): - if event.type == pygame.QUIT: - running = False - - # Handle Pacman movement - keys = pygame.key.get_pressed() - if keys[pygame.K_LEFT]: - pacman.move(-1, 0) - if keys[pygame.K_RIGHT]: - pacman.move(1, 0) - if keys[pygame.K_UP]: - pacman.move(0, -1) - if keys[pygame.K_DOWN]: - pacman.move(0, 1) - - if iter%3==0: - # Ghost moves towards Pacman - ghost.move_towards_pacman(pacman) - - # Check for collisions (game over if ghost catches pacman) - if pacman.x == ghost.x and pacman.y == ghost.y: - print("Game Over! The ghost caught Pacman.") - running = False - - # Eat cookies - if labyrinth[pacman.y][pacman.x] == ".": - labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] - - # Check if all cookies are eaten (game over) - if all("." not in row for row in labyrinth): - print("You Win! Pacman ate all the cookies.") - running = False - - # Draw the labyrinth, pacman, and ghost - draw_labyrinth() - pacman.draw() - ghost.draw() - - # Update display - pygame.display.flip() - - # Cap the frame rate - clock.tick(5) - - pygame.quit() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/data/classes.py b/data/classes.py index c4a7f2c..c5c3df8 100644 --- a/data/classes.py +++ b/data/classes.py @@ -67,45 +67,3 @@ class Ghost: pixel_x = self.x * CELL_SIZE + col_idx * pixel_size pixel_y = self.y * CELL_SIZE + row_idx * pixel_size pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) - - - -class StateIndexer: - """Converts (x, y, cookie_dir) states to unique indices""" - def __init__(self): - # State space boundaries - self.x_min, self.x_max = -7, 7 # 15 values: -7 to 7 inclusive - self.y_min, self.y_max = -2, 2 # 5 values: -2 to 2 inclusive - self.dir_min, self.dir_max = 0, 3 # 4 directions: 0 to 3 - - # Ranges - self.x_range = self.x_max - self.x_min + 1 # 15 - self.y_range = self.y_max - self.y_min + 1 # 5 - self.dir_range = self.dir_max - self.dir_min + 1 # 4 - - # Multipliers for indexing - self.y_dir_product = self.y_range * self.dir_range # 5 * 4 = 20 - self.total_states = self.x_range * self.y_dir_product # 15 * 20 = 300 - - def to_index(self, x, y, cookie_dir): - """Convert state to unique index 0..299""" - # Convert to zero-based indices - x_idx = x - self.x_min # -7→0, -6→1, ..., 7→14 - y_idx = y - self.y_min # -2→0, -1→1, ..., 2→4 - dir_idx = cookie_dir - self.dir_min # 0→0, 1→1, 2→2, 3→3 - - # Linear mapping: (x * y_range * dir_range) + (y * dir_range) + dir - return (x_idx * self.y_dir_product) + (y_idx * self.dir_range) + dir_idx - - def from_index(self, idx): - """Convert index back to state""" - dir_idx = idx % self.dir_range - idx //= self.dir_range - y_idx = idx % self.y_range - x_idx = idx // self.y_range - - return ( - x_idx + self.x_min, - y_idx + self.y_min, - dir_idx + self.dir_min - ) diff --git a/data/classes_consts.py b/data/classes_consts.py index 0e1b890..eac8764 100644 --- a/data/classes_consts.py +++ b/data/classes_consts.py @@ -1,5 +1,7 @@ import pygame +from data import conf + LABYRINTH_INIT = [ @@ -26,4 +28,6 @@ ROWS = len(LABYRINTH_INIT) COLS = len(LABYRINTH_INIT[0]) -screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) \ No newline at end of file +screen = None +if conf.show_game: + screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) \ No newline at end of file diff --git a/data/conf.py b/data/conf.py index 5abd12a..0e72eb2 100644 --- a/data/conf.py +++ b/data/conf.py @@ -1,10 +1,4 @@ -from data.classes import StateIndexer - - -indexer = StateIndexer() - -EPSILON = 0.01 -# EPSILON = 0.005 +EPSILON = 0.005 ALPHA = 0.2 GAMMA = 0.8 @@ -16,5 +10,6 @@ REWARD_ON_HALF = 50 REWARD_ON_LOSE = -250 plot_result = True -show_game = True -show_trained = True \ No newline at end of file +show_game = False +show_trained = False + diff --git a/main.py b/main.py index 85ae2d9..aa43777 100644 --- a/main.py +++ b/main.py @@ -5,7 +5,6 @@ import data.conf as conf - oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA) # multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) # gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)