From 93076e24264136ddaf141443fb02e4df347a7eed Mon Sep 17 00:00:00 2001 From: 2wenty1ne Date: Mon, 1 Dec 2025 15:33:28 +0100 Subject: [PATCH] Finished assigment --- game.py | 53 ++++++++++++++++++++---------- main.py | 20 ++++++++---- util.py | 99 +++++++++++++++++++++++++++++++++++++-------------------- 3 files changed, 114 insertions(+), 58 deletions(-) diff --git a/game.py b/game.py index 1417865..5f18516 100644 --- a/game.py +++ b/game.py @@ -1,8 +1,8 @@ import pygame -import random import math +import os -from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value +from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action # Initialize pygame pygame.init() @@ -12,6 +12,8 @@ SCREEN_WIDTH = 400 SCREEN_HEIGHT = 400 CELL_SIZE = 40 +os.environ['SDL_VIDEODRIVER'] = 'dummy' + # Define colors YELLOW = (255, 255, 0) RED = (255, 0, 0) @@ -20,7 +22,7 @@ BLUE = (0, 0, 255) BLACK = (0, 0, 0) # Labyrinth as a string -labyrinth = [ +labyrinth_init = [ "##########", "#........#", "#.##..##.#", @@ -28,13 +30,16 @@ labyrinth = [ "##########" ] +labyrinth = labyrinth_init.copy() + # Get labyrinth dimensions ROWS = len(labyrinth) COLS = len(labyrinth[0]) # Initialize game screen -screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) -pygame.display.set_caption("Micro-Pacman") +# screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) +# pygame.display.set_caption("Micro-Pacman") +screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE)) # Pacman class class Pacman: @@ -100,7 +105,7 @@ class Ghost: pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) # Draw walls and cookies -def draw_labyrinth(): +def draw_labyrinth(labyrinth): for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == "#": @@ -110,19 +115,20 @@ def draw_labyrinth(): # Main game function -def main(q_values, EPSILON, ALPHA, GAMMA): +def run_game(q_values, EPSILON, ALPHA, GAMMA): clock = pygame.time.Clock() + labyrinth = labyrinth_init.copy() # Initialize Pacman and Ghost positions pacman = Pacman(1, 1) ghost = Ghost(COLS - 2, ROWS - 2) - # Game loop #? -------------------------MY CODE----------------------------------- state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) reward = 0 #? -------------------------MY CODE----------------------------------- + #? GAME LOOP running = True iter = 0 while running: @@ -156,7 +162,6 @@ def main(q_values, EPSILON, ALPHA, GAMMA): pacman.move(0, 1) #? -------------------------MY CODE----------------------------------- - if iter%3==0: # Ghost moves towards Pacman ghost.move_towards_pacman(pacman) @@ -165,6 +170,7 @@ def main(q_values, EPSILON, ALPHA, GAMMA): if pacman.x == ghost.x and pacman.y == ghost.y: print("Game Over! The ghost caught Pacman.") running = False + reward = -10 # Eat cookies if labyrinth[pacman.y][pacman.x] == ".": @@ -173,29 +179,44 @@ def main(q_values, EPSILON, ALPHA, GAMMA): # Check if all cookies are eaten (game over) if all("." not in row for row in labyrinth): print("You Win! Pacman ate all the cookies.") + reward = 10 running = False # Draw the labyrinth, pacman, and ghost #? -------------------------MY CODE----------------------------------- - new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) + if not running: + new_state = state + else: + new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) - best_value_new_state, _ = get_best_q_value(q_values, new_state) + best_action_new_state, _ = get_best_q_action(q_values, new_state) + best_value_new_state = q_values[(new_state, best_action_new_state)] - current_value = q_values.get((state, action), 0) + current_value = q_values.get((state, action)) adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value) q_values[(state, action)] = current_value + adjusted_value + + state = new_state + + if not running: + counter = 0 + for y, row in enumerate(labyrinth): + for x, cell in enumerate(row): + if cell == ".": + counter += 1 + return 20-counter #? -------------------------MY CODE----------------------------------- - draw_labyrinth() + draw_labyrinth(labyrinth) pacman.draw() ghost.draw() # Update display - pygame.display.flip() + # pygame.display.flip() # Cap the frame rate - clock.tick(1) + clock.tick(10000) pygame.quit() if __name__ == "__main__": - main() \ No newline at end of file + run_game() \ No newline at end of file diff --git a/main.py b/main.py index 3faf4ff..0187b46 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,13 @@ -from util import epsilon_greedy, get_start_state, test +import matplotlib.pyplot as plt + +from game import run_game +from util import initial_q_fill -AMOUNT_RUNS = 10 +AMOUNT_RUNS = 5000 EPSILON = 0.1 ALPHA = 0.1 -GAMMA = 0.1 +GAMMA = 0.9 """ @@ -13,12 +16,15 @@ action: Direction q_value: (state, action) """ q_values = {} +initial_q_fill(q_values) +cookies_per_run = [] # Amount of single runs for x in range(AMOUNT_RUNS): - state = get_start_state() + amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA) + cookies_per_run.append(amount_cookies_ate) + print(f"Run {x}: {amount_cookies_ate} cookies ate\n") - # Single run, until win or death - while(True): - action = epsilon_greedy(q_values, state, EPSILON) +plt.plot(cookies_per_run) +plt.show() diff --git a/util.py b/util.py index ce3373b..f632371 100644 --- a/util.py +++ b/util.py @@ -8,6 +8,17 @@ class Direction(Enum): LEFT = 3 +def initial_q_fill(q_values): + for x in range(8): + for y in range(3): + for cookie_direction in Direction: + for action in Direction: + state = (x, y, cookie_direction) + q_values[(state, action)] = random.random() * 0.2 - 0.1 + + + + def get_start_state(): first_direction_cookie = random.choice([True, False]) if first_direction_cookie: @@ -18,68 +29,86 @@ def get_start_state(): def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): - # distance pacman - ghost - x_dist = abs(pac_x - ghost_x) - y_dist = abs(pac_y - ghost_y) + x_ghost_dist = abs(pac_x - ghost_x) + y_ghost_dist = abs(pac_y - ghost_y) - # closest cookie - best_distance = 12 - best_cords = None + cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) - all_cookie_locations = get_all_cookies_locations(labyrinth) - for (cookie_x, cookie_y) in all_cookie_locations: - dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y) - - if dist < best_distance: - best_distance = dist - best_cords = (cookie_x, cookie_y) - - # closest cookie direction - cookie_direction = None - real_dist_x = pac_x - cookie_x - real_dist_y = pac_y - cookie_y - - #TODO - if real_dist_x >= 0 & real_dist_y > 0: - cookie_direction = Direction - - return x_dist, y_dist, cookie_direction + return x_ghost_dist, y_ghost_dist, cookie_direction -def get_all_cookies_locations(labyrinth): - cookie_locations = [] +def get_closest_cookie_direction(labyrinth, pac_x, pac_y): + cookie_distances = [] + for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == ".": - cookie_locations.append((x, y)) + x_dist = abs(pac_x - x) + y_dist = abs(pac_y - y) + dist = x_dist + y_dist + cookie_distances.append((dist, (x, y))) - return cookie_locations + closest_cookie = min(cookie_distances, key=lambda item: item[0]) + closest_cookie_cords = closest_cookie[1] + cookie_x = closest_cookie_cords[0] + cookie_y = closest_cookie_cords[1] + + + dx = cookie_x - pac_x + dy = cookie_y - pac_y + + if abs(dx) >= abs(dy): + #? X distance bigger + + if dy > 0: + return Direction.DOWN + elif dy < 0: + return Direction.UP + else: + #? Cookie on the same Y level + if dx > 0: + return Direction.RIGHT + else: + return Direction.LEFT + else: + #? Y distance bigger + + if dx > 0: + return Direction.RIGHT + elif dx < 0: + return Direction.LEFT + else: + #? Cookie on the same X level + if dy > 0: + return Direction.DOWN + else: + return Direction.UP def epsilon_greedy(q_values, state, epsilon): - best_action, states_for_epsilon = get_best_q_value(q_values, state) + best_action, actions_for_epsilon = get_best_q_action(q_values, state) if random.random() < epsilon: - if not states_for_epsilon: + if not actions_for_epsilon: best_action = get_random_direction() return best_action - random_action = random.choice(states_for_epsilon) + random_action = random.choice(actions_for_epsilon) return random_action return best_action -def get_best_q_value(q_values, state): +def get_best_q_action(q_values, state): best_action = None best_value = None - states_for_epsilon = [] + actions_for_epsilon = [] for (q_state, q_action), value in q_values.items(): if q_state == state: - states_for_epsilon.append(q_action) + actions_for_epsilon.append(q_action) if best_value is None: best_value = value @@ -93,7 +122,7 @@ def get_best_q_value(q_values, state): if not best_action: best_action = get_random_direction() - return best_action, states_for_epsilon + return best_action, actions_for_epsilon def get_random_direction():