From a965dc07ce63f958dccfc27c7aa3a8030a3ff83b Mon Sep 17 00:00:00 2001 From: 2wenty1ne Date: Tue, 9 Dec 2025 18:17:05 +0100 Subject: [PATCH] working version 1.0 --- ReinforcmentLearning/game.py | 64 ++++++++++++++++++++------------ ReinforcmentLearning/learning.py | 45 +++++++--------------- ReinforcmentLearning/util.py | 8 ++-- main.py | 8 ++-- 4 files changed, 63 insertions(+), 62 deletions(-) diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py index 0eabfe3..ad3944b 100644 --- a/ReinforcmentLearning/game.py +++ b/ReinforcmentLearning/game.py @@ -101,10 +101,9 @@ class Ghost: -def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game): +def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game): #? Learning initial - q_values = {} - initial_q_fill(q_values) + q_values = initial_q_fill() #? Game initial pygame.init() @@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) pygame.display.set_caption("Micro-Pacman") + #? Start try cookies_per_run = [] + iterations = [] for x in range(AMOUNT_RUNS): - amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen) + # if show_game: + if x == AMOUNT_RUNS / 4: + print("1 / 4 done") + + if x == AMOUNT_RUNS / 2: + print("2 / 4 done") + + if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4): + print("3 / 4 done") + amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen) cookies_per_run.append(amount_cookies_ate) + iterations.append(iterations_per_run) pygame.quit() - return cookies_per_run + return cookies_per_run, iterations + -# Main game function def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen): clock = pygame.time.Clock() labyrinth = LABYRINTH_INIT.copy() @@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho reward = 0 #? -------------------------MY CODE----------------------------------- - if show_game: - screen.fill(BLACK) - - iter = iter + 1 # Handle events for event in pygame.event.get(): if event.type == pygame.QUIT: running = False + if show_game: + screen.fill(BLACK) + + + iter = iter + 1 + # Handle Pacman movement keys = pygame.key.get_pressed() if keys[pygame.K_LEFT]: @@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho if keys[pygame.K_DOWN]: pacman.move(0, 1) + #? -------------------------MY CODE----------------------------------- action = epsilon_greedy(q_values, state, EPSILON) if action == Direction.LEFT: @@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho pacman.move(labyrinth, 0, 1) #? -------------------------MY CODE----------------------------------- + if iter%3==0: - # Ghost moves towards Pacman ghost.move_towards_pacman(labyrinth, pacman) - # Check for collisions (game over if ghost catches pacman) if pacman.x == ghost.x and pacman.y == ghost.y: - # print("Game Over! The ghost caught Pacman.") + if show_game: + print("Game Over! The ghost caught Pacman.") running = False reward = REWARD_ON_LOSE @@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] #? -------------------------MY CODE----------------------------------- - cookie_counter = 0 + #? half reward + # cookie_counter = 0 - for y, row in enumerate(labyrinth): - for x, cell in enumerate(row): - if cell == ".": - cookie_counter += 1 + # for y, row in enumerate(labyrinth): + # for x, cell in enumerate(row): + # if cell == ".": + # cookie_counter += 1 - if cookie_counter == 10: - reward = REWARD_ON_HALF - if show_game: - print("Half reward") + # if cookie_counter == 10: + # # reward = REWARD_ON_HALF + # if show_game: + # print("Got half reward") #? -------------------------MY CODE----------------------------------- @@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho for x, cell in enumerate(row): if cell == ".": counter += 1 - return 20-counter + return 20-counter, iter #? -------------------------MY CODE----------------------------------- @@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho pygame.display.flip() # Cap the frame rate - clock.tick(20) + clock.tick(40) + def draw_labyrinth(screen, labyrinth): diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py index 004a8ca..a31bbbd 100644 --- a/ReinforcmentLearning/learning.py +++ b/ReinforcmentLearning/learning.py @@ -2,8 +2,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from ReinforcmentLearning.game import run_game, wrapper -from ReinforcmentLearning.util import initial_q_fill +from ReinforcmentLearning.game import start_try @@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl q_value: (state, action) """ - cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game) - - # if show_game: - # if x == AMOUNT_RUNS / 4: - # print("1 / 4 done") - - # if x == AMOUNT_RUNS / 2: - # print("2 / 4 done") - - # if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4): - # print("3 / 4 done") - + cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game) wins = sum(1 for result in cookies_per_run if result == 20) @@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%") if plot_result: - plot_results(cookies_per_run) + plot_results(cookies_per_run, iterations) return cookies_per_run, wins -def plot_results(cookies_per_run): +def plot_results(cookies_per_run, iterations): wins = [] losses = [] win_count = 0 @@ -63,29 +51,22 @@ def plot_results(cookies_per_run): wins.append(win_count) losses.append((i + 1) - win_count) # Losses count down from top - # Last 700 attempts - last_700_wins = wins[-700:] if len(wins) >= 700 else wins - last_700_losses = losses[-700:] if len(losses) >= 700 else losses - last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1)) - - # Create figure with 2 subplots + # Create figure with 2 subplots (wins/losses and iterations) fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8)) - # Plot 1: All attempts (with thicker lines: linewidth=1.5) + # Plot 1: Wins vs Losses ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins') ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses') ax1.set_xlabel('Attempt') ax1.set_ylabel('Count') - ax1.set_title('All Attempts: Wins vs Losses') + ax1.set_title('Wins vs Losses Over Time') ax1.legend() - # Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5) - ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins') - ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses') - ax2.set_xlabel('Attempt') - ax2.set_ylabel('Count') - ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses') - ax2.legend() + # Plot 2: Iterations per run + ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5) + ax2.set_xlabel('Run Number') + ax2.set_ylabel('Iterations') + ax2.set_title('Iterations per Run') plt.tight_layout() - plt.show() + plt.show() \ No newline at end of file diff --git a/ReinforcmentLearning/util.py b/ReinforcmentLearning/util.py index 59cfaee..35ac1ba 100644 --- a/ReinforcmentLearning/util.py +++ b/ReinforcmentLearning/util.py @@ -8,14 +8,17 @@ class Direction(Enum): LEFT = 3 -def initial_q_fill(q_values): +def initial_q_fill(): + q_values = {} + for x in range(-7, 8): for y in range(-2, 3): for cookie_direction in Direction: for action in Direction: state = (x, y, cookie_direction) q_values[(state, action)] = random.random() * 0.2 - 0.1 - + + return q_values @@ -109,7 +112,6 @@ def get_best_q_action(q_values, state): for (q_state, q_action), value in q_values.items(): if q_state == state: actions_for_epsilon.append(q_action) - if best_value is None: best_value = value best_action = q_action diff --git a/main.py b/main.py index 068283e..f14f8d5 100644 --- a/main.py +++ b/main.py @@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry # EPSILON = 0.1618 EPSILON = 0.01 -ALPHA = 0.01 -GAMMA = 0.2713 +# ALPHA = 0.01 +ALPHA = 0.2 +# GAMMA = 0.2713 +GAMMA = 0.8 AMOUNT_RUNS = 5000 AMOUNT_TRIES = 5 @@ -14,7 +16,7 @@ REWARD_ON_WIN = 400 REWARD_ON_LOSE = -250 plot_result = True -show_game = True +show_game = False oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)