diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py index fb1e803..f7cc00c 100644 --- a/ReinforcmentLearning/game.py +++ b/ReinforcmentLearning/game.py @@ -12,8 +12,6 @@ SCREEN_WIDTH = 400 SCREEN_HEIGHT = 400 CELL_SIZE = 40 -os.environ['SDL_VIDEODRIVER'] = 'dummy' - # Define colors YELLOW = (255, 255, 0) RED = (255, 0, 0) @@ -21,6 +19,8 @@ WHITE = (255, 255, 255) BLUE = (0, 0, 255) BLACK = (0, 0, 0) +REWARD_ON_HALF = 50 + # Labyrinth as a string labyrinth_init = [ "##########", @@ -37,9 +37,9 @@ ROWS = len(labyrinth) COLS = len(labyrinth[0]) # Initialize game screen -# screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) -# pygame.display.set_caption("Micro-Pacman") +screen = None # screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE)) +# pygame.display.set_caption("Micro-Pacman") # Pacman class class Pacman: @@ -115,24 +115,34 @@ def draw_labyrinth(labyrinth): # Main game function -def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE): +def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game): clock = pygame.time.Clock() labyrinth = labyrinth_init.copy() + if show_game: + pygame.display.set_caption("Micro-Pacman") + screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) + + # Initialize Pacman and Ghost positions pacman = Pacman(1, 1) ghost = Ghost(COLS - 2, ROWS - 2) #? -------------------------MY CODE----------------------------------- state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) - reward = 0 #? -------------------------MY CODE----------------------------------- #? GAME LOOP running = True iter = 0 while running: - #screen.fill(BLACK) + #? -------------------------MY CODE----------------------------------- + reward = 0 + #? -------------------------MY CODE----------------------------------- + + if show_game: + screen.fill(BLACK) + iter = iter + 1 # Handle events for event in pygame.event.get(): @@ -176,9 +186,25 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE): if labyrinth[pacman.y][pacman.x] == ".": labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] + #? -------------------------MY CODE----------------------------------- + cookie_counter = 0 + + for y, row in enumerate(labyrinth): + for x, cell in enumerate(row): + if cell == ".": + cookie_counter += 1 + + if cookie_counter == 10: + reward = REWARD_ON_HALF + if show_game: + print("Half reward") + #? -------------------------MY CODE----------------------------------- + + # Check if all cookies are eaten (game over) if all("." not in row for row in labyrinth): - # print("You Win! Pacman ate all the cookies.") + if show_game: + print("You Win! Pacman ate all the cookies.") reward = REWARD_ON_WIN running = False @@ -208,15 +234,16 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE): return 20-counter #? -------------------------MY CODE----------------------------------- - #draw_labyrinth(labyrinth) - #pacman.draw() - #ghost.draw() + if show_game: + draw_labyrinth(labyrinth) + pacman.draw() + ghost.draw() - # Update display - # pygame.display.flip() + # Update display + pygame.display.flip() - # Cap the frame rate - # clock.tick(5) + # Cap the frame rate + clock.tick(20) pygame.quit() diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py index d4c511d..26836c3 100644 --- a/ReinforcmentLearning/learning.py +++ b/ReinforcmentLearning/learning.py @@ -12,7 +12,8 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI wins_per_try = [] for x in range(AMOUNT_TRIES): - cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + plot_result = False + cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result) cookies_per_run.append(cookies_per_run) wins_per_try.append(amount_wins) # print(f"Finished try {x+1}\n") @@ -20,7 +21,7 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI return cookies_per_try, wins_per_try -def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): +def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game): """ state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction) action: Direction @@ -34,29 +35,54 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): cookies_per_run = [] # Amount of single runs for x in range(AMOUNT_RUNS): - amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE) + amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game) cookies_per_run.append(amount_cookies_ate) - wins = 0 - for element in cookies_per_run: - if element == 20: - wins += 1 - toAdd = 1 if element == 20 else 0 - + wins = sum(1 for result in cookies_per_run if result == 20) + + print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%") + if plot_result: + plot_results(cookies_per_run) + return cookies_per_run, wins -def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS): - # print("---------DONE---------") - # print("Used: ") - # print(f"Epsilon: {EPSILON}") - # print(f"Gamma: {GAMMA}") - # print(f"Alpha: {ALPHA}") +def plot_results(cookies_per_run): + wins = [] + losses = [] + win_count = 0 - # print("---------SUMMARY---------") - print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n") - # print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%") - # print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%") + for i, r in enumerate(cookies_per_run): + if r == 20: + win_count += 1 + wins.append(win_count) + losses.append((i + 1) - win_count) # Losses count down from top + # Last 700 attempts + last_700_wins = wins[-700:] if len(wins) >= 700 else wins + last_700_losses = losses[-700:] if len(losses) >= 700 else losses + last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1)) + + # Create figure with 2 subplots + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8)) + + # Plot 1: All attempts (with thicker lines: linewidth=1.5) + ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins') + ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses') + ax1.set_xlabel('Attempt') + ax1.set_ylabel('Count') + ax1.set_title('All Attempts: Wins vs Losses') + ax1.legend() + + # Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5) + ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins') + ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses') + ax2.set_xlabel('Attempt') + ax2.set_ylabel('Count') + ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses') + ax2.legend() + + plt.tight_layout() + plt.show() diff --git a/main.py b/main.py index 1a63542..e11684d 100644 --- a/main.py +++ b/main.py @@ -1,17 +1,22 @@ from GenTunic.gen_tuning import gen_tuning_main -from ReinforcmentLearning.learning import multipleTries +from ReinforcmentLearning.learning import multipleTries, oneTry -EPSILON = 0.1 -ALPHA = 0.4 -GAMMA = 0.8 +# EPSILON = 0.1618 +EPSILON = 0.01 +ALPHA = 0.01 +GAMMA = 0.2713 AMOUNT_RUNS = 5000 AMOUNT_TRIES = 5 -REWARD_ON_WIN = 10 +REWARD_ON_WIN = 100 REWARD_ON_LOSE = -450 +plot_result = True +show_game = False -multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + +oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) +#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) #gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) \ No newline at end of file