working version 1.0

2025-12-09 18:17:05 +01:00 · 2025-12-09 18:17:05 +01:00 · a965dc07ce
parent ca843080fe
commit a965dc07ce
4 changed files with 63 additions and 62 deletions
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -101,10 +101,9 @@ class Ghost:
-def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
+def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
    #? Learning initial
-    q_values = {}
+    q_values = initial_q_fill()
    initial_q_fill(q_values)
    #? Game initial
    pygame.init()
@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s
        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
        pygame.display.set_caption("Micro-Pacman")
    #? Start try
    cookies_per_run = []
    iterations = []
    for x in range(AMOUNT_RUNS):
-        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
+        # if show_game:
        if x == AMOUNT_RUNS / 4:
            print("1 / 4 done")
        if x == AMOUNT_RUNS / 2:
            print("2 / 4 done")
        if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
            print("3 / 4 done")
        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
        cookies_per_run.append(amount_cookies_ate)
        iterations.append(iterations_per_run)
    pygame.quit()
-    return cookies_per_run
+    return cookies_per_run, iterations
 # Main game function
 def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
    clock = pygame.time.Clock()
    labyrinth = LABYRINTH_INIT.copy()
@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
        reward = 0
        #? -------------------------MY CODE-----------------------------------
        if show_game:
            screen.fill(BLACK)
        iter = iter + 1
        # Handle events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
        if show_game:
            screen.fill(BLACK)
        iter = iter + 1
        # Handle Pacman movement
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
        if keys[pygame.K_DOWN]:
            pacman.move(0, 1)
        #? -------------------------MY CODE-----------------------------------
        action = epsilon_greedy(q_values, state, EPSILON)
        if action == Direction.LEFT:
@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            pacman.move(labyrinth, 0, 1)
        #? -------------------------MY CODE-----------------------------------
        if iter%3==0:
            # Ghost moves towards Pacman
            ghost.move_towards_pacman(labyrinth, pacman)
        # Check for collisions (game over if ghost catches pacman)
        if pacman.x == ghost.x and pacman.y == ghost.y:
-            # print("Game Over! The ghost caught Pacman.")
+            if show_game:
                print("Game Over! The ghost caught Pacman.")
            running = False
            reward = REWARD_ON_LOSE
@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
        #? -------------------------MY CODE-----------------------------------
-            cookie_counter = 0
+            #? half reward
            # cookie_counter = 0
-            for y, row in enumerate(labyrinth):
+            # for y, row in enumerate(labyrinth):
-                for x, cell in enumerate(row):
+            #     for x, cell in enumerate(row):
-                    if cell == ".":
+            #         if cell == ".":
-                        cookie_counter += 1
+            #             cookie_counter += 1
-            if cookie_counter == 10:
+            # if cookie_counter == 10:
-                reward = REWARD_ON_HALF
+            #     # reward = REWARD_ON_HALF
-                if show_game:
+            #     if show_game:
-                    print("Half reward")
+            #         print("Got half reward")
        #? -------------------------MY CODE-----------------------------------
@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
                for x, cell in enumerate(row):
                    if cell == ".":
                        counter += 1
-            return 20-counter
+            return 20-counter, iter
        #? -------------------------MY CODE-----------------------------------
@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            pygame.display.flip()
            # Cap the frame rate
-            clock.tick(20)
+            clock.tick(40)
 def draw_labyrinth(screen, labyrinth):
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -2,8 +2,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from ReinforcmentLearning.game import run_game, wrapper
+from ReinforcmentLearning.game import start_try
 from ReinforcmentLearning.util import initial_q_fill
@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
    q_value: (state, action)
    """
-    cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
+    cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
        # if show_game:
        #     if x == AMOUNT_RUNS / 4:
        #         print("1 / 4 done")
        #     if x == AMOUNT_RUNS / 2:
        #         print("2 / 4 done")
        #     if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
        #         print("3 / 4 done")
    wins = sum(1 for result in cookies_per_run if result == 20)
@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
    print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
    if plot_result:
-        plot_results(cookies_per_run)
+        plot_results(cookies_per_run, iterations)
    return cookies_per_run, wins
-def plot_results(cookies_per_run):
+def plot_results(cookies_per_run, iterations):
    wins = []
    losses = []
    win_count = 0
@ -63,29 +51,22 @@ def plot_results(cookies_per_run):
        wins.append(win_count)
        losses.append((i + 1) - win_count)  # Losses count down from top
-    # Last 700 attempts
+    # Create figure with 2 subplots (wins/losses and iterations)
    last_700_wins = wins[-700:] if len(wins) >= 700 else wins
    last_700_losses = losses[-700:] if len(losses) >= 700 else losses
    last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
    # Create figure with 2 subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
-    # Plot 1: All attempts (with thicker lines: linewidth=1.5)
+    # Plot 1: Wins vs Losses
    ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
    ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
    ax1.set_xlabel('Attempt')
    ax1.set_ylabel('Count')
-    ax1.set_title('All Attempts: Wins vs Losses')
+    ax1.set_title('Wins vs Losses Over Time')
    ax1.legend()
-    # Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
+    # Plot 2: Iterations per run
-    ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
+    ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
-    ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
+    ax2.set_xlabel('Run Number')
-    ax2.set_xlabel('Attempt')
+    ax2.set_ylabel('Iterations')
-    ax2.set_ylabel('Count')
+    ax2.set_title('Iterations per Run')
    ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
    ax2.legend()
    plt.tight_layout()
    plt.show()
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -8,7 +8,9 @@ class Direction(Enum):
    LEFT = 3
-def initial_q_fill(q_values):
+def initial_q_fill():
    q_values = {}
    for x in range(-7, 8):
        for y in range(-2, 3):
            for cookie_direction in Direction:
@ -16,6 +18,7 @@ def initial_q_fill(q_values):
                    state = (x, y, cookie_direction)
                    q_values[(state, action)] = random.random() * 0.2 - 0.1
    return q_values
@ -109,7 +112,6 @@ def get_best_q_action(q_values, state):
    for (q_state, q_action), value in q_values.items():
        if q_state == state:
            actions_for_epsilon.append(q_action)
            if best_value is None:
                best_value = value
                best_action = q_action
--- a/main.py
+++ b/main.py
@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
 # EPSILON = 0.1618
 EPSILON = 0.01
-ALPHA = 0.01
+# ALPHA = 0.01
-GAMMA = 0.2713
+ALPHA = 0.2
 # GAMMA = 0.2713
 GAMMA = 0.8
 AMOUNT_RUNS = 5000
 AMOUNT_TRIES = 5
@ -14,7 +16,7 @@ REWARD_ON_WIN = 400
 REWARD_ON_LOSE = -250
 plot_result = True
-show_game = True
+show_game = False
 oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)