working version 1.0

2025-12-09 18:17:05 +01:00 · 2025-12-09 18:17:05 +01:00 · a965dc07ce
parent ca843080fe
commit a965dc07ce
4 changed files with 63 additions and 62 deletions
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -101,10 +101,9 @@ class Ghost:



-def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
+def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
    #? Learning initial
-    q_values = {}
-    initial_q_fill(q_values)
+    q_values = initial_q_fill()
    
    #? Game initial
    pygame.init()
@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s
        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
        pygame.display.set_caption("Micro-Pacman")

+    #? Start try
    cookies_per_run = []
+    iterations = []
    for x in range(AMOUNT_RUNS):
-        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
+        # if show_game:
+        if x == AMOUNT_RUNS / 4:
+            print("1 / 4 done")
+
+        if x == AMOUNT_RUNS / 2:
+            print("2 / 4 done")
+
+        if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
+            print("3 / 4 done")
+        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
        cookies_per_run.append(amount_cookies_ate)
+        iterations.append(iterations_per_run)

    pygame.quit()

-    return cookies_per_run
+    return cookies_per_run, iterations
+


-# Main game function
 def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
    clock = pygame.time.Clock()
    labyrinth = LABYRINTH_INIT.copy()
@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
        reward = 0
        #? -------------------------MY CODE-----------------------------------

-        if show_game:
-            screen.fill(BLACK)
-
-        iter = iter + 1
        # Handle events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

+        if show_game:
+            screen.fill(BLACK)
+
+
+        iter = iter + 1
+
        # Handle Pacman movement
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
        if keys[pygame.K_DOWN]:
            pacman.move(0, 1)

+
        #? -------------------------MY CODE-----------------------------------
        action = epsilon_greedy(q_values, state, EPSILON)
        if action == Direction.LEFT:
@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            pacman.move(labyrinth, 0, 1)
        #? -------------------------MY CODE-----------------------------------

+
        if iter%3==0:
-            # Ghost moves towards Pacman
            ghost.move_towards_pacman(labyrinth, pacman)

-        # Check for collisions (game over if ghost catches pacman)
        if pacman.x == ghost.x and pacman.y == ghost.y:
-            # print("Game Over! The ghost caught Pacman.")
+            if show_game:
+                print("Game Over! The ghost caught Pacman.")
            running = False
            reward = REWARD_ON_LOSE

@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]

        #? -------------------------MY CODE-----------------------------------
-            cookie_counter = 0
+            #? half reward
+            # cookie_counter = 0
            
-            for y, row in enumerate(labyrinth):
-                for x, cell in enumerate(row):
-                    if cell == ".":
-                        cookie_counter += 1
+            # for y, row in enumerate(labyrinth):
+            #     for x, cell in enumerate(row):
+            #         if cell == ".":
+            #             cookie_counter += 1
            
-            if cookie_counter == 10:
-                reward = REWARD_ON_HALF
-                if show_game:
-                    print("Half reward")
+            # if cookie_counter == 10:
+            #     # reward = REWARD_ON_HALF
+            #     if show_game:
+            #         print("Got half reward")
        #? -------------------------MY CODE-----------------------------------


@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
                for x, cell in enumerate(row):
                    if cell == ".":
                        counter += 1
-            return 20-counter
+            return 20-counter, iter
        #? -------------------------MY CODE-----------------------------------
        

@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            pygame.display.flip()

            # Cap the frame rate
-            clock.tick(20)
+            clock.tick(40)
+


 def draw_labyrinth(screen, labyrinth):
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -2,8 +2,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd

-from ReinforcmentLearning.game import run_game, wrapper
-from ReinforcmentLearning.util import initial_q_fill
+from ReinforcmentLearning.game import start_try



@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
    q_value: (state, action)
    """

-    cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
-
-        # if show_game:
-        #     if x == AMOUNT_RUNS / 4:
-        #         print("1 / 4 done")
-
-        #     if x == AMOUNT_RUNS / 2:
-        #         print("2 / 4 done")
-
-        #     if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
-        #         print("3 / 4 done")
-
+    cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)

    wins = sum(1 for result in cookies_per_run if result == 20)

@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
    print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")

    if plot_result:
-        plot_results(cookies_per_run)
+        plot_results(cookies_per_run, iterations)

    return cookies_per_run, wins


-def plot_results(cookies_per_run):
+def plot_results(cookies_per_run, iterations):
    wins = []
    losses = []
    win_count = 0
@ -63,29 +51,22 @@ def plot_results(cookies_per_run):
        wins.append(win_count)
        losses.append((i + 1) - win_count)  # Losses count down from top

-    # Last 700 attempts
-    last_700_wins = wins[-700:] if len(wins) >= 700 else wins
-    last_700_losses = losses[-700:] if len(losses) >= 700 else losses
-    last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
-
-    # Create figure with 2 subplots
+    # Create figure with 2 subplots (wins/losses and iterations)
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

-    # Plot 1: All attempts (with thicker lines: linewidth=1.5)
+    # Plot 1: Wins vs Losses
    ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
    ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
    ax1.set_xlabel('Attempt')
    ax1.set_ylabel('Count')
-    ax1.set_title('All Attempts: Wins vs Losses')
+    ax1.set_title('Wins vs Losses Over Time')
    ax1.legend()

-    # Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
-    ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
-    ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
-    ax2.set_xlabel('Attempt')
-    ax2.set_ylabel('Count')
-    ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
-    ax2.legend()
+    # Plot 2: Iterations per run
+    ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
+    ax2.set_xlabel('Run Number')
+    ax2.set_ylabel('Iterations')
+    ax2.set_title('Iterations per Run')

    plt.tight_layout()
-    plt.show()
+    plt.show()
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -8,14 +8,17 @@ class Direction(Enum):
    LEFT = 3


-def initial_q_fill(q_values):
+def initial_q_fill():
+    q_values = {}
+
    for x in range(-7, 8):
        for y in range(-2, 3):
            for cookie_direction in Direction:
                for action in Direction:
                    state = (x, y, cookie_direction)
                    q_values[(state, action)] = random.random() * 0.2 - 0.1
-            
+
+    return q_values



@ -109,7 +112,6 @@ def get_best_q_action(q_values, state):
    for (q_state, q_action), value in q_values.items():
        if q_state == state:
            actions_for_epsilon.append(q_action)
-
            if best_value is None:
                best_value = value
                best_action = q_action
--- a/main.py
+++ b/main.py
@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry

 # EPSILON = 0.1618
 EPSILON = 0.01
-ALPHA = 0.01
-GAMMA = 0.2713
+# ALPHA = 0.01
+ALPHA = 0.2
+# GAMMA = 0.2713
+GAMMA = 0.8

 AMOUNT_RUNS = 5000
 AMOUNT_TRIES = 5
@ -14,7 +16,7 @@ REWARD_ON_WIN = 400
 REWARD_ON_LOSE = -250

 plot_result = True
-show_game = True
+show_game = False


 oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)