From a965dc07ce63f958dccfc27c7aa3a8030a3ff83b Mon Sep 17 00:00:00 2001
From: 2wenty1ne <sirdarkvic@gmail.com>
Date: Tue, 9 Dec 2025 18:17:05 +0100
Subject: [PATCH] working version 1.0

---
 ReinforcmentLearning/game.py     | 64 ++++++++++++++++++++------------
 ReinforcmentLearning/learning.py | 45 +++++++---------------
 ReinforcmentLearning/util.py     |  8 ++--
 main.py                          |  8 ++--
 4 files changed, 63 insertions(+), 62 deletions(-)

diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py
index 0eabfe3..ad3944b 100644
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@@ -101,10 +101,9 @@ class Ghost:
 
 
 
-def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
+def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
     #? Learning initial
-    q_values = {}
-    initial_q_fill(q_values)
+    q_values = initial_q_fill()
     
     #? Game initial
     pygame.init()
@@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s
         screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
         pygame.display.set_caption("Micro-Pacman")
 
+    #? Start try
     cookies_per_run = []
+    iterations = []
     for x in range(AMOUNT_RUNS):
-        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
+        # if show_game:
+        if x == AMOUNT_RUNS / 4:
+            print("1 / 4 done")
+
+        if x == AMOUNT_RUNS / 2:
+            print("2 / 4 done")
+
+        if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
+            print("3 / 4 done")
+        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
         cookies_per_run.append(amount_cookies_ate)
+        iterations.append(iterations_per_run)
 
     pygame.quit()
 
-    return cookies_per_run
+    return cookies_per_run, iterations
+
 
 
-# Main game function
 def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
     clock = pygame.time.Clock()
     labyrinth = LABYRINTH_INIT.copy()
@@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
         reward = 0
         #? -------------------------MY CODE-----------------------------------
 
-        if show_game:
-            screen.fill(BLACK)
-
-        iter = iter + 1
         # Handle events
         for event in pygame.event.get():
             if event.type == pygame.QUIT:
                 running = False
 
+        if show_game:
+            screen.fill(BLACK)
+
+
+        iter = iter + 1
+
         # Handle Pacman movement
         keys = pygame.key.get_pressed()
         if keys[pygame.K_LEFT]:
@@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
         if keys[pygame.K_DOWN]:
             pacman.move(0, 1)
 
+
         #? -------------------------MY CODE-----------------------------------
         action = epsilon_greedy(q_values, state, EPSILON)
         if action == Direction.LEFT:
@@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
             pacman.move(labyrinth, 0, 1)
         #? -------------------------MY CODE-----------------------------------
 
+
         if iter%3==0:
-            # Ghost moves towards Pacman
             ghost.move_towards_pacman(labyrinth, pacman)
 
-        # Check for collisions (game over if ghost catches pacman)
         if pacman.x == ghost.x and pacman.y == ghost.y:
-            # print("Game Over! The ghost caught Pacman.")
+            if show_game:
+                print("Game Over! The ghost caught Pacman.")
             running = False
             reward = REWARD_ON_LOSE
 
@@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
             labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
 
         #? -------------------------MY CODE-----------------------------------
-            cookie_counter = 0
+            #? half reward
+            # cookie_counter = 0
             
-            for y, row in enumerate(labyrinth):
-                for x, cell in enumerate(row):
-                    if cell == ".":
-                        cookie_counter += 1
+            # for y, row in enumerate(labyrinth):
+            #     for x, cell in enumerate(row):
+            #         if cell == ".":
+            #             cookie_counter += 1
             
-            if cookie_counter == 10:
-                reward = REWARD_ON_HALF
-                if show_game:
-                    print("Half reward")
+            # if cookie_counter == 10:
+            #     # reward = REWARD_ON_HALF
+            #     if show_game:
+            #         print("Got half reward")
         #? -------------------------MY CODE-----------------------------------
 
 
@@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
                 for x, cell in enumerate(row):
                     if cell == ".":
                         counter += 1
-            return 20-counter
+            return 20-counter, iter
         #? -------------------------MY CODE-----------------------------------
         
 
@@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
             pygame.display.flip()
 
             # Cap the frame rate
-            clock.tick(20)
+            clock.tick(40)
+
 
 
 def draw_labyrinth(screen, labyrinth):
diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py
index 004a8ca..a31bbbd 100644
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@@ -2,8 +2,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
-from ReinforcmentLearning.game import run_game, wrapper
-from ReinforcmentLearning.util import initial_q_fill
+from ReinforcmentLearning.game import start_try
 
 
 
@@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
     q_value: (state, action)
     """
 
-    cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
-
-        # if show_game:
-        #     if x == AMOUNT_RUNS / 4:
-        #         print("1 / 4 done")
-
-        #     if x == AMOUNT_RUNS / 2:
-        #         print("2 / 4 done")
-
-        #     if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
-        #         print("3 / 4 done")
-
+    cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
 
     wins = sum(1 for result in cookies_per_run if result == 20)
 
@@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
     print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
 
     if plot_result:
-        plot_results(cookies_per_run)
+        plot_results(cookies_per_run, iterations)
 
     return cookies_per_run, wins
 
 
-def plot_results(cookies_per_run):
+def plot_results(cookies_per_run, iterations):
     wins = []
     losses = []
     win_count = 0
@@ -63,29 +51,22 @@ def plot_results(cookies_per_run):
         wins.append(win_count)
         losses.append((i + 1) - win_count)  # Losses count down from top
 
-    # Last 700 attempts
-    last_700_wins = wins[-700:] if len(wins) >= 700 else wins
-    last_700_losses = losses[-700:] if len(losses) >= 700 else losses
-    last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
-
-    # Create figure with 2 subplots
+    # Create figure with 2 subplots (wins/losses and iterations)
     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
 
-    # Plot 1: All attempts (with thicker lines: linewidth=1.5)
+    # Plot 1: Wins vs Losses
     ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
     ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
     ax1.set_xlabel('Attempt')
     ax1.set_ylabel('Count')
-    ax1.set_title('All Attempts: Wins vs Losses')
+    ax1.set_title('Wins vs Losses Over Time')
     ax1.legend()
 
-    # Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
-    ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
-    ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
-    ax2.set_xlabel('Attempt')
-    ax2.set_ylabel('Count')
-    ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
-    ax2.legend()
+    # Plot 2: Iterations per run
+    ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
+    ax2.set_xlabel('Run Number')
+    ax2.set_ylabel('Iterations')
+    ax2.set_title('Iterations per Run')
 
     plt.tight_layout()
-    plt.show()
+    plt.show()
\ No newline at end of file
diff --git a/ReinforcmentLearning/util.py b/ReinforcmentLearning/util.py
index 59cfaee..35ac1ba 100644
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@@ -8,14 +8,17 @@ class Direction(Enum):
     LEFT = 3
 
 
-def initial_q_fill(q_values):
+def initial_q_fill():
+    q_values = {}
+
     for x in range(-7, 8):
         for y in range(-2, 3):
             for cookie_direction in Direction:
                 for action in Direction:
                     state = (x, y, cookie_direction)
                     q_values[(state, action)] = random.random() * 0.2 - 0.1
-            
+
+    return q_values
 
 
 
@@ -109,7 +112,6 @@ def get_best_q_action(q_values, state):
     for (q_state, q_action), value in q_values.items():
         if q_state == state:
             actions_for_epsilon.append(q_action)
-
             if best_value is None:
                 best_value = value
                 best_action = q_action
diff --git a/main.py b/main.py
index 068283e..f14f8d5 100644
--- a/main.py
+++ b/main.py
@@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
 
 # EPSILON = 0.1618
 EPSILON = 0.01
-ALPHA = 0.01
-GAMMA = 0.2713
+# ALPHA = 0.01
+ALPHA = 0.2
+# GAMMA = 0.2713
+GAMMA = 0.8
 
 AMOUNT_RUNS = 5000
 AMOUNT_TRIES = 5
@@ -14,7 +16,7 @@ REWARD_ON_WIN = 400
 REWARD_ON_LOSE = -250
 
 plot_result = True
-show_game = True
+show_game = False
 
 
 oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)