From a52de42507dd70e8a4ff212b101639b26a7d4288 Mon Sep 17 00:00:00 2001
From: 2wenty1ne <sirdarkvic@gmail.com>
Date: Wed, 10 Dec 2025 11:01:52 +0100
Subject: [PATCH] Added global conf file, cleaning up

---
 GenTunic/gen_tuning.py           |  4 +--
 ReinforcmentLearning/game.py     | 45 ++++++++++++++++++++------------
 ReinforcmentLearning/learning.py | 44 +++++++++++++++----------------
 conf.py                          | 14 ++++++++++
 main.py                          | 23 +++-------------
 5 files changed, 71 insertions(+), 59 deletions(-)
 create mode 100644 conf.py

diff --git a/GenTunic/gen_tuning.py b/GenTunic/gen_tuning.py
index afb460e..4808a3f 100644
--- a/GenTunic/gen_tuning.py
+++ b/GenTunic/gen_tuning.py
@@ -1,8 +1,8 @@
 
 import math
 import time
-import matplotlib
-matplotlib.use('Agg')
+# import matplotlib
+# matplotlib.use('Agg')
 from matplotlib import pyplot as plt
 import numpy as np
 from GenTunic.gen_math import project_bit
diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py
index a8bccd0..be58510 100644
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@@ -3,6 +3,7 @@ import math
 import os
 
 from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
+import conf
 
 # Initialize pygame
 
@@ -101,7 +102,7 @@ class Ghost:
 
 
 
-def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
+def start_try(EPSILON, ALPHA, GAMMA):
     #? Learning initial
     q_values = initial_q_fill()
     
@@ -109,35 +110,47 @@ def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE,
     pygame.init()
     screen = None
 
-    if show_game:
+    if conf.show_game:
         screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
         pygame.display.set_caption("Micro-Pacman")
 
     #? Start try
     cookies_per_run = []
     iterations = []
-    for x in range(AMOUNT_RUNS):
-        if show_game:
-            if x == AMOUNT_RUNS / 4:
+    for x in range(conf.AMOUNT_RUNS):
+        if conf.show_game:
+            if x == conf.AMOUNT_RUNS / 4:
                 print("1 / 4 done")
 
-            if x == AMOUNT_RUNS / 2:
+            if x == conf.AMOUNT_RUNS / 2:
                 print("2 / 4 done")
 
-            if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
+            if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4):
                 print("3 / 4 done")
-        
-        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
+
+
+        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
         cookies_per_run.append(amount_cookies_ate)
         iterations.append(iterations_per_run)
 
+        print(f"Run {x+1}: {iterations_per_run} iterations")
+
+    if conf.show_trained:
+        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+        pygame.display.set_caption("Micro-Pacman")
+
+        while True:
+            print("After game")
+            run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
+
+
     pygame.quit()
 
     return cookies_per_run, iterations
 
 
 
-def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
+def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
     clock = pygame.time.Clock()
     labyrinth = LABYRINTH_INIT.copy()
 
@@ -163,7 +176,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
             if event.type == pygame.QUIT:
                 running = False
 
-        if show_game:
+        if conf.show_game:
             screen.fill(BLACK)
 
 
@@ -198,10 +211,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
             ghost.move_towards_pacman(labyrinth, pacman)
 
         if pacman.x == ghost.x and pacman.y == ghost.y:
-            if show_game:
+            if conf.show_game:
                 print("Game Over! The ghost caught Pacman.")
             running = False
-            reward = REWARD_ON_LOSE
+            reward = conf.REWARD_ON_LOSE
 
         # Eat cookies
         if labyrinth[pacman.y][pacman.x] == ".":
@@ -228,10 +241,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
         if all("." not in row for row in labyrinth):
             # time_reward = calc_time_reward(iter)
             # reward = REWARD_ON_WIN * time_reward
-            reward = REWARD_ON_WIN
+            reward = conf.REWARD_ON_WIN
             running = False
 
-            if show_game:
+            if conf.show_game:
                 # print(f"You Win! Took {iter} iterations, reward: {time_reward}")
                 print(f"You Win! Took {iter} iterations")
 
@@ -262,7 +275,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
         
 
         # Draw the labyrinth, pacman, and ghost
-        if show_game:
+        if conf.show_game:
             draw_labyrinth(screen, labyrinth)
             pacman.draw()
             ghost.draw()
diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py
index 1ea08f4..6f9170c 100644
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@@ -1,47 +1,46 @@
-import matplotlib
-matplotlib.use('Agg')
+# import matplotlib
+# matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
 from ReinforcmentLearning.game import start_try
+import conf
 
 
 
-def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
-    cookies_per_try = []
+def multipleTries(EPSILON, ALPHA, GAMMA):
+    conf.show_game = False
+    conf.plot_result = False
+    
     wins_per_try = []
     
-    for x in range(AMOUNT_TRIES):
-        plot_result = False
-        show_game = False
-        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
+    for x in range(conf.AMOUNT_TRIES):
+        cookies_per_run = oneTry(EPSILON, ALPHA, GAMMA)
 
-        last_700_results = cookies_per_run[-700:]
-        wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
-        win_probalitiy = (wins_in_last_700 / 700)
+        results_last_700 = cookies_per_run[-700:]
+        wins_in_last_700 = sum(1 for result in results_last_700 if result == 20)
+        win_probalitiy_last_700 = (wins_in_last_700 / 700)
+        wins_per_try.append(win_probalitiy_last_700)
 
-        cookies_per_run.append(cookies_per_run)
-        wins_per_try.append(win_probalitiy)
         # print(f"Finished try {x+1}\n")
 
-    return cookies_per_try, wins_per_try
+    return wins_per_try
 
 
-def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game):
+
+def oneTry(EPSILON, ALPHA, GAMMA):
     """
     state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
     action: Direction
     q_value: (state, action)
     """
 
-    cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
+    cookies_per_run, iterations = start_try(EPSILON, ALPHA, GAMMA)
 
-    wins = sum(1 for result in cookies_per_run if result == 20)
-
-
-    if plot_result:
-        print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%")
+    if conf.plot_result:
+        wins = sum(1 for result in cookies_per_run if result == 20)
+        print(f"Win percentage overall: {(wins/conf.AMOUNT_RUNS)*100}%")
 
         last_700_results = cookies_per_run[-700:]
         wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
@@ -50,7 +49,8 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
 
         plot_results(cookies_per_run, iterations)
 
-    return cookies_per_run, wins
+    return cookies_per_run
+
 
 
 def plot_results(cookies_per_run, iterations):
diff --git a/conf.py b/conf.py
new file mode 100644
index 0000000..3ac202a
--- /dev/null
+++ b/conf.py
@@ -0,0 +1,14 @@
+EPSILON = 0.01
+# EPSILON = 0.005
+ALPHA = 0.2
+GAMMA = 0.8
+
+AMOUNT_RUNS = 5000
+AMOUNT_TRIES = 5
+
+REWARD_ON_WIN = 400
+REWARD_ON_LOSE = -250
+
+plot_result = True
+show_game = False
+show_trained = True
\ No newline at end of file
diff --git a/main.py b/main.py
index f2ee65b..d7e7d00 100644
--- a/main.py
+++ b/main.py
@@ -1,26 +1,11 @@
 from GenTunic.gen_tuning import gen_tuning_main
 from ReinforcmentLearning.learning import multipleTries, oneTry
 from ReinforcmentLearning.util import calc_time_reward
-
-
-EPSILON = 0.01
-# EPSILON = 0.005
-ALPHA = 0.2
-GAMMA = 0.8
-
-AMOUNT_RUNS = 5000
-AMOUNT_TRIES = 5
-
-REWARD_ON_WIN = 400
-REWARD_ON_LOSE = -250
-
-plot_result = False
-show_game = False
+import conf
 
 
 
 
-
-# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
-#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
-gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
+oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA)
+# multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+# gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)