Added global conf file, cleaning up

2025-12-10 11:01:52 +01:00 · 2025-12-10 11:01:52 +01:00 · a52de42507
parent 44b7a4e942
commit a52de42507
5 changed files with 71 additions and 59 deletions
--- a/GenTunic/gen_tuning.py
+++ b/GenTunic/gen_tuning.py
@ -1,8 +1,8 @@

 import math
 import time
-import matplotlib
-matplotlib.use('Agg')
+# import matplotlib
+# matplotlib.use('Agg')
 from matplotlib import pyplot as plt
 import numpy as np
 from GenTunic.gen_math import project_bit
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -3,6 +3,7 @@ import math
 import os

 from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
+import conf

 # Initialize pygame

@ -101,7 +102,7 @@ class Ghost:



-def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
+def start_try(EPSILON, ALPHA, GAMMA):
    #? Learning initial
    q_values = initial_q_fill()
    
@ -109,35 +110,47 @@ def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE,
    pygame.init()
    screen = None

-    if show_game:
+    if conf.show_game:
        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
        pygame.display.set_caption("Micro-Pacman")

    #? Start try
    cookies_per_run = []
    iterations = []
-    for x in range(AMOUNT_RUNS):
-        if show_game:
-            if x == AMOUNT_RUNS / 4:
+    for x in range(conf.AMOUNT_RUNS):
+        if conf.show_game:
+            if x == conf.AMOUNT_RUNS / 4:
                print("1 / 4 done")

-            if x == AMOUNT_RUNS / 2:
+            if x == conf.AMOUNT_RUNS / 2:
                print("2 / 4 done")

-            if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
+            if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4):
                print("3 / 4 done")
-        
-        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
+
+
+        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
        cookies_per_run.append(amount_cookies_ate)
        iterations.append(iterations_per_run)

+        print(f"Run {x+1}: {iterations_per_run} iterations")
+
+    if conf.show_trained:
+        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
+        pygame.display.set_caption("Micro-Pacman")
+
+        while True:
+            print("After game")
+            run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
+
+
    pygame.quit()

    return cookies_per_run, iterations



-def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
+def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
    clock = pygame.time.Clock()
    labyrinth = LABYRINTH_INIT.copy()

@ -163,7 +176,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            if event.type == pygame.QUIT:
                running = False

-        if show_game:
+        if conf.show_game:
            screen.fill(BLACK)


@ -198,10 +211,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
            ghost.move_towards_pacman(labyrinth, pacman)

        if pacman.x == ghost.x and pacman.y == ghost.y:
-            if show_game:
+            if conf.show_game:
                print("Game Over! The ghost caught Pacman.")
            running = False
-            reward = REWARD_ON_LOSE
+            reward = conf.REWARD_ON_LOSE

        # Eat cookies
        if labyrinth[pacman.y][pacman.x] == ".":
@ -228,10 +241,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
        if all("." not in row for row in labyrinth):
            # time_reward = calc_time_reward(iter)
            # reward = REWARD_ON_WIN * time_reward
-            reward = REWARD_ON_WIN
+            reward = conf.REWARD_ON_WIN
            running = False

-            if show_game:
+            if conf.show_game:
                # print(f"You Win! Took {iter} iterations, reward: {time_reward}")
                print(f"You Win! Took {iter} iterations")

@ -262,7 +275,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
        

        # Draw the labyrinth, pacman, and ghost
-        if show_game:
+        if conf.show_game:
            draw_labyrinth(screen, labyrinth)
            pacman.draw()
            ghost.draw()
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -1,47 +1,46 @@
-import matplotlib
-matplotlib.use('Agg')
+# import matplotlib
+# matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd

 from ReinforcmentLearning.game import start_try
+import conf



-def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
-    cookies_per_try = []
+def multipleTries(EPSILON, ALPHA, GAMMA):
+    conf.show_game = False
+    conf.plot_result = False
+    
    wins_per_try = []
    
-    for x in range(AMOUNT_TRIES):
-        plot_result = False
-        show_game = False
-        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
+    for x in range(conf.AMOUNT_TRIES):
+        cookies_per_run = oneTry(EPSILON, ALPHA, GAMMA)

-        last_700_results = cookies_per_run[-700:]
-        wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
-        win_probalitiy = (wins_in_last_700 / 700)
+        results_last_700 = cookies_per_run[-700:]
+        wins_in_last_700 = sum(1 for result in results_last_700 if result == 20)
+        win_probalitiy_last_700 = (wins_in_last_700 / 700)
+        wins_per_try.append(win_probalitiy_last_700)

-        cookies_per_run.append(cookies_per_run)
-        wins_per_try.append(win_probalitiy)
        # print(f"Finished try {x+1}\n")

-    return cookies_per_try, wins_per_try
+    return wins_per_try


-def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game):
+
+def oneTry(EPSILON, ALPHA, GAMMA):
    """
    state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
    action: Direction
    q_value: (state, action)
    """

-    cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
+    cookies_per_run, iterations = start_try(EPSILON, ALPHA, GAMMA)

-    wins = sum(1 for result in cookies_per_run if result == 20)
-
-
-    if plot_result:
-        print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%")
+    if conf.plot_result:
+        wins = sum(1 for result in cookies_per_run if result == 20)
+        print(f"Win percentage overall: {(wins/conf.AMOUNT_RUNS)*100}%")

        last_700_results = cookies_per_run[-700:]
        wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
@ -50,7 +49,8 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl

        plot_results(cookies_per_run, iterations)

-    return cookies_per_run, wins
+    return cookies_per_run
+


 def plot_results(cookies_per_run, iterations):
--- a/conf.py
+++ b/conf.py
@ -0,0 +1,14 @@
+EPSILON = 0.01
+# EPSILON = 0.005
+ALPHA = 0.2
+GAMMA = 0.8
+
+AMOUNT_RUNS = 5000
+AMOUNT_TRIES = 5
+
+REWARD_ON_WIN = 400
+REWARD_ON_LOSE = -250
+
+plot_result = True
+show_game = False
+show_trained = True
--- a/main.py
+++ b/main.py
@ -1,26 +1,11 @@
 from GenTunic.gen_tuning import gen_tuning_main
 from ReinforcmentLearning.learning import multipleTries, oneTry
 from ReinforcmentLearning.util import calc_time_reward
-
-
-EPSILON = 0.01
-# EPSILON = 0.005
-ALPHA = 0.2
-GAMMA = 0.8
-
-AMOUNT_RUNS = 5000
-AMOUNT_TRIES = 5
-
-REWARD_ON_WIN = 400
-REWARD_ON_LOSE = -250
-
-plot_result = False
-show_game = False
+import conf




-
-# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
-#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
-gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
+oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA)
+# multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+# gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)