From a52de42507dd70e8a4ff212b101639b26a7d4288 Mon Sep 17 00:00:00 2001 From: 2wenty1ne Date: Wed, 10 Dec 2025 11:01:52 +0100 Subject: [PATCH] Added global conf file, cleaning up --- GenTunic/gen_tuning.py | 4 +-- ReinforcmentLearning/game.py | 45 ++++++++++++++++++++------------ ReinforcmentLearning/learning.py | 44 +++++++++++++++---------------- conf.py | 14 ++++++++++ main.py | 23 +++------------- 5 files changed, 71 insertions(+), 59 deletions(-) create mode 100644 conf.py diff --git a/GenTunic/gen_tuning.py b/GenTunic/gen_tuning.py index afb460e..4808a3f 100644 --- a/GenTunic/gen_tuning.py +++ b/GenTunic/gen_tuning.py @@ -1,8 +1,8 @@ import math import time -import matplotlib -matplotlib.use('Agg') +# import matplotlib +# matplotlib.use('Agg') from matplotlib import pyplot as plt import numpy as np from GenTunic.gen_math import project_bit diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py index a8bccd0..be58510 100644 --- a/ReinforcmentLearning/game.py +++ b/ReinforcmentLearning/game.py @@ -3,6 +3,7 @@ import math import os from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill +import conf # Initialize pygame @@ -101,7 +102,7 @@ class Ghost: -def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game): +def start_try(EPSILON, ALPHA, GAMMA): #? Learning initial q_values = initial_q_fill() @@ -109,35 +110,47 @@ def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, pygame.init() screen = None - if show_game: + if conf.show_game: screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) pygame.display.set_caption("Micro-Pacman") #? Start try cookies_per_run = [] iterations = [] - for x in range(AMOUNT_RUNS): - if show_game: - if x == AMOUNT_RUNS / 4: + for x in range(conf.AMOUNT_RUNS): + if conf.show_game: + if x == conf.AMOUNT_RUNS / 4: print("1 / 4 done") - if x == AMOUNT_RUNS / 2: + if x == conf.AMOUNT_RUNS / 2: print("2 / 4 done") - if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4): + if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4): print("3 / 4 done") - - amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen) + + + amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen) cookies_per_run.append(amount_cookies_ate) iterations.append(iterations_per_run) + print(f"Run {x+1}: {iterations_per_run} iterations") + + if conf.show_trained: + screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) + pygame.display.set_caption("Micro-Pacman") + + while True: + print("After game") + run_game(q_values, EPSILON, ALPHA, GAMMA, screen) + + pygame.quit() return cookies_per_run, iterations -def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen): +def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): clock = pygame.time.Clock() labyrinth = LABYRINTH_INIT.copy() @@ -163,7 +176,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho if event.type == pygame.QUIT: running = False - if show_game: + if conf.show_game: screen.fill(BLACK) @@ -198,10 +211,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho ghost.move_towards_pacman(labyrinth, pacman) if pacman.x == ghost.x and pacman.y == ghost.y: - if show_game: + if conf.show_game: print("Game Over! The ghost caught Pacman.") running = False - reward = REWARD_ON_LOSE + reward = conf.REWARD_ON_LOSE # Eat cookies if labyrinth[pacman.y][pacman.x] == ".": @@ -228,10 +241,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho if all("." not in row for row in labyrinth): # time_reward = calc_time_reward(iter) # reward = REWARD_ON_WIN * time_reward - reward = REWARD_ON_WIN + reward = conf.REWARD_ON_WIN running = False - if show_game: + if conf.show_game: # print(f"You Win! Took {iter} iterations, reward: {time_reward}") print(f"You Win! Took {iter} iterations") @@ -262,7 +275,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho # Draw the labyrinth, pacman, and ghost - if show_game: + if conf.show_game: draw_labyrinth(screen, labyrinth) pacman.draw() ghost.draw() diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py index 1ea08f4..6f9170c 100644 --- a/ReinforcmentLearning/learning.py +++ b/ReinforcmentLearning/learning.py @@ -1,47 +1,46 @@ -import matplotlib -matplotlib.use('Agg') +# import matplotlib +# matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np import pandas as pd from ReinforcmentLearning.game import start_try +import conf -def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): - cookies_per_try = [] +def multipleTries(EPSILON, ALPHA, GAMMA): + conf.show_game = False + conf.plot_result = False + wins_per_try = [] - for x in range(AMOUNT_TRIES): - plot_result = False - show_game = False - cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) + for x in range(conf.AMOUNT_TRIES): + cookies_per_run = oneTry(EPSILON, ALPHA, GAMMA) - last_700_results = cookies_per_run[-700:] - wins_in_last_700 = sum(1 for result in last_700_results if result == 20) - win_probalitiy = (wins_in_last_700 / 700) + results_last_700 = cookies_per_run[-700:] + wins_in_last_700 = sum(1 for result in results_last_700 if result == 20) + win_probalitiy_last_700 = (wins_in_last_700 / 700) + wins_per_try.append(win_probalitiy_last_700) - cookies_per_run.append(cookies_per_run) - wins_per_try.append(win_probalitiy) # print(f"Finished try {x+1}\n") - return cookies_per_try, wins_per_try + return wins_per_try -def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game): + +def oneTry(EPSILON, ALPHA, GAMMA): """ state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction) action: Direction q_value: (state, action) """ - cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game) + cookies_per_run, iterations = start_try(EPSILON, ALPHA, GAMMA) - wins = sum(1 for result in cookies_per_run if result == 20) - - - if plot_result: - print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%") + if conf.plot_result: + wins = sum(1 for result in cookies_per_run if result == 20) + print(f"Win percentage overall: {(wins/conf.AMOUNT_RUNS)*100}%") last_700_results = cookies_per_run[-700:] wins_in_last_700 = sum(1 for result in last_700_results if result == 20) @@ -50,7 +49,8 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl plot_results(cookies_per_run, iterations) - return cookies_per_run, wins + return cookies_per_run + def plot_results(cookies_per_run, iterations): diff --git a/conf.py b/conf.py new file mode 100644 index 0000000..3ac202a --- /dev/null +++ b/conf.py @@ -0,0 +1,14 @@ +EPSILON = 0.01 +# EPSILON = 0.005 +ALPHA = 0.2 +GAMMA = 0.8 + +AMOUNT_RUNS = 5000 +AMOUNT_TRIES = 5 + +REWARD_ON_WIN = 400 +REWARD_ON_LOSE = -250 + +plot_result = True +show_game = False +show_trained = True \ No newline at end of file diff --git a/main.py b/main.py index f2ee65b..d7e7d00 100644 --- a/main.py +++ b/main.py @@ -1,26 +1,11 @@ from GenTunic.gen_tuning import gen_tuning_main from ReinforcmentLearning.learning import multipleTries, oneTry from ReinforcmentLearning.util import calc_time_reward - - -EPSILON = 0.01 -# EPSILON = 0.005 -ALPHA = 0.2 -GAMMA = 0.8 - -AMOUNT_RUNS = 5000 -AMOUNT_TRIES = 5 - -REWARD_ON_WIN = 400 -REWARD_ON_LOSE = -250 - -plot_result = False -show_game = False +import conf - -# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) -#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) -gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON) +oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA) +# multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) +# gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)