Added global conf file, cleaning up

main
2wenty1ne 2025-12-10 11:01:52 +01:00
parent 44b7a4e942
commit a52de42507
5 changed files with 71 additions and 59 deletions

View File

@ -1,8 +1,8 @@
import math
import time
import matplotlib
matplotlib.use('Agg')
# import matplotlib
# matplotlib.use('Agg')
from matplotlib import pyplot as plt
import numpy as np
from GenTunic.gen_math import project_bit

View File

@ -3,6 +3,7 @@ import math
import os
from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
import conf
# Initialize pygame
@ -101,7 +102,7 @@ class Ghost:
def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
def start_try(EPSILON, ALPHA, GAMMA):
#? Learning initial
q_values = initial_q_fill()
@ -109,35 +110,47 @@ def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE,
pygame.init()
screen = None
if show_game:
if conf.show_game:
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman")
#? Start try
cookies_per_run = []
iterations = []
for x in range(AMOUNT_RUNS):
if show_game:
if x == AMOUNT_RUNS / 4:
for x in range(conf.AMOUNT_RUNS):
if conf.show_game:
if x == conf.AMOUNT_RUNS / 4:
print("1 / 4 done")
if x == AMOUNT_RUNS / 2:
if x == conf.AMOUNT_RUNS / 2:
print("2 / 4 done")
if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4):
print("3 / 4 done")
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
cookies_per_run.append(amount_cookies_ate)
iterations.append(iterations_per_run)
print(f"Run {x+1}: {iterations_per_run} iterations")
if conf.show_trained:
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman")
while True:
print("After game")
run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
pygame.quit()
return cookies_per_run, iterations
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
clock = pygame.time.Clock()
labyrinth = LABYRINTH_INIT.copy()
@ -163,7 +176,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
if event.type == pygame.QUIT:
running = False
if show_game:
if conf.show_game:
screen.fill(BLACK)
@ -198,10 +211,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
ghost.move_towards_pacman(labyrinth, pacman)
if pacman.x == ghost.x and pacman.y == ghost.y:
if show_game:
if conf.show_game:
print("Game Over! The ghost caught Pacman.")
running = False
reward = REWARD_ON_LOSE
reward = conf.REWARD_ON_LOSE
# Eat cookies
if labyrinth[pacman.y][pacman.x] == ".":
@ -228,10 +241,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
if all("." not in row for row in labyrinth):
# time_reward = calc_time_reward(iter)
# reward = REWARD_ON_WIN * time_reward
reward = REWARD_ON_WIN
reward = conf.REWARD_ON_WIN
running = False
if show_game:
if conf.show_game:
# print(f"You Win! Took {iter} iterations, reward: {time_reward}")
print(f"You Win! Took {iter} iterations")
@ -262,7 +275,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
# Draw the labyrinth, pacman, and ghost
if show_game:
if conf.show_game:
draw_labyrinth(screen, labyrinth)
pacman.draw()
ghost.draw()

View File

@ -1,47 +1,46 @@
import matplotlib
matplotlib.use('Agg')
# import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ReinforcmentLearning.game import start_try
import conf
def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
cookies_per_try = []
def multipleTries(EPSILON, ALPHA, GAMMA):
conf.show_game = False
conf.plot_result = False
wins_per_try = []
for x in range(AMOUNT_TRIES):
plot_result = False
show_game = False
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
for x in range(conf.AMOUNT_TRIES):
cookies_per_run = oneTry(EPSILON, ALPHA, GAMMA)
last_700_results = cookies_per_run[-700:]
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
win_probalitiy = (wins_in_last_700 / 700)
results_last_700 = cookies_per_run[-700:]
wins_in_last_700 = sum(1 for result in results_last_700 if result == 20)
win_probalitiy_last_700 = (wins_in_last_700 / 700)
wins_per_try.append(win_probalitiy_last_700)
cookies_per_run.append(cookies_per_run)
wins_per_try.append(win_probalitiy)
# print(f"Finished try {x+1}\n")
return cookies_per_try, wins_per_try
return wins_per_try
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game):
def oneTry(EPSILON, ALPHA, GAMMA):
"""
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
action: Direction
q_value: (state, action)
"""
cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
cookies_per_run, iterations = start_try(EPSILON, ALPHA, GAMMA)
wins = sum(1 for result in cookies_per_run if result == 20)
if plot_result:
print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%")
if conf.plot_result:
wins = sum(1 for result in cookies_per_run if result == 20)
print(f"Win percentage overall: {(wins/conf.AMOUNT_RUNS)*100}%")
last_700_results = cookies_per_run[-700:]
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
@ -50,7 +49,8 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
plot_results(cookies_per_run, iterations)
return cookies_per_run, wins
return cookies_per_run
def plot_results(cookies_per_run, iterations):

14
conf.py 100644
View File

@ -0,0 +1,14 @@
EPSILON = 0.01
# EPSILON = 0.005
ALPHA = 0.2
GAMMA = 0.8
AMOUNT_RUNS = 5000
AMOUNT_TRIES = 5
REWARD_ON_WIN = 400
REWARD_ON_LOSE = -250
plot_result = True
show_game = False
show_trained = True

23
main.py
View File

@ -1,26 +1,11 @@
from GenTunic.gen_tuning import gen_tuning_main
from ReinforcmentLearning.learning import multipleTries, oneTry
from ReinforcmentLearning.util import calc_time_reward
EPSILON = 0.01
# EPSILON = 0.005
ALPHA = 0.2
GAMMA = 0.8
AMOUNT_RUNS = 5000
AMOUNT_TRIES = 5
REWARD_ON_WIN = 400
REWARD_ON_LOSE = -250
plot_result = False
show_game = False
import conf
# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA)
# multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
# gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)