Added time reward, wins in last 700 runs and used that for new ga with fixed epsilon

main
2wenty1ne 2025-12-09 22:43:34 +01:00
parent 909445135f
commit 07685e42a6
5 changed files with 36 additions and 23 deletions

View File

@ -11,10 +11,10 @@ POPULATIUON_SIZE = 200
MUTATION_RATE = 0.05 MUTATION_RATE = 0.05
CROSSOVER_RATE = 0.65 CROSSOVER_RATE = 0.65
GEN_SIZE = 8 * 3 GEN_SIZE = 16 * 2
THRESHOLD = 0.8 THRESHOLD = 0.95
def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON):
start_time = time.time() start_time = time.time()
population = create_population(POPULATIUON_SIZE, GEN_SIZE) population = create_population(POPULATIUON_SIZE, GEN_SIZE)
@ -26,7 +26,7 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
while True: while True:
print(f"Starting eveloution round {counter + 1}") print(f"Starting eveloution round {counter + 1}")
#? Calc fitness #? Calc fitness
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
_, best_fitness = fintess_values _, best_fitness = fintess_values
best_fintess_values.append(best_fitness) best_fintess_values.append(best_fitness)
@ -56,8 +56,8 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
print("\n=== BEST PARAMETERS ===") print("\n=== BEST PARAMETERS ===")
gen = population[best_fintess_index]["population"] gen = population[best_fintess_index]["population"]
parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "] parameter_names = ["Alpha: ", "Gamma: "]
parameters = [project_bit(x) for x in np.split(gen, 3)] parameters = [project_bit(x) for x in np.split(gen, 2)]
for index, name in enumerate(parameter_names): for index, name in enumerate(parameter_names):
print(f"{name}{parameters[index]}") print(f"{name}{parameters[index]}")

View File

@ -16,14 +16,14 @@ def create_population(size, GEN_SIZE):
return np.array(population_propability) return np.array(population_propability)
def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON):
population_fitness_sum = 0 population_fitness_sum = 0
for i, individual in enumerate(population_propability): for i, individual in enumerate(population_propability):
gen = individual["population"] gen = individual["population"]
alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)] alpha, gamma = [project_bit(x) for x in np.split(gen, 2)]
_, multiple_tries_wins = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) _, multiple_tries_win_prob = multipleTries(EPSILON, alpha, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS) # multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS)
fitness = np.array(multiple_tries_win_prob).mean() fitness = np.array(multiple_tries_win_prob).mean()
individual["probability"] = fitness individual["probability"] = fitness

View File

@ -2,7 +2,7 @@ import pygame
import math import math
import os import os
from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action, initial_q_fill from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
# Initialize pygame # Initialize pygame
@ -208,6 +208,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
#? half reward #? half reward
# cookie_counter = 0 # cookie_counter = 0
# for y, row in enumerate(labyrinth): # for y, row in enumerate(labyrinth):
@ -224,11 +225,15 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
# Check if all cookies are eaten (game over) # Check if all cookies are eaten (game over)
if all("." not in row for row in labyrinth): if all("." not in row for row in labyrinth):
if show_game: # time_reward = calc_time_reward(iter)
print("You Win! Pacman ate all the cookies.") # reward = REWARD_ON_WIN * time_reward
reward = REWARD_ON_WIN reward = REWARD_ON_WIN
running = False running = False
if show_game:
# print(f"You Win! Took {iter} iterations, reward: {time_reward}")
print(f"You Win! Took {iter} iterations")
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
if not running: if not running:

View File

@ -12,9 +12,15 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI
for x in range(AMOUNT_TRIES): for x in range(AMOUNT_TRIES):
plot_result = False plot_result = False
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result) show_game = False
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
last_700_results = cookies_per_run[-700:]
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
win_probalitiy = (wins_in_last_700 / 700)
cookies_per_run.append(cookies_per_run) cookies_per_run.append(cookies_per_run)
wins_per_try.append(amount_wins) wins_per_try.append(win_probalitiy)
# print(f"Finished try {x+1}\n") # print(f"Finished try {x+1}\n")
return cookies_per_try, wins_per_try return cookies_per_try, wins_per_try
@ -32,9 +38,14 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
wins = sum(1 for result in cookies_per_run if result == 20) wins = sum(1 for result in cookies_per_run if result == 20)
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
if plot_result: if plot_result:
print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%")
last_700_results = cookies_per_run[-700:]
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
win_percentage = (wins_in_last_700 / 700) * 100
print(f"Win percentage in the last 700: {win_percentage:.2f}%\n")
plot_results(cookies_per_run, iterations) plot_results(cookies_per_run, iterations)
return cookies_per_run, wins return cookies_per_run, wins

View File

@ -3,11 +3,9 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
from ReinforcmentLearning.util import calc_time_reward from ReinforcmentLearning.util import calc_time_reward
# EPSILON = 0.01 EPSILON = 0.01
EPSILON = 0.005 # EPSILON = 0.005
# ALPHA = 0.01
ALPHA = 0.2 ALPHA = 0.2
# GAMMA = 0.2713
GAMMA = 0.8 GAMMA = 0.8
AMOUNT_RUNS = 5000 AMOUNT_RUNS = 5000
@ -19,8 +17,7 @@ REWARD_ON_LOSE = -250
plot_result = True plot_result = True
show_game = False show_game = False
print(calc_time_reward(100000))
# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) # oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) #multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)