diff --git a/GenTunic/gen_tuning.py b/GenTunic/gen_tuning.py index 554f9ec..5a2b160 100644 --- a/GenTunic/gen_tuning.py +++ b/GenTunic/gen_tuning.py @@ -11,10 +11,10 @@ POPULATIUON_SIZE = 200 MUTATION_RATE = 0.05 CROSSOVER_RATE = 0.65 -GEN_SIZE = 8 * 3 -THRESHOLD = 0.8 +GEN_SIZE = 16 * 2 +THRESHOLD = 0.95 -def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): +def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON): start_time = time.time() population = create_population(POPULATIUON_SIZE, GEN_SIZE) @@ -26,7 +26,7 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): while True: print(f"Starting eveloution round {counter + 1}") #? Calc fitness - population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON) _, best_fitness = fintess_values best_fintess_values.append(best_fitness) @@ -56,8 +56,8 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): print("\n=== BEST PARAMETERS ===") gen = population[best_fintess_index]["population"] - parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "] - parameters = [project_bit(x) for x in np.split(gen, 3)] + parameter_names = ["Alpha: ", "Gamma: "] + parameters = [project_bit(x) for x in np.split(gen, 2)] for index, name in enumerate(parameter_names): print(f"{name}{parameters[index]}") diff --git a/GenTunic/gen_util.py b/GenTunic/gen_util.py index 8c95e87..c08ba9f 100644 --- a/GenTunic/gen_util.py +++ b/GenTunic/gen_util.py @@ -16,14 +16,14 @@ def create_population(size, GEN_SIZE): return np.array(population_propability) -def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): +def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON): population_fitness_sum = 0 for i, individual in enumerate(population_propability): gen = individual["population"] - alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)] - _, multiple_tries_wins = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) - multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS) + alpha, gamma = [project_bit(x) for x in np.split(gen, 2)] + _, multiple_tries_win_prob = multipleTries(EPSILON, alpha, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + # multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS) fitness = np.array(multiple_tries_win_prob).mean() individual["probability"] = fitness diff --git a/ReinforcmentLearning/game.py b/ReinforcmentLearning/game.py index ad3944b..65a9f2b 100644 --- a/ReinforcmentLearning/game.py +++ b/ReinforcmentLearning/game.py @@ -2,7 +2,7 @@ import pygame import math import os -from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action, initial_q_fill +from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill # Initialize pygame @@ -208,6 +208,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho #? -------------------------MY CODE----------------------------------- #? half reward + # cookie_counter = 0 # for y, row in enumerate(labyrinth): @@ -224,11 +225,15 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho # Check if all cookies are eaten (game over) if all("." not in row for row in labyrinth): - if show_game: - print("You Win! Pacman ate all the cookies.") + # time_reward = calc_time_reward(iter) + # reward = REWARD_ON_WIN * time_reward reward = REWARD_ON_WIN running = False + if show_game: + # print(f"You Win! Took {iter} iterations, reward: {time_reward}") + print(f"You Win! Took {iter} iterations") + #? -------------------------MY CODE----------------------------------- if not running: diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py index a31bbbd..bd1a85b 100644 --- a/ReinforcmentLearning/learning.py +++ b/ReinforcmentLearning/learning.py @@ -12,9 +12,15 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI for x in range(AMOUNT_TRIES): plot_result = False - cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result) + show_game = False + cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) + + last_700_results = cookies_per_run[-700:] + wins_in_last_700 = sum(1 for result in last_700_results if result == 20) + win_probalitiy = (wins_in_last_700 / 700) + cookies_per_run.append(cookies_per_run) - wins_per_try.append(amount_wins) + wins_per_try.append(win_probalitiy) # print(f"Finished try {x+1}\n") return cookies_per_try, wins_per_try @@ -32,9 +38,14 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl wins = sum(1 for result in cookies_per_run if result == 20) - print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%") - if plot_result: + print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%") + + last_700_results = cookies_per_run[-700:] + wins_in_last_700 = sum(1 for result in last_700_results if result == 20) + win_percentage = (wins_in_last_700 / 700) * 100 + print(f"Win percentage in the last 700: {win_percentage:.2f}%\n") + plot_results(cookies_per_run, iterations) return cookies_per_run, wins diff --git a/main.py b/main.py index d53ed0a..20f8640 100644 --- a/main.py +++ b/main.py @@ -3,11 +3,9 @@ from ReinforcmentLearning.learning import multipleTries, oneTry from ReinforcmentLearning.util import calc_time_reward -# EPSILON = 0.01 -EPSILON = 0.005 -# ALPHA = 0.01 +EPSILON = 0.01 +# EPSILON = 0.005 ALPHA = 0.2 -# GAMMA = 0.2713 GAMMA = 0.8 AMOUNT_RUNS = 5000 @@ -19,8 +17,7 @@ REWARD_ON_LOSE = -250 plot_result = True show_game = False -print(calc_time_reward(100000)) # oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) #multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) -#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) \ No newline at end of file +gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)