Added time reward, wins in last 700 runs and used that for new ga with fixed epsilon

2025-12-09 22:43:34 +01:00 · 2025-12-09 22:43:34 +01:00 · 07685e42a6
parent 909445135f
commit 07685e42a6
5 changed files with 36 additions and 23 deletions
--- a/GenTunic/gen_tuning.py
+++ b/GenTunic/gen_tuning.py
@ -11,10 +11,10 @@ POPULATIUON_SIZE = 200
 MUTATION_RATE = 0.05
 CROSSOVER_RATE = 0.65

-GEN_SIZE = 8 * 3
-THRESHOLD = 0.8
+GEN_SIZE = 16 * 2
+THRESHOLD = 0.95

-def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
+def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON):
    start_time = time.time()
    
    population = create_population(POPULATIUON_SIZE, GEN_SIZE)
@ -26,7 +26,7 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    while True:
        print(f"Starting eveloution round {counter + 1}")
        #? Calc fitness
-        population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+        population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)

        _, best_fitness = fintess_values
        best_fintess_values.append(best_fitness)
@ -56,8 +56,8 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):

    print("\n=== BEST PARAMETERS ===")
    gen = population[best_fintess_index]["population"]
-    parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "]
-    parameters = [project_bit(x) for x in np.split(gen, 3)]
+    parameter_names = ["Alpha: ", "Gamma: "]
+    parameters = [project_bit(x) for x in np.split(gen, 2)]
    for index, name in enumerate(parameter_names):
        print(f"{name}{parameters[index]}")

--- a/GenTunic/gen_util.py
+++ b/GenTunic/gen_util.py
@ -16,14 +16,14 @@ def create_population(size, GEN_SIZE):
    return np.array(population_propability)


-def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
+def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON):
    population_fitness_sum = 0

    for i, individual in enumerate(population_propability):
        gen = individual["population"]
-        alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)]
-        _, multiple_tries_wins = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
-        multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS)
+        alpha, gamma = [project_bit(x) for x in np.split(gen, 2)]
+        _, multiple_tries_win_prob = multipleTries(EPSILON, alpha, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+        # multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS)
        fitness = np.array(multiple_tries_win_prob).mean()

        individual["probability"] = fitness
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -2,7 +2,7 @@ import pygame
 import math
 import os

-from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action, initial_q_fill
+from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill

 # Initialize pygame

@ -208,6 +208,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho

        #? -------------------------MY CODE-----------------------------------
            #? half reward
+
            # cookie_counter = 0
            
            # for y, row in enumerate(labyrinth):
@ -224,11 +225,15 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho

        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
-            if show_game:
-                print("You Win! Pacman ate all the cookies.")
+            # time_reward = calc_time_reward(iter)
+            # reward = REWARD_ON_WIN * time_reward
            reward = REWARD_ON_WIN
            running = False

+            if show_game:
+                # print(f"You Win! Took {iter} iterations, reward: {time_reward}")
+                print(f"You Win! Took {iter} iterations")
+

        #? -------------------------MY CODE-----------------------------------
        if not running:
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -12,9 +12,15 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI
    
    for x in range(AMOUNT_TRIES):
        plot_result = False
-        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result)
+        show_game = False
+        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
+
+        last_700_results = cookies_per_run[-700:]
+        wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
+        win_probalitiy = (wins_in_last_700 / 700)
+
        cookies_per_run.append(cookies_per_run)
-        wins_per_try.append(amount_wins)
+        wins_per_try.append(win_probalitiy)
        # print(f"Finished try {x+1}\n")

    return cookies_per_try, wins_per_try
@ -32,9 +38,14 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
    wins = sum(1 for result in cookies_per_run if result == 20)


-    print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
-
    if plot_result:
+        print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%")
+
+        last_700_results = cookies_per_run[-700:]
+        wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
+        win_percentage = (wins_in_last_700 / 700) * 100
+        print(f"Win percentage in the last 700: {win_percentage:.2f}%\n")
+
        plot_results(cookies_per_run, iterations)

    return cookies_per_run, wins
--- a/main.py
+++ b/main.py
@ -3,11 +3,9 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
 from ReinforcmentLearning.util import calc_time_reward


-# EPSILON = 0.01
-EPSILON = 0.005
-# ALPHA = 0.01
+EPSILON = 0.01
+# EPSILON = 0.005
 ALPHA = 0.2
-# GAMMA = 0.2713
 GAMMA = 0.8

 AMOUNT_RUNS = 5000
@ -19,8 +17,7 @@ REWARD_ON_LOSE = -250
 plot_result = True
 show_game = False

-print(calc_time_reward(100000))

 # oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
 #multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
-#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)