befor time reward

2025-12-09 20:38:07 +01:00 · 2025-12-09 20:38:07 +01:00 · 909445135f
parent a965dc07ce
commit 909445135f
3 changed files with 22 additions and 4 deletions
--- a/GenTunic/gen_util.py
+++ b/GenTunic/gen_util.py
@ -110,3 +110,8 @@ def mutation(population, MUTATION_RATE, GEN_SIZE):
        population[individual_index]["population"] = grey_to_bit(grey_to_mutate)
    return population
 def gen_to_params():
    pass
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -129,3 +129,14 @@ def get_best_q_action(q_values, state):
 def get_random_direction():
    return random.choice(list(Direction))
 def calc_time_reward(amount_iterations):
    if amount_iterations < 1000:
        return 10
    if amount_iterations > 10000:
        return 1
    return - (1 / 1000) * amount_iterations + 11
--- a/main.py
+++ b/main.py
@ -1,9 +1,10 @@
 from GenTunic.gen_tuning import gen_tuning_main
 from ReinforcmentLearning.learning import multipleTries, oneTry
 from ReinforcmentLearning.util import calc_time_reward
-# EPSILON = 0.1618
+# EPSILON = 0.01
-EPSILON = 0.01
+EPSILON = 0.005
 # ALPHA = 0.01
 ALPHA = 0.2
 # GAMMA = 0.2713
@ -18,7 +19,8 @@ REWARD_ON_LOSE = -250
 plot_result = True
 show_game = False
 print(calc_time_reward(100000))
-oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
+# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
 #multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
 #gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)