befor time reward

2025-12-09 20:38:07 +01:00 · 2025-12-09 20:38:07 +01:00 · 909445135f
parent a965dc07ce
commit 909445135f
3 changed files with 22 additions and 4 deletions
--- a/GenTunic/gen_util.py
+++ b/GenTunic/gen_util.py
@ -109,4 +109,9 @@ def mutation(population, MUTATION_RATE, GEN_SIZE):

        population[individual_index]["population"] = grey_to_bit(grey_to_mutate)

-    return population
+    return population
+
+
+
+def gen_to_params():
+    pass
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -129,3 +129,14 @@ def get_best_q_action(q_values, state):

 def get_random_direction():
    return random.choice(list(Direction))
+
+
+
+def calc_time_reward(amount_iterations):
+    if amount_iterations < 1000:
+        return 10
+    
+    if amount_iterations > 10000:
+        return 1
+    
+    return - (1 / 1000) * amount_iterations + 11
--- a/main.py
+++ b/main.py
@ -1,9 +1,10 @@
 from GenTunic.gen_tuning import gen_tuning_main
 from ReinforcmentLearning.learning import multipleTries, oneTry
+from ReinforcmentLearning.util import calc_time_reward


-# EPSILON = 0.1618
-EPSILON = 0.01
+# EPSILON = 0.01
+EPSILON = 0.005
 # ALPHA = 0.01
 ALPHA = 0.2
 # GAMMA = 0.2713
@ -18,7 +19,8 @@ REWARD_ON_LOSE = -250
 plot_result = True
 show_game = False

+print(calc_time_reward(100000))

-oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
+# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
 #multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
 #gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)