diff --git a/GenTunic/gen_util.py b/GenTunic/gen_util.py index 56c4d7f..8c95e87 100644 --- a/GenTunic/gen_util.py +++ b/GenTunic/gen_util.py @@ -109,4 +109,9 @@ def mutation(population, MUTATION_RATE, GEN_SIZE): population[individual_index]["population"] = grey_to_bit(grey_to_mutate) - return population \ No newline at end of file + return population + + + +def gen_to_params(): + pass \ No newline at end of file diff --git a/ReinforcmentLearning/util.py b/ReinforcmentLearning/util.py index 35ac1ba..dc670e2 100644 --- a/ReinforcmentLearning/util.py +++ b/ReinforcmentLearning/util.py @@ -129,3 +129,14 @@ def get_best_q_action(q_values, state): def get_random_direction(): return random.choice(list(Direction)) + + + +def calc_time_reward(amount_iterations): + if amount_iterations < 1000: + return 10 + + if amount_iterations > 10000: + return 1 + + return - (1 / 1000) * amount_iterations + 11 diff --git a/main.py b/main.py index f14f8d5..d53ed0a 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,10 @@ from GenTunic.gen_tuning import gen_tuning_main from ReinforcmentLearning.learning import multipleTries, oneTry +from ReinforcmentLearning.util import calc_time_reward -# EPSILON = 0.1618 -EPSILON = 0.01 +# EPSILON = 0.01 +EPSILON = 0.005 # ALPHA = 0.01 ALPHA = 0.2 # GAMMA = 0.2713 @@ -18,7 +19,8 @@ REWARD_ON_LOSE = -250 plot_result = True show_game = False +print(calc_time_reward(100000)) -oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) +# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) #multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) #gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) \ No newline at end of file