befor time reward
parent
a965dc07ce
commit
909445135f
|
|
@ -109,4 +109,9 @@ def mutation(population, MUTATION_RATE, GEN_SIZE):
|
|||
|
||||
population[individual_index]["population"] = grey_to_bit(grey_to_mutate)
|
||||
|
||||
return population
|
||||
return population
|
||||
|
||||
|
||||
|
||||
def gen_to_params():
|
||||
pass
|
||||
|
|
@ -129,3 +129,14 @@ def get_best_q_action(q_values, state):
|
|||
|
||||
def get_random_direction():
|
||||
return random.choice(list(Direction))
|
||||
|
||||
|
||||
|
||||
def calc_time_reward(amount_iterations):
|
||||
if amount_iterations < 1000:
|
||||
return 10
|
||||
|
||||
if amount_iterations > 10000:
|
||||
return 1
|
||||
|
||||
return - (1 / 1000) * amount_iterations + 11
|
||||
|
|
|
|||
8
main.py
8
main.py
|
|
@ -1,9 +1,10 @@
|
|||
from GenTunic.gen_tuning import gen_tuning_main
|
||||
from ReinforcmentLearning.learning import multipleTries, oneTry
|
||||
from ReinforcmentLearning.util import calc_time_reward
|
||||
|
||||
|
||||
# EPSILON = 0.1618
|
||||
EPSILON = 0.01
|
||||
# EPSILON = 0.01
|
||||
EPSILON = 0.005
|
||||
# ALPHA = 0.01
|
||||
ALPHA = 0.2
|
||||
# GAMMA = 0.2713
|
||||
|
|
@ -18,7 +19,8 @@ REWARD_ON_LOSE = -250
|
|||
plot_result = True
|
||||
show_game = False
|
||||
|
||||
print(calc_time_reward(100000))
|
||||
|
||||
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||
# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
Loading…
Reference in New Issue