befor time reward
parent
a965dc07ce
commit
909445135f
|
|
@ -110,3 +110,8 @@ def mutation(population, MUTATION_RATE, GEN_SIZE):
|
||||||
population[individual_index]["population"] = grey_to_bit(grey_to_mutate)
|
population[individual_index]["population"] = grey_to_bit(grey_to_mutate)
|
||||||
|
|
||||||
return population
|
return population
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def gen_to_params():
|
||||||
|
pass
|
||||||
|
|
@ -129,3 +129,14 @@ def get_best_q_action(q_values, state):
|
||||||
|
|
||||||
def get_random_direction():
|
def get_random_direction():
|
||||||
return random.choice(list(Direction))
|
return random.choice(list(Direction))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def calc_time_reward(amount_iterations):
|
||||||
|
if amount_iterations < 1000:
|
||||||
|
return 10
|
||||||
|
|
||||||
|
if amount_iterations > 10000:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return - (1 / 1000) * amount_iterations + 11
|
||||||
|
|
|
||||||
8
main.py
8
main.py
|
|
@ -1,9 +1,10 @@
|
||||||
from GenTunic.gen_tuning import gen_tuning_main
|
from GenTunic.gen_tuning import gen_tuning_main
|
||||||
from ReinforcmentLearning.learning import multipleTries, oneTry
|
from ReinforcmentLearning.learning import multipleTries, oneTry
|
||||||
|
from ReinforcmentLearning.util import calc_time_reward
|
||||||
|
|
||||||
|
|
||||||
# EPSILON = 0.1618
|
# EPSILON = 0.01
|
||||||
EPSILON = 0.01
|
EPSILON = 0.005
|
||||||
# ALPHA = 0.01
|
# ALPHA = 0.01
|
||||||
ALPHA = 0.2
|
ALPHA = 0.2
|
||||||
# GAMMA = 0.2713
|
# GAMMA = 0.2713
|
||||||
|
|
@ -18,7 +19,8 @@ REWARD_ON_LOSE = -250
|
||||||
plot_result = True
|
plot_result = True
|
||||||
show_game = False
|
show_game = False
|
||||||
|
|
||||||
|
print(calc_time_reward(100000))
|
||||||
|
|
||||||
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||||
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
Loading…
Reference in New Issue