Added time reward, wins in last 700 runs and used that for new ga with fixed epsilon
parent
909445135f
commit
07685e42a6
|
|
@ -11,10 +11,10 @@ POPULATIUON_SIZE = 200
|
||||||
MUTATION_RATE = 0.05
|
MUTATION_RATE = 0.05
|
||||||
CROSSOVER_RATE = 0.65
|
CROSSOVER_RATE = 0.65
|
||||||
|
|
||||||
GEN_SIZE = 8 * 3
|
GEN_SIZE = 16 * 2
|
||||||
THRESHOLD = 0.8
|
THRESHOLD = 0.95
|
||||||
|
|
||||||
def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
population = create_population(POPULATIUON_SIZE, GEN_SIZE)
|
population = create_population(POPULATIUON_SIZE, GEN_SIZE)
|
||||||
|
|
@ -26,7 +26,7 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||||
while True:
|
while True:
|
||||||
print(f"Starting eveloution round {counter + 1}")
|
print(f"Starting eveloution round {counter + 1}")
|
||||||
#? Calc fitness
|
#? Calc fitness
|
||||||
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
|
||||||
|
|
||||||
_, best_fitness = fintess_values
|
_, best_fitness = fintess_values
|
||||||
best_fintess_values.append(best_fitness)
|
best_fintess_values.append(best_fitness)
|
||||||
|
|
@ -56,8 +56,8 @@ def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||||
|
|
||||||
print("\n=== BEST PARAMETERS ===")
|
print("\n=== BEST PARAMETERS ===")
|
||||||
gen = population[best_fintess_index]["population"]
|
gen = population[best_fintess_index]["population"]
|
||||||
parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "]
|
parameter_names = ["Alpha: ", "Gamma: "]
|
||||||
parameters = [project_bit(x) for x in np.split(gen, 3)]
|
parameters = [project_bit(x) for x in np.split(gen, 2)]
|
||||||
for index, name in enumerate(parameter_names):
|
for index, name in enumerate(parameter_names):
|
||||||
print(f"{name}{parameters[index]}")
|
print(f"{name}{parameters[index]}")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,14 +16,14 @@ def create_population(size, GEN_SIZE):
|
||||||
return np.array(population_propability)
|
return np.array(population_propability)
|
||||||
|
|
||||||
|
|
||||||
def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON):
|
||||||
population_fitness_sum = 0
|
population_fitness_sum = 0
|
||||||
|
|
||||||
for i, individual in enumerate(population_propability):
|
for i, individual in enumerate(population_propability):
|
||||||
gen = individual["population"]
|
gen = individual["population"]
|
||||||
alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)]
|
alpha, gamma = [project_bit(x) for x in np.split(gen, 2)]
|
||||||
_, multiple_tries_wins = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
_, multiple_tries_win_prob = multipleTries(EPSILON, alpha, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS)
|
# multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS)
|
||||||
fitness = np.array(multiple_tries_win_prob).mean()
|
fitness = np.array(multiple_tries_win_prob).mean()
|
||||||
|
|
||||||
individual["probability"] = fitness
|
individual["probability"] = fitness
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ import pygame
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action, initial_q_fill
|
from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
|
||||||
|
|
||||||
# Initialize pygame
|
# Initialize pygame
|
||||||
|
|
||||||
|
|
@ -208,6 +208,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
|
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
#? half reward
|
#? half reward
|
||||||
|
|
||||||
# cookie_counter = 0
|
# cookie_counter = 0
|
||||||
|
|
||||||
# for y, row in enumerate(labyrinth):
|
# for y, row in enumerate(labyrinth):
|
||||||
|
|
@ -224,11 +225,15 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
|
|
||||||
# Check if all cookies are eaten (game over)
|
# Check if all cookies are eaten (game over)
|
||||||
if all("." not in row for row in labyrinth):
|
if all("." not in row for row in labyrinth):
|
||||||
if show_game:
|
# time_reward = calc_time_reward(iter)
|
||||||
print("You Win! Pacman ate all the cookies.")
|
# reward = REWARD_ON_WIN * time_reward
|
||||||
reward = REWARD_ON_WIN
|
reward = REWARD_ON_WIN
|
||||||
running = False
|
running = False
|
||||||
|
|
||||||
|
if show_game:
|
||||||
|
# print(f"You Win! Took {iter} iterations, reward: {time_reward}")
|
||||||
|
print(f"You Win! Took {iter} iterations")
|
||||||
|
|
||||||
|
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
if not running:
|
if not running:
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,15 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI
|
||||||
|
|
||||||
for x in range(AMOUNT_TRIES):
|
for x in range(AMOUNT_TRIES):
|
||||||
plot_result = False
|
plot_result = False
|
||||||
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result)
|
show_game = False
|
||||||
|
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||||
|
|
||||||
|
last_700_results = cookies_per_run[-700:]
|
||||||
|
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
|
||||||
|
win_probalitiy = (wins_in_last_700 / 700)
|
||||||
|
|
||||||
cookies_per_run.append(cookies_per_run)
|
cookies_per_run.append(cookies_per_run)
|
||||||
wins_per_try.append(amount_wins)
|
wins_per_try.append(win_probalitiy)
|
||||||
# print(f"Finished try {x+1}\n")
|
# print(f"Finished try {x+1}\n")
|
||||||
|
|
||||||
return cookies_per_try, wins_per_try
|
return cookies_per_try, wins_per_try
|
||||||
|
|
@ -32,9 +38,14 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
|
||||||
wins = sum(1 for result in cookies_per_run if result == 20)
|
wins = sum(1 for result in cookies_per_run if result == 20)
|
||||||
|
|
||||||
|
|
||||||
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
|
||||||
|
|
||||||
if plot_result:
|
if plot_result:
|
||||||
|
print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%")
|
||||||
|
|
||||||
|
last_700_results = cookies_per_run[-700:]
|
||||||
|
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
|
||||||
|
win_percentage = (wins_in_last_700 / 700) * 100
|
||||||
|
print(f"Win percentage in the last 700: {win_percentage:.2f}%\n")
|
||||||
|
|
||||||
plot_results(cookies_per_run, iterations)
|
plot_results(cookies_per_run, iterations)
|
||||||
|
|
||||||
return cookies_per_run, wins
|
return cookies_per_run, wins
|
||||||
|
|
|
||||||
9
main.py
9
main.py
|
|
@ -3,11 +3,9 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
|
||||||
from ReinforcmentLearning.util import calc_time_reward
|
from ReinforcmentLearning.util import calc_time_reward
|
||||||
|
|
||||||
|
|
||||||
# EPSILON = 0.01
|
EPSILON = 0.01
|
||||||
EPSILON = 0.005
|
# EPSILON = 0.005
|
||||||
# ALPHA = 0.01
|
|
||||||
ALPHA = 0.2
|
ALPHA = 0.2
|
||||||
# GAMMA = 0.2713
|
|
||||||
GAMMA = 0.8
|
GAMMA = 0.8
|
||||||
|
|
||||||
AMOUNT_RUNS = 5000
|
AMOUNT_RUNS = 5000
|
||||||
|
|
@ -19,8 +17,7 @@ REWARD_ON_LOSE = -250
|
||||||
plot_result = True
|
plot_result = True
|
||||||
show_game = False
|
show_game = False
|
||||||
|
|
||||||
print(calc_time_reward(100000))
|
|
||||||
|
|
||||||
# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||||
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue