Added GA to tune constants
parent
0e154fc55a
commit
738b122f43
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,37 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def project_bit(bit):
|
||||||
|
n = len(bit)
|
||||||
|
q_min = 0.1
|
||||||
|
q_max = 0.5
|
||||||
|
|
||||||
|
reverse_bit = np.flip(bit)
|
||||||
|
dec = np.uint64(0)
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
dec += np.uint64(2)**i * reverse_bit[i]
|
||||||
|
|
||||||
|
q = q_min + ((q_max - q_min) / (2**n - 1)) * dec
|
||||||
|
|
||||||
|
return q
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def bit_to_grey(bit):
|
||||||
|
grey = [bit[0]]
|
||||||
|
|
||||||
|
for i in range(1, len(bit)):
|
||||||
|
grey.append(bit[i-1] ^ bit[i])
|
||||||
|
|
||||||
|
return np.array(grey)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def grey_to_bit(grey):
|
||||||
|
bit = [grey[0]]
|
||||||
|
|
||||||
|
for i in range(1, len(grey)):
|
||||||
|
bit.append(bit[i-1] ^ grey[i])
|
||||||
|
|
||||||
|
return np.array(bit)
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
|
||||||
|
import math
|
||||||
|
import time
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from GenTunic.gen_math import project_bit
|
||||||
|
from GenTunic.gen_util import calc_population_fitness, create_population, crossover, mutation, turnament_selection
|
||||||
|
|
||||||
|
|
||||||
|
POPULATIUON_SIZE = 200
|
||||||
|
MUTATION_RATE = 0.05
|
||||||
|
CROSSOVER_RATE = 0.65
|
||||||
|
|
||||||
|
GEN_SIZE = 8 * 3
|
||||||
|
THRESHOLD = 0.5
|
||||||
|
|
||||||
|
def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
population = create_population(POPULATIUON_SIZE, GEN_SIZE)
|
||||||
|
|
||||||
|
best_fintess_values = []
|
||||||
|
best_fitness = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
#? Calc fitness
|
||||||
|
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
|
|
||||||
|
_, best_fitness = fintess_values
|
||||||
|
best_fintess_values.append(best_fitness)
|
||||||
|
print(best_fitness)
|
||||||
|
|
||||||
|
if best_fitness > THRESHOLD:
|
||||||
|
print("Breaking")
|
||||||
|
break
|
||||||
|
|
||||||
|
#? Selection
|
||||||
|
amount_selections = math.floor((1 - CROSSOVER_RATE) * len(population_propability))
|
||||||
|
amount_crossover = POPULATIUON_SIZE - amount_selections
|
||||||
|
|
||||||
|
new_population = turnament_selection(population_propability, amount_selections)
|
||||||
|
|
||||||
|
#? Crossover
|
||||||
|
new_population = crossover(population_propability, new_population, amount_crossover, GEN_SIZE)
|
||||||
|
|
||||||
|
#? Mutation
|
||||||
|
population = mutation(new_population, MUTATION_RATE, GEN_SIZE)
|
||||||
|
|
||||||
|
|
||||||
|
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
|
best_fintess_index, best_fitness = fintess_values
|
||||||
|
|
||||||
|
print("\n=== BEST PARAMETERS ===")
|
||||||
|
gen = population[best_fintess_index]["population"]
|
||||||
|
parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "]
|
||||||
|
parameters = [project_bit(x) for x in np.split(gen, 3)]
|
||||||
|
for index, name in enumerate(parameter_names):
|
||||||
|
print(f"{name}{parameters[index]}")
|
||||||
|
|
||||||
|
time_amount = time.time() - start_time
|
||||||
|
print(f"\nTook {time_amount}s")
|
||||||
|
|
||||||
|
plt.plot(best_fintess_values)
|
||||||
|
plt.show()
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
import random
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit
|
||||||
|
from ReinforcmentLearning.learning import multipleTries
|
||||||
|
|
||||||
|
|
||||||
|
def create_population(size, GEN_SIZE):
|
||||||
|
dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)]
|
||||||
|
population_propability = np.zeros(size, dtype=dtype)
|
||||||
|
|
||||||
|
for i in range(size):
|
||||||
|
gen = np.random.randint(0, 2, GEN_SIZE)
|
||||||
|
population_propability[i] = (gen, 0)
|
||||||
|
|
||||||
|
return np.array(population_propability)
|
||||||
|
|
||||||
|
|
||||||
|
def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||||
|
population_fitness_sum = 0
|
||||||
|
|
||||||
|
for individual in population_propability:
|
||||||
|
gen = individual["population"]
|
||||||
|
alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)]
|
||||||
|
_, multiple_tries_win_prob = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
|
fitness = np.array(multiple_tries_win_prob).mean()
|
||||||
|
|
||||||
|
individual["probability"] = fitness
|
||||||
|
population_fitness_sum += fitness
|
||||||
|
|
||||||
|
|
||||||
|
best_fitness_index = np.argmax(population_propability["probability"])
|
||||||
|
best_fitness = population_propability[best_fitness_index]["probability"]
|
||||||
|
|
||||||
|
population_propability["probability"] = population_propability["probability"] / population_fitness_sum
|
||||||
|
|
||||||
|
return population_propability, (best_fitness_index, best_fitness)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def turnament_selection(population_propability, amount_selections):
|
||||||
|
selected_population = []
|
||||||
|
|
||||||
|
best_fitness_index = np.argmax(population_propability["probability"])
|
||||||
|
selected_population.append(population_propability[best_fitness_index])
|
||||||
|
|
||||||
|
while len(selected_population) < amount_selections:
|
||||||
|
pair_indecies = random.sample(range(len(population_propability)), 2)
|
||||||
|
|
||||||
|
if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]:
|
||||||
|
selected_population.append(population_propability[pair_indecies[0]])
|
||||||
|
else:
|
||||||
|
selected_population.append(population_propability[pair_indecies[1]])
|
||||||
|
|
||||||
|
return np.array(selected_population)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE):
|
||||||
|
crossover_population = turnament_selection(population_propability, amount_crossover)
|
||||||
|
|
||||||
|
select_one_parent = False
|
||||||
|
|
||||||
|
if amount_crossover % 2 == 1:
|
||||||
|
amount_crossover -= 1
|
||||||
|
select_one_parent = True
|
||||||
|
|
||||||
|
for i in range(0, amount_crossover, 2):
|
||||||
|
crossover_point = np.random.randint(1, GEN_SIZE)
|
||||||
|
|
||||||
|
mother_a = crossover_population[i]["population"][:crossover_point]
|
||||||
|
mother_b = crossover_population[i]["population"][crossover_point:]
|
||||||
|
|
||||||
|
father_a = crossover_population[i+1]["population"][:crossover_point]
|
||||||
|
father_b = crossover_population[i+1]["population"][crossover_point:]
|
||||||
|
|
||||||
|
child_one = np.empty(1, dtype=selected_population.dtype)
|
||||||
|
child_one["population"] = np.concatenate((mother_a, father_b))
|
||||||
|
child_one["probability"] = 0
|
||||||
|
|
||||||
|
child_two = np.empty(1, dtype=selected_population.dtype)
|
||||||
|
child_two["population"] = np.concatenate((mother_b, father_a))
|
||||||
|
child_two["probability"] = 0
|
||||||
|
|
||||||
|
selected_population = np.concatenate((selected_population, child_one))
|
||||||
|
selected_population = np.concatenate((selected_population, child_two))
|
||||||
|
|
||||||
|
is_last_iteration = (i >= amount_crossover - 2)
|
||||||
|
if is_last_iteration and select_one_parent:
|
||||||
|
selected_population = np.append(selected_population, crossover_population[i])
|
||||||
|
|
||||||
|
return selected_population
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def mutation(population, MUTATION_RATE, GEN_SIZE):
|
||||||
|
amount_mutation = len(population) * MUTATION_RATE
|
||||||
|
mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False)
|
||||||
|
|
||||||
|
for individual_index in mutation_indecies:
|
||||||
|
bit_index = np.random.randint(0, GEN_SIZE)
|
||||||
|
bit_to_mutate = population[individual_index]["population"][bit_index]
|
||||||
|
mutated_grey = bit_to_grey(bit_to_mutate) ^ 1
|
||||||
|
population[individual_index]["population"][bit_index] = grey_to_bit(mutated_grey)
|
||||||
|
|
||||||
|
return population
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -2,7 +2,7 @@ import pygame
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
|
from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
|
||||||
|
|
||||||
# Initialize pygame
|
# Initialize pygame
|
||||||
pygame.init()
|
pygame.init()
|
||||||
|
|
@ -115,7 +115,7 @@ def draw_labyrinth(labyrinth):
|
||||||
|
|
||||||
|
|
||||||
# Main game function
|
# Main game function
|
||||||
def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||||
clock = pygame.time.Clock()
|
clock = pygame.time.Clock()
|
||||||
labyrinth = labyrinth_init.copy()
|
labyrinth = labyrinth_init.copy()
|
||||||
|
|
||||||
|
|
@ -170,7 +170,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||||
# print("Game Over! The ghost caught Pacman.")
|
# print("Game Over! The ghost caught Pacman.")
|
||||||
running = False
|
running = False
|
||||||
reward = -10
|
reward = REWARD_ON_LOSE
|
||||||
|
|
||||||
# Eat cookies
|
# Eat cookies
|
||||||
if labyrinth[pacman.y][pacman.x] == ".":
|
if labyrinth[pacman.y][pacman.x] == ".":
|
||||||
|
|
@ -179,10 +179,11 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
# Check if all cookies are eaten (game over)
|
# Check if all cookies are eaten (game over)
|
||||||
if all("." not in row for row in labyrinth):
|
if all("." not in row for row in labyrinth):
|
||||||
# print("You Win! Pacman ate all the cookies.")
|
# print("You Win! Pacman ate all the cookies.")
|
||||||
reward = 10
|
reward = REWARD_ON_WIN
|
||||||
running = False
|
running = False
|
||||||
|
|
||||||
# Draw the labyrinth, pacman, and ghost
|
# Draw the labyrinth, pacman, and ghost
|
||||||
|
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
if not running:
|
if not running:
|
||||||
new_state = state
|
new_state = state
|
||||||
|
|
@ -206,6 +207,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
counter += 1
|
counter += 1
|
||||||
return 20-counter
|
return 20-counter
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
draw_labyrinth(labyrinth)
|
draw_labyrinth(labyrinth)
|
||||||
pacman.draw()
|
pacman.draw()
|
||||||
ghost.draw()
|
ghost.draw()
|
||||||
|
|
@ -0,0 +1,62 @@
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from ReinforcmentLearning.game import run_game
|
||||||
|
from ReinforcmentLearning.util import initial_q_fill
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||||
|
cookies_per_try = []
|
||||||
|
wins_per_try = []
|
||||||
|
|
||||||
|
for x in range(AMOUNT_TRIES):
|
||||||
|
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
|
cookies_per_run.append(cookies_per_run)
|
||||||
|
wins_per_try.append(amount_wins)
|
||||||
|
# print(f"Finished try {x+1}\n")
|
||||||
|
|
||||||
|
return cookies_per_try, wins_per_try
|
||||||
|
|
||||||
|
|
||||||
|
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||||
|
"""
|
||||||
|
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
|
||||||
|
action: Direction
|
||||||
|
q_value: (state, action)
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values = {}
|
||||||
|
|
||||||
|
initial_q_fill(q_values)
|
||||||
|
|
||||||
|
cookies_per_run = []
|
||||||
|
# Amount of single runs
|
||||||
|
for x in range(AMOUNT_RUNS):
|
||||||
|
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
|
cookies_per_run.append(amount_cookies_ate)
|
||||||
|
|
||||||
|
wins = 0
|
||||||
|
for element in cookies_per_run:
|
||||||
|
if element == 20:
|
||||||
|
wins += 1
|
||||||
|
|
||||||
|
# print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
||||||
|
|
||||||
|
return cookies_per_run, wins
|
||||||
|
|
||||||
|
|
||||||
|
def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
|
||||||
|
# print("---------DONE---------")
|
||||||
|
# print("Used: ")
|
||||||
|
# print(f"Epsilon: {EPSILON}")
|
||||||
|
# print(f"Gamma: {GAMMA}")
|
||||||
|
# print(f"Alpha: {ALPHA}")
|
||||||
|
|
||||||
|
# print("---------SUMMARY---------")
|
||||||
|
print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
|
||||||
|
# print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
|
||||||
|
# print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")
|
||||||
|
|
||||||
|
|
@ -9,8 +9,8 @@ class Direction(Enum):
|
||||||
|
|
||||||
|
|
||||||
def initial_q_fill(q_values):
|
def initial_q_fill(q_values):
|
||||||
for x in range(8):
|
for x in range(-7, 8):
|
||||||
for y in range(3):
|
for y in range(-2, 3):
|
||||||
for cookie_direction in Direction:
|
for cookie_direction in Direction:
|
||||||
for action in Direction:
|
for action in Direction:
|
||||||
state = (x, y, cookie_direction)
|
state = (x, y, cookie_direction)
|
||||||
|
|
@ -29,8 +29,8 @@ def get_start_state():
|
||||||
|
|
||||||
|
|
||||||
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
||||||
x_ghost_dist = abs(pac_x - ghost_x)
|
x_ghost_dist = pac_x - ghost_x
|
||||||
y_ghost_dist = abs(pac_y - ghost_y)
|
y_ghost_dist = pac_y - ghost_y
|
||||||
|
|
||||||
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
||||||
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
from learning import runTry
|
|
||||||
|
|
||||||
EPSILON = 0.5
|
|
||||||
ALPHA = 0.5
|
|
||||||
GAMMA = 0.5
|
|
||||||
|
|
||||||
STEPS = 10
|
|
||||||
|
|
||||||
for alpha in np.linspace(0.1, 0.5, 10):
|
|
||||||
runTry(EPSILON, alpha, GAMMA)
|
|
||||||
28
data.txt
28
data.txt
|
|
@ -1,16 +1,20 @@
|
||||||
E: 0,1; A: 0.1; G: 0.9; 200/5000
|
# High Gamma
|
||||||
E: 0,1; A: 0.1; G: 0.9; 150/5000
|
E: 0.1; A: 0.1; G: 0.9; 0.04%
|
||||||
|
E: 0.1; A: 0.1; G: 0.9; 0.03%
|
||||||
|
|
||||||
E: 0,5; A: 0.1; G: 0.9; 0.0034%
|
E: 0.5; A: 0.1; G: 0.9; 0.0034%
|
||||||
E: 0,5; A: 0.1; G: 0.9; 0.002%
|
E: 0.5; A: 0.1; G: 0.9; 0.002%
|
||||||
|
|
||||||
E: 0,5; A: 0.5; G: 0.5; 0.0012%
|
E: 0.5; A: 0.5; G: 0.5; 0.0012%
|
||||||
E: 0,5; A: 0.5; G: 0.5; 0.0002%
|
E: 0.5; A: 0.5; G: 0.5; 0.0002%
|
||||||
E: 0,5; A: 0.5; G: 0.5; 0.001%
|
E: 0.5; A: 0.5; G: 0.5; 0.001%
|
||||||
|
|
||||||
E: 0,5; A: 0.3; G: 0.5; 0.0018%
|
E: 0.5; A: 0.3; G: 0.5; 0.0018%
|
||||||
E: 0,5; A: 0.3; G: 0.5; 0.0022%
|
E: 0.5; A: 0.3; G: 0.5; 0.0022%
|
||||||
E: 0,5; A: 0.3; G: 0.5; 0.0014%
|
E: 0.5; A: 0.3; G: 0.5; 0.0014%
|
||||||
E: 0,5; A: 0.3; G: 0.5; 0.0016%
|
E: 0.5; A: 0.3; G: 0.5; 0.0016%
|
||||||
E: 0,5; A: 0.3; G: 0.5; 0.0022%
|
E: 0.5; A: 0.3; G: 0.5; 0.0022%
|
||||||
|
|
||||||
|
|
||||||
|
# AFTER ABSOLUT CHANGE
|
||||||
|
|
||||||
|
|
|
||||||
58
learning.py
58
learning.py
|
|
@ -1,58 +0,0 @@
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from game import run_game
|
|
||||||
from util import initial_q_fill
|
|
||||||
|
|
||||||
|
|
||||||
EPSILON = 0.5
|
|
||||||
ALPHA = 0.3
|
|
||||||
GAMMA = 0.8
|
|
||||||
|
|
||||||
def runTry(EPSILON, ALPHA, GAMMA):
|
|
||||||
"""
|
|
||||||
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
|
|
||||||
action: Direction
|
|
||||||
q_value: (state, action)
|
|
||||||
"""
|
|
||||||
|
|
||||||
AMOUNT_RUNS = 5000
|
|
||||||
q_values = {}
|
|
||||||
|
|
||||||
initial_q_fill(q_values)
|
|
||||||
|
|
||||||
cookies_per_run = []
|
|
||||||
# Amount of single runs
|
|
||||||
for x in range(AMOUNT_RUNS):
|
|
||||||
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
|
|
||||||
cookies_per_run.append(amount_cookies_ate)
|
|
||||||
# print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
|
|
||||||
|
|
||||||
wins = 0
|
|
||||||
for element in cookies_per_run:
|
|
||||||
if element == 20:
|
|
||||||
wins += 1
|
|
||||||
|
|
||||||
print(f"Win percentage: {wins/AMOUNT_RUNS}%")
|
|
||||||
|
|
||||||
return cookies_per_run
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cookies_per_run = runTry(EPSILON, ALPHA, GAMMA)
|
|
||||||
|
|
||||||
|
|
||||||
window_size = 100 # Adjust based on your needs
|
|
||||||
rolling_avg = pd.Series(cookies_per_run).rolling(window=window_size, center=True).mean()
|
|
||||||
|
|
||||||
plt.figure(figsize=(12, 6))
|
|
||||||
plt.plot(cookies_per_run, alpha=0.2, label='Raw Data', linewidth=0.5, color='gray')
|
|
||||||
plt.plot(rolling_avg, label=f'{window_size}-point Moving Average',
|
|
||||||
linewidth=2, color='blue')
|
|
||||||
plt.title("Data with Rolling Average")
|
|
||||||
plt.xlabel("Index")
|
|
||||||
plt.ylabel("Value")
|
|
||||||
plt.legend()
|
|
||||||
plt.grid(True, alpha=0.3)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
from GenTunic.gen_tuning import gen_tuning_main
|
||||||
|
from ReinforcmentLearning.learning import multipleTries
|
||||||
|
|
||||||
|
|
||||||
|
EPSILON = 0.3
|
||||||
|
ALPHA = 0.3
|
||||||
|
GAMMA = 0.8
|
||||||
|
|
||||||
|
AMOUNT_RUNS = 5000
|
||||||
|
AMOUNT_TRIES = 10
|
||||||
|
|
||||||
|
REWARD_ON_WIN = 10
|
||||||
|
REWARD_ON_LOSE = -10
|
||||||
|
|
||||||
|
|
||||||
|
#multipleTries(EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
|
gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||||
Loading…
Reference in New Issue