Added GA to tune constants
parent
0e154fc55a
commit
738b122f43
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,37 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def project_bit(bit):
|
||||
n = len(bit)
|
||||
q_min = 0.1
|
||||
q_max = 0.5
|
||||
|
||||
reverse_bit = np.flip(bit)
|
||||
dec = np.uint64(0)
|
||||
|
||||
for i in range(n):
|
||||
dec += np.uint64(2)**i * reverse_bit[i]
|
||||
|
||||
q = q_min + ((q_max - q_min) / (2**n - 1)) * dec
|
||||
|
||||
return q
|
||||
|
||||
|
||||
|
||||
def bit_to_grey(bit):
|
||||
grey = [bit[0]]
|
||||
|
||||
for i in range(1, len(bit)):
|
||||
grey.append(bit[i-1] ^ bit[i])
|
||||
|
||||
return np.array(grey)
|
||||
|
||||
|
||||
|
||||
def grey_to_bit(grey):
|
||||
bit = [grey[0]]
|
||||
|
||||
for i in range(1, len(grey)):
|
||||
bit.append(bit[i-1] ^ grey[i])
|
||||
|
||||
return np.array(bit)
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
|
||||
import math
|
||||
import time
|
||||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
from GenTunic.gen_math import project_bit
|
||||
from GenTunic.gen_util import calc_population_fitness, create_population, crossover, mutation, turnament_selection
|
||||
|
||||
|
||||
POPULATIUON_SIZE = 200
|
||||
MUTATION_RATE = 0.05
|
||||
CROSSOVER_RATE = 0.65
|
||||
|
||||
GEN_SIZE = 8 * 3
|
||||
THRESHOLD = 0.5
|
||||
|
||||
def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
start_time = time.time()
|
||||
|
||||
population = create_population(POPULATIUON_SIZE, GEN_SIZE)
|
||||
|
||||
best_fintess_values = []
|
||||
best_fitness = 0
|
||||
|
||||
while True:
|
||||
#? Calc fitness
|
||||
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
|
||||
_, best_fitness = fintess_values
|
||||
best_fintess_values.append(best_fitness)
|
||||
print(best_fitness)
|
||||
|
||||
if best_fitness > THRESHOLD:
|
||||
print("Breaking")
|
||||
break
|
||||
|
||||
#? Selection
|
||||
amount_selections = math.floor((1 - CROSSOVER_RATE) * len(population_propability))
|
||||
amount_crossover = POPULATIUON_SIZE - amount_selections
|
||||
|
||||
new_population = turnament_selection(population_propability, amount_selections)
|
||||
|
||||
#? Crossover
|
||||
new_population = crossover(population_propability, new_population, amount_crossover, GEN_SIZE)
|
||||
|
||||
#? Mutation
|
||||
population = mutation(new_population, MUTATION_RATE, GEN_SIZE)
|
||||
|
||||
|
||||
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
best_fintess_index, best_fitness = fintess_values
|
||||
|
||||
print("\n=== BEST PARAMETERS ===")
|
||||
gen = population[best_fintess_index]["population"]
|
||||
parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "]
|
||||
parameters = [project_bit(x) for x in np.split(gen, 3)]
|
||||
for index, name in enumerate(parameter_names):
|
||||
print(f"{name}{parameters[index]}")
|
||||
|
||||
time_amount = time.time() - start_time
|
||||
print(f"\nTook {time_amount}s")
|
||||
|
||||
plt.plot(best_fintess_values)
|
||||
plt.show()
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
import random
|
||||
import numpy as np
|
||||
|
||||
from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit
|
||||
from ReinforcmentLearning.learning import multipleTries
|
||||
|
||||
|
||||
def create_population(size, GEN_SIZE):
|
||||
dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)]
|
||||
population_propability = np.zeros(size, dtype=dtype)
|
||||
|
||||
for i in range(size):
|
||||
gen = np.random.randint(0, 2, GEN_SIZE)
|
||||
population_propability[i] = (gen, 0)
|
||||
|
||||
return np.array(population_propability)
|
||||
|
||||
|
||||
def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
population_fitness_sum = 0
|
||||
|
||||
for individual in population_propability:
|
||||
gen = individual["population"]
|
||||
alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)]
|
||||
_, multiple_tries_win_prob = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
fitness = np.array(multiple_tries_win_prob).mean()
|
||||
|
||||
individual["probability"] = fitness
|
||||
population_fitness_sum += fitness
|
||||
|
||||
|
||||
best_fitness_index = np.argmax(population_propability["probability"])
|
||||
best_fitness = population_propability[best_fitness_index]["probability"]
|
||||
|
||||
population_propability["probability"] = population_propability["probability"] / population_fitness_sum
|
||||
|
||||
return population_propability, (best_fitness_index, best_fitness)
|
||||
|
||||
|
||||
|
||||
def turnament_selection(population_propability, amount_selections):
|
||||
selected_population = []
|
||||
|
||||
best_fitness_index = np.argmax(population_propability["probability"])
|
||||
selected_population.append(population_propability[best_fitness_index])
|
||||
|
||||
while len(selected_population) < amount_selections:
|
||||
pair_indecies = random.sample(range(len(population_propability)), 2)
|
||||
|
||||
if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]:
|
||||
selected_population.append(population_propability[pair_indecies[0]])
|
||||
else:
|
||||
selected_population.append(population_propability[pair_indecies[1]])
|
||||
|
||||
return np.array(selected_population)
|
||||
|
||||
|
||||
|
||||
def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE):
|
||||
crossover_population = turnament_selection(population_propability, amount_crossover)
|
||||
|
||||
select_one_parent = False
|
||||
|
||||
if amount_crossover % 2 == 1:
|
||||
amount_crossover -= 1
|
||||
select_one_parent = True
|
||||
|
||||
for i in range(0, amount_crossover, 2):
|
||||
crossover_point = np.random.randint(1, GEN_SIZE)
|
||||
|
||||
mother_a = crossover_population[i]["population"][:crossover_point]
|
||||
mother_b = crossover_population[i]["population"][crossover_point:]
|
||||
|
||||
father_a = crossover_population[i+1]["population"][:crossover_point]
|
||||
father_b = crossover_population[i+1]["population"][crossover_point:]
|
||||
|
||||
child_one = np.empty(1, dtype=selected_population.dtype)
|
||||
child_one["population"] = np.concatenate((mother_a, father_b))
|
||||
child_one["probability"] = 0
|
||||
|
||||
child_two = np.empty(1, dtype=selected_population.dtype)
|
||||
child_two["population"] = np.concatenate((mother_b, father_a))
|
||||
child_two["probability"] = 0
|
||||
|
||||
selected_population = np.concatenate((selected_population, child_one))
|
||||
selected_population = np.concatenate((selected_population, child_two))
|
||||
|
||||
is_last_iteration = (i >= amount_crossover - 2)
|
||||
if is_last_iteration and select_one_parent:
|
||||
selected_population = np.append(selected_population, crossover_population[i])
|
||||
|
||||
return selected_population
|
||||
|
||||
|
||||
|
||||
def mutation(population, MUTATION_RATE, GEN_SIZE):
|
||||
amount_mutation = len(population) * MUTATION_RATE
|
||||
mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False)
|
||||
|
||||
for individual_index in mutation_indecies:
|
||||
bit_index = np.random.randint(0, GEN_SIZE)
|
||||
bit_to_mutate = population[individual_index]["population"][bit_index]
|
||||
mutated_grey = bit_to_grey(bit_to_mutate) ^ 1
|
||||
population[individual_index]["population"][bit_index] = grey_to_bit(mutated_grey)
|
||||
|
||||
return population
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -2,7 +2,7 @@ import pygame
|
|||
import math
|
||||
import os
|
||||
|
||||
from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
|
||||
from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
|
||||
|
||||
# Initialize pygame
|
||||
pygame.init()
|
||||
|
|
@ -115,7 +115,7 @@ def draw_labyrinth(labyrinth):
|
|||
|
||||
|
||||
# Main game function
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
clock = pygame.time.Clock()
|
||||
labyrinth = labyrinth_init.copy()
|
||||
|
||||
|
|
@ -170,7 +170,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
|||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||
# print("Game Over! The ghost caught Pacman.")
|
||||
running = False
|
||||
reward = -10
|
||||
reward = REWARD_ON_LOSE
|
||||
|
||||
# Eat cookies
|
||||
if labyrinth[pacman.y][pacman.x] == ".":
|
||||
|
|
@ -179,10 +179,11 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
|||
# Check if all cookies are eaten (game over)
|
||||
if all("." not in row for row in labyrinth):
|
||||
# print("You Win! Pacman ate all the cookies.")
|
||||
reward = 10
|
||||
reward = REWARD_ON_WIN
|
||||
running = False
|
||||
|
||||
# Draw the labyrinth, pacman, and ghost
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
if not running:
|
||||
new_state = state
|
||||
|
|
@ -206,6 +207,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
|||
counter += 1
|
||||
return 20-counter
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
draw_labyrinth(labyrinth)
|
||||
pacman.draw()
|
||||
ghost.draw()
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from ReinforcmentLearning.game import run_game
|
||||
from ReinforcmentLearning.util import initial_q_fill
|
||||
|
||||
|
||||
|
||||
|
||||
def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
cookies_per_try = []
|
||||
wins_per_try = []
|
||||
|
||||
for x in range(AMOUNT_TRIES):
|
||||
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
cookies_per_run.append(cookies_per_run)
|
||||
wins_per_try.append(amount_wins)
|
||||
# print(f"Finished try {x+1}\n")
|
||||
|
||||
return cookies_per_try, wins_per_try
|
||||
|
||||
|
||||
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
"""
|
||||
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
|
||||
action: Direction
|
||||
q_value: (state, action)
|
||||
"""
|
||||
|
||||
q_values = {}
|
||||
|
||||
initial_q_fill(q_values)
|
||||
|
||||
cookies_per_run = []
|
||||
# Amount of single runs
|
||||
for x in range(AMOUNT_RUNS):
|
||||
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
cookies_per_run.append(amount_cookies_ate)
|
||||
|
||||
wins = 0
|
||||
for element in cookies_per_run:
|
||||
if element == 20:
|
||||
wins += 1
|
||||
|
||||
# print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
||||
|
||||
return cookies_per_run, wins
|
||||
|
||||
|
||||
def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
|
||||
# print("---------DONE---------")
|
||||
# print("Used: ")
|
||||
# print(f"Epsilon: {EPSILON}")
|
||||
# print(f"Gamma: {GAMMA}")
|
||||
# print(f"Alpha: {ALPHA}")
|
||||
|
||||
# print("---------SUMMARY---------")
|
||||
print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
|
||||
# print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
|
||||
# print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")
|
||||
|
||||
|
|
@ -9,8 +9,8 @@ class Direction(Enum):
|
|||
|
||||
|
||||
def initial_q_fill(q_values):
|
||||
for x in range(8):
|
||||
for y in range(3):
|
||||
for x in range(-7, 8):
|
||||
for y in range(-2, 3):
|
||||
for cookie_direction in Direction:
|
||||
for action in Direction:
|
||||
state = (x, y, cookie_direction)
|
||||
|
|
@ -29,8 +29,8 @@ def get_start_state():
|
|||
|
||||
|
||||
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
||||
x_ghost_dist = abs(pac_x - ghost_x)
|
||||
y_ghost_dist = abs(pac_y - ghost_y)
|
||||
x_ghost_dist = pac_x - ghost_x
|
||||
y_ghost_dist = pac_y - ghost_y
|
||||
|
||||
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
||||
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
import numpy as np
|
||||
from learning import runTry
|
||||
|
||||
EPSILON = 0.5
|
||||
ALPHA = 0.5
|
||||
GAMMA = 0.5
|
||||
|
||||
STEPS = 10
|
||||
|
||||
for alpha in np.linspace(0.1, 0.5, 10):
|
||||
runTry(EPSILON, alpha, GAMMA)
|
||||
28
data.txt
28
data.txt
|
|
@ -1,16 +1,20 @@
|
|||
E: 0,1; A: 0.1; G: 0.9; 200/5000
|
||||
E: 0,1; A: 0.1; G: 0.9; 150/5000
|
||||
# High Gamma
|
||||
E: 0.1; A: 0.1; G: 0.9; 0.04%
|
||||
E: 0.1; A: 0.1; G: 0.9; 0.03%
|
||||
|
||||
E: 0,5; A: 0.1; G: 0.9; 0.0034%
|
||||
E: 0,5; A: 0.1; G: 0.9; 0.002%
|
||||
E: 0.5; A: 0.1; G: 0.9; 0.0034%
|
||||
E: 0.5; A: 0.1; G: 0.9; 0.002%
|
||||
|
||||
E: 0,5; A: 0.5; G: 0.5; 0.0012%
|
||||
E: 0,5; A: 0.5; G: 0.5; 0.0002%
|
||||
E: 0,5; A: 0.5; G: 0.5; 0.001%
|
||||
E: 0.5; A: 0.5; G: 0.5; 0.0012%
|
||||
E: 0.5; A: 0.5; G: 0.5; 0.0002%
|
||||
E: 0.5; A: 0.5; G: 0.5; 0.001%
|
||||
|
||||
E: 0,5; A: 0.3; G: 0.5; 0.0018%
|
||||
E: 0,5; A: 0.3; G: 0.5; 0.0022%
|
||||
E: 0,5; A: 0.3; G: 0.5; 0.0014%
|
||||
E: 0,5; A: 0.3; G: 0.5; 0.0016%
|
||||
E: 0,5; A: 0.3; G: 0.5; 0.0022%
|
||||
E: 0.5; A: 0.3; G: 0.5; 0.0018%
|
||||
E: 0.5; A: 0.3; G: 0.5; 0.0022%
|
||||
E: 0.5; A: 0.3; G: 0.5; 0.0014%
|
||||
E: 0.5; A: 0.3; G: 0.5; 0.0016%
|
||||
E: 0.5; A: 0.3; G: 0.5; 0.0022%
|
||||
|
||||
|
||||
# AFTER ABSOLUT CHANGE
|
||||
|
||||
|
|
|
|||
58
learning.py
58
learning.py
|
|
@ -1,58 +0,0 @@
|
|||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
from game import run_game
|
||||
from util import initial_q_fill
|
||||
|
||||
|
||||
EPSILON = 0.5
|
||||
ALPHA = 0.3
|
||||
GAMMA = 0.8
|
||||
|
||||
def runTry(EPSILON, ALPHA, GAMMA):
|
||||
"""
|
||||
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
|
||||
action: Direction
|
||||
q_value: (state, action)
|
||||
"""
|
||||
|
||||
AMOUNT_RUNS = 5000
|
||||
q_values = {}
|
||||
|
||||
initial_q_fill(q_values)
|
||||
|
||||
cookies_per_run = []
|
||||
# Amount of single runs
|
||||
for x in range(AMOUNT_RUNS):
|
||||
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
|
||||
cookies_per_run.append(amount_cookies_ate)
|
||||
# print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
|
||||
|
||||
wins = 0
|
||||
for element in cookies_per_run:
|
||||
if element == 20:
|
||||
wins += 1
|
||||
|
||||
print(f"Win percentage: {wins/AMOUNT_RUNS}%")
|
||||
|
||||
return cookies_per_run
|
||||
|
||||
|
||||
|
||||
cookies_per_run = runTry(EPSILON, ALPHA, GAMMA)
|
||||
|
||||
|
||||
window_size = 100 # Adjust based on your needs
|
||||
rolling_avg = pd.Series(cookies_per_run).rolling(window=window_size, center=True).mean()
|
||||
|
||||
plt.figure(figsize=(12, 6))
|
||||
plt.plot(cookies_per_run, alpha=0.2, label='Raw Data', linewidth=0.5, color='gray')
|
||||
plt.plot(rolling_avg, label=f'{window_size}-point Moving Average',
|
||||
linewidth=2, color='blue')
|
||||
plt.title("Data with Rolling Average")
|
||||
plt.xlabel("Index")
|
||||
plt.ylabel("Value")
|
||||
plt.legend()
|
||||
plt.grid(True, alpha=0.3)
|
||||
plt.show()
|
||||
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
from GenTunic.gen_tuning import gen_tuning_main
|
||||
from ReinforcmentLearning.learning import multipleTries
|
||||
|
||||
|
||||
EPSILON = 0.3
|
||||
ALPHA = 0.3
|
||||
GAMMA = 0.8
|
||||
|
||||
AMOUNT_RUNS = 5000
|
||||
AMOUNT_TRIES = 10
|
||||
|
||||
REWARD_ON_WIN = 10
|
||||
REWARD_ON_LOSE = -10
|
||||
|
||||
|
||||
#multipleTries(EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
Loading…
Reference in New Issue