Added GA to tune constants

main
2wenty1ne 2025-12-03 15:50:46 +01:00
parent 0e154fc55a
commit 738b122f43
16 changed files with 312 additions and 89 deletions

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,37 @@
import numpy as np
def project_bit(bit):
n = len(bit)
q_min = 0.1
q_max = 0.5
reverse_bit = np.flip(bit)
dec = np.uint64(0)
for i in range(n):
dec += np.uint64(2)**i * reverse_bit[i]
q = q_min + ((q_max - q_min) / (2**n - 1)) * dec
return q
def bit_to_grey(bit):
grey = [bit[0]]
for i in range(1, len(bit)):
grey.append(bit[i-1] ^ bit[i])
return np.array(grey)
def grey_to_bit(grey):
bit = [grey[0]]
for i in range(1, len(grey)):
bit.append(bit[i-1] ^ grey[i])
return np.array(bit)

View File

@ -0,0 +1,64 @@
import math
import time
from matplotlib import pyplot as plt
import numpy as np
from GenTunic.gen_math import project_bit
from GenTunic.gen_util import calc_population_fitness, create_population, crossover, mutation, turnament_selection
POPULATIUON_SIZE = 200
MUTATION_RATE = 0.05
CROSSOVER_RATE = 0.65
GEN_SIZE = 8 * 3
THRESHOLD = 0.5
def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
start_time = time.time()
population = create_population(POPULATIUON_SIZE, GEN_SIZE)
best_fintess_values = []
best_fitness = 0
while True:
#? Calc fitness
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
_, best_fitness = fintess_values
best_fintess_values.append(best_fitness)
print(best_fitness)
if best_fitness > THRESHOLD:
print("Breaking")
break
#? Selection
amount_selections = math.floor((1 - CROSSOVER_RATE) * len(population_propability))
amount_crossover = POPULATIUON_SIZE - amount_selections
new_population = turnament_selection(population_propability, amount_selections)
#? Crossover
new_population = crossover(population_propability, new_population, amount_crossover, GEN_SIZE)
#? Mutation
population = mutation(new_population, MUTATION_RATE, GEN_SIZE)
population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
best_fintess_index, best_fitness = fintess_values
print("\n=== BEST PARAMETERS ===")
gen = population[best_fintess_index]["population"]
parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "]
parameters = [project_bit(x) for x in np.split(gen, 3)]
for index, name in enumerate(parameter_names):
print(f"{name}{parameters[index]}")
time_amount = time.time() - start_time
print(f"\nTook {time_amount}s")
plt.plot(best_fintess_values)
plt.show()

View File

@ -0,0 +1,106 @@
import random
import numpy as np
from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit
from ReinforcmentLearning.learning import multipleTries
def create_population(size, GEN_SIZE):
dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)]
population_propability = np.zeros(size, dtype=dtype)
for i in range(size):
gen = np.random.randint(0, 2, GEN_SIZE)
population_propability[i] = (gen, 0)
return np.array(population_propability)
def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
population_fitness_sum = 0
for individual in population_propability:
gen = individual["population"]
alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)]
_, multiple_tries_win_prob = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
fitness = np.array(multiple_tries_win_prob).mean()
individual["probability"] = fitness
population_fitness_sum += fitness
best_fitness_index = np.argmax(population_propability["probability"])
best_fitness = population_propability[best_fitness_index]["probability"]
population_propability["probability"] = population_propability["probability"] / population_fitness_sum
return population_propability, (best_fitness_index, best_fitness)
def turnament_selection(population_propability, amount_selections):
selected_population = []
best_fitness_index = np.argmax(population_propability["probability"])
selected_population.append(population_propability[best_fitness_index])
while len(selected_population) < amount_selections:
pair_indecies = random.sample(range(len(population_propability)), 2)
if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]:
selected_population.append(population_propability[pair_indecies[0]])
else:
selected_population.append(population_propability[pair_indecies[1]])
return np.array(selected_population)
def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE):
crossover_population = turnament_selection(population_propability, amount_crossover)
select_one_parent = False
if amount_crossover % 2 == 1:
amount_crossover -= 1
select_one_parent = True
for i in range(0, amount_crossover, 2):
crossover_point = np.random.randint(1, GEN_SIZE)
mother_a = crossover_population[i]["population"][:crossover_point]
mother_b = crossover_population[i]["population"][crossover_point:]
father_a = crossover_population[i+1]["population"][:crossover_point]
father_b = crossover_population[i+1]["population"][crossover_point:]
child_one = np.empty(1, dtype=selected_population.dtype)
child_one["population"] = np.concatenate((mother_a, father_b))
child_one["probability"] = 0
child_two = np.empty(1, dtype=selected_population.dtype)
child_two["population"] = np.concatenate((mother_b, father_a))
child_two["probability"] = 0
selected_population = np.concatenate((selected_population, child_one))
selected_population = np.concatenate((selected_population, child_two))
is_last_iteration = (i >= amount_crossover - 2)
if is_last_iteration and select_one_parent:
selected_population = np.append(selected_population, crossover_population[i])
return selected_population
def mutation(population, MUTATION_RATE, GEN_SIZE):
amount_mutation = len(population) * MUTATION_RATE
mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False)
for individual_index in mutation_indecies:
bit_index = np.random.randint(0, GEN_SIZE)
bit_to_mutate = population[individual_index]["population"][bit_index]
mutated_grey = bit_to_grey(bit_to_mutate) ^ 1
population[individual_index]["population"][bit_index] = grey_to_bit(mutated_grey)
return population

View File

@ -2,7 +2,7 @@ import pygame
import math
import os
from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
# Initialize pygame
pygame.init()
@ -115,7 +115,7 @@ def draw_labyrinth(labyrinth):
# Main game function
def run_game(q_values, EPSILON, ALPHA, GAMMA):
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
clock = pygame.time.Clock()
labyrinth = labyrinth_init.copy()
@ -170,7 +170,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
if pacman.x == ghost.x and pacman.y == ghost.y:
# print("Game Over! The ghost caught Pacman.")
running = False
reward = -10
reward = REWARD_ON_LOSE
# Eat cookies
if labyrinth[pacman.y][pacman.x] == ".":
@ -179,10 +179,11 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
# Check if all cookies are eaten (game over)
if all("." not in row for row in labyrinth):
# print("You Win! Pacman ate all the cookies.")
reward = 10
reward = REWARD_ON_WIN
running = False
# Draw the labyrinth, pacman, and ghost
#? -------------------------MY CODE-----------------------------------
if not running:
new_state = state
@ -206,6 +207,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
counter += 1
return 20-counter
#? -------------------------MY CODE-----------------------------------
draw_labyrinth(labyrinth)
pacman.draw()
ghost.draw()

View File

@ -0,0 +1,62 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ReinforcmentLearning.game import run_game
from ReinforcmentLearning.util import initial_q_fill
def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
cookies_per_try = []
wins_per_try = []
for x in range(AMOUNT_TRIES):
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
cookies_per_run.append(cookies_per_run)
wins_per_try.append(amount_wins)
# print(f"Finished try {x+1}\n")
return cookies_per_try, wins_per_try
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
"""
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
action: Direction
q_value: (state, action)
"""
q_values = {}
initial_q_fill(q_values)
cookies_per_run = []
# Amount of single runs
for x in range(AMOUNT_RUNS):
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
cookies_per_run.append(amount_cookies_ate)
wins = 0
for element in cookies_per_run:
if element == 20:
wins += 1
# print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
return cookies_per_run, wins
def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
# print("---------DONE---------")
# print("Used: ")
# print(f"Epsilon: {EPSILON}")
# print(f"Gamma: {GAMMA}")
# print(f"Alpha: {ALPHA}")
# print("---------SUMMARY---------")
print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
# print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
# print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")

View File

@ -9,8 +9,8 @@ class Direction(Enum):
def initial_q_fill(q_values):
for x in range(8):
for y in range(3):
for x in range(-7, 8):
for y in range(-2, 3):
for cookie_direction in Direction:
for action in Direction:
state = (x, y, cookie_direction)
@ -29,8 +29,8 @@ def get_start_state():
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
x_ghost_dist = abs(pac_x - ghost_x)
y_ghost_dist = abs(pac_y - ghost_y)
x_ghost_dist = pac_x - ghost_x
y_ghost_dist = pac_y - ghost_y
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)

View File

@ -1,11 +0,0 @@
import numpy as np
from learning import runTry
EPSILON = 0.5
ALPHA = 0.5
GAMMA = 0.5
STEPS = 10
for alpha in np.linspace(0.1, 0.5, 10):
runTry(EPSILON, alpha, GAMMA)

View File

@ -1,16 +1,20 @@
E: 0,1; A: 0.1; G: 0.9; 200/5000
E: 0,1; A: 0.1; G: 0.9; 150/5000
# High Gamma
E: 0.1; A: 0.1; G: 0.9; 0.04%
E: 0.1; A: 0.1; G: 0.9; 0.03%
E: 0,5; A: 0.1; G: 0.9; 0.0034%
E: 0,5; A: 0.1; G: 0.9; 0.002%
E: 0.5; A: 0.1; G: 0.9; 0.0034%
E: 0.5; A: 0.1; G: 0.9; 0.002%
E: 0,5; A: 0.5; G: 0.5; 0.0012%
E: 0,5; A: 0.5; G: 0.5; 0.0002%
E: 0,5; A: 0.5; G: 0.5; 0.001%
E: 0.5; A: 0.5; G: 0.5; 0.0012%
E: 0.5; A: 0.5; G: 0.5; 0.0002%
E: 0.5; A: 0.5; G: 0.5; 0.001%
E: 0,5; A: 0.3; G: 0.5; 0.0018%
E: 0,5; A: 0.3; G: 0.5; 0.0022%
E: 0,5; A: 0.3; G: 0.5; 0.0014%
E: 0,5; A: 0.3; G: 0.5; 0.0016%
E: 0,5; A: 0.3; G: 0.5; 0.0022%
E: 0.5; A: 0.3; G: 0.5; 0.0018%
E: 0.5; A: 0.3; G: 0.5; 0.0022%
E: 0.5; A: 0.3; G: 0.5; 0.0014%
E: 0.5; A: 0.3; G: 0.5; 0.0016%
E: 0.5; A: 0.3; G: 0.5; 0.0022%
# AFTER ABSOLUT CHANGE

View File

@ -1,58 +0,0 @@
import matplotlib.pyplot as plt
import pandas as pd
from game import run_game
from util import initial_q_fill
EPSILON = 0.5
ALPHA = 0.3
GAMMA = 0.8
def runTry(EPSILON, ALPHA, GAMMA):
"""
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
action: Direction
q_value: (state, action)
"""
AMOUNT_RUNS = 5000
q_values = {}
initial_q_fill(q_values)
cookies_per_run = []
# Amount of single runs
for x in range(AMOUNT_RUNS):
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
cookies_per_run.append(amount_cookies_ate)
# print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
wins = 0
for element in cookies_per_run:
if element == 20:
wins += 1
print(f"Win percentage: {wins/AMOUNT_RUNS}%")
return cookies_per_run
cookies_per_run = runTry(EPSILON, ALPHA, GAMMA)
window_size = 100 # Adjust based on your needs
rolling_avg = pd.Series(cookies_per_run).rolling(window=window_size, center=True).mean()
plt.figure(figsize=(12, 6))
plt.plot(cookies_per_run, alpha=0.2, label='Raw Data', linewidth=0.5, color='gray')
plt.plot(rolling_avg, label=f'{window_size}-point Moving Average',
linewidth=2, color='blue')
plt.title("Data with Rolling Average")
plt.xlabel("Index")
plt.ylabel("Value")
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

17
main.py 100644
View File

@ -0,0 +1,17 @@
from GenTunic.gen_tuning import gen_tuning_main
from ReinforcmentLearning.learning import multipleTries
EPSILON = 0.3
ALPHA = 0.3
GAMMA = 0.8
AMOUNT_RUNS = 5000
AMOUNT_TRIES = 10
REWARD_ON_WIN = 10
REWARD_ON_LOSE = -10
#multipleTries(EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)