Added GA to tune constants

2025-12-03 15:50:46 +01:00 · 2025-12-03 15:50:46 +01:00 · 738b122f43
parent 0e154fc55a
commit 738b122f43
16 changed files with 312 additions and 89 deletions
--- a/GenTunic/pycache/gen_math.cpython-313.pyc
+++ b/GenTunic/pycache/gen_math.cpython-313.pyc
--- a/GenTunic/pycache/gen_tuning.cpython-313.pyc
+++ b/GenTunic/pycache/gen_tuning.cpython-313.pyc
--- a/GenTunic/pycache/gen_util.cpython-313.pyc
+++ b/GenTunic/pycache/gen_util.cpython-313.pyc
--- a/GenTunic/gen_math.py
+++ b/GenTunic/gen_math.py
@ -0,0 +1,37 @@
 import numpy as np
 def project_bit(bit):
    n = len(bit)
    q_min = 0.1
    q_max = 0.5
    reverse_bit = np.flip(bit)
    dec = np.uint64(0)
    for i in range(n):
        dec += np.uint64(2)**i * reverse_bit[i]
    q = q_min + ((q_max - q_min) / (2**n - 1)) * dec 
    return q
 def bit_to_grey(bit):
    grey = [bit[0]]
    for i in range(1, len(bit)):
        grey.append(bit[i-1] ^ bit[i])
    return np.array(grey)
 def grey_to_bit(grey):
    bit = [grey[0]]
    for i in range(1, len(grey)):
        bit.append(bit[i-1] ^ grey[i])
    return np.array(bit)
--- a/GenTunic/gen_tuning.py
+++ b/GenTunic/gen_tuning.py
@ -0,0 +1,64 @@
 import math
 import time
 from matplotlib import pyplot as plt
 import numpy as np
 from GenTunic.gen_math import project_bit
 from GenTunic.gen_util import calc_population_fitness, create_population, crossover, mutation, turnament_selection
 POPULATIUON_SIZE = 200
 MUTATION_RATE = 0.05
 CROSSOVER_RATE = 0.65
 GEN_SIZE = 8 * 3
 THRESHOLD = 0.5
 def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    start_time = time.time()
    population = create_population(POPULATIUON_SIZE, GEN_SIZE)
    best_fintess_values = []
    best_fitness = 0
    while True:
        #? Calc fitness
        population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
        _, best_fitness = fintess_values
        best_fintess_values.append(best_fitness)
        print(best_fitness)
        if best_fitness > THRESHOLD:
            print("Breaking")
            break
        #? Selection
        amount_selections = math.floor((1 - CROSSOVER_RATE) * len(population_propability))
        amount_crossover = POPULATIUON_SIZE - amount_selections
        new_population = turnament_selection(population_propability, amount_selections)
        #? Crossover
        new_population = crossover(population_propability, new_population, amount_crossover, GEN_SIZE)
        #? Mutation
        population = mutation(new_population, MUTATION_RATE, GEN_SIZE)
    population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
    best_fintess_index, best_fitness = fintess_values
    print("\n=== BEST PARAMETERS ===")
    gen = population[best_fintess_index]["population"]
    parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "]
    parameters = [project_bit(x) for x in np.split(gen, 3)]
    for index, name in enumerate(parameter_names):
        print(f"{name}{parameters[index]}")
    time_amount = time.time() - start_time
    print(f"\nTook {time_amount}s")
    plt.plot(best_fintess_values)
    plt.show()
--- a/GenTunic/gen_util.py
+++ b/GenTunic/gen_util.py
@ -0,0 +1,106 @@
 import random
 import numpy as np
 from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit
 from ReinforcmentLearning.learning import multipleTries
 def create_population(size, GEN_SIZE):
    dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)]
    population_propability = np.zeros(size, dtype=dtype)
    for i in range(size):
        gen = np.random.randint(0, 2, GEN_SIZE)
        population_propability[i] = (gen, 0)
    return np.array(population_propability)
 def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    population_fitness_sum = 0
    for individual in population_propability:
        gen = individual["population"]
        alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)]
        _, multiple_tries_win_prob = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
        fitness = np.array(multiple_tries_win_prob).mean()
        individual["probability"] = fitness
        population_fitness_sum += fitness
    best_fitness_index = np.argmax(population_propability["probability"])
    best_fitness = population_propability[best_fitness_index]["probability"]
    population_propability["probability"] = population_propability["probability"] / population_fitness_sum
    return population_propability, (best_fitness_index, best_fitness)
 def turnament_selection(population_propability, amount_selections):
    selected_population = []
    best_fitness_index = np.argmax(population_propability["probability"])
    selected_population.append(population_propability[best_fitness_index])
    while len(selected_population) < amount_selections:
        pair_indecies = random.sample(range(len(population_propability)), 2)
        if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]:
            selected_population.append(population_propability[pair_indecies[0]])
        else:
            selected_population.append(population_propability[pair_indecies[1]])
    return np.array(selected_population)
 def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE):
    crossover_population = turnament_selection(population_propability, amount_crossover)
    select_one_parent = False
    if amount_crossover % 2 == 1:
        amount_crossover -= 1
        select_one_parent = True
    for i in range(0, amount_crossover, 2):
        crossover_point = np.random.randint(1, GEN_SIZE)
        mother_a = crossover_population[i]["population"][:crossover_point]
        mother_b = crossover_population[i]["population"][crossover_point:]
        father_a = crossover_population[i+1]["population"][:crossover_point]
        father_b = crossover_population[i+1]["population"][crossover_point:]
        child_one = np.empty(1, dtype=selected_population.dtype)
        child_one["population"] = np.concatenate((mother_a, father_b))
        child_one["probability"] = 0
        child_two = np.empty(1, dtype=selected_population.dtype)
        child_two["population"] = np.concatenate((mother_b, father_a))
        child_two["probability"] = 0
        selected_population = np.concatenate((selected_population, child_one))
        selected_population = np.concatenate((selected_population, child_two))
        is_last_iteration = (i >= amount_crossover - 2)
        if is_last_iteration and select_one_parent:
            selected_population = np.append(selected_population, crossover_population[i])
    return selected_population
 def mutation(population, MUTATION_RATE, GEN_SIZE):
    amount_mutation = len(population) * MUTATION_RATE
    mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False)
    for individual_index in mutation_indecies:
        bit_index = np.random.randint(0, GEN_SIZE)
        bit_to_mutate = population[individual_index]["population"][bit_index]
        mutated_grey = bit_to_grey(bit_to_mutate) ^ 1
        population[individual_index]["population"][bit_index] = grey_to_bit(mutated_grey)
    return population
--- a/ReinforcmentLearning/pycache/game.cpython-313.pyc
+++ b/ReinforcmentLearning/pycache/game.cpython-313.pyc
--- a/ReinforcmentLearning/pycache/learning.cpython-313.pyc
+++ b/ReinforcmentLearning/pycache/learning.cpython-313.pyc
--- a/ReinforcmentLearning/pycache/util.cpython-313.pyc
+++ b/ReinforcmentLearning/pycache/util.cpython-313.pyc
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -2,7 +2,7 @@ import pygame
 import math
 import os
-from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
+from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
 # Initialize pygame
 pygame.init()
@ -115,7 +115,7 @@ def draw_labyrinth(labyrinth):
 # Main game function
-def run_game(q_values, EPSILON, ALPHA, GAMMA):
+def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
    clock = pygame.time.Clock()
    labyrinth = labyrinth_init.copy()
@ -170,7 +170,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
        if pacman.x == ghost.x and pacman.y == ghost.y:
            # print("Game Over! The ghost caught Pacman.")
            running = False
-            reward = -10
+            reward = REWARD_ON_LOSE
        # Eat cookies
        if labyrinth[pacman.y][pacman.x] == ".":
@ -179,10 +179,11 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
            # print("You Win! Pacman ate all the cookies.")
-            reward = 10
+            reward = REWARD_ON_WIN
            running = False
        # Draw the labyrinth, pacman, and ghost
        #? -------------------------MY CODE-----------------------------------
        if not running:
            new_state = state
@ -206,6 +207,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
                        counter += 1
            return 20-counter
        #? -------------------------MY CODE-----------------------------------
        draw_labyrinth(labyrinth)
        pacman.draw()
        ghost.draw()
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -0,0 +1,62 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from ReinforcmentLearning.game import run_game
 from ReinforcmentLearning.util import initial_q_fill
 def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    cookies_per_try = []
    wins_per_try = []
    for x in range(AMOUNT_TRIES):
        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
        cookies_per_run.append(cookies_per_run)
        wins_per_try.append(amount_wins)
        # print(f"Finished try {x+1}\n")
    return cookies_per_try, wins_per_try
 def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    """
    state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
    action: Direction
    q_value: (state, action)
    """
    q_values = {}
    initial_q_fill(q_values)
    cookies_per_run = []
    # Amount of single runs
    for x in range(AMOUNT_RUNS):
        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
        cookies_per_run.append(amount_cookies_ate)
    wins = 0
    for element in cookies_per_run:
        if element == 20:
            wins += 1
    # print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
    return cookies_per_run, wins
 def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
    # print("---------DONE---------")
    # print("Used: ")
    # print(f"Epsilon: {EPSILON}")
    # print(f"Gamma: {GAMMA}")
    # print(f"Alpha: {ALPHA}")
    # print("---------SUMMARY---------")
    print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
    # print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
    # print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -9,8 +9,8 @@ class Direction(Enum):
 def initial_q_fill(q_values):
-    for x in range(8):
+    for x in range(-7, 8):
-        for y in range(3):
+        for y in range(-2, 3):
            for cookie_direction in Direction:
                for action in Direction:
                    state = (x, y, cookie_direction)
@ -29,8 +29,8 @@ def get_start_state():
 def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
-    x_ghost_dist = abs(pac_x - ghost_x)
+    x_ghost_dist = pac_x - ghost_x
-    y_ghost_dist = abs(pac_y - ghost_y)
+    y_ghost_dist = pac_y - ghost_y
    cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
--- a/argument_swapper.py
+++ b/argument_swapper.py
@ -1,11 +0,0 @@
 import numpy as np
 from learning import runTry
 EPSILON = 0.5
 ALPHA = 0.5
 GAMMA = 0.5
 STEPS = 10
 for alpha in np.linspace(0.1, 0.5, 10):
    runTry(EPSILON, alpha, GAMMA)
--- a/data.txt
+++ b/data.txt
@ -1,16 +1,20 @@
-E: 0,1; A: 0.1; G: 0.9; 200/5000
+# High Gamma
-E: 0,1; A: 0.1; G: 0.9; 150/5000
+E: 0.1; A: 0.1; G: 0.9; 0.04%
 E: 0.1; A: 0.1; G: 0.9; 0.03%
-E: 0,5; A: 0.1; G: 0.9; 0.0034%
+E: 0.5; A: 0.1; G: 0.9; 0.0034%
-E: 0,5; A: 0.1; G: 0.9; 0.002%
+E: 0.5; A: 0.1; G: 0.9; 0.002%
-E: 0,5; A: 0.5; G: 0.5; 0.0012%
+E: 0.5; A: 0.5; G: 0.5; 0.0012%
-E: 0,5; A: 0.5; G: 0.5; 0.0002%
+E: 0.5; A: 0.5; G: 0.5; 0.0002%
-E: 0,5; A: 0.5; G: 0.5; 0.001%
+E: 0.5; A: 0.5; G: 0.5; 0.001%
-E: 0,5; A: 0.3; G: 0.5; 0.0018%
+E: 0.5; A: 0.3; G: 0.5; 0.0018%
-E: 0,5; A: 0.3; G: 0.5; 0.0022%
+E: 0.5; A: 0.3; G: 0.5; 0.0022%
-E: 0,5; A: 0.3; G: 0.5; 0.0014%
+E: 0.5; A: 0.3; G: 0.5; 0.0014%
-E: 0,5; A: 0.3; G: 0.5; 0.0016%
+E: 0.5; A: 0.3; G: 0.5; 0.0016%
-E: 0,5; A: 0.3; G: 0.5; 0.0022%
+E: 0.5; A: 0.3; G: 0.5; 0.0022%
 # AFTER ABSOLUT CHANGE
--- a/learning.py
+++ b/learning.py
@ -1,58 +0,0 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from game import run_game
 from util import initial_q_fill
 EPSILON = 0.5
 ALPHA = 0.3
 GAMMA = 0.8
 def runTry(EPSILON, ALPHA, GAMMA):
    """
    state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
    action: Direction
    q_value: (state, action)
    """
    AMOUNT_RUNS = 5000
    q_values = {}
    initial_q_fill(q_values)
    cookies_per_run = []
    # Amount of single runs
    for x in range(AMOUNT_RUNS):
        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
        cookies_per_run.append(amount_cookies_ate)
        # print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
    wins = 0
    for element in cookies_per_run:
        if element == 20:
            wins += 1
    print(f"Win percentage: {wins/AMOUNT_RUNS}%")
    return cookies_per_run
 cookies_per_run = runTry(EPSILON, ALPHA, GAMMA)
 window_size = 100  # Adjust based on your needs
 rolling_avg = pd.Series(cookies_per_run).rolling(window=window_size, center=True).mean()
 plt.figure(figsize=(12, 6))
 plt.plot(cookies_per_run, alpha=0.2, label='Raw Data', linewidth=0.5, color='gray')
 plt.plot(rolling_avg, label=f'{window_size}-point Moving Average', 
            linewidth=2, color='blue')
 plt.title("Data with Rolling Average")
 plt.xlabel("Index")
 plt.ylabel("Value")
 plt.legend()
 plt.grid(True, alpha=0.3)
 plt.show()
--- a/main.py
+++ b/main.py
@ -0,0 +1,17 @@
 from GenTunic.gen_tuning import gen_tuning_main
 from ReinforcmentLearning.learning import multipleTries
 EPSILON = 0.3
 ALPHA = 0.3
 GAMMA = 0.8
 AMOUNT_RUNS = 5000
 AMOUNT_TRIES = 10
 REWARD_ON_WIN = 10
 REWARD_ON_LOSE = -10
 #multipleTries(EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
 gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)