Added GA to tune constants

2025-12-03 15:50:46 +01:00 · 2025-12-03 15:50:46 +01:00 · 738b122f43
parent 0e154fc55a
commit 738b122f43
16 changed files with 312 additions and 89 deletions
--- a/GenTunic/pycache/gen_math.cpython-313.pyc
+++ b/GenTunic/pycache/gen_math.cpython-313.pyc
--- a/GenTunic/pycache/gen_tuning.cpython-313.pyc
+++ b/GenTunic/pycache/gen_tuning.cpython-313.pyc
--- a/GenTunic/pycache/gen_util.cpython-313.pyc
+++ b/GenTunic/pycache/gen_util.cpython-313.pyc
--- a/GenTunic/gen_math.py
+++ b/GenTunic/gen_math.py
@ -0,0 +1,37 @@
+import numpy as np
+
+
+def project_bit(bit):
+    n = len(bit)
+    q_min = 0.1
+    q_max = 0.5
+
+    reverse_bit = np.flip(bit)
+    dec = np.uint64(0)
+
+    for i in range(n):
+        dec += np.uint64(2)**i * reverse_bit[i]
+
+    q = q_min + ((q_max - q_min) / (2**n - 1)) * dec 
+
+    return q
+
+
+
+def bit_to_grey(bit):
+    grey = [bit[0]]
+
+    for i in range(1, len(bit)):
+        grey.append(bit[i-1] ^ bit[i])
+
+    return np.array(grey)
+
+
+
+def grey_to_bit(grey):
+    bit = [grey[0]]
+
+    for i in range(1, len(grey)):
+        bit.append(bit[i-1] ^ grey[i])
+
+    return np.array(bit)
--- a/GenTunic/gen_tuning.py
+++ b/GenTunic/gen_tuning.py
@ -0,0 +1,64 @@
+
+import math
+import time
+from matplotlib import pyplot as plt
+import numpy as np
+from GenTunic.gen_math import project_bit
+from GenTunic.gen_util import calc_population_fitness, create_population, crossover, mutation, turnament_selection
+
+
+POPULATIUON_SIZE = 200
+MUTATION_RATE = 0.05
+CROSSOVER_RATE = 0.65
+
+GEN_SIZE = 8 * 3
+THRESHOLD = 0.5
+
+def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
+    start_time = time.time()
+    
+    population = create_population(POPULATIUON_SIZE, GEN_SIZE)
+
+    best_fintess_values = []
+    best_fitness = 0
+
+    while True:
+        #? Calc fitness
+        population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+
+        _, best_fitness = fintess_values
+        best_fintess_values.append(best_fitness)
+        print(best_fitness)
+
+        if best_fitness > THRESHOLD:
+            print("Breaking")
+            break
+
+        #? Selection
+        amount_selections = math.floor((1 - CROSSOVER_RATE) * len(population_propability))
+        amount_crossover = POPULATIUON_SIZE - amount_selections
+
+        new_population = turnament_selection(population_propability, amount_selections)
+
+        #? Crossover
+        new_population = crossover(population_propability, new_population, amount_crossover, GEN_SIZE)
+
+        #? Mutation
+        population = mutation(new_population, MUTATION_RATE, GEN_SIZE)
+
+
+    population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+    best_fintess_index, best_fitness = fintess_values
+
+    print("\n=== BEST PARAMETERS ===")
+    gen = population[best_fintess_index]["population"]
+    parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "]
+    parameters = [project_bit(x) for x in np.split(gen, 3)]
+    for index, name in enumerate(parameter_names):
+        print(f"{name}{parameters[index]}")
+
+    time_amount = time.time() - start_time
+    print(f"\nTook {time_amount}s")
+
+    plt.plot(best_fintess_values)
+    plt.show()
--- a/GenTunic/gen_util.py
+++ b/GenTunic/gen_util.py
@ -0,0 +1,106 @@
+import random
+import numpy as np
+
+from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit
+from ReinforcmentLearning.learning import multipleTries
+
+
+def create_population(size, GEN_SIZE):
+    dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)]
+    population_propability = np.zeros(size, dtype=dtype)
+
+    for i in range(size):
+        gen = np.random.randint(0, 2, GEN_SIZE)
+        population_propability[i] = (gen, 0)
+
+    return np.array(population_propability)
+
+
+def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
+    population_fitness_sum = 0
+
+    for individual in population_propability:
+        gen = individual["population"]
+        alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)]
+        _, multiple_tries_win_prob = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+        fitness = np.array(multiple_tries_win_prob).mean()
+
+        individual["probability"] = fitness
+        population_fitness_sum += fitness
+
+
+    best_fitness_index = np.argmax(population_propability["probability"])
+    best_fitness = population_propability[best_fitness_index]["probability"]
+
+    population_propability["probability"] = population_propability["probability"] / population_fitness_sum
+
+    return population_propability, (best_fitness_index, best_fitness)
+
+
+
+def turnament_selection(population_propability, amount_selections):
+    selected_population = []
+
+    best_fitness_index = np.argmax(population_propability["probability"])
+    selected_population.append(population_propability[best_fitness_index])
+
+    while len(selected_population) < amount_selections:
+        pair_indecies = random.sample(range(len(population_propability)), 2)
+        
+        if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]:
+            selected_population.append(population_propability[pair_indecies[0]])
+        else:
+            selected_population.append(population_propability[pair_indecies[1]])
+
+    return np.array(selected_population)
+
+
+
+def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE):
+    crossover_population = turnament_selection(population_propability, amount_crossover)
+
+    select_one_parent = False
+
+    if amount_crossover % 2 == 1:
+        amount_crossover -= 1
+        select_one_parent = True
+
+    for i in range(0, amount_crossover, 2):
+        crossover_point = np.random.randint(1, GEN_SIZE)
+
+        mother_a = crossover_population[i]["population"][:crossover_point]
+        mother_b = crossover_population[i]["population"][crossover_point:]
+
+        father_a = crossover_population[i+1]["population"][:crossover_point]
+        father_b = crossover_population[i+1]["population"][crossover_point:]
+
+        child_one = np.empty(1, dtype=selected_population.dtype)
+        child_one["population"] = np.concatenate((mother_a, father_b))
+        child_one["probability"] = 0
+
+        child_two = np.empty(1, dtype=selected_population.dtype)
+        child_two["population"] = np.concatenate((mother_b, father_a))
+        child_two["probability"] = 0
+    
+        selected_population = np.concatenate((selected_population, child_one))
+        selected_population = np.concatenate((selected_population, child_two))
+
+        is_last_iteration = (i >= amount_crossover - 2)
+        if is_last_iteration and select_one_parent:
+            selected_population = np.append(selected_population, crossover_population[i])
+
+    return selected_population
+
+
+
+def mutation(population, MUTATION_RATE, GEN_SIZE):
+    amount_mutation = len(population) * MUTATION_RATE
+    mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False)
+
+    for individual_index in mutation_indecies:
+        bit_index = np.random.randint(0, GEN_SIZE)
+        bit_to_mutate = population[individual_index]["population"][bit_index]
+        mutated_grey = bit_to_grey(bit_to_mutate) ^ 1
+        population[individual_index]["population"][bit_index] = grey_to_bit(mutated_grey)
+
+    return population
--- a/ReinforcmentLearning/pycache/game.cpython-313.pyc
+++ b/ReinforcmentLearning/pycache/game.cpython-313.pyc
--- a/ReinforcmentLearning/pycache/learning.cpython-313.pyc
+++ b/ReinforcmentLearning/pycache/learning.cpython-313.pyc
--- a/ReinforcmentLearning/pycache/util.cpython-313.pyc
+++ b/ReinforcmentLearning/pycache/util.cpython-313.pyc
--- a/ReinforcmentLearning/game.py
+++ b/ReinforcmentLearning/game.py
@ -2,7 +2,7 @@ import pygame
 import math
 import os

-from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
+from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action

 # Initialize pygame
 pygame.init()
@ -115,7 +115,7 @@ def draw_labyrinth(labyrinth):


 # Main game function
-def run_game(q_values, EPSILON, ALPHA, GAMMA):
+def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
    clock = pygame.time.Clock()
    labyrinth = labyrinth_init.copy()

@ -170,7 +170,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
        if pacman.x == ghost.x and pacman.y == ghost.y:
            # print("Game Over! The ghost caught Pacman.")
            running = False
-            reward = -10
+            reward = REWARD_ON_LOSE

        # Eat cookies
        if labyrinth[pacman.y][pacman.x] == ".":
@ -179,10 +179,11 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
            # print("You Win! Pacman ate all the cookies.")
-            reward = 10
+            reward = REWARD_ON_WIN
            running = False

        # Draw the labyrinth, pacman, and ghost
+
        #? -------------------------MY CODE-----------------------------------
        if not running:
            new_state = state
@ -206,6 +207,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA):
                        counter += 1
            return 20-counter
        #? -------------------------MY CODE-----------------------------------
+        
        draw_labyrinth(labyrinth)
        pacman.draw()
        ghost.draw()
--- a/ReinforcmentLearning/learning.py
+++ b/ReinforcmentLearning/learning.py
@ -0,0 +1,62 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from ReinforcmentLearning.game import run_game
+from ReinforcmentLearning.util import initial_q_fill
+
+
+
+
+def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
+    cookies_per_try = []
+    wins_per_try = []
+    
+    for x in range(AMOUNT_TRIES):
+        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
+        cookies_per_run.append(cookies_per_run)
+        wins_per_try.append(amount_wins)
+        # print(f"Finished try {x+1}\n")
+
+    return cookies_per_try, wins_per_try
+
+
+def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
+    """
+    state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
+    action: Direction
+    q_value: (state, action)
+    """
+    
+    q_values = {}
+
+    initial_q_fill(q_values)
+
+    cookies_per_run = []
+    # Amount of single runs
+    for x in range(AMOUNT_RUNS):
+        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
+        cookies_per_run.append(amount_cookies_ate)
+
+    wins = 0
+    for element in cookies_per_run:
+        if element == 20:
+            wins += 1
+        
+    # print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
+
+    return cookies_per_run, wins
+
+
+def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
+    # print("---------DONE---------")
+    # print("Used: ")
+    # print(f"Epsilon: {EPSILON}")
+    # print(f"Gamma: {GAMMA}")
+    # print(f"Alpha: {ALPHA}")
+
+    # print("---------SUMMARY---------")
+    print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
+    # print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
+    # print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")
+
--- a/ReinforcmentLearning/util.py
+++ b/ReinforcmentLearning/util.py
@ -9,8 +9,8 @@ class Direction(Enum):


 def initial_q_fill(q_values):
-    for x in range(8):
-        for y in range(3):
+    for x in range(-7, 8):
+        for y in range(-2, 3):
            for cookie_direction in Direction:
                for action in Direction:
                    state = (x, y, cookie_direction)
@ -29,8 +29,8 @@ def get_start_state():


 def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
-    x_ghost_dist = abs(pac_x - ghost_x)
-    y_ghost_dist = abs(pac_y - ghost_y)
+    x_ghost_dist = pac_x - ghost_x
+    y_ghost_dist = pac_y - ghost_y

    cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)

--- a/argument_swapper.py
+++ b/argument_swapper.py
@ -1,11 +0,0 @@
-import numpy as np
-from learning import runTry
-
-EPSILON = 0.5
-ALPHA = 0.5
-GAMMA = 0.5
-
-STEPS = 10
-
-for alpha in np.linspace(0.1, 0.5, 10):
-    runTry(EPSILON, alpha, GAMMA)
--- a/data.txt
+++ b/data.txt
@ -1,16 +1,20 @@
-E: 0,1; A: 0.1; G: 0.9; 200/5000
-E: 0,1; A: 0.1; G: 0.9; 150/5000
+# High Gamma
+E: 0.1; A: 0.1; G: 0.9; 0.04%
+E: 0.1; A: 0.1; G: 0.9; 0.03%

-E: 0,5; A: 0.1; G: 0.9; 0.0034%
-E: 0,5; A: 0.1; G: 0.9; 0.002%
+E: 0.5; A: 0.1; G: 0.9; 0.0034%
+E: 0.5; A: 0.1; G: 0.9; 0.002%

-E: 0,5; A: 0.5; G: 0.5; 0.0012%
-E: 0,5; A: 0.5; G: 0.5; 0.0002%
-E: 0,5; A: 0.5; G: 0.5; 0.001%
+E: 0.5; A: 0.5; G: 0.5; 0.0012%
+E: 0.5; A: 0.5; G: 0.5; 0.0002%
+E: 0.5; A: 0.5; G: 0.5; 0.001%

-E: 0,5; A: 0.3; G: 0.5; 0.0018%
-E: 0,5; A: 0.3; G: 0.5; 0.0022%
-E: 0,5; A: 0.3; G: 0.5; 0.0014%
-E: 0,5; A: 0.3; G: 0.5; 0.0016%
-E: 0,5; A: 0.3; G: 0.5; 0.0022%
+E: 0.5; A: 0.3; G: 0.5; 0.0018%
+E: 0.5; A: 0.3; G: 0.5; 0.0022%
+E: 0.5; A: 0.3; G: 0.5; 0.0014%
+E: 0.5; A: 0.3; G: 0.5; 0.0016%
+E: 0.5; A: 0.3; G: 0.5; 0.0022%
+
+
+# AFTER ABSOLUT CHANGE

--- a/learning.py
+++ b/learning.py
@ -1,58 +0,0 @@
-import matplotlib.pyplot as plt
-import pandas as pd
-
-from game import run_game
-from util import initial_q_fill
-
-
-EPSILON = 0.5
-ALPHA = 0.3
-GAMMA = 0.8
-
-def runTry(EPSILON, ALPHA, GAMMA):
-    """
-    state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
-    action: Direction
-    q_value: (state, action)
-    """
-
-    AMOUNT_RUNS = 5000
-    q_values = {}
-
-    initial_q_fill(q_values)
-
-    cookies_per_run = []
-    # Amount of single runs
-    for x in range(AMOUNT_RUNS):
-        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
-        cookies_per_run.append(amount_cookies_ate)
-        # print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
-
-    wins = 0
-    for element in cookies_per_run:
-        if element == 20:
-            wins += 1
-        
-    print(f"Win percentage: {wins/AMOUNT_RUNS}%")
-
-    return cookies_per_run
-
-
-
-cookies_per_run = runTry(EPSILON, ALPHA, GAMMA)
-
-
-window_size = 100  # Adjust based on your needs
-rolling_avg = pd.Series(cookies_per_run).rolling(window=window_size, center=True).mean()
-
-plt.figure(figsize=(12, 6))
-plt.plot(cookies_per_run, alpha=0.2, label='Raw Data', linewidth=0.5, color='gray')
-plt.plot(rolling_avg, label=f'{window_size}-point Moving Average', 
-            linewidth=2, color='blue')
-plt.title("Data with Rolling Average")
-plt.xlabel("Index")
-plt.ylabel("Value")
-plt.legend()
-plt.grid(True, alpha=0.3)
-plt.show()
-
--- a/main.py
+++ b/main.py
@ -0,0 +1,17 @@
+from GenTunic.gen_tuning import gen_tuning_main
+from ReinforcmentLearning.learning import multipleTries
+
+
+EPSILON = 0.3
+ALPHA = 0.3
+GAMMA = 0.8
+
+AMOUNT_RUNS = 5000
+AMOUNT_TRIES = 10
+
+REWARD_ON_WIN = 10
+REWARD_ON_LOSE = -10
+
+
+#multipleTries(EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
+gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)