diff --git a/GenTunic/__pycache__/gen_math.cpython-313.pyc b/GenTunic/__pycache__/gen_math.cpython-313.pyc new file mode 100644 index 0000000..670104d Binary files /dev/null and b/GenTunic/__pycache__/gen_math.cpython-313.pyc differ diff --git a/GenTunic/__pycache__/gen_tuning.cpython-313.pyc b/GenTunic/__pycache__/gen_tuning.cpython-313.pyc new file mode 100644 index 0000000..a610856 Binary files /dev/null and b/GenTunic/__pycache__/gen_tuning.cpython-313.pyc differ diff --git a/GenTunic/__pycache__/gen_util.cpython-313.pyc b/GenTunic/__pycache__/gen_util.cpython-313.pyc new file mode 100644 index 0000000..549b9ae Binary files /dev/null and b/GenTunic/__pycache__/gen_util.cpython-313.pyc differ diff --git a/GenTunic/gen_math.py b/GenTunic/gen_math.py new file mode 100644 index 0000000..867f7ed --- /dev/null +++ b/GenTunic/gen_math.py @@ -0,0 +1,37 @@ +import numpy as np + + +def project_bit(bit): + n = len(bit) + q_min = 0.1 + q_max = 0.5 + + reverse_bit = np.flip(bit) + dec = np.uint64(0) + + for i in range(n): + dec += np.uint64(2)**i * reverse_bit[i] + + q = q_min + ((q_max - q_min) / (2**n - 1)) * dec + + return q + + + +def bit_to_grey(bit): + grey = [bit[0]] + + for i in range(1, len(bit)): + grey.append(bit[i-1] ^ bit[i]) + + return np.array(grey) + + + +def grey_to_bit(grey): + bit = [grey[0]] + + for i in range(1, len(grey)): + bit.append(bit[i-1] ^ grey[i]) + + return np.array(bit) \ No newline at end of file diff --git a/GenTunic/gen_tuning.py b/GenTunic/gen_tuning.py new file mode 100644 index 0000000..43036e8 --- /dev/null +++ b/GenTunic/gen_tuning.py @@ -0,0 +1,64 @@ + +import math +import time +from matplotlib import pyplot as plt +import numpy as np +from GenTunic.gen_math import project_bit +from GenTunic.gen_util import calc_population_fitness, create_population, crossover, mutation, turnament_selection + + +POPULATIUON_SIZE = 200 +MUTATION_RATE = 0.05 +CROSSOVER_RATE = 0.65 + +GEN_SIZE = 8 * 3 +THRESHOLD = 0.5 + +def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + start_time = time.time() + + population = create_population(POPULATIUON_SIZE, GEN_SIZE) + + best_fintess_values = [] + best_fitness = 0 + + while True: + #? Calc fitness + population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + + _, best_fitness = fintess_values + best_fintess_values.append(best_fitness) + print(best_fitness) + + if best_fitness > THRESHOLD: + print("Breaking") + break + + #? Selection + amount_selections = math.floor((1 - CROSSOVER_RATE) * len(population_propability)) + amount_crossover = POPULATIUON_SIZE - amount_selections + + new_population = turnament_selection(population_propability, amount_selections) + + #? Crossover + new_population = crossover(population_propability, new_population, amount_crossover, GEN_SIZE) + + #? Mutation + population = mutation(new_population, MUTATION_RATE, GEN_SIZE) + + + population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + best_fintess_index, best_fitness = fintess_values + + print("\n=== BEST PARAMETERS ===") + gen = population[best_fintess_index]["population"] + parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "] + parameters = [project_bit(x) for x in np.split(gen, 3)] + for index, name in enumerate(parameter_names): + print(f"{name}{parameters[index]}") + + time_amount = time.time() - start_time + print(f"\nTook {time_amount}s") + + plt.plot(best_fintess_values) + plt.show() \ No newline at end of file diff --git a/GenTunic/gen_util.py b/GenTunic/gen_util.py new file mode 100644 index 0000000..4704184 --- /dev/null +++ b/GenTunic/gen_util.py @@ -0,0 +1,106 @@ +import random +import numpy as np + +from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit +from ReinforcmentLearning.learning import multipleTries + + +def create_population(size, GEN_SIZE): + dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)] + population_propability = np.zeros(size, dtype=dtype) + + for i in range(size): + gen = np.random.randint(0, 2, GEN_SIZE) + population_propability[i] = (gen, 0) + + return np.array(population_propability) + + +def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + population_fitness_sum = 0 + + for individual in population_propability: + gen = individual["population"] + alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)] + _, multiple_tries_win_prob = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + fitness = np.array(multiple_tries_win_prob).mean() + + individual["probability"] = fitness + population_fitness_sum += fitness + + + best_fitness_index = np.argmax(population_propability["probability"]) + best_fitness = population_propability[best_fitness_index]["probability"] + + population_propability["probability"] = population_propability["probability"] / population_fitness_sum + + return population_propability, (best_fitness_index, best_fitness) + + + +def turnament_selection(population_propability, amount_selections): + selected_population = [] + + best_fitness_index = np.argmax(population_propability["probability"]) + selected_population.append(population_propability[best_fitness_index]) + + while len(selected_population) < amount_selections: + pair_indecies = random.sample(range(len(population_propability)), 2) + + if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]: + selected_population.append(population_propability[pair_indecies[0]]) + else: + selected_population.append(population_propability[pair_indecies[1]]) + + return np.array(selected_population) + + + +def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE): + crossover_population = turnament_selection(population_propability, amount_crossover) + + select_one_parent = False + + if amount_crossover % 2 == 1: + amount_crossover -= 1 + select_one_parent = True + + for i in range(0, amount_crossover, 2): + crossover_point = np.random.randint(1, GEN_SIZE) + + mother_a = crossover_population[i]["population"][:crossover_point] + mother_b = crossover_population[i]["population"][crossover_point:] + + father_a = crossover_population[i+1]["population"][:crossover_point] + father_b = crossover_population[i+1]["population"][crossover_point:] + + child_one = np.empty(1, dtype=selected_population.dtype) + child_one["population"] = np.concatenate((mother_a, father_b)) + child_one["probability"] = 0 + + child_two = np.empty(1, dtype=selected_population.dtype) + child_two["population"] = np.concatenate((mother_b, father_a)) + child_two["probability"] = 0 + + selected_population = np.concatenate((selected_population, child_one)) + selected_population = np.concatenate((selected_population, child_two)) + + is_last_iteration = (i >= amount_crossover - 2) + if is_last_iteration and select_one_parent: + selected_population = np.append(selected_population, crossover_population[i]) + + return selected_population + + + +def mutation(population, MUTATION_RATE, GEN_SIZE): + amount_mutation = len(population) * MUTATION_RATE + mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False) + + for individual_index in mutation_indecies: + bit_index = np.random.randint(0, GEN_SIZE) + bit_to_mutate = population[individual_index]["population"][bit_index] + mutated_grey = bit_to_grey(bit_to_mutate) ^ 1 + population[individual_index]["population"][bit_index] = grey_to_bit(mutated_grey) + + return population \ No newline at end of file diff --git a/ReinforcmentLearning/__pycache__/game.cpython-313.pyc b/ReinforcmentLearning/__pycache__/game.cpython-313.pyc new file mode 100644 index 0000000..135a7f9 Binary files /dev/null and b/ReinforcmentLearning/__pycache__/game.cpython-313.pyc differ diff --git a/ReinforcmentLearning/__pycache__/learning.cpython-313.pyc b/ReinforcmentLearning/__pycache__/learning.cpython-313.pyc new file mode 100644 index 0000000..c41db0b Binary files /dev/null and b/ReinforcmentLearning/__pycache__/learning.cpython-313.pyc differ diff --git a/ReinforcmentLearning/__pycache__/util.cpython-313.pyc b/ReinforcmentLearning/__pycache__/util.cpython-313.pyc new file mode 100644 index 0000000..86dac8c Binary files /dev/null and b/ReinforcmentLearning/__pycache__/util.cpython-313.pyc differ diff --git a/game.py b/ReinforcmentLearning/game.py similarity index 96% rename from game.py rename to ReinforcmentLearning/game.py index ada0948..47cc11a 100644 --- a/game.py +++ b/ReinforcmentLearning/game.py @@ -2,7 +2,7 @@ import pygame import math import os -from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action +from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action # Initialize pygame pygame.init() @@ -115,7 +115,7 @@ def draw_labyrinth(labyrinth): # Main game function -def run_game(q_values, EPSILON, ALPHA, GAMMA): +def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE): clock = pygame.time.Clock() labyrinth = labyrinth_init.copy() @@ -170,7 +170,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA): if pacman.x == ghost.x and pacman.y == ghost.y: # print("Game Over! The ghost caught Pacman.") running = False - reward = -10 + reward = REWARD_ON_LOSE # Eat cookies if labyrinth[pacman.y][pacman.x] == ".": @@ -179,10 +179,11 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA): # Check if all cookies are eaten (game over) if all("." not in row for row in labyrinth): # print("You Win! Pacman ate all the cookies.") - reward = 10 + reward = REWARD_ON_WIN running = False # Draw the labyrinth, pacman, and ghost + #? -------------------------MY CODE----------------------------------- if not running: new_state = state @@ -206,6 +207,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA): counter += 1 return 20-counter #? -------------------------MY CODE----------------------------------- + draw_labyrinth(labyrinth) pacman.draw() ghost.draw() diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py new file mode 100644 index 0000000..44f8d33 --- /dev/null +++ b/ReinforcmentLearning/learning.py @@ -0,0 +1,62 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from ReinforcmentLearning.game import run_game +from ReinforcmentLearning.util import initial_q_fill + + + + +def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + cookies_per_try = [] + wins_per_try = [] + + for x in range(AMOUNT_TRIES): + cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + cookies_per_run.append(cookies_per_run) + wins_per_try.append(amount_wins) + # print(f"Finished try {x+1}\n") + + return cookies_per_try, wins_per_try + + +def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + """ + state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction) + action: Direction + q_value: (state, action) + """ + + q_values = {} + + initial_q_fill(q_values) + + cookies_per_run = [] + # Amount of single runs + for x in range(AMOUNT_RUNS): + amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE) + cookies_per_run.append(amount_cookies_ate) + + wins = 0 + for element in cookies_per_run: + if element == 20: + wins += 1 + + # print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%") + + return cookies_per_run, wins + + +def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS): + # print("---------DONE---------") + # print("Used: ") + # print(f"Epsilon: {EPSILON}") + # print(f"Gamma: {GAMMA}") + # print(f"Alpha: {ALPHA}") + + # print("---------SUMMARY---------") + print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n") + # print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%") + # print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%") + diff --git a/util.py b/ReinforcmentLearning/util.py similarity index 96% rename from util.py rename to ReinforcmentLearning/util.py index f632371..59cfaee 100644 --- a/util.py +++ b/ReinforcmentLearning/util.py @@ -9,8 +9,8 @@ class Direction(Enum): def initial_q_fill(q_values): - for x in range(8): - for y in range(3): + for x in range(-7, 8): + for y in range(-2, 3): for cookie_direction in Direction: for action in Direction: state = (x, y, cookie_direction) @@ -29,8 +29,8 @@ def get_start_state(): def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): - x_ghost_dist = abs(pac_x - ghost_x) - y_ghost_dist = abs(pac_y - ghost_y) + x_ghost_dist = pac_x - ghost_x + y_ghost_dist = pac_y - ghost_y cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) diff --git a/argument_swapper.py b/argument_swapper.py deleted file mode 100644 index 9dbd365..0000000 --- a/argument_swapper.py +++ /dev/null @@ -1,11 +0,0 @@ -import numpy as np -from learning import runTry - -EPSILON = 0.5 -ALPHA = 0.5 -GAMMA = 0.5 - -STEPS = 10 - -for alpha in np.linspace(0.1, 0.5, 10): - runTry(EPSILON, alpha, GAMMA) \ No newline at end of file diff --git a/data.txt b/data.txt index 0116767..520be21 100644 --- a/data.txt +++ b/data.txt @@ -1,16 +1,20 @@ -E: 0,1; A: 0.1; G: 0.9; 200/5000 -E: 0,1; A: 0.1; G: 0.9; 150/5000 +# High Gamma +E: 0.1; A: 0.1; G: 0.9; 0.04% +E: 0.1; A: 0.1; G: 0.9; 0.03% -E: 0,5; A: 0.1; G: 0.9; 0.0034% -E: 0,5; A: 0.1; G: 0.9; 0.002% +E: 0.5; A: 0.1; G: 0.9; 0.0034% +E: 0.5; A: 0.1; G: 0.9; 0.002% -E: 0,5; A: 0.5; G: 0.5; 0.0012% -E: 0,5; A: 0.5; G: 0.5; 0.0002% -E: 0,5; A: 0.5; G: 0.5; 0.001% +E: 0.5; A: 0.5; G: 0.5; 0.0012% +E: 0.5; A: 0.5; G: 0.5; 0.0002% +E: 0.5; A: 0.5; G: 0.5; 0.001% -E: 0,5; A: 0.3; G: 0.5; 0.0018% -E: 0,5; A: 0.3; G: 0.5; 0.0022% -E: 0,5; A: 0.3; G: 0.5; 0.0014% -E: 0,5; A: 0.3; G: 0.5; 0.0016% -E: 0,5; A: 0.3; G: 0.5; 0.0022% +E: 0.5; A: 0.3; G: 0.5; 0.0018% +E: 0.5; A: 0.3; G: 0.5; 0.0022% +E: 0.5; A: 0.3; G: 0.5; 0.0014% +E: 0.5; A: 0.3; G: 0.5; 0.0016% +E: 0.5; A: 0.3; G: 0.5; 0.0022% + + +# AFTER ABSOLUT CHANGE diff --git a/learning.py b/learning.py deleted file mode 100644 index 0719915..0000000 --- a/learning.py +++ /dev/null @@ -1,58 +0,0 @@ -import matplotlib.pyplot as plt -import pandas as pd - -from game import run_game -from util import initial_q_fill - - -EPSILON = 0.5 -ALPHA = 0.3 -GAMMA = 0.8 - -def runTry(EPSILON, ALPHA, GAMMA): - """ - state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction) - action: Direction - q_value: (state, action) - """ - - AMOUNT_RUNS = 5000 - q_values = {} - - initial_q_fill(q_values) - - cookies_per_run = [] - # Amount of single runs - for x in range(AMOUNT_RUNS): - amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA) - cookies_per_run.append(amount_cookies_ate) - # print(f"Run {x}: {amount_cookies_ate} cookies ate\n") - - wins = 0 - for element in cookies_per_run: - if element == 20: - wins += 1 - - print(f"Win percentage: {wins/AMOUNT_RUNS}%") - - return cookies_per_run - - - -cookies_per_run = runTry(EPSILON, ALPHA, GAMMA) - - -window_size = 100 # Adjust based on your needs -rolling_avg = pd.Series(cookies_per_run).rolling(window=window_size, center=True).mean() - -plt.figure(figsize=(12, 6)) -plt.plot(cookies_per_run, alpha=0.2, label='Raw Data', linewidth=0.5, color='gray') -plt.plot(rolling_avg, label=f'{window_size}-point Moving Average', - linewidth=2, color='blue') -plt.title("Data with Rolling Average") -plt.xlabel("Index") -plt.ylabel("Value") -plt.legend() -plt.grid(True, alpha=0.3) -plt.show() - diff --git a/main.py b/main.py new file mode 100644 index 0000000..6d0bca7 --- /dev/null +++ b/main.py @@ -0,0 +1,17 @@ +from GenTunic.gen_tuning import gen_tuning_main +from ReinforcmentLearning.learning import multipleTries + + +EPSILON = 0.3 +ALPHA = 0.3 +GAMMA = 0.8 + +AMOUNT_RUNS = 5000 +AMOUNT_TRIES = 10 + +REWARD_ON_WIN = 10 +REWARD_ON_LOSE = -10 + + +#multipleTries(EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE) +gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) \ No newline at end of file