import random import numpy as np from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit from ReinforcmentLearning.learning import multipleTries def create_population(size, GEN_SIZE): dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)] population_propability = np.zeros(size, dtype=dtype) for i in range(size): gen = np.random.randint(0, 2, GEN_SIZE) population_propability[i] = (gen, 0) return np.array(population_propability) def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): population_fitness_sum = 0 for i, individual in enumerate(population_propability): gen = individual["population"] alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)] _, multiple_tries_wins = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) multiple_tries_win_prob = np.divide(np.array(multiple_tries_wins), AMOUNT_RUNS) fitness = np.array(multiple_tries_win_prob).mean() individual["probability"] = fitness population_fitness_sum += fitness print(f"Individual {i}: {fitness}") best_fitness_index = np.argmax(population_propability["probability"]) best_fitness = population_propability[best_fitness_index]["probability"] population_propability["probability"] = population_propability["probability"] / population_fitness_sum return population_propability, (best_fitness_index, best_fitness) def turnament_selection(population_propability, amount_selections): selected_population = [] best_fitness_index = np.argmax(population_propability["probability"]) selected_population.append(population_propability[best_fitness_index]) while len(selected_population) < amount_selections: pair_indecies = random.sample(range(len(population_propability)), 2) if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]: selected_population.append(population_propability[pair_indecies[0]]) else: selected_population.append(population_propability[pair_indecies[1]]) return np.array(selected_population) def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE): crossover_population = turnament_selection(population_propability, amount_crossover) select_one_parent = False if amount_crossover % 2 == 1: amount_crossover -= 1 select_one_parent = True for i in range(0, amount_crossover, 2): crossover_point = np.random.randint(1, GEN_SIZE) mother_a = crossover_population[i]["population"][:crossover_point] mother_b = crossover_population[i]["population"][crossover_point:] father_a = crossover_population[i+1]["population"][:crossover_point] father_b = crossover_population[i+1]["population"][crossover_point:] child_one = np.empty(1, dtype=selected_population.dtype) child_one["population"] = np.concatenate((mother_a, father_b)) child_one["probability"] = 0 child_two = np.empty(1, dtype=selected_population.dtype) child_two["population"] = np.concatenate((mother_b, father_a)) child_two["probability"] = 0 selected_population = np.concatenate((selected_population, child_one)) selected_population = np.concatenate((selected_population, child_two)) is_last_iteration = (i >= amount_crossover - 2) if is_last_iteration and select_one_parent: selected_population = np.append(selected_population, crossover_population[i]) return selected_population def mutation(population, MUTATION_RATE, GEN_SIZE): amount_mutation = len(population) * MUTATION_RATE mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False) for individual_index in mutation_indecies: bit_index = np.random.randint(0, GEN_SIZE) bit_to_mutate = population[individual_index]["population"] grey_to_mutate = bit_to_grey(bit_to_mutate) grey_to_mutate[bit_index] ^= 1 population[individual_index]["population"] = grey_to_bit(grey_to_mutate) return population def gen_to_params(): pass