from enum import Enum import random import pygame import numpy as np import data.classes_consts as consts class Direction(Enum): UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 def initial_q_fill(): q_values = {} for x in range(-7, 8): for y in range(-2, 3): for ghost_direction in Direction: for cookie_direction in Direction: state = (x, y, ghost_direction, cookie_direction) q_values[state] = np.zeros(4) for action_idx in range(len(Direction)): q_values[state][action_idx] = random.random() * 0.2 - 0.1 return q_values def initial_q_fill_only_surroundings(): q_values = {} for ghost_distance in [1, 2]: for ghost_direction in Direction: for cookie_direction in Direction: state = (ghost_distance, ghost_direction, cookie_direction) q_values[state] = np.zeros(4) for action_idx in range(len(Direction)): q_values[state][action_idx] = random.random() * 0.2 - 0.1 return q_values def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): x_ghost_dist = pac_x - ghost_x y_ghost_dist = pac_y - ghost_y cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist) return x_ghost_dist, y_ghost_dist, ghost_direction, cookie_direction def calc_current_state_surroundings(labyrinth, pac_x, pac_y, ghost_x, ghost_y): x_ghost_dist = abs(pac_x - ghost_x) y_ghost_dist = abs(pac_y - ghost_y) ghost_distance_sum = x_ghost_dist + y_ghost_dist ghost_distance = 1 if ghost_distance_sum == 1 else 2 ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist) cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) return ghost_distance, ghost_direction, cookie_direction def get_closest_cookie_direction(labyrinth, pac_x, pac_y): cookie_distances = [] for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == ".": x_dist = abs(pac_x - x) y_dist = abs(pac_y - y) dist = x_dist + y_dist cookie_distances.append((dist, (x, y))) closest_cookie = min(cookie_distances, key=lambda item: item[0]) closest_cookie_cords = closest_cookie[1] cookie_x = closest_cookie_cords[0] cookie_y = closest_cookie_cords[1] dx = cookie_x - pac_x dy = cookie_y - pac_y return cords_to_direction(dx, dy) def cords_to_direction(dx, dy): if abs(dx) >= abs(dy): #? X distance bigger if dy > 0: return Direction.DOWN elif dy < 0: return Direction.UP else: #? Cookie on the same Y level if dx > 0: return Direction.RIGHT else: return Direction.LEFT else: #? Y distance bigger if dx > 0: return Direction.RIGHT elif dx < 0: return Direction.LEFT else: #? Cookie on the same X level if dy > 0: return Direction.DOWN else: return Direction.UP def epsilon_greedy(q_values, state, epsilon): if random.random() < epsilon: random_action = get_random_direction() return random_action best_action = get_best_q_action(q_values, state) return best_action def get_best_q_action(q_values, state): state_q_values = q_values[state] best_action_index = np.argmax(state_q_values) return Direction(best_action_index) def get_random_direction(): return random.choice(list(Direction)) def calc_time_reward(amount_iterations): if amount_iterations < 1000: return 10 if amount_iterations > 10000: return 1 return - (1 / 1000) * amount_iterations + 11 def draw_labyrinth(screen, labyrinth): CELL_SIZE = consts.CELL_SIZE BLUE = consts.BLUE WHITE = consts.WHITE for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == "#": pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)) elif cell == ".": pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)