MLE-Pacman/ReinforcmentLearning/util.py

from enum import Enum
import random
import pygame

import numpy as np

import data.classes_consts as consts

class Direction(Enum):
    UP = 0
    RIGHT = 1
    DOWN = 2
    LEFT = 3


def initial_q_fill():
    q_values = {}

    for x in range(-7, 8):
        for y in range(-2, 3):
            for ghost_direction in Direction:
                for cookie_direction in Direction:
                    state = (x, y, ghost_direction, cookie_direction)
                    q_values[state] = np.zeros(4)

                    for action_idx in range(len(Direction)):
                        q_values[state][action_idx] = random.random() * 0.2 - 0.1

    return q_values


def initial_q_fill_only_surroundings():
    q_values = {}

    for ghost_distance in [1, 2]:
        for ghost_direction in Direction:
            for cookie_direction in Direction:
                state = (ghost_distance, ghost_direction, cookie_direction)
                q_values[state] = np.zeros(4)

                for action_idx in range(len(Direction)):
                    q_values[state][action_idx] = random.random() * 0.2 - 0.1

    return q_values


def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
    x_ghost_dist = pac_x - ghost_x
    y_ghost_dist = pac_y - ghost_y

    cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)

    ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist)

    return x_ghost_dist, y_ghost_dist, ghost_direction, cookie_direction


def calc_current_state_surroundings(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
    x_ghost_dist = abs(pac_x - ghost_x)
    y_ghost_dist = abs(pac_y - ghost_y)

    ghost_distance_sum = x_ghost_dist + y_ghost_dist
    ghost_distance = 1 if ghost_distance_sum == 1 else 2

    ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist)

    cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)

    return ghost_distance, ghost_direction, cookie_direction


def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
    cookie_distances = []

    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == ".":
                x_dist = abs(pac_x - x)
                y_dist = abs(pac_y - y)
                dist = x_dist + y_dist
                cookie_distances.append((dist, (x, y)))

    closest_cookie = min(cookie_distances, key=lambda item: item[0])
    closest_cookie_cords = closest_cookie[1]
    cookie_x = closest_cookie_cords[0]
    cookie_y = closest_cookie_cords[1]


    dx = cookie_x - pac_x
    dy = cookie_y - pac_y
    return cords_to_direction(dx, dy)


def cords_to_direction(dx, dy):
    if abs(dx) >= abs(dy):
        #? X distance bigger

        if dy > 0:
            return Direction.DOWN
        elif dy < 0:
            return Direction.UP
        else:
            #? Cookie on the same Y level
            if dx > 0:
                return Direction.RIGHT
            else:
                return Direction.LEFT
    else:
        #? Y distance bigger

        if dx > 0:
            return Direction.RIGHT
        elif dx < 0:
            return Direction.LEFT
        else:
            #? Cookie on the same X level
            if dy > 0:
                return Direction.DOWN
            else:
                return Direction.UP


def epsilon_greedy(q_values, state, epsilon):
    if random.random() < epsilon:
        random_action = get_random_direction()
        return random_action

    best_action = get_best_q_action(q_values, state)
    return best_action


def get_best_q_action(q_values, state):
    state_q_values = q_values[state]
    best_action_index = np.argmax(state_q_values)

    return Direction(best_action_index)


def get_random_direction():
    return random.choice(list(Direction))


def calc_time_reward(amount_iterations):
    if amount_iterations < 1000:
        return 10

    if amount_iterations > 10000:
        return 1

    return - (1 / 1000) * amount_iterations + 11


def draw_labyrinth(screen, labyrinth):
    CELL_SIZE = consts.CELL_SIZE
    BLUE = consts.BLUE
    WHITE = consts.WHITE

    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == "#":
                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
            elif cell == ".":
                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)