MLE-Pacman/util.py

from enum import Enum
import random

class Direction(Enum):
    UP = 0
    RIGHT = 1
    DOWN = 2
    LEFT = 3


def get_start_state():
    first_direction_cookie = random.choice([True, False])
    if first_direction_cookie:
        return (7, 2, Direction.DOWN)

    return(7, 2, Direction.RIGHT)


def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
    # distance pacman - ghost
    x_dist = abs(pac_x - ghost_x)
    y_dist = abs(pac_y - ghost_y)

    # closest cookie
    best_distance = 12
    best_cords = None

    all_cookie_locations = get_all_cookies_locations(labyrinth)
    for (cookie_x, cookie_y) in all_cookie_locations:
        dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)

        if dist < best_distance:
            best_distance = dist
            best_cords = (cookie_x, cookie_y)

    # closest cookie direction
    cookie_direction = None
    real_dist_x = pac_x - cookie_x
    real_dist_y = pac_y - cookie_y

    #TODO
    if real_dist_x >= 0 & real_dist_y > 0:
        cookie_direction = Direction

    return x_dist, y_dist, cookie_direction


def get_all_cookies_locations(labyrinth):
    cookie_locations = []
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == ".":
                cookie_locations.append((x, y))

    return cookie_locations


def epsilon_greedy(q_values, state, epsilon):
    best_action, states_for_epsilon = get_best_q_value(q_values, state)

    if random.random() < epsilon:
        if not states_for_epsilon:
            best_action = get_random_direction()
            return best_action

        random_action = random.choice(states_for_epsilon)
        return random_action

    return best_action


def get_best_q_value(q_values, state):
    best_action = None
    best_value = None

    states_for_epsilon = []

    for (q_state, q_action), value in q_values.items():
        if q_state == state:
            states_for_epsilon.append(q_action)

            if best_value is None:
                best_value = value
                best_action = q_action
                continue

            if value > best_value:
                best_value = value
                best_action = q_action

    if not best_action:
        best_action = get_random_direction()

    return best_action, states_for_epsilon


def get_random_direction():
    return random.choice(list(Direction))