MLE-Pacman/ReinforcmentLearning/util.py

168 lines
4.4 KiB
Python

from enum import Enum
import random
import pygame
import numpy as np
import data.classes_consts as consts
class Direction(Enum):
UP = 0
RIGHT = 1
DOWN = 2
LEFT = 3
def initial_q_fill():
q_values = {}
for x in range(-7, 8):
for y in range(-2, 3):
for ghost_direction in Direction:
for cookie_direction in Direction:
state = (x, y, ghost_direction, cookie_direction)
q_values[state] = np.zeros(4)
for action_idx in range(len(Direction)):
q_values[state][action_idx] = random.random() * 0.2 - 0.1
return q_values
def initial_q_fill_only_surroundings():
q_values = {}
for ghost_distance in [1, 2]:
for ghost_direction in Direction:
for cookie_direction in Direction:
state = (ghost_distance, ghost_direction, cookie_direction)
q_values[state] = np.zeros(4)
for action_idx in range(len(Direction)):
q_values[state][action_idx] = random.random() * 0.2 - 0.1
return q_values
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
x_ghost_dist = pac_x - ghost_x
y_ghost_dist = pac_y - ghost_y
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist)
return x_ghost_dist, y_ghost_dist, ghost_direction, cookie_direction
def calc_current_state_surroundings(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
x_ghost_dist = abs(pac_x - ghost_x)
y_ghost_dist = abs(pac_y - ghost_y)
ghost_distance_sum = x_ghost_dist + y_ghost_dist
ghost_distance = 1 if ghost_distance_sum == 1 else 2
ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist)
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
return ghost_distance, ghost_direction, cookie_direction
def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
cookie_distances = []
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == ".":
x_dist = abs(pac_x - x)
y_dist = abs(pac_y - y)
dist = x_dist + y_dist
cookie_distances.append((dist, (x, y)))
closest_cookie = min(cookie_distances, key=lambda item: item[0])
closest_cookie_cords = closest_cookie[1]
cookie_x = closest_cookie_cords[0]
cookie_y = closest_cookie_cords[1]
dx = cookie_x - pac_x
dy = cookie_y - pac_y
return cords_to_direction(dx, dy)
def cords_to_direction(dx, dy):
if abs(dx) >= abs(dy):
#? X distance bigger
if dy > 0:
return Direction.DOWN
elif dy < 0:
return Direction.UP
else:
#? Cookie on the same Y level
if dx > 0:
return Direction.RIGHT
else:
return Direction.LEFT
else:
#? Y distance bigger
if dx > 0:
return Direction.RIGHT
elif dx < 0:
return Direction.LEFT
else:
#? Cookie on the same X level
if dy > 0:
return Direction.DOWN
else:
return Direction.UP
def epsilon_greedy(q_values, state, epsilon):
if random.random() < epsilon:
random_action = get_random_direction()
return random_action
best_action = get_best_q_action(q_values, state)
return best_action
def get_best_q_action(q_values, state):
state_q_values = q_values[state]
best_action_index = np.argmax(state_q_values)
return Direction(best_action_index)
def get_random_direction():
return random.choice(list(Direction))
def calc_time_reward(amount_iterations):
if amount_iterations < 1000:
return 10
if amount_iterations > 10000:
return 1
return - (1 / 1000) * amount_iterations + 11
def draw_labyrinth(screen, labyrinth):
CELL_SIZE = consts.CELL_SIZE
BLUE = consts.BLUE
WHITE = consts.WHITE
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == "#":
pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
elif cell == ".":
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)