168 lines
4.4 KiB
Python
168 lines
4.4 KiB
Python
from enum import Enum
|
|
import random
|
|
import pygame
|
|
|
|
import numpy as np
|
|
|
|
import data.classes_consts as consts
|
|
|
|
class Direction(Enum):
|
|
UP = 0
|
|
RIGHT = 1
|
|
DOWN = 2
|
|
LEFT = 3
|
|
|
|
|
|
def initial_q_fill():
|
|
q_values = {}
|
|
|
|
for x in range(-7, 8):
|
|
for y in range(-2, 3):
|
|
for ghost_direction in Direction:
|
|
for cookie_direction in Direction:
|
|
state = (x, y, ghost_direction, cookie_direction)
|
|
q_values[state] = np.zeros(4)
|
|
|
|
for action_idx in range(len(Direction)):
|
|
q_values[state][action_idx] = random.random() * 0.2 - 0.1
|
|
|
|
return q_values
|
|
|
|
|
|
def initial_q_fill_only_surroundings():
|
|
q_values = {}
|
|
|
|
for ghost_distance in [1, 2]:
|
|
for ghost_direction in Direction:
|
|
for cookie_direction in Direction:
|
|
state = (ghost_distance, ghost_direction, cookie_direction)
|
|
q_values[state] = np.zeros(4)
|
|
|
|
for action_idx in range(len(Direction)):
|
|
q_values[state][action_idx] = random.random() * 0.2 - 0.1
|
|
|
|
return q_values
|
|
|
|
|
|
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
|
x_ghost_dist = pac_x - ghost_x
|
|
y_ghost_dist = pac_y - ghost_y
|
|
|
|
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
|
|
|
ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist)
|
|
|
|
return x_ghost_dist, y_ghost_dist, ghost_direction, cookie_direction
|
|
|
|
|
|
def calc_current_state_surroundings(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
|
x_ghost_dist = abs(pac_x - ghost_x)
|
|
y_ghost_dist = abs(pac_y - ghost_y)
|
|
|
|
ghost_distance_sum = x_ghost_dist + y_ghost_dist
|
|
ghost_distance = 1 if ghost_distance_sum == 1 else 2
|
|
|
|
ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist)
|
|
|
|
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
|
|
|
return ghost_distance, ghost_direction, cookie_direction
|
|
|
|
|
|
|
|
def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
|
|
cookie_distances = []
|
|
|
|
for y, row in enumerate(labyrinth):
|
|
for x, cell in enumerate(row):
|
|
if cell == ".":
|
|
x_dist = abs(pac_x - x)
|
|
y_dist = abs(pac_y - y)
|
|
dist = x_dist + y_dist
|
|
cookie_distances.append((dist, (x, y)))
|
|
|
|
closest_cookie = min(cookie_distances, key=lambda item: item[0])
|
|
closest_cookie_cords = closest_cookie[1]
|
|
cookie_x = closest_cookie_cords[0]
|
|
cookie_y = closest_cookie_cords[1]
|
|
|
|
|
|
dx = cookie_x - pac_x
|
|
dy = cookie_y - pac_y
|
|
return cords_to_direction(dx, dy)
|
|
|
|
|
|
def cords_to_direction(dx, dy):
|
|
if abs(dx) >= abs(dy):
|
|
#? X distance bigger
|
|
|
|
if dy > 0:
|
|
return Direction.DOWN
|
|
elif dy < 0:
|
|
return Direction.UP
|
|
else:
|
|
#? Cookie on the same Y level
|
|
if dx > 0:
|
|
return Direction.RIGHT
|
|
else:
|
|
return Direction.LEFT
|
|
else:
|
|
#? Y distance bigger
|
|
|
|
if dx > 0:
|
|
return Direction.RIGHT
|
|
elif dx < 0:
|
|
return Direction.LEFT
|
|
else:
|
|
#? Cookie on the same X level
|
|
if dy > 0:
|
|
return Direction.DOWN
|
|
else:
|
|
return Direction.UP
|
|
|
|
|
|
|
|
def epsilon_greedy(q_values, state, epsilon):
|
|
if random.random() < epsilon:
|
|
random_action = get_random_direction()
|
|
return random_action
|
|
|
|
best_action = get_best_q_action(q_values, state)
|
|
return best_action
|
|
|
|
|
|
def get_best_q_action(q_values, state):
|
|
state_q_values = q_values[state]
|
|
best_action_index = np.argmax(state_q_values)
|
|
|
|
return Direction(best_action_index)
|
|
|
|
|
|
def get_random_direction():
|
|
return random.choice(list(Direction))
|
|
|
|
|
|
|
|
def calc_time_reward(amount_iterations):
|
|
if amount_iterations < 1000:
|
|
return 10
|
|
|
|
if amount_iterations > 10000:
|
|
return 1
|
|
|
|
return - (1 / 1000) * amount_iterations + 11
|
|
|
|
|
|
|
|
def draw_labyrinth(screen, labyrinth):
|
|
CELL_SIZE = consts.CELL_SIZE
|
|
BLUE = consts.BLUE
|
|
WHITE = consts.WHITE
|
|
|
|
for y, row in enumerate(labyrinth):
|
|
for x, cell in enumerate(row):
|
|
if cell == "#":
|
|
pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
|
|
elif cell == ".":
|
|
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
|