130 lines
3.2 KiB
Python
130 lines
3.2 KiB
Python
from enum import Enum
|
|
import random
|
|
|
|
class Direction(Enum):
|
|
UP = 0
|
|
RIGHT = 1
|
|
DOWN = 2
|
|
LEFT = 3
|
|
|
|
|
|
def initial_q_fill(q_values):
|
|
for x in range(-7, 8):
|
|
for y in range(-2, 3):
|
|
for cookie_direction in Direction:
|
|
for action in Direction:
|
|
state = (x, y, cookie_direction)
|
|
q_values[(state, action)] = random.random() * 0.2 - 0.1
|
|
|
|
|
|
|
|
|
|
def get_start_state():
|
|
first_direction_cookie = random.choice([True, False])
|
|
if first_direction_cookie:
|
|
return (7, 2, Direction.DOWN)
|
|
|
|
return(7, 2, Direction.RIGHT)
|
|
|
|
|
|
|
|
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
|
x_ghost_dist = pac_x - ghost_x
|
|
y_ghost_dist = pac_y - ghost_y
|
|
|
|
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
|
|
|
return x_ghost_dist, y_ghost_dist, cookie_direction
|
|
|
|
|
|
def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
|
|
cookie_distances = []
|
|
|
|
for y, row in enumerate(labyrinth):
|
|
for x, cell in enumerate(row):
|
|
if cell == ".":
|
|
x_dist = abs(pac_x - x)
|
|
y_dist = abs(pac_y - y)
|
|
dist = x_dist + y_dist
|
|
cookie_distances.append((dist, (x, y)))
|
|
|
|
closest_cookie = min(cookie_distances, key=lambda item: item[0])
|
|
closest_cookie_cords = closest_cookie[1]
|
|
cookie_x = closest_cookie_cords[0]
|
|
cookie_y = closest_cookie_cords[1]
|
|
|
|
|
|
dx = cookie_x - pac_x
|
|
dy = cookie_y - pac_y
|
|
|
|
if abs(dx) >= abs(dy):
|
|
#? X distance bigger
|
|
|
|
if dy > 0:
|
|
return Direction.DOWN
|
|
elif dy < 0:
|
|
return Direction.UP
|
|
else:
|
|
#? Cookie on the same Y level
|
|
if dx > 0:
|
|
return Direction.RIGHT
|
|
else:
|
|
return Direction.LEFT
|
|
else:
|
|
#? Y distance bigger
|
|
|
|
if dx > 0:
|
|
return Direction.RIGHT
|
|
elif dx < 0:
|
|
return Direction.LEFT
|
|
else:
|
|
#? Cookie on the same X level
|
|
if dy > 0:
|
|
return Direction.DOWN
|
|
else:
|
|
return Direction.UP
|
|
|
|
|
|
|
|
def epsilon_greedy(q_values, state, epsilon):
|
|
best_action, actions_for_epsilon = get_best_q_action(q_values, state)
|
|
|
|
if random.random() < epsilon:
|
|
if not actions_for_epsilon:
|
|
best_action = get_random_direction()
|
|
return best_action
|
|
|
|
random_action = random.choice(actions_for_epsilon)
|
|
return random_action
|
|
|
|
return best_action
|
|
|
|
|
|
def get_best_q_action(q_values, state):
|
|
best_action = None
|
|
best_value = None
|
|
|
|
actions_for_epsilon = []
|
|
|
|
for (q_state, q_action), value in q_values.items():
|
|
if q_state == state:
|
|
actions_for_epsilon.append(q_action)
|
|
|
|
if best_value is None:
|
|
best_value = value
|
|
best_action = q_action
|
|
continue
|
|
|
|
if value > best_value:
|
|
best_value = value
|
|
best_action = q_action
|
|
|
|
if not best_action:
|
|
best_action = get_random_direction()
|
|
|
|
return best_action, actions_for_epsilon
|
|
|
|
|
|
def get_random_direction():
|
|
return random.choice(list(Direction))
|