101 lines
2.4 KiB
Python
101 lines
2.4 KiB
Python
from enum import Enum
|
|
import random
|
|
|
|
class Direction(Enum):
|
|
UP = 0
|
|
RIGHT = 1
|
|
DOWN = 2
|
|
LEFT = 3
|
|
|
|
|
|
def get_start_state():
|
|
first_direction_cookie = random.choice([True, False])
|
|
if first_direction_cookie:
|
|
return (7, 2, Direction.DOWN)
|
|
|
|
return(7, 2, Direction.RIGHT)
|
|
|
|
|
|
|
|
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
|
# distance pacman - ghost
|
|
x_dist = abs(pac_x - ghost_x)
|
|
y_dist = abs(pac_y - ghost_y)
|
|
|
|
# closest cookie
|
|
best_distance = 12
|
|
best_cords = None
|
|
|
|
all_cookie_locations = get_all_cookies_locations(labyrinth)
|
|
for (cookie_x, cookie_y) in all_cookie_locations:
|
|
dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
|
|
|
|
if dist < best_distance:
|
|
best_distance = dist
|
|
best_cords = (cookie_x, cookie_y)
|
|
|
|
# closest cookie direction
|
|
cookie_direction = None
|
|
real_dist_x = pac_x - cookie_x
|
|
real_dist_y = pac_y - cookie_y
|
|
|
|
#TODO
|
|
if real_dist_x >= 0 & real_dist_y > 0:
|
|
cookie_direction = Direction
|
|
|
|
return x_dist, y_dist, cookie_direction
|
|
|
|
|
|
def get_all_cookies_locations(labyrinth):
|
|
cookie_locations = []
|
|
for y, row in enumerate(labyrinth):
|
|
for x, cell in enumerate(row):
|
|
if cell == ".":
|
|
cookie_locations.append((x, y))
|
|
|
|
return cookie_locations
|
|
|
|
|
|
|
|
def epsilon_greedy(q_values, state, epsilon):
|
|
best_action, states_for_epsilon = get_best_q_value(q_values, state)
|
|
|
|
if random.random() < epsilon:
|
|
if not states_for_epsilon:
|
|
best_action = get_random_direction()
|
|
return best_action
|
|
|
|
random_action = random.choice(states_for_epsilon)
|
|
return random_action
|
|
|
|
return best_action
|
|
|
|
|
|
def get_best_q_value(q_values, state):
|
|
best_action = None
|
|
best_value = None
|
|
|
|
states_for_epsilon = []
|
|
|
|
for (q_state, q_action), value in q_values.items():
|
|
if q_state == state:
|
|
states_for_epsilon.append(q_action)
|
|
|
|
if best_value is None:
|
|
best_value = value
|
|
best_action = q_action
|
|
continue
|
|
|
|
if value > best_value:
|
|
best_value = value
|
|
best_action = q_action
|
|
|
|
if not best_action:
|
|
best_action = get_random_direction()
|
|
|
|
return best_action, states_for_epsilon
|
|
|
|
|
|
def get_random_direction():
|
|
return random.choice(list(Direction))
|