MLE-Pacman/util.py

101 lines
2.4 KiB
Python

from enum import Enum
import random
class Direction(Enum):
UP = 0
RIGHT = 1
DOWN = 2
LEFT = 3
def get_start_state():
first_direction_cookie = random.choice([True, False])
if first_direction_cookie:
return (7, 2, Direction.DOWN)
return(7, 2, Direction.RIGHT)
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
# distance pacman - ghost
x_dist = abs(pac_x - ghost_x)
y_dist = abs(pac_y - ghost_y)
# closest cookie
best_distance = 12
best_cords = None
all_cookie_locations = get_all_cookies_locations(labyrinth)
for (cookie_x, cookie_y) in all_cookie_locations:
dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
if dist < best_distance:
best_distance = dist
best_cords = (cookie_x, cookie_y)
# closest cookie direction
cookie_direction = None
real_dist_x = pac_x - cookie_x
real_dist_y = pac_y - cookie_y
#TODO
if real_dist_x >= 0 & real_dist_y > 0:
cookie_direction = Direction
return x_dist, y_dist, cookie_direction
def get_all_cookies_locations(labyrinth):
cookie_locations = []
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == ".":
cookie_locations.append((x, y))
return cookie_locations
def epsilon_greedy(q_values, state, epsilon):
best_action, states_for_epsilon = get_best_q_value(q_values, state)
if random.random() < epsilon:
if not states_for_epsilon:
best_action = get_random_direction()
return best_action
random_action = random.choice(states_for_epsilon)
return random_action
return best_action
def get_best_q_value(q_values, state):
best_action = None
best_value = None
states_for_epsilon = []
for (q_state, q_action), value in q_values.items():
if q_state == state:
states_for_epsilon.append(q_action)
if best_value is None:
best_value = value
best_action = q_action
continue
if value > best_value:
best_value = value
best_action = q_action
if not best_action:
best_action = get_random_direction()
return best_action, states_for_epsilon
def get_random_direction():
return random.choice(list(Direction))