MLE-Pacman/ReinforcmentLearning/util.py

130 lines
3.2 KiB
Python

from enum import Enum
import random
class Direction(Enum):
UP = 0
RIGHT = 1
DOWN = 2
LEFT = 3
def initial_q_fill(q_values):
for x in range(-7, 8):
for y in range(-2, 3):
for cookie_direction in Direction:
for action in Direction:
state = (x, y, cookie_direction)
q_values[(state, action)] = random.random() * 0.2 - 0.1
def get_start_state():
first_direction_cookie = random.choice([True, False])
if first_direction_cookie:
return (7, 2, Direction.DOWN)
return(7, 2, Direction.RIGHT)
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
x_ghost_dist = pac_x - ghost_x
y_ghost_dist = pac_y - ghost_y
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
return x_ghost_dist, y_ghost_dist, cookie_direction
def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
cookie_distances = []
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == ".":
x_dist = abs(pac_x - x)
y_dist = abs(pac_y - y)
dist = x_dist + y_dist
cookie_distances.append((dist, (x, y)))
closest_cookie = min(cookie_distances, key=lambda item: item[0])
closest_cookie_cords = closest_cookie[1]
cookie_x = closest_cookie_cords[0]
cookie_y = closest_cookie_cords[1]
dx = cookie_x - pac_x
dy = cookie_y - pac_y
if abs(dx) >= abs(dy):
#? X distance bigger
if dy > 0:
return Direction.DOWN
elif dy < 0:
return Direction.UP
else:
#? Cookie on the same Y level
if dx > 0:
return Direction.RIGHT
else:
return Direction.LEFT
else:
#? Y distance bigger
if dx > 0:
return Direction.RIGHT
elif dx < 0:
return Direction.LEFT
else:
#? Cookie on the same X level
if dy > 0:
return Direction.DOWN
else:
return Direction.UP
def epsilon_greedy(q_values, state, epsilon):
best_action, actions_for_epsilon = get_best_q_action(q_values, state)
if random.random() < epsilon:
if not actions_for_epsilon:
best_action = get_random_direction()
return best_action
random_action = random.choice(actions_for_epsilon)
return random_action
return best_action
def get_best_q_action(q_values, state):
best_action = None
best_value = None
actions_for_epsilon = []
for (q_state, q_action), value in q_values.items():
if q_state == state:
actions_for_epsilon.append(q_action)
if best_value is None:
best_value = value
best_action = q_action
continue
if value > best_value:
best_value = value
best_action = q_action
if not best_action:
best_action = get_random_direction()
return best_action, actions_for_epsilon
def get_random_direction():
return random.choice(list(Direction))