from enum import Enum import random class Direction(Enum): UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 def initial_q_fill(q_values): for x in range(8): for y in range(3): for cookie_direction in Direction: for action in Direction: state = (x, y, cookie_direction) q_values[(state, action)] = random.random() * 0.2 - 0.1 def get_start_state(): first_direction_cookie = random.choice([True, False]) if first_direction_cookie: return (7, 2, Direction.DOWN) return(7, 2, Direction.RIGHT) def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): x_ghost_dist = abs(pac_x - ghost_x) y_ghost_dist = abs(pac_y - ghost_y) cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) return x_ghost_dist, y_ghost_dist, cookie_direction def get_closest_cookie_direction(labyrinth, pac_x, pac_y): cookie_distances = [] for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == ".": x_dist = abs(pac_x - x) y_dist = abs(pac_y - y) dist = x_dist + y_dist cookie_distances.append((dist, (x, y))) closest_cookie = min(cookie_distances, key=lambda item: item[0]) closest_cookie_cords = closest_cookie[1] cookie_x = closest_cookie_cords[0] cookie_y = closest_cookie_cords[1] dx = cookie_x - pac_x dy = cookie_y - pac_y if abs(dx) >= abs(dy): #? X distance bigger if dy > 0: return Direction.DOWN elif dy < 0: return Direction.UP else: #? Cookie on the same Y level if dx > 0: return Direction.RIGHT else: return Direction.LEFT else: #? Y distance bigger if dx > 0: return Direction.RIGHT elif dx < 0: return Direction.LEFT else: #? Cookie on the same X level if dy > 0: return Direction.DOWN else: return Direction.UP def epsilon_greedy(q_values, state, epsilon): best_action, actions_for_epsilon = get_best_q_action(q_values, state) if random.random() < epsilon: if not actions_for_epsilon: best_action = get_random_direction() return best_action random_action = random.choice(actions_for_epsilon) return random_action return best_action def get_best_q_action(q_values, state): best_action = None best_value = None actions_for_epsilon = [] for (q_state, q_action), value in q_values.items(): if q_state == state: actions_for_epsilon.append(q_action) if best_value is None: best_value = value best_action = q_action continue if value > best_value: best_value = value best_action = q_action if not best_action: best_action = get_random_direction() return best_action, actions_for_epsilon def get_random_direction(): return random.choice(list(Direction))