from enum import Enum import random class Direction(Enum): UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 def get_start_state(): first_direction_cookie = random.choice([True, False]) if first_direction_cookie: return (7, 2, Direction.DOWN) return(7, 2, Direction.RIGHT) def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): # distance pacman - ghost x_dist = abs(pac_x - ghost_x) y_dist = abs(pac_y - ghost_y) # closest cookie best_distance = 12 best_cords = None all_cookie_locations = get_all_cookies_locations(labyrinth) for (cookie_x, cookie_y) in all_cookie_locations: dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y) if dist < best_distance: best_distance = dist best_cords = (cookie_x, cookie_y) # closest cookie direction cookie_direction = None real_dist_x = pac_x - cookie_x real_dist_y = pac_y - cookie_y #TODO if real_dist_x >= 0 & real_dist_y > 0: cookie_direction = Direction return x_dist, y_dist, cookie_direction def get_all_cookies_locations(labyrinth): cookie_locations = [] for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == ".": cookie_locations.append((x, y)) return cookie_locations def epsilon_greedy(q_values, state, epsilon): best_action, states_for_epsilon = get_best_q_value(q_values, state) if random.random() < epsilon: if not states_for_epsilon: best_action = get_random_direction() return best_action random_action = random.choice(states_for_epsilon) return random_action return best_action def get_best_q_value(q_values, state): best_action = None best_value = None states_for_epsilon = [] for (q_state, q_action), value in q_values.items(): if q_state == state: states_for_epsilon.append(q_action) if best_value is None: best_value = value best_action = q_action continue if value > best_value: best_value = value best_action = q_action if not best_action: best_action = get_random_direction() return best_action, states_for_epsilon def get_random_direction(): return random.choice(list(Direction))