from enum import Enum import random class Direction(Enum): UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 def initial_q_fill(): q_values = {} for x in range(-7, 8): for y in range(-2, 3): for cookie_direction in Direction: for action in Direction: state = (x, y, cookie_direction) q_values[(state, action)] = random.random() * 0.2 - 0.1 return q_values def get_start_state(): first_direction_cookie = random.choice([True, False]) if first_direction_cookie: return (7, 2, Direction.DOWN) return(7, 2, Direction.RIGHT) def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): x_ghost_dist = pac_x - ghost_x y_ghost_dist = pac_y - ghost_y cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) return x_ghost_dist, y_ghost_dist, cookie_direction def get_closest_cookie_direction(labyrinth, pac_x, pac_y): cookie_distances = [] for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == ".": x_dist = abs(pac_x - x) y_dist = abs(pac_y - y) dist = x_dist + y_dist cookie_distances.append((dist, (x, y))) closest_cookie = min(cookie_distances, key=lambda item: item[0]) closest_cookie_cords = closest_cookie[1] cookie_x = closest_cookie_cords[0] cookie_y = closest_cookie_cords[1] dx = cookie_x - pac_x dy = cookie_y - pac_y if abs(dx) >= abs(dy): #? X distance bigger if dy > 0: return Direction.DOWN elif dy < 0: return Direction.UP else: #? Cookie on the same Y level if dx > 0: return Direction.RIGHT else: return Direction.LEFT else: #? Y distance bigger if dx > 0: return Direction.RIGHT elif dx < 0: return Direction.LEFT else: #? Cookie on the same X level if dy > 0: return Direction.DOWN else: return Direction.UP def epsilon_greedy(q_values, state, epsilon): best_action, actions_for_epsilon = get_best_q_action(q_values, state) if random.random() < epsilon: if not actions_for_epsilon: best_action = get_random_direction() return best_action random_action = random.choice(actions_for_epsilon) return random_action return best_action def get_best_q_action(q_values, state): best_action = None best_value = None actions_for_epsilon = [] for (q_state, q_action), value in q_values.items(): if q_state == state: actions_for_epsilon.append(q_action) if best_value is None: best_value = value best_action = q_action continue if value > best_value: best_value = value best_action = q_action if not best_action: best_action = get_random_direction() return best_action, actions_for_epsilon def get_random_direction(): return random.choice(list(Direction)) def calc_time_reward(amount_iterations): if amount_iterations < 1000: return 10 if amount_iterations > 10000: return 1 return - (1 / 1000) * amount_iterations + 11