from enum import Enum import random class Direction(Enum): UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 def get_start_state(): first_direction_cookie = random.choice([True, False]) if first_direction_cookie: return (7, 2, Direction.DOWN) return(7, 2, Direction.RIGHT) def epsilon_greedy(q_values, state, epsilon): best_action = None best_value = None states_for_epsilon = [] for (q_state, q_action), value in q_values.items(): if q_state == state: states_for_epsilon.append(q_action) if best_value is None: best_value = value best_action = q_action continue if value > best_value: best_value = value best_action = q_action if random.random() < epsilon: random_action = random.choice(states_for_epsilon) return random_action return best_action def take_action(state, action): pass