46 lines
995 B
Python
46 lines
995 B
Python
from enum import Enum
|
|
import random
|
|
|
|
class Direction(Enum):
|
|
UP = 0
|
|
RIGHT = 1
|
|
DOWN = 2
|
|
LEFT = 3
|
|
|
|
|
|
def get_start_state():
|
|
first_direction_cookie = random.choice([True, False])
|
|
if first_direction_cookie:
|
|
return (7, 2, Direction.DOWN)
|
|
|
|
return(7, 2, Direction.RIGHT)
|
|
|
|
|
|
def epsilon_greedy(q_values, state, epsilon):
|
|
best_action = None
|
|
best_value = None
|
|
|
|
states_for_epsilon = []
|
|
|
|
for (q_state, q_action), value in q_values.items():
|
|
if q_state == state:
|
|
states_for_epsilon.append(q_action)
|
|
|
|
if best_value is None:
|
|
best_value = value
|
|
best_action = q_action
|
|
continue
|
|
|
|
if value > best_value:
|
|
best_value = value
|
|
best_action = q_action
|
|
|
|
if random.random() < epsilon:
|
|
random_action = random.choice(states_for_epsilon)
|
|
return random_action
|
|
|
|
return best_action
|
|
|
|
|
|
def take_action(state, action):
|
|
pass |