diff --git a/ReinforcmentLearning/util.py b/ReinforcmentLearning/util.py index 6236a76..ab53a52 100644 --- a/ReinforcmentLearning/util.py +++ b/ReinforcmentLearning/util.py @@ -18,8 +18,24 @@ def initial_q_fill(): for x in range(-7, 8): for y in range(-2, 3): + for ghost_direction in Direction: + for cookie_direction in Direction: + state = (x, y, ghost_direction, cookie_direction) + q_values[state] = np.zeros(4) + + for action_idx in range(len(Direction)): + q_values[state][action_idx] = random.random() * 0.2 - 0.1 + + return q_values + + +def initial_q_fill_only_surroundings(): + q_values = {} + + for ghost_distance in [1, 2]: + for ghost_direction in Direction: for cookie_direction in Direction: - state = (x, y, cookie_direction) + state = (ghost_distance, ghost_direction, cookie_direction) q_values[state] = np.zeros(4) for action_idx in range(len(Direction)): @@ -28,14 +44,30 @@ def initial_q_fill(): return q_values - def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): x_ghost_dist = pac_x - ghost_x y_ghost_dist = pac_y - ghost_y cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) - return x_ghost_dist, y_ghost_dist, cookie_direction + ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist) + + return x_ghost_dist, y_ghost_dist, ghost_direction, cookie_direction + + +def calc_current_state_surroundings(labyrinth, pac_x, pac_y, ghost_x, ghost_y): + x_ghost_dist = abs(pac_x - ghost_x) + y_ghost_dist = abs(pac_y - ghost_y) + + ghost_distance_sum = x_ghost_dist + y_ghost_dist + ghost_distance = 1 if ghost_distance_sum == 1 else 2 + + ghost_direction = cords_to_direction(x_ghost_dist, y_ghost_dist) + + cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) + + return ghost_distance, ghost_direction, cookie_direction + def get_closest_cookie_direction(labyrinth, pac_x, pac_y):