diff --git a/game.py b/game.py index cdea1a7..1417865 100644 --- a/game.py +++ b/game.py @@ -2,6 +2,8 @@ import pygame import random import math +from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value + # Initialize pygame pygame.init() @@ -106,8 +108,9 @@ def draw_labyrinth(): elif cell == ".": pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5) + # Main game function -def main(): +def main(q_values, EPSILON, ALPHA, GAMMA): clock = pygame.time.Clock() # Initialize Pacman and Ghost positions @@ -115,6 +118,11 @@ def main(): ghost = Ghost(COLS - 2, ROWS - 2) # Game loop + #? -------------------------MY CODE----------------------------------- + state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) + reward = 0 + #? -------------------------MY CODE----------------------------------- + running = True iter = 0 while running: @@ -136,6 +144,19 @@ def main(): if keys[pygame.K_DOWN]: pacman.move(0, 1) + #? -------------------------MY CODE----------------------------------- + action = epsilon_greedy(q_values, state, EPSILON) + if action == Direction.LEFT: + pacman.move(-1, 0) + if action == Direction.RIGHT: + pacman.move(1, 0) + if action == Direction.UP: + pacman.move(0, -1) + if action == Direction.DOWN: + pacman.move(0, 1) + #? -------------------------MY CODE----------------------------------- + + if iter%3==0: # Ghost moves towards Pacman ghost.move_towards_pacman(pacman) @@ -155,6 +176,15 @@ def main(): running = False # Draw the labyrinth, pacman, and ghost + #? -------------------------MY CODE----------------------------------- + new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) + + best_value_new_state, _ = get_best_q_value(q_values, new_state) + + current_value = q_values.get((state, action), 0) + adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value) + q_values[(state, action)] = current_value + adjusted_value + #? -------------------------MY CODE----------------------------------- draw_labyrinth() pacman.draw() ghost.draw() @@ -163,7 +193,7 @@ def main(): pygame.display.flip() # Cap the frame rate - clock.tick(5) + clock.tick(1) pygame.quit() diff --git a/main.py b/main.py index 2b6cf4f..3faf4ff 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,10 @@ -from util import epsilon_greedy, get_start_state +from util import epsilon_greedy, get_start_state, test AMOUNT_RUNS = 10 EPSILON = 0.1 +ALPHA = 0.1 +GAMMA = 0.1 """ @@ -12,7 +14,6 @@ q_value: (state, action) """ q_values = {} - # Amount of single runs for x in range(AMOUNT_RUNS): state = get_start_state() diff --git a/util.py b/util.py index 891f740..ce3373b 100644 --- a/util.py +++ b/util.py @@ -16,7 +16,62 @@ def get_start_state(): return(7, 2, Direction.RIGHT) + +def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): + # distance pacman - ghost + x_dist = abs(pac_x - ghost_x) + y_dist = abs(pac_y - ghost_y) + + # closest cookie + best_distance = 12 + best_cords = None + + all_cookie_locations = get_all_cookies_locations(labyrinth) + for (cookie_x, cookie_y) in all_cookie_locations: + dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y) + + if dist < best_distance: + best_distance = dist + best_cords = (cookie_x, cookie_y) + + # closest cookie direction + cookie_direction = None + real_dist_x = pac_x - cookie_x + real_dist_y = pac_y - cookie_y + + #TODO + if real_dist_x >= 0 & real_dist_y > 0: + cookie_direction = Direction + + return x_dist, y_dist, cookie_direction + + +def get_all_cookies_locations(labyrinth): + cookie_locations = [] + for y, row in enumerate(labyrinth): + for x, cell in enumerate(row): + if cell == ".": + cookie_locations.append((x, y)) + + return cookie_locations + + + def epsilon_greedy(q_values, state, epsilon): + best_action, states_for_epsilon = get_best_q_value(q_values, state) + + if random.random() < epsilon: + if not states_for_epsilon: + best_action = get_random_direction() + return best_action + + random_action = random.choice(states_for_epsilon) + return random_action + + return best_action + + +def get_best_q_value(q_values, state): best_action = None best_value = None @@ -35,12 +90,11 @@ def epsilon_greedy(q_values, state, epsilon): best_value = value best_action = q_action - if random.random() < epsilon: - random_action = random.choice(states_for_epsilon) - return random_action - - return best_action + if not best_action: + best_action = get_random_direction() + + return best_action, states_for_epsilon -def take_action(state, action): - pass \ No newline at end of file +def get_random_direction(): + return random.choice(list(Direction))