diff --git a/04_pacman_rl/pacman.py b/04_pacman_rl/pacman.py index 673ca34..42b98b0 100644 --- a/04_pacman_rl/pacman.py +++ b/04_pacman_rl/pacman.py @@ -149,7 +149,6 @@ def train(q, num_iterations=10000): """Train the agent for num_iterations without pygame visualization.""" global labyrinth - outer_iter = 0 total_iterations = 0 while total_iterations < num_iterations: @@ -168,12 +167,25 @@ def train(q, num_iterations=10000): ghost_x, ghost_y = COLS - 2, ROWS - 2 s = (pacman_x, pacman_y, ghost_x, ghost_y) - while running and total_iterations < num_iterations: + while running: iter = iter + 1 - total_iterations += 1 + + # Check for collisions + if pacman_x == ghost_x and pacman_y == ghost_y: + running = False + # total_iterations += 1 + + # Eat cookies + if labyrinth[pacman_y][pacman_x] == ".": + labyrinth[pacman_y] = labyrinth[pacman_y][:pacman_x] + " " + labyrinth[pacman_y][pacman_x+1:] + + # Check if all cookies are eaten + if all("." not in row for row in labyrinth): + running = False + total_iterations += 1 # Q-Learning - a = rl.epsilon_greedy(q, s) + a = rl.epsilon_greedy(q, s, 0.025) s_new, r, labyrinth = rl.take_action(s, a, labyrinth) q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a]) s = s_new @@ -200,24 +212,9 @@ def train(q, num_iterations=10000): ghost_y -= 1 s = (pacman_x, pacman_y, ghost_x, ghost_y) - - # Check for collisions - if pacman_x == ghost_x and pacman_y == ghost_y: - running = False - break - - # Eat cookies - if labyrinth[pacman_y][pacman_x] == ".": - labyrinth[pacman_y] = labyrinth[pacman_y][:pacman_x] + " " + labyrinth[pacman_y][pacman_x+1:] - - # Check if all cookies are eaten - if all("." not in row for row in labyrinth): - running = False - break - outer_iter += 1 - if outer_iter % 100 == 0: - print(f"Training iteration {outer_iter}, Total steps: {total_iterations}") + if total_iterations % 500 == 0: + print(f"Training iteration {total_iterations}") return q @@ -226,6 +223,9 @@ def visualize(q, num_games=10): """Visualize the trained agent playing the game.""" global labyrinth + games_won = 0 + games_lost = 0 + clock = pygame.time.Clock() for game_num in range(num_games): @@ -246,12 +246,30 @@ def visualize(q, num_games=10): print(f"Game {game_num + 1}/{num_games}") - while running or iter < 100: + while running or iter < 300: screen.fill(BLACK) iter = iter + 1 + # Check for collisions + if pacman.x == ghost.x and pacman.y == ghost.y: + print("Game Over! The ghost caught Pacman.") + running = False + games_lost += 1 + break + + # Eat cookies + if labyrinth[pacman.y][pacman.x] == ".": + labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] + + # Check if all cookies are eaten + if all("." not in row for row in labyrinth): + print("You Win! Pacman ate all the cookies.") + running = False + games_won += 1 + break + # Q-Learning - a = rl.epsilon_greedy(q, s, epsilon=0.025) + a = rl.epsilon_greedy(q, s, 0.025) s_new, r, labyrinth = rl.take_action(s, a, labyrinth) q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a]) s = s_new @@ -263,30 +281,15 @@ def visualize(q, num_games=10): s = (pacman.x, pacman.y, ghost.x, ghost.y) - # Check for collisions - if pacman.x == ghost.x and pacman.y == ghost.y: - print("Game Over! The ghost caught Pacman.") - running = False - break - - # Eat cookies - if labyrinth[pacman.y][pacman.x] == ".": - labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] - - # Check if all cookies are eaten - if all("." not in row for row in labyrinth): - print("You Win! Pacman ate all the cookies.") - running = False - break - # Draw draw_labyrinth() pacman.draw() ghost.draw() pygame.display.flip() - tick_speed = 10 # if game_num % 20 == 0 else 100 + tick_speed = 200 # if game_num % 20 == 0 else 100 clock.tick(tick_speed) + print("winrate: " + str(games_won / num_games)) # Main function def main(): @@ -298,10 +301,10 @@ def main(): q = rl.q_init() print("Training for 10000 iterations...") - q = train(q, num_iterations=5000) + q = train(q, num_iterations=10000) print("\nTraining complete! Starting visualization...") - visualize(q, num_games=10) + visualize(q, num_games=100) pygame.quit() diff --git a/04_pacman_rl/reinforcement_learning.py b/04_pacman_rl/reinforcement_learning.py index 6993440..7999b9d 100644 --- a/04_pacman_rl/reinforcement_learning.py +++ b/04_pacman_rl/reinforcement_learning.py @@ -127,6 +127,17 @@ def take_action(s, a, labyrinth): # print("Invalid action") else: r = calc_reward(tuple(s_new), labyrinth) + + # Boost reward if moving closer to nearest cookie + cookie_dx, cookie_dy = get_nearest_cookie(s[0], s[1], labyrinth) + old_distance = abs(cookie_dx) + abs(cookie_dy) + + new_cookie_dx = int(np.sign(get_nearest_cookie(s_new[0], s_new[1], labyrinth)[0] - s_new[0])) + new_cookie_dy = int(np.sign(get_nearest_cookie(s_new[0], s_new[1], labyrinth)[1] - s_new[1])) + new_distance = abs(new_cookie_dx) + abs(new_cookie_dy) + + if new_distance < old_distance: + r += 2 # Bonus for moving closer to cookie # Mark new Pacman position as eaten (if it's a cookie) if labyrinth[s_new[1]][s_new[0]] == ".": @@ -135,11 +146,6 @@ def take_action(s, a, labyrinth): row_list[s_new[0]] = " " labyrinth[s_new[1]] = "".join(row_list) - # Check if all cookies are eaten - if all("." not in row for row in labyrinth): - r = 100.0 - #print("All cookies eaten") - return tuple(s_new), r, labyrinth def max_q(q, s_new, labyrinth):