import pygame import math import reinforcement_learning as rl import json import os # Initialize pygame pygame.init() # Define constants SCREEN_WIDTH = 400 SCREEN_HEIGHT = 400 CELL_SIZE = 40 # Define colors YELLOW = (255, 255, 0) RED = (255, 0, 0) WHITE = (255, 255, 255) BLUE = (0, 0, 255) BLACK = (0, 0, 0) # Labyrinth as a string labyrinth = [ "##########", "#........#", "#.##..##.#", "#........#", "##########" ] # Get labyrinth dimensions ROWS = len(labyrinth) COLS = len(labyrinth[0]) # Q-Learning Constants GAMMA = 0.90 ALPHA = 0.2 # Initialize game screen screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) pygame.display.set_caption("Micro-Pacman") # Pacman class class Pacman: def __init__(self, x, y): self.x = x self.y = y self.count = 0 def move(self, dx, dy): new_x, new_y = self.x + dx, self.y + dy if labyrinth[new_y][new_x] != "#": self.x = new_x self.y = new_y def draw(self): radius = CELL_SIZE // 2 - 4 start_angle = math.pi / 6 end_angle = -math.pi / 6 pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4) # Calculate the points for the mouth start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)), self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle))) end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)), self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle))) self.count += 1 if self.count%2==0: # Draw the mouth by filling a polygon pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos]) # Ghost class with pixel art class Ghost: # Define the pixel art for the ghost using strings ghost_pixels = [ " #### ", "######", "## # #", "######", "######", "# # # " ] def __init__(self, x, y): self.x = x self.y = y def move_towards_pacman(self, pacman): if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#": self.x += 1 elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#": self.x -= 1 elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#": self.y += 1 elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#": self.y -= 1 def draw(self): pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art for row_idx, row in enumerate(self.ghost_pixels): for col_idx, pixel in enumerate(row): if pixel == "#": pixel_x = self.x * CELL_SIZE + col_idx * pixel_size pixel_y = self.y * CELL_SIZE + row_idx * pixel_size pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) # Draw walls and cookies def draw_labyrinth(): for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == "#": pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)) elif cell == ".": pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5) def move_pacman(pacman, a): if a == 0: # left pacman.move(-1, 0) if a == 1: # right pacman.move(1, 0) if a == 2: # up pacman.move(0, -1) if a == 3: # down pacman.move(0, 1) def save_q_table(q, filename="q_table.json"): """Save Q-table to JSON file.""" # Convert tuple keys to strings for JSON serialization q_json = {str(k): v for k, v in q.items()} with open(filename, 'w') as f: json.dump(q_json, f) print(f"Q-table saved to {filename}") def load_q_table(filename="q_table.json"): """Load Q-table from JSON file, or return None if file doesn't exist.""" if not os.path.exists(filename): print(f"No saved Q-table found at {filename}. Starting fresh.") return None with open(filename, 'r') as f: q_json = json.load(f) # Convert string keys back to tuples q = {eval(k): v for k, v in q_json.items()} print(f"Q-table loaded from {filename}") return q # Training function (without visualization) def train(q, num_iterations=10000): """Train the agent for num_iterations without pygame visualization.""" global labyrinth total_iterations = 0 while total_iterations < num_iterations: labyrinth = [ "##########", "#........#", "#.##..##.#", "#........#", "##########" ] running = True iter = 0 # Initialize Pacman and Ghost positions (no visual objects needed) pacman_x, pacman_y = 1, 1 ghost_x, ghost_y = COLS - 2, ROWS - 2 s = (pacman_x, pacman_y, ghost_x, ghost_y) while running: iter = iter + 1 # Check for collisions if pacman_x == ghost_x and pacman_y == ghost_y: running = False # total_iterations += 1 # Eat cookies if labyrinth[pacman_y][pacman_x] == ".": labyrinth[pacman_y] = labyrinth[pacman_y][:pacman_x] + " " + labyrinth[pacman_y][pacman_x+1:] # Check if all cookies are eaten if all("." not in row for row in labyrinth): running = False total_iterations += 1 # Q-Learning a = rl.epsilon_greedy(q, s, 0.025) s_new, r, labyrinth = rl.take_action(s, a, labyrinth) q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a]) s = s_new # Update Pacman position if a == 0: # left pacman_x = max(1, pacman_x - 1) if labyrinth[pacman_y][pacman_x - 1] != "#" else pacman_x elif a == 1: # right pacman_x = min(COLS - 2, pacman_x + 1) if labyrinth[pacman_y][pacman_x + 1] != "#" else pacman_x elif a == 2: # up pacman_y = max(1, pacman_y - 1) if labyrinth[pacman_y - 1][pacman_x] != "#" else pacman_y elif a == 3: # down pacman_y = min(ROWS - 2, pacman_y + 1) if labyrinth[pacman_y + 1][pacman_x] != "#" else pacman_y # Ghost movement if iter % 3 == 0: if ghost_x < pacman_x and labyrinth[ghost_y][ghost_x + 1] != "#": ghost_x += 1 elif ghost_x > pacman_x and labyrinth[ghost_y][ghost_x - 1] != "#": ghost_x -= 1 elif ghost_y < pacman_y and labyrinth[ghost_y + 1][ghost_x] != "#": ghost_y += 1 elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#": ghost_y -= 1 s = (pacman_x, pacman_y, ghost_x, ghost_y) if total_iterations % 500 == 0: print(f"Training iteration {total_iterations}") return q # Visualization function (with pygame) def visualize(q, num_games=10): """Visualize the trained agent playing the game.""" global labyrinth games_won = 0 games_lost = 0 clock = pygame.time.Clock() for game_num in range(num_games): labyrinth = [ "##########", "#........#", "#.##..##.#", "#........#", "##########" ] running = True iter = 0 # Initialize Pacman and Ghost positions pacman = Pacman(1, 1) ghost = Ghost(COLS - 2, ROWS - 2) s = (pacman.x, pacman.y, ghost.x, ghost.y) print(f"Game {game_num + 1}/{num_games}") while running or iter < 300: screen.fill(BLACK) iter = iter + 1 # Check for collisions if pacman.x == ghost.x and pacman.y == ghost.y: print("Game Over! The ghost caught Pacman.") running = False games_lost += 1 break # Eat cookies if labyrinth[pacman.y][pacman.x] == ".": labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] # Check if all cookies are eaten if all("." not in row for row in labyrinth): print("You Win! Pacman ate all the cookies.") running = False games_won += 1 break # Q-Learning a = rl.epsilon_greedy(q, s, 0.025) s_new, r, labyrinth = rl.take_action(s, a, labyrinth) q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a]) s = s_new move_pacman(pacman, a) if iter % 3 == 0: ghost.move_towards_pacman(pacman) s = (pacman.x, pacman.y, ghost.x, ghost.y) # Draw draw_labyrinth() pacman.draw() ghost.draw() pygame.display.flip() tick_speed = 200 # if game_num % 20 == 0 else 100 clock.tick(tick_speed) print("winrate: " + str(games_won / num_games)) # Main function def main(): global labyrinth # Load existing Q-table or create new one q = load_q_table("q_table.json") if q is None: q = rl.q_init() print("Training for 10000 iterations...") q = train(q, num_iterations=10000) print("\nTraining complete! Starting visualization...") visualize(q, num_games=100) pygame.quit() # Save Q-table when exiting save_q_table(q, "q_table.json") if __name__ == "__main__": main()