import pygame import math import os from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill import conf # Initialize pygame # Define constants SCREEN_WIDTH = 400 SCREEN_HEIGHT = 400 CELL_SIZE = 40 # Define colors YELLOW = (255, 255, 0) RED = (255, 0, 0) WHITE = (255, 255, 255) BLUE = (0, 0, 255) BLACK = (0, 0, 0) REWARD_ON_HALF = 50 # Labyrinth as a string LABYRINTH_INIT = [ "##########", "#........#", "#.##..##.#", "#........#", "##########" ] # Get labyrinth dimensions ROWS = len(LABYRINTH_INIT) COLS = len(LABYRINTH_INIT[0]) class Pacman: def __init__(self, screen, x, y): self.screen = screen self.x = x self.y = y self.count = 0 def move(self, labyrinth, dx, dy): new_x, new_y = self.x + dx, self.y + dy if labyrinth[new_y][new_x] != "#": self.x = new_x self.y = new_y def draw(self): radius = CELL_SIZE // 2 - 4 start_angle = math.pi / 6 end_angle = -math.pi / 6 pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4) # Calculate the points for the mouth start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)), self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle))) end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)), self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle))) self.count += 1 if self.count%2==0: # Draw the mouth by filling a polygon pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos]) class Ghost: # Define the pixel art for the ghost using strings ghost_pixels = [ " #### ", "######", "## # #", "######", "######", "# # # " ] def __init__(self, screen, x, y): self.screen = screen self.x = x self.y = y def move_towards_pacman(self, labyrinth, pacman): if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#": self.x += 1 elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#": self.x -= 1 elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#": self.y += 1 elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#": self.y -= 1 def draw(self): pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art for row_idx, row in enumerate(self.ghost_pixels): for col_idx, pixel in enumerate(row): if pixel == "#": pixel_x = self.x * CELL_SIZE + col_idx * pixel_size pixel_y = self.y * CELL_SIZE + row_idx * pixel_size pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) def start_try(EPSILON, ALPHA, GAMMA): #? Learning initial q_values = initial_q_fill() #? Game initial pygame.init() screen = None if conf.show_game: screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) pygame.display.set_caption("Micro-Pacman") #? Start try cookies_per_run = [] iterations = [] for x in range(conf.AMOUNT_RUNS): if conf.show_game: if x == conf.AMOUNT_RUNS / 4: print("1 / 4 done") if x == conf.AMOUNT_RUNS / 2: print("2 / 4 done") if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4): print("3 / 4 done") amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen) cookies_per_run.append(amount_cookies_ate) iterations.append(iterations_per_run) print(f"Run {x+1}: {iterations_per_run} iterations") if conf.show_trained: screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) pygame.display.set_caption("Micro-Pacman") while True: print("After game") run_game(q_values, EPSILON, ALPHA, GAMMA, screen) pygame.quit() return cookies_per_run, iterations def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): clock = pygame.time.Clock() labyrinth = LABYRINTH_INIT.copy() # Initialize Pacman and Ghost positions pacman = Pacman(screen, 1, 1) ghost = Ghost(screen, COLS - 2, ROWS - 2) #? -------------------------MY CODE----------------------------------- state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) #? -------------------------MY CODE----------------------------------- #? GAME LOOP running = True iter = 0 while running: #? -------------------------MY CODE----------------------------------- reward = 0 #? -------------------------MY CODE----------------------------------- # Handle events for event in pygame.event.get(): if event.type == pygame.QUIT: running = False if conf.show_game: screen.fill(BLACK) iter = iter + 1 # Handle Pacman movement keys = pygame.key.get_pressed() if keys[pygame.K_LEFT]: pacman.move(-1, 0) if keys[pygame.K_RIGHT]: pacman.move(1, 0) if keys[pygame.K_UP]: pacman.move(0, -1) if keys[pygame.K_DOWN]: pacman.move(0, 1) #? -------------------------MY CODE----------------------------------- action = epsilon_greedy(q_values, state, EPSILON) if action == Direction.LEFT: pacman.move(labyrinth, -1, 0) if action == Direction.RIGHT: pacman.move(labyrinth, 1, 0) if action == Direction.UP: pacman.move(labyrinth, 0, -1) if action == Direction.DOWN: pacman.move(labyrinth, 0, 1) #? -------------------------MY CODE----------------------------------- if iter%3==0: ghost.move_towards_pacman(labyrinth, pacman) if pacman.x == ghost.x and pacman.y == ghost.y: if conf.show_game: print("Game Over! The ghost caught Pacman.") running = False reward = conf.REWARD_ON_LOSE # Eat cookies if labyrinth[pacman.y][pacman.x] == ".": labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] #? -------------------------MY CODE----------------------------------- #? half reward # cookie_counter = 0 # for y, row in enumerate(labyrinth): # for x, cell in enumerate(row): # if cell == ".": # cookie_counter += 1 # if cookie_counter == 10: # # reward = REWARD_ON_HALF # if show_game: # print("Got half reward") #? -------------------------MY CODE----------------------------------- # Check if all cookies are eaten (game over) if all("." not in row for row in labyrinth): # time_reward = calc_time_reward(iter) # reward = REWARD_ON_WIN * time_reward reward = conf.REWARD_ON_WIN running = False if conf.show_game: # print(f"You Win! Took {iter} iterations, reward: {time_reward}") print(f"You Win! Took {iter} iterations") #? -------------------------MY CODE----------------------------------- if not running: new_state = state else: new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) best_action_new_state, _ = get_best_q_action(q_values, new_state) best_value_new_state = q_values[(new_state, best_action_new_state)] current_value = q_values.get((state, action)) adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value) q_values[(state, action)] = current_value + adjusted_value state = new_state if not running: counter = 0 for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == ".": counter += 1 return 20-counter, iter #? -------------------------MY CODE----------------------------------- # Draw the labyrinth, pacman, and ghost if conf.show_game: draw_labyrinth(screen, labyrinth) pacman.draw() ghost.draw() # Update display pygame.display.flip() # Cap the frame rate clock.tick(40) def draw_labyrinth(screen, labyrinth): for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == "#": pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)) elif cell == ".": pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5) if __name__ == "__main__": run_game()