import pygame import math import os from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill import data.classes_consts as consts import data.conf as conf # import data.classes as classes from data.classes import Pacman, Ghost def start_try(EPSILON, ALPHA, GAMMA): #? Learning initial q_values = initial_q_fill() print(len(q_values)) #? Game initial pygame.init() screen = None if conf.show_game: screen = consts.screen pygame.display.set_caption("Micro-Pacman") #? Start try cookies_per_run = [] iterations = [] for x in range(conf.AMOUNT_RUNS): if conf.show_game: if x == conf.AMOUNT_RUNS / 4: print("1 / 4 done") if x == conf.AMOUNT_RUNS / 2: print("2 / 4 done") if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4): print("3 / 4 done") amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen) cookies_per_run.append(amount_cookies_ate) iterations.append(iterations_per_run) print(f"Run {x+1}: {iterations_per_run} iterations") if conf.show_trained: screen = consts.screen pygame.display.set_caption("Micro-Pacman") while True: print("After game") run_game(q_values, EPSILON, ALPHA, GAMMA, screen) pygame.quit() return cookies_per_run, iterations def run_game(q_values, EPSILON, ALPHA, GAMMA, screen): clock = pygame.time.Clock() labyrinth = consts.LABYRINTH_INIT.copy() # Initialize Pacman and Ghost positions pacman = Pacman(screen, 1, 1) ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2) state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) #? GAME LOOP running = True iter = 0 while running: reward = 0 iter = iter + 1 # Handle events for event in pygame.event.get(): if event.type == pygame.QUIT: running = False if conf.show_game: screen.fill(consts.BLACK) #? Arrow key movements keys = pygame.key.get_pressed() if keys[pygame.K_LEFT]: pacman.move(-1, 0) if keys[pygame.K_RIGHT]: pacman.move(1, 0) if keys[pygame.K_UP]: pacman.move(0, -1) if keys[pygame.K_DOWN]: pacman.move(0, 1) #? Agent movements action = epsilon_greedy(q_values, state, EPSILON) if action == Direction.LEFT: pacman.move(labyrinth, -1, 0) if action == Direction.RIGHT: pacman.move(labyrinth, 1, 0) if action == Direction.UP: pacman.move(labyrinth, 0, -1) if action == Direction.DOWN: pacman.move(labyrinth, 0, 1) if iter%3==0: ghost.move_towards_pacman(labyrinth, pacman) if pacman.x == ghost.x and pacman.y == ghost.y: if conf.show_game: print("Game Over! The ghost caught Pacman.") running = False reward = conf.REWARD_ON_LOSE # Eat cookies if labyrinth[pacman.y][pacman.x] == ".": labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] #? half reward # cookie_counter = 0 # for y, row in enumerate(labyrinth): # for x, cell in enumerate(row): # if cell == ".": # cookie_counter += 1 # if cookie_counter == 10: # # reward = REWARD_ON_HALF # if show_game: # print("Got half reward") if all("." not in row for row in labyrinth): # time_reward = calc_time_reward(iter) # reward = REWARD_ON_WIN * time_reward reward = conf.REWARD_ON_WIN running = False if conf.show_game: print(f"You Win! Took {iter} iterations") if not running: new_state = state else: new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) best_action_new_state, _ = get_best_q_action(q_values, new_state) best_value_new_state = q_values[(new_state, best_action_new_state)] current_value = q_values.get((state, action)) adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value) q_values[(state, action)] = current_value + adjusted_value state = new_state if not running: counter = 0 for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == ".": counter += 1 return 20-counter, iter if conf.show_game: draw_labyrinth(screen, labyrinth) pacman.draw() ghost.draw() # Update display pygame.display.flip() # Cap the frame rate clock.tick(40) if __name__ == "__main__": run_game()