MLE-Pacman/ReinforcmentLearning/game.py

180 lines
5.1 KiB
Python

import pygame
import math
import os
from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill
import data.classes_consts as consts
import data.conf as conf
from data.classes import Pacman, Ghost
def start_try(EPSILON, ALPHA, GAMMA):
#? Learning initial
q_values = initial_q_fill()
#? Game initial
pygame.init()
screen = None
if conf.show_game:
screen = consts.screen
pygame.display.set_caption("Micro-Pacman")
#? Start try
cookies_per_run = []
iterations = []
for x in range(conf.AMOUNT_RUNS):
if conf.show_game:
if x == conf.AMOUNT_RUNS / 4:
print("1 / 4 done")
if x == conf.AMOUNT_RUNS / 2:
print("2 / 4 done")
if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4):
print("3 / 4 done")
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
cookies_per_run.append(amount_cookies_ate)
iterations.append(iterations_per_run)
# print(f"Run {x+1}: {iterations_per_run} iterations")
if conf.show_trained:
screen = consts.screen
pygame.display.set_caption("Micro-Pacman")
while True:
print("After game")
run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
pygame.quit()
return cookies_per_run, iterations
def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
clock = pygame.time.Clock()
labyrinth = consts.LABYRINTH_INIT.copy()
# Initialize Pacman and Ghost positions
pacman = Pacman(screen, 1, 1)
ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2)
state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
#? GAME LOOP
running = True
iter = 0
while running:
reward = 0
iter = iter + 1
# Handle events
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
if conf.show_game:
screen.fill(consts.BLACK)
#? Arrow key movements
keys = pygame.key.get_pressed()
if keys[pygame.K_LEFT]:
pacman.move(-1, 0)
if keys[pygame.K_RIGHT]:
pacman.move(1, 0)
if keys[pygame.K_UP]:
pacman.move(0, -1)
if keys[pygame.K_DOWN]:
pacman.move(0, 1)
#? Agent movements
action = epsilon_greedy(q_values, state, EPSILON)
if action == Direction.LEFT:
pacman.move(labyrinth, -1, 0)
if action == Direction.RIGHT:
pacman.move(labyrinth, 1, 0)
if action == Direction.UP:
pacman.move(labyrinth, 0, -1)
if action == Direction.DOWN:
pacman.move(labyrinth, 0, 1)
if iter%3==0:
ghost.move_towards_pacman(labyrinth, pacman)
if pacman.x == ghost.x and pacman.y == ghost.y:
if conf.show_game:
print("Game Over! The ghost caught Pacman.")
running = False
reward = conf.REWARD_ON_LOSE
# Eat cookies
if labyrinth[pacman.y][pacman.x] == ".":
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
#? half reward
# cookie_counter = 0
# for y, row in enumerate(labyrinth):
# for x, cell in enumerate(row):
# if cell == ".":
# cookie_counter += 1
# if cookie_counter == 10:
# # reward = REWARD_ON_HALF
# if show_game:
# print("Got half reward")
if all("." not in row for row in labyrinth):
# time_reward = calc_time_reward(iter)
# reward = REWARD_ON_WIN * time_reward
reward = conf.REWARD_ON_WIN
running = False
if conf.show_game:
print(f"You Win! Took {iter} iterations")
if not running:
new_state = state
else:
new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
best_action_new_state = get_best_q_action(q_values, new_state)
best_value_new_state = q_values[new_state][best_action_new_state.value]
current_value = q_values[state][action.value]
adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
q_values[state][action.value] = current_value + adjusted_value
state = new_state
if not running:
counter = 0
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == ".":
counter += 1
return 20-counter, iter
if conf.show_game:
draw_labyrinth(screen, labyrinth)
pacman.draw()
ghost.draw()
# Update display
pygame.display.flip()
# Cap the frame rate
clock.tick(40)
if __name__ == "__main__":
run_game()