MLE-Pacman/ReinforcmentLearning/game.py

import pygame
import math
import os

from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
import conf

# Initialize pygame

# Define constants
SCREEN_WIDTH = 400
SCREEN_HEIGHT = 400
CELL_SIZE = 40

# Define colors
YELLOW = (255, 255, 0)
RED = (255, 0, 0)
WHITE = (255, 255, 255)
BLUE = (0, 0, 255)
BLACK = (0, 0, 0)

REWARD_ON_HALF = 50

# Labyrinth as a string
LABYRINTH_INIT = [
    "##########",
    "#........#",
    "#.##..##.#",
    "#........#",
    "##########"
]

# Get labyrinth dimensions
ROWS = len(LABYRINTH_INIT)
COLS = len(LABYRINTH_INIT[0])


class Pacman:
    def __init__(self, screen, x, y):
        self.screen = screen
        self.x = x
        self.y = y
        self.count = 0

    def move(self, labyrinth, dx, dy):
        new_x, new_y = self.x + dx, self.y + dy
        if labyrinth[new_y][new_x] != "#":
            self.x = new_x
            self.y = new_y

    def draw(self):
        radius = CELL_SIZE // 2 - 4
        start_angle = math.pi / 6
        end_angle = -math.pi / 6
        pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
            # Calculate the points for the mouth
        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
        self.count += 1
        if self.count%2==0:
            # Draw the mouth by filling a polygon
            pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])


class Ghost:
    # Define the pixel art for the ghost using strings
    ghost_pixels = [
        " #### ",
        "######",
        "## # #",
        "######",
        "######",
        "# # # "
    ]

    def __init__(self, screen, x, y):
        self.screen = screen
        self.x = x
        self.y = y

    def move_towards_pacman(self, labyrinth, pacman):
        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
            self.x += 1
        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
            self.x -= 1
        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
            self.y += 1
        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
            self.y -= 1

    def draw(self):
        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
        for row_idx, row in enumerate(self.ghost_pixels):
            for col_idx, pixel in enumerate(row):
                if pixel == "#":
                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
                    pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))


def start_try(EPSILON, ALPHA, GAMMA):
    #? Learning initial
    q_values = initial_q_fill()

    #? Game initial
    pygame.init()
    screen = None

    if conf.show_game:
        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
        pygame.display.set_caption("Micro-Pacman")

    #? Start try
    cookies_per_run = []
    iterations = []
    for x in range(conf.AMOUNT_RUNS):
        if conf.show_game:
            if x == conf.AMOUNT_RUNS / 4:
                print("1 / 4 done")

            if x == conf.AMOUNT_RUNS / 2:
                print("2 / 4 done")

            if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4):
                print("3 / 4 done")


        amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
        cookies_per_run.append(amount_cookies_ate)
        iterations.append(iterations_per_run)

        print(f"Run {x+1}: {iterations_per_run} iterations")

    if conf.show_trained:
        screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
        pygame.display.set_caption("Micro-Pacman")

        while True:
            print("After game")
            run_game(q_values, EPSILON, ALPHA, GAMMA, screen)


    pygame.quit()

    return cookies_per_run, iterations


def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
    clock = pygame.time.Clock()
    labyrinth = LABYRINTH_INIT.copy()


    # Initialize Pacman and Ghost positions
    pacman = Pacman(screen, 1, 1)
    ghost = Ghost(screen, COLS - 2, ROWS - 2)

    #? -------------------------MY CODE-----------------------------------
    state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
    #? -------------------------MY CODE-----------------------------------

    #? GAME LOOP
    running = True
    iter = 0
    while running:
        #? -------------------------MY CODE-----------------------------------
        reward = 0
        #? -------------------------MY CODE-----------------------------------

        # Handle events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

        if conf.show_game:
            screen.fill(BLACK)


        iter = iter + 1

        # Handle Pacman movement
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
            pacman.move(-1, 0)
        if keys[pygame.K_RIGHT]:
            pacman.move(1, 0)
        if keys[pygame.K_UP]:
            pacman.move(0, -1)
        if keys[pygame.K_DOWN]:
            pacman.move(0, 1)


        #? -------------------------MY CODE-----------------------------------
        action = epsilon_greedy(q_values, state, EPSILON)
        if action == Direction.LEFT:
            pacman.move(labyrinth, -1, 0)
        if action == Direction.RIGHT:
            pacman.move(labyrinth, 1, 0)
        if action == Direction.UP:
            pacman.move(labyrinth, 0, -1)
        if action == Direction.DOWN:
            pacman.move(labyrinth, 0, 1)
        #? -------------------------MY CODE-----------------------------------


        if iter%3==0:
            ghost.move_towards_pacman(labyrinth, pacman)

        if pacman.x == ghost.x and pacman.y == ghost.y:
            if conf.show_game:
                print("Game Over! The ghost caught Pacman.")
            running = False
            reward = conf.REWARD_ON_LOSE

        # Eat cookies
        if labyrinth[pacman.y][pacman.x] == ".":
            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]

        #? -------------------------MY CODE-----------------------------------
            #? half reward

            # cookie_counter = 0

            # for y, row in enumerate(labyrinth):
            #     for x, cell in enumerate(row):
            #         if cell == ".":
            #             cookie_counter += 1

            # if cookie_counter == 10:
            #     # reward = REWARD_ON_HALF
            #     if show_game:
            #         print("Got half reward")
        #? -------------------------MY CODE-----------------------------------


        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
            # time_reward = calc_time_reward(iter)
            # reward = REWARD_ON_WIN * time_reward
            reward = conf.REWARD_ON_WIN
            running = False

            if conf.show_game:
                # print(f"You Win! Took {iter} iterations, reward: {time_reward}")
                print(f"You Win! Took {iter} iterations")


        #? -------------------------MY CODE-----------------------------------
        if not running:
            new_state = state
        else:
            new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)

        best_action_new_state, _ = get_best_q_action(q_values, new_state)
        best_value_new_state = q_values[(new_state, best_action_new_state)]

        current_value = q_values.get((state, action))
        adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
        q_values[(state, action)] = current_value + adjusted_value

        state = new_state

        if not running:
            counter = 0
            for y, row in enumerate(labyrinth):
                for x, cell in enumerate(row):
                    if cell == ".":
                        counter += 1
            return 20-counter, iter
        #? -------------------------MY CODE-----------------------------------


        # Draw the labyrinth, pacman, and ghost
        if conf.show_game:
            draw_labyrinth(screen, labyrinth)
            pacman.draw()
            ghost.draw()

            # Update display
            pygame.display.flip()

            # Cap the frame rate
            clock.tick(40)


def draw_labyrinth(screen, labyrinth):
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == "#":
                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
            elif cell == ".":
                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)


if __name__ == "__main__":
    run_game()