MLE/04_pacman_rl/pacman.py

import pygame
import math
import reinforcement_learning as rl
import json
import os

# Initialize pygame
pygame.init()

# Define constants
SCREEN_WIDTH = 400
SCREEN_HEIGHT = 400
CELL_SIZE = 40

# Define colors
YELLOW = (255, 255, 0)
RED = (255, 0, 0)
WHITE = (255, 255, 255)
BLUE = (0, 0, 255)
BLACK = (0, 0, 0)

# Labyrinth as a string
labyrinth = [
    "##########",
    "#........#",
    "#.##..##.#",
    "#........#",
    "##########"
]

# Get labyrinth dimensions
ROWS = len(labyrinth)
COLS = len(labyrinth[0])

# Q-Learning Constants
GAMMA = 0.90
ALPHA = 0.2

# Initialize game screen
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman")

# Pacman class
class Pacman:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.count = 0

    def move(self, dx, dy):
        new_x, new_y = self.x + dx, self.y + dy
        if labyrinth[new_y][new_x] != "#":
            self.x = new_x
            self.y = new_y

    def draw(self):
        radius = CELL_SIZE // 2 - 4
        start_angle = math.pi / 6
        end_angle = -math.pi / 6
        pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
            # Calculate the points for the mouth
        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
        self.count += 1
        if self.count%2==0:
            # Draw the mouth by filling a polygon
            pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])

# Ghost class with pixel art
class Ghost:
    # Define the pixel art for the ghost using strings
    ghost_pixels = [
        " #### ",
        "######",
        "## # #",
        "######",
        "######",
        "# # # "
    ]

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def move_towards_pacman(self, pacman):
        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
            self.x += 1
        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
            self.x -= 1
        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
            self.y += 1
        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
            self.y -= 1

    def draw(self):
        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
        for row_idx, row in enumerate(self.ghost_pixels):
            for col_idx, pixel in enumerate(row):
                if pixel == "#":
                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
                    pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))

# Draw walls and cookies
def draw_labyrinth():
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == "#":
                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
            elif cell == ".":
                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)

def move_pacman(pacman, a):
    if a == 0: # left
        pacman.move(-1, 0)
    if a == 1: # right
        pacman.move(1, 0)
    if a == 2: # up
        pacman.move(0, -1)
    if a == 3: # down
        pacman.move(0, 1)

def save_q_table(q, filename="q_table.json"):
    """Save Q-table to JSON file."""
    # Convert tuple keys to strings for JSON serialization
    q_json = {str(k): v for k, v in q.items()}
    with open(filename, 'w') as f:
        json.dump(q_json, f)
    print(f"Q-table saved to {filename}")

def load_q_table(filename="q_table.json"):
    """Load Q-table from JSON file, or return None if file doesn't exist."""
    if not os.path.exists(filename):
        print(f"No saved Q-table found at {filename}. Starting fresh.")
        return None

    with open(filename, 'r') as f:
        q_json = json.load(f)

    # Convert string keys back to tuples
    q = {eval(k): v for k, v in q_json.items()}
    print(f"Q-table loaded from {filename}")
    return q

# Training function (without visualization)
def train(q, num_iterations=10000):
    """Train the agent for num_iterations without pygame visualization."""
    global labyrinth

    total_iterations = 0

    while total_iterations < num_iterations:
        labyrinth = [
            "##########",
            "#........#",
            "#.##..##.#",
            "#........#",
            "##########"
        ]
        running = True
        iter = 0

        # Initialize Pacman and Ghost positions (no visual objects needed)
        pacman_x, pacman_y = 1, 1
        ghost_x, ghost_y = COLS - 2, ROWS - 2
        s = (pacman_x, pacman_y, ghost_x, ghost_y)

        while running:
            iter = iter + 1

            # Check for collisions
            if pacman_x == ghost_x and pacman_y == ghost_y:
                running = False
                # total_iterations += 1

            # Eat cookies
            if labyrinth[pacman_y][pacman_x] == ".":
                labyrinth[pacman_y] = labyrinth[pacman_y][:pacman_x] + " " + labyrinth[pacman_y][pacman_x+1:]

            # Check if all cookies are eaten
            if all("." not in row for row in labyrinth):
                running = False
                total_iterations += 1

            # Q-Learning
            a = rl.epsilon_greedy(q, s, 0.025)
            s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
            q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
            s = s_new

            # Update Pacman position
            if a == 0:  # left
                pacman_x = max(1, pacman_x - 1) if labyrinth[pacman_y][pacman_x - 1] != "#" else pacman_x
            elif a == 1:  # right
                pacman_x = min(COLS - 2, pacman_x + 1) if labyrinth[pacman_y][pacman_x + 1] != "#" else pacman_x
            elif a == 2:  # up
                pacman_y = max(1, pacman_y - 1) if labyrinth[pacman_y - 1][pacman_x] != "#" else pacman_y
            elif a == 3:  # down
                pacman_y = min(ROWS - 2, pacman_y + 1) if labyrinth[pacman_y + 1][pacman_x] != "#" else pacman_y

            # Ghost movement
            if iter % 3 == 0:
                if ghost_x < pacman_x and labyrinth[ghost_y][ghost_x + 1] != "#":
                    ghost_x += 1
                elif ghost_x > pacman_x and labyrinth[ghost_y][ghost_x - 1] != "#":
                    ghost_x -= 1
                elif ghost_y < pacman_y and labyrinth[ghost_y + 1][ghost_x] != "#":
                    ghost_y += 1
                elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
                    ghost_y -= 1

            s = (pacman_x, pacman_y, ghost_x, ghost_y)

        if total_iterations % 500 == 0:
            print(f"Training iteration {total_iterations}")

    return q

# Visualization function (with pygame)
def visualize(q, num_games=10):
    """Visualize the trained agent playing the game."""
    global labyrinth

    games_won = 0
    games_lost = 0

    clock = pygame.time.Clock()

    for game_num in range(num_games):
        labyrinth = [
            "##########",
            "#........#",
            "#.##..##.#",
            "#........#",
            "##########"
        ]
        running = True
        iter = 0

        # Initialize Pacman and Ghost positions
        pacman = Pacman(1, 1)
        ghost = Ghost(COLS - 2, ROWS - 2)
        s = (pacman.x, pacman.y, ghost.x, ghost.y)

        print(f"Game {game_num + 1}/{num_games}")

        while running or iter < 300:
            screen.fill(BLACK)
            iter = iter + 1

            # Check for collisions
            if pacman.x == ghost.x and pacman.y == ghost.y:
                print("Game Over! The ghost caught Pacman.")
                running = False
                games_lost += 1
                break

            # Eat cookies
            if labyrinth[pacman.y][pacman.x] == ".":
                labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]

            # Check if all cookies are eaten
            if all("." not in row for row in labyrinth):
                print("You Win! Pacman ate all the cookies.")
                running = False
                games_won += 1
                break

            # Q-Learning
            a = rl.epsilon_greedy(q, s, 0.025)
            s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
            q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
            s = s_new

            move_pacman(pacman, a)

            if iter % 3 == 0:
                ghost.move_towards_pacman(pacman)

            s = (pacman.x, pacman.y, ghost.x, ghost.y)

            # Draw
            draw_labyrinth()
            pacman.draw()
            ghost.draw()
            pygame.display.flip()

            tick_speed = 200 # if game_num % 20 == 0 else 100
            clock.tick(tick_speed)
    print("winrate: " + str(games_won / num_games))

# Main function
def main():
    global labyrinth

    # Load existing Q-table or create new one
    q = load_q_table("q_table.json")
    if q is None:
        q = rl.q_init()

    print("Training for 10000 iterations...")
    q = train(q, num_iterations=10000)

    print("\nTraining complete! Starting visualization...")
    visualize(q, num_games=100)

    pygame.quit()

    # Save Q-table when exiting
    save_q_table(q, "q_table.json")

if __name__ == "__main__":
    main()