MLE/04_pacman_rl/pacman.py

import pygame
import random
import math
import reinforcement_learning as rl
import time

# Initialize pygame
pygame.init()

# Define constants
SCREEN_WIDTH = 400
SCREEN_HEIGHT = 400
CELL_SIZE = 40

# Define colors
YELLOW = (255, 255, 0)
RED = (255, 0, 0)
WHITE = (255, 255, 255)
BLUE = (0, 0, 255)
BLACK = (0, 0, 0)

# Labyrinth as a string
labyrinth = [
    "##########",
    "#........#",
    "#.##..##.#",
    "#........#",
    "##########"
]

# Get labyrinth dimensions
ROWS = len(labyrinth)
COLS = len(labyrinth[0])

# Initialize game screen
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman")

# Pacman class
class Pacman:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.count = 0

    def move(self, dx, dy):
        new_x, new_y = self.x + dx, self.y + dy
        if labyrinth[new_y][new_x] != "#":
            self.x = new_x
            self.y = new_y

    def draw(self):
        radius = CELL_SIZE // 2 - 4
        start_angle = math.pi / 6
        end_angle = -math.pi / 6
        pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
            # Calculate the points for the mouth
        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
        self.count += 1
        if self.count%2==0:
            # Draw the mouth by filling a polygon
            pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])

# Ghost class with pixel art
class Ghost:
    # Define the pixel art for the ghost using strings
    ghost_pixels = [
        " #### ",
        "######",
        "## # #",
        "######",
        "######",
        "# # # "
    ]

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def move_towards_pacman(self, pacman):
        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
            self.x += 1
        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
            self.x -= 1
        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
            self.y += 1
        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
            self.y -= 1

    def draw(self):
        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
        for row_idx, row in enumerate(self.ghost_pixels):
            for col_idx, pixel in enumerate(row):
                if pixel == "#":
                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
                    pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))

# Draw walls and cookies
def draw_labyrinth():
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == "#":
                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
            elif cell == ".":
                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)

def move_pacman(pacman, a):
    if a == 0: # left
        pacman.move(-1, 0)
    if a == 1: # right
        pacman.move(1, 0)
    if a == 2: # up
        pacman.move(0, -1)
    if a == 3: # down
        pacman.move(0, 1)

# Main game function
def main():
    global labyrinth
    clock = pygame.time.Clock()

    # Initialize Pacman and Ghost positions
    pacman = Pacman(1, 1)
    ghost = Ghost(COLS - 2, ROWS - 2)

    s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
    q = rl.q_init()
    a_opposite_direction = {0: 1, 1: 0, 2: 3, 3: 2}
    gamma = 0.90
    alpha = 0.2

    # Game loop
    running = True
    iter = 0
    while running:
        screen.fill(BLACK)
        iter = iter + 1
        # Handle events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

        if iter%3==0:
            # Ghost moves towards Pacman
            ghost.move_towards_pacman(pacman)

        # Check for collisions (game over if ghost catches pacman)
        if pacman.x == ghost.x and pacman.y == ghost.y:
            print("Game Over! The ghost caught Pacman.")
            running = False

        # Eat cookies
        if labyrinth[pacman.y][pacman.x] == ".":
            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]

        # Check if all cookies are eaten (game over)
        if all("." not in row for row in labyrinth):
            print("You Win! Pacman ate all the cookies.")
            running = False

        # Start of my code
        s_not_terminal = True
        labyrinth_copy = [list(row) for row in labyrinth]  # Create proper deep copy
        a = None
        iteration = 0
        max_iterations = 50  # Prevent infinite loops
        while s_not_terminal and iteration < max_iterations:
            iteration += 1
            print("s: " + str(s))
            print("q[s] before action: " + str(q[s]))

            a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
            s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy)

            q[s][a] += round(alpha * (r + gamma * rl.max_q(q, s_new, labyrinth) - q[s][a]), 2)
            # q[s_new][a_opposite_direction[a]] += round(alpha * (r + gamma * max(q[s]) - q[s_new][a_opposite_direction[a]]), 2)

            s = s_new

            if all("." not in row for row in labyrinth_copy):
                s_not_terminal = False

            # Check for collisions (game over if ghost catches pacman)
            if s[0] == s[2] and s[1] == s[3]:
                s_not_terminal = False
                q[s][a] = 0.01
                print("There was just a collision!!!")
                print("s: " + str(s))

            time.sleep(0.025)

        if iteration >= max_iterations:
            print(f"Max iterations reached ({max_iterations}), breaking out of loop")

        s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
        a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
        move_pacman(pacman, a)
        print("NEW LOOP")

        # Draw the labyrinth, pacman, and ghost
        draw_labyrinth()
        pacman.draw()
        ghost.draw()

        # Update display
        pygame.display.flip()

        # Cap the frame rate
        clock.tick(5)

    pygame.quit()

if __name__ == "__main__":
    main()

"""
    for state_key in q:
        if state_key[0] == s_new[0] and state_key[1] == s_new[1]:
            # Update this state's Q-values based on the current transition, but only if action is valid
            if q[state_key][a] > 0:  # Only update if action is not blocked
                q[state_key][a] += round(alpha * (r + gamma * max(q[s_new]) - q[state_key][a]), 2)
            if q[state_key][opposite_action[a]] > 0:  # Only update if opposite action is not blocked
                q[state_key][opposite_action[a]] += round(alpha * (r + gamma * max(q[s_new]) - q[state_key][opposite_action[a]]), 2)
    print("s_new: " + str(s_new))
    print("q[s] after action with manipulated a: " + str(q[s]))
    print("q[s_new] after action: " + str(q[s_new]))
    print()
"""