MLE_Assignments/Aufgabe_4.py

import sys
from copy import deepcopy
from random import random

import numpy as np
import pygame
import math


# Initialize pygame
pygame.init()

# Define constants
SCREEN_WIDTH = 400
SCREEN_HEIGHT = 400
CELL_SIZE = 40

# Define colors
YELLOW = (255, 255, 0)
RED = (255, 0, 0)
WHITE = (255, 255, 255)
BLUE = (0, 0, 255)
BLACK = (0, 0, 0)

# Labyrinth as a string
labyrinth_origin = [
    "##########",
    "#........#",
    "#.##..##.#",
    "#........#",
    "##########"
]

# Get labyrinth dimensions
ROWS = len(labyrinth_origin)
COLS = len(labyrinth_origin[0])

# Initialize game screen
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman")

# Pacman class
class Pacman:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.count = 0

    def move(self, dx, dy):
        new_x, new_y = self.x + dx, self.y + dy
        if labyrinth[new_y][new_x] != "#":
            self.x = new_x
            self.y = new_y

    def draw(self):
        radius = CELL_SIZE // 2 - 4
        start_angle = math.pi / 6
        end_angle = -math.pi / 6
        pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
            # Calculate the points for the mouth
        start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
                     self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
        end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
                   self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
        self.count += 1
        if self.count%2==0:
            # Draw the mouth by filling a polygon
            pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])

    def caught(self, ghost):
        return self.x == ghost.x and self.y == ghost.y

# Ghost class with pixel art
class Ghost:
    # Define the pixel art for the ghost using strings
    ghost_pixels = [
        " #### ",
        "######",
        "## # #",
        "######",
        "######",
        "# # # "
    ]

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def move_towards_pacman(self, pacman):
        if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
            self.x += 1
        elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
            self.x -= 1
        elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
            self.y += 1
        elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
            self.y -= 1

    def draw(self):
        pixel_size = CELL_SIZE // len(self.ghost_pixels)  # Size of each pixel in the ghost art
        for row_idx, row in enumerate(self.ghost_pixels):
            for col_idx, pixel in enumerate(row):
                if pixel == "#":
                    pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
                    pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
                    pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))

# Draw walls and cookies
def draw_labyrinth():
    for y, row in enumerate(labyrinth):
        for x, cell in enumerate(row):
            if cell == "#":
                pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
            elif cell == ".":
                pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)


def calcState(pacman, ghost, labyrinth):
    ROW = len(labyrinth)
    COL = len(labyrinth[0])

    p_x, p_y = pacman.x, pacman.y
    g_x, g_y = ghost.x, ghost.y

    pacman_index = p_y * COL + p_x
    ghost_index = g_y * COL + g_x
    position_state = pacman_index * (ROW * COL) + ghost_index

    # Check for cookies in the four directions relative to Pacman
    cookie_left = 1 if p_y > 0 and labyrinth[p_y - 1][p_x] == '.' else 0
    cookie_right = 1 if p_y < COL - 1 and labyrinth[p_y + 1][p_x] == '.' else 0
    cookie_up = 1 if p_x > 0 and labyrinth[p_y][p_x - 1] == '.' else 0
    cookie_down = 1 if p_x < ROW - 1 and labyrinth[p_y][p_x + 1] == '.' else 0

    cookie_state = (cookie_left << 3) + (cookie_right << 2) + (cookie_up << 1) + cookie_down

    state = position_state * 16 + cookie_state

    return state


# Use 4 or 5 bits (16 or 32 Zustände) um die Pellets zu kodieren
# > 64000 Zustände are unfeasible
# idea implement sense for pacman: there are still pellets to the left/right/up/down of pacman

clock = pygame.time.Clock()
q = np.random.rand(((ROWS * COLS)**2) * 16, 4)*0.1 # q[s][a]=0..0.1, q[pac + ghost][4]

alpha = 0.5  # Lernrate
gamma = 0.9  # Discount Faktor
epsilon = 10  # für Epsilon-Greedy Aktionsauswahl

max_iter = 0
iter = 0
round = 0
lose = 0
win = 0
while True:
    round += 1
    if(round % 1000 == 0):
        print("Round: ", round)
        print("Won: ", win, " Lose: ", lose)
    # Initialize Pacman and Ghost positions
    labyrinth = deepcopy(labyrinth_origin)

    newPacPos = False
    x = 0
    y = 0
    while not newPacPos:
        x = np.random.randint(COLS)
        y = np.random.randint(ROWS)
        if labyrinth[y][x] != "#":
            newPacPos = True
    pacman = Pacman(x, y)

    newGhostPos = False
    while not newGhostPos:
        x = np.random.randint(COLS)
        y = np.random.randint(ROWS)
        if labyrinth[y][x] != "#" and not (pacman.x == x and pacman.y == y):
            newGhostPos = True

    ghost = Ghost(x,y)

    done = False
    if iter > max_iter:
        max_iter = iter
        print(max_iter)
    iter = 0
    while not done:
        epsion_happned = False
        # eindimensionaler state
        s = calcState(pacman, ghost, labyrinth)

        if np.random.randint(100) < epsilon:  # Epsilon Greedy
            a = np.random.randint(4)  # action
            epsion_happned = True
        else:
            # argmax ergibt den Index und damit die Aktion,
            # bei dem der q am größten ist
            a = np.argmax(q[s])
        # wenn keine Wand, bewege Agent
        match a:
            #down
            case 0:
                pacman.move(0,1)
            #up
            case 1:
                pacman.move(0,-1)
            # left
            case 2:
                pacman.move(-1,0)
            #right
            case 3:
                pacman.move(1,0)

            # Ghost moves towards Pacman
        if iter%3==0:
            ghost.move_towards_pacman(pacman)

        # neuer eindimensionaler Zustand
        reward = -1
        new_s = calcState(pacman, ghost, labyrinth)

        if pacman.caught(ghost):
            reward = -10
            done = True
            lose += 1
            # print(epsion_happned)
            # print(q[s])


        elif labyrinth[pacman.y][pacman.x] == '.':
            labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
            reward = 1

        if not any('.' in s for s in labyrinth):
            done = True
            reward = 10
            win += 1

        q[s][a] += alpha * (reward + gamma * np.max(q[new_s]) - q[s][a])

        if(round > 100000):
            epsilon = 0
            draw_labyrinth()
            pacman.draw()
            ghost.draw()

            # Update display
            pygame.display.flip()

            # Cap the frame rate
            clock.tick(20) # 60 Frames pro Sekunde
            screen.fill(BLACK)

        iter += 1