import sys from copy import deepcopy from random import random import numpy as np import pygame import math # Initialize pygame pygame.init() # Define constants SCREEN_WIDTH = 400 SCREEN_HEIGHT = 400 CELL_SIZE = 40 # Define colors YELLOW = (255, 255, 0) RED = (255, 0, 0) WHITE = (255, 255, 255) BLUE = (0, 0, 255) BLACK = (0, 0, 0) # Labyrinth as a string labyrinth_origin = [ "##########", "#........#", "#.##..##.#", "#........#", "##########" ] # Get labyrinth dimensions ROWS = len(labyrinth_origin) COLS = len(labyrinth_origin[0]) # Initialize game screen screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) pygame.display.set_caption("Micro-Pacman") # Pacman class class Pacman: def __init__(self, x, y): self.x = x self.y = y self.count = 0 def move(self, dx, dy): new_x, new_y = self.x + dx, self.y + dy if labyrinth[new_y][new_x] != "#": self.x = new_x self.y = new_y def draw(self): radius = CELL_SIZE // 2 - 4 start_angle = math.pi / 6 end_angle = -math.pi / 6 pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4) # Calculate the points for the mouth start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)), self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle))) end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)), self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle))) self.count += 1 if self.count%2==0: # Draw the mouth by filling a polygon pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos]) def caught(self, ghost): return self.x == ghost.x and self.y == ghost.y # Ghost class with pixel art class Ghost: # Define the pixel art for the ghost using strings ghost_pixels = [ " #### ", "######", "## # #", "######", "######", "# # # " ] def __init__(self, x, y): self.x = x self.y = y def move_towards_pacman(self, pacman): if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#": self.x += 1 elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#": self.x -= 1 elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#": self.y += 1 elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#": self.y -= 1 def draw(self): pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art for row_idx, row in enumerate(self.ghost_pixels): for col_idx, pixel in enumerate(row): if pixel == "#": pixel_x = self.x * CELL_SIZE + col_idx * pixel_size pixel_y = self.y * CELL_SIZE + row_idx * pixel_size pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) # Draw walls and cookies def draw_labyrinth(): for y, row in enumerate(labyrinth): for x, cell in enumerate(row): if cell == "#": pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)) elif cell == ".": pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5) def calcState(pacman, ghost, labyrinth): ROW = len(labyrinth) COL = len(labyrinth[0]) p_x, p_y = pacman.x, pacman.y g_x, g_y = ghost.x, ghost.y pacman_index = p_y * COL + p_x ghost_index = g_y * COL + g_x position_state = pacman_index * (ROW * COL) + ghost_index # Check for cookies in the four directions relative to Pacman cookie_left = 1 if p_y > 0 and labyrinth[p_y - 1][p_x] == '.' else 0 cookie_right = 1 if p_y < COL - 1 and labyrinth[p_y + 1][p_x] == '.' else 0 cookie_up = 1 if p_x > 0 and labyrinth[p_y][p_x - 1] == '.' else 0 cookie_down = 1 if p_x < ROW - 1 and labyrinth[p_y][p_x + 1] == '.' else 0 cookie_state = (cookie_left << 3) + (cookie_right << 2) + (cookie_up << 1) + cookie_down state = position_state * 16 + cookie_state return state # Use 4 or 5 bits (16 or 32 Zustände) um die Pellets zu kodieren # > 64000 Zustände are unfeasible # idea implement sense for pacman: there are still pellets to the left/right/up/down of pacman clock = pygame.time.Clock() q = np.random.rand(((ROWS * COLS)**2) * 16, 4)*0.1 # q[s][a]=0..0.1, q[pac + ghost][4] alpha = 0.5 # Lernrate gamma = 0.9 # Discount Faktor epsilon = 10 # für Epsilon-Greedy Aktionsauswahl max_iter = 0 iter = 0 round = 0 lose = 0 win = 0 while True: round += 1 if(round % 1000 == 0): print("Round: ", round) print("Won: ", win, " Lose: ", lose) # Initialize Pacman and Ghost positions labyrinth = deepcopy(labyrinth_origin) newPacPos = False x = 0 y = 0 while not newPacPos: x = np.random.randint(COLS) y = np.random.randint(ROWS) if labyrinth[y][x] != "#": newPacPos = True pacman = Pacman(x, y) newGhostPos = False while not newGhostPos: x = np.random.randint(COLS) y = np.random.randint(ROWS) if labyrinth[y][x] != "#" and not (pacman.x == x and pacman.y == y): newGhostPos = True ghost = Ghost(x,y) done = False if iter > max_iter: max_iter = iter print(max_iter) iter = 0 while not done: epsion_happned = False # eindimensionaler state s = calcState(pacman, ghost, labyrinth) if np.random.randint(100) < epsilon: # Epsilon Greedy a = np.random.randint(4) # action epsion_happned = True else: # argmax ergibt den Index und damit die Aktion, # bei dem der q am größten ist a = np.argmax(q[s]) # wenn keine Wand, bewege Agent match a: #down case 0: pacman.move(0,1) #up case 1: pacman.move(0,-1) # left case 2: pacman.move(-1,0) #right case 3: pacman.move(1,0) # Ghost moves towards Pacman if iter%3==0: ghost.move_towards_pacman(pacman) # neuer eindimensionaler Zustand reward = -1 new_s = calcState(pacman, ghost, labyrinth) if pacman.caught(ghost): reward = -10 done = True lose += 1 # print(epsion_happned) # print(q[s]) elif labyrinth[pacman.y][pacman.x] == '.': labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] reward = 1 if not any('.' in s for s in labyrinth): done = True reward = 10 win += 1 q[s][a] += alpha * (reward + gamma * np.max(q[new_s]) - q[s][a]) if(round > 100000): epsilon = 0 draw_labyrinth() pacman.draw() ghost.draw() # Update display pygame.display.flip() # Cap the frame rate clock.tick(20) # 60 Frames pro Sekunde screen.fill(BLACK) iter += 1