311 lines
9.8 KiB
Python
311 lines
9.8 KiB
Python
import pygame
|
|
import math
|
|
import reinforcement_learning as rl
|
|
import json
|
|
import os
|
|
|
|
# Initialize pygame
|
|
pygame.init()
|
|
|
|
# Define constants
|
|
SCREEN_WIDTH = 400
|
|
SCREEN_HEIGHT = 400
|
|
CELL_SIZE = 40
|
|
|
|
# Define colors
|
|
YELLOW = (255, 255, 0)
|
|
RED = (255, 0, 0)
|
|
WHITE = (255, 255, 255)
|
|
BLUE = (0, 0, 255)
|
|
BLACK = (0, 0, 0)
|
|
|
|
# Labyrinth as a string
|
|
labyrinth = [
|
|
"##########",
|
|
"#........#",
|
|
"#.##..##.#",
|
|
"#........#",
|
|
"##########"
|
|
]
|
|
|
|
# Get labyrinth dimensions
|
|
ROWS = len(labyrinth)
|
|
COLS = len(labyrinth[0])
|
|
|
|
# Q-Learning Constants
|
|
GAMMA = 0.90
|
|
ALPHA = 0.2
|
|
|
|
# Initialize game screen
|
|
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
|
pygame.display.set_caption("Micro-Pacman")
|
|
|
|
# Pacman class
|
|
class Pacman:
|
|
def __init__(self, x, y):
|
|
self.x = x
|
|
self.y = y
|
|
self.count = 0
|
|
|
|
def move(self, dx, dy):
|
|
new_x, new_y = self.x + dx, self.y + dy
|
|
if labyrinth[new_y][new_x] != "#":
|
|
self.x = new_x
|
|
self.y = new_y
|
|
|
|
def draw(self):
|
|
radius = CELL_SIZE // 2 - 4
|
|
start_angle = math.pi / 6
|
|
end_angle = -math.pi / 6
|
|
pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
|
|
# Calculate the points for the mouth
|
|
start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
|
|
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
|
|
end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
|
|
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
|
|
self.count += 1
|
|
if self.count%2==0:
|
|
# Draw the mouth by filling a polygon
|
|
pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
|
|
|
|
# Ghost class with pixel art
|
|
class Ghost:
|
|
# Define the pixel art for the ghost using strings
|
|
ghost_pixels = [
|
|
" #### ",
|
|
"######",
|
|
"## # #",
|
|
"######",
|
|
"######",
|
|
"# # # "
|
|
]
|
|
|
|
def __init__(self, x, y):
|
|
self.x = x
|
|
self.y = y
|
|
|
|
def move_towards_pacman(self, pacman):
|
|
if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
|
|
self.x += 1
|
|
elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
|
|
self.x -= 1
|
|
elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
|
|
self.y += 1
|
|
elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
|
|
self.y -= 1
|
|
|
|
def draw(self):
|
|
pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art
|
|
for row_idx, row in enumerate(self.ghost_pixels):
|
|
for col_idx, pixel in enumerate(row):
|
|
if pixel == "#":
|
|
pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
|
|
pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
|
|
pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
|
|
|
|
# Draw walls and cookies
|
|
def draw_labyrinth():
|
|
for y, row in enumerate(labyrinth):
|
|
for x, cell in enumerate(row):
|
|
if cell == "#":
|
|
pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
|
|
elif cell == ".":
|
|
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
|
|
|
|
def move_pacman(pacman, a):
|
|
if a == 0: # left
|
|
pacman.move(-1, 0)
|
|
if a == 1: # right
|
|
pacman.move(1, 0)
|
|
if a == 2: # up
|
|
pacman.move(0, -1)
|
|
if a == 3: # down
|
|
pacman.move(0, 1)
|
|
|
|
def save_q_table(q, filename="q_table.json"):
|
|
"""Save Q-table to JSON file."""
|
|
# Convert tuple keys to strings for JSON serialization
|
|
q_json = {str(k): v for k, v in q.items()}
|
|
with open(filename, 'w') as f:
|
|
json.dump(q_json, f)
|
|
print(f"Q-table saved to {filename}")
|
|
|
|
def load_q_table(filename="q_table.json"):
|
|
"""Load Q-table from JSON file, or return None if file doesn't exist."""
|
|
if not os.path.exists(filename):
|
|
print(f"No saved Q-table found at {filename}. Starting fresh.")
|
|
return None
|
|
|
|
with open(filename, 'r') as f:
|
|
q_json = json.load(f)
|
|
|
|
# Convert string keys back to tuples
|
|
q = {eval(k): v for k, v in q_json.items()}
|
|
print(f"Q-table loaded from {filename}")
|
|
return q
|
|
|
|
# Training function (without visualization)
|
|
def train(q, num_iterations=10000):
|
|
"""Train the agent for num_iterations without pygame visualization."""
|
|
global labyrinth
|
|
|
|
outer_iter = 0
|
|
total_iterations = 0
|
|
|
|
while total_iterations < num_iterations:
|
|
labyrinth = [
|
|
"##########",
|
|
"#........#",
|
|
"#.##..##.#",
|
|
"#........#",
|
|
"##########"
|
|
]
|
|
running = True
|
|
iter = 0
|
|
|
|
# Initialize Pacman and Ghost positions (no visual objects needed)
|
|
pacman_x, pacman_y = 1, 1
|
|
ghost_x, ghost_y = COLS - 2, ROWS - 2
|
|
s = (pacman_x, pacman_y, ghost_x, ghost_y)
|
|
|
|
while running and total_iterations < num_iterations:
|
|
iter = iter + 1
|
|
total_iterations += 1
|
|
|
|
# Check for collisions
|
|
if pacman_x == ghost_x and pacman_y == ghost_y:
|
|
running = False
|
|
break
|
|
|
|
# Eat cookies
|
|
if labyrinth[pacman_y][pacman_x] == ".":
|
|
labyrinth[pacman_y] = labyrinth[pacman_y][:pacman_x] + " " + labyrinth[pacman_y][pacman_x+1:]
|
|
|
|
# Check if all cookies are eaten
|
|
if all("." not in row for row in labyrinth):
|
|
running = False
|
|
break
|
|
|
|
# Q-Learning
|
|
a = rl.epsilon_greedy(q, s)
|
|
s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
|
|
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
|
|
s = s_new
|
|
|
|
# Update Pacman position
|
|
if a == 0: # left
|
|
pacman_x = max(1, pacman_x - 1) if labyrinth[pacman_y][pacman_x - 1] != "#" else pacman_x
|
|
elif a == 1: # right
|
|
pacman_x = min(COLS - 2, pacman_x + 1) if labyrinth[pacman_y][pacman_x + 1] != "#" else pacman_x
|
|
elif a == 2: # up
|
|
pacman_y = max(1, pacman_y - 1) if labyrinth[pacman_y - 1][pacman_x] != "#" else pacman_y
|
|
elif a == 3: # down
|
|
pacman_y = min(ROWS - 2, pacman_y + 1) if labyrinth[pacman_y + 1][pacman_x] != "#" else pacman_y
|
|
|
|
# Ghost movement
|
|
if iter % 3 == 0:
|
|
if ghost_x < pacman_x and labyrinth[ghost_y][ghost_x + 1] != "#":
|
|
ghost_x += 1
|
|
elif ghost_x > pacman_x and labyrinth[ghost_y][ghost_x - 1] != "#":
|
|
ghost_x -= 1
|
|
elif ghost_y < pacman_y and labyrinth[ghost_y + 1][ghost_x] != "#":
|
|
ghost_y += 1
|
|
elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
|
|
ghost_y -= 1
|
|
|
|
s = (pacman_x, pacman_y, ghost_x, ghost_y)
|
|
|
|
outer_iter += 1
|
|
if outer_iter % 100 == 0:
|
|
print(f"Training iteration {outer_iter}, Total steps: {total_iterations}")
|
|
|
|
return q
|
|
|
|
# Visualization function (with pygame)
|
|
def visualize(q, num_games=10):
|
|
"""Visualize the trained agent playing the game."""
|
|
global labyrinth
|
|
|
|
clock = pygame.time.Clock()
|
|
|
|
for game_num in range(num_games):
|
|
labyrinth = [
|
|
"##########",
|
|
"#........#",
|
|
"#.##..##.#",
|
|
"#........#",
|
|
"##########"
|
|
]
|
|
running = True
|
|
iter = 0
|
|
|
|
# Initialize Pacman and Ghost positions
|
|
pacman = Pacman(1, 1)
|
|
ghost = Ghost(COLS - 2, ROWS - 2)
|
|
s = (pacman.x, pacman.y, ghost.x, ghost.y)
|
|
|
|
print(f"Game {game_num + 1}/{num_games}")
|
|
|
|
while running or iter < 100:
|
|
screen.fill(BLACK)
|
|
iter = iter + 1
|
|
|
|
# Check for collisions
|
|
if pacman.x == ghost.x and pacman.y == ghost.y:
|
|
print("Game Over! The ghost caught Pacman.")
|
|
running = False
|
|
break
|
|
|
|
# Eat cookies
|
|
if labyrinth[pacman.y][pacman.x] == ".":
|
|
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
|
|
|
# Check if all cookies are eaten
|
|
if all("." not in row for row in labyrinth):
|
|
print("You Win! Pacman ate all the cookies.")
|
|
running = False
|
|
break
|
|
|
|
# Q-Learning
|
|
a = rl.epsilon_greedy(q, s)
|
|
s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
|
|
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
|
|
s = s_new
|
|
|
|
move_pacman(pacman, a)
|
|
|
|
if iter % 3 == 0:
|
|
ghost.move_towards_pacman(pacman)
|
|
s = (pacman.x, pacman.y, ghost.x, ghost.y)
|
|
|
|
# Draw
|
|
draw_labyrinth()
|
|
pacman.draw()
|
|
ghost.draw()
|
|
pygame.display.flip()
|
|
|
|
tick_speed = 20 # if game_num % 20 == 0 else 100
|
|
clock.tick(tick_speed)
|
|
|
|
# Main function
|
|
def main():
|
|
global labyrinth
|
|
|
|
# Load existing Q-table or create new one
|
|
q = load_q_table("q_table.json")
|
|
if q is None:
|
|
q = rl.q_init()
|
|
|
|
print("Training for 10000 iterations...")
|
|
q = train(q, num_iterations=20000)
|
|
|
|
print("\nTraining complete! Starting visualization...")
|
|
visualize(q, num_games=10)
|
|
|
|
pygame.quit()
|
|
|
|
# Save Q-table when exiting
|
|
save_q_table(q, "q_table.json")
|
|
|
|
if __name__ == "__main__":
|
|
main() |