stuff persistent

master
Ruben-FreddyLoafers 2025-12-08 15:15:42 +01:00
parent c88d8d003e
commit 3fb0afd80e
2 changed files with 156 additions and 39 deletions

View File

@ -1,6 +1,8 @@
import pygame import pygame
import math import math
import reinforcement_learning as rl import reinforcement_learning as rl
import json
import os
# Initialize pygame # Initialize pygame
pygame.init() pygame.init()
@ -120,19 +122,113 @@ def move_pacman(pacman, a):
if a == 3: # down if a == 3: # down
pacman.move(0, 1) pacman.move(0, 1)
# Main game function def save_q_table(q, filename="q_table.json"):
def main(): """Save Q-table to JSON file."""
# Convert tuple keys to strings for JSON serialization
q_json = {str(k): v for k, v in q.items()}
with open(filename, 'w') as f:
json.dump(q_json, f)
print(f"Q-table saved to {filename}")
def load_q_table(filename="q_table.json"):
"""Load Q-table from JSON file, or return None if file doesn't exist."""
if not os.path.exists(filename):
print(f"No saved Q-table found at {filename}. Starting fresh.")
return None
with open(filename, 'r') as f:
q_json = json.load(f)
# Convert string keys back to tuples
q = {eval(k): v for k, v in q_json.items()}
print(f"Q-table loaded from {filename}")
return q
# Training function (without visualization)
def train(q, num_iterations=10000):
"""Train the agent for num_iterations without pygame visualization."""
global labyrinth
outer_iter = 0
total_iterations = 0
while total_iterations < num_iterations:
labyrinth = [
"##########",
"#........#",
"#.##..##.#",
"#........#",
"##########"
]
running = True
iter = 0
# Initialize Pacman and Ghost positions (no visual objects needed)
pacman_x, pacman_y = 1, 1
ghost_x, ghost_y = COLS - 2, ROWS - 2
s = (pacman_x, pacman_y, ghost_x, ghost_y)
while running and total_iterations < num_iterations:
iter = iter + 1
total_iterations += 1
# Check for collisions
if pacman_x == ghost_x and pacman_y == ghost_y:
running = False
break
# Eat cookies
if labyrinth[pacman_y][pacman_x] == ".":
labyrinth[pacman_y] = labyrinth[pacman_y][:pacman_x] + " " + labyrinth[pacman_y][pacman_x+1:]
# Check if all cookies are eaten
if all("." not in row for row in labyrinth):
running = False
break
# Q-Learning
a = rl.epsilon_greedy(q, s)
s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
s = s_new
# Update Pacman position
if a == 0: # left
pacman_x = max(1, pacman_x - 1) if labyrinth[pacman_y][pacman_x - 1] != "#" else pacman_x
elif a == 1: # right
pacman_x = min(COLS - 2, pacman_x + 1) if labyrinth[pacman_y][pacman_x + 1] != "#" else pacman_x
elif a == 2: # up
pacman_y = max(1, pacman_y - 1) if labyrinth[pacman_y - 1][pacman_x] != "#" else pacman_y
elif a == 3: # down
pacman_y = min(ROWS - 2, pacman_y + 1) if labyrinth[pacman_y + 1][pacman_x] != "#" else pacman_y
# Ghost movement
if iter % 3 == 0:
if ghost_x < pacman_x and labyrinth[ghost_y][ghost_x + 1] != "#":
ghost_x += 1
elif ghost_x > pacman_x and labyrinth[ghost_y][ghost_x - 1] != "#":
ghost_x -= 1
elif ghost_y < pacman_y and labyrinth[ghost_y + 1][ghost_x] != "#":
ghost_y += 1
elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
ghost_y -= 1
s = (pacman_x, pacman_y, ghost_x, ghost_y)
outer_iter += 1
if outer_iter % 100 == 0:
print(f"Training iteration {outer_iter}, Total steps: {total_iterations}")
return q
# Visualization function (with pygame)
def visualize(q, num_games=10):
"""Visualize the trained agent playing the game."""
global labyrinth global labyrinth
q = rl.q_init()
clock = pygame.time.Clock() clock = pygame.time.Clock()
# Game loop for game_num in range(num_games):
not_won = True
outer_iter = 0
while not_won:
labyrinth = [ labyrinth = [
"##########", "##########",
"#........#", "#........#",
@ -146,22 +242,17 @@ def main():
# Initialize Pacman and Ghost positions # Initialize Pacman and Ghost positions
pacman = Pacman(1, 1) pacman = Pacman(1, 1)
ghost = Ghost(COLS - 2, ROWS - 2) ghost = Ghost(COLS - 2, ROWS - 2)
s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable s = (pacman.x, pacman.y, ghost.x, ghost.y)
# Handle events print(f"Game {game_num + 1}/{num_games}")
for event in pygame.event.get():
if event.type == pygame.QUIT:
not_won = False
print(outer_iter)
while running or iter < 100: while running or iter < 100:
screen.fill(BLACK) screen.fill(BLACK)
iter = iter + 1 iter = iter + 1
# Check for collisions (game over if ghost catches pacman) # Check for collisions
if pacman.x == ghost.x and pacman.y == ghost.y: if pacman.x == ghost.x and pacman.y == ghost.y:
print("Game Over! The ghost caught Pacman.") print("Game Over! The ghost caught Pacman.")
outer_iter = outer_iter + 1
running = False running = False
break break
@ -169,49 +260,52 @@ def main():
if labyrinth[pacman.y][pacman.x] == ".": if labyrinth[pacman.y][pacman.x] == ".":
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
# Check if all cookies are eaten (game over) # Check if all cookies are eaten
if all("." not in row for row in labyrinth): if all("." not in row for row in labyrinth):
print("You Win! Pacman ate all the cookies.") print("You Win! Pacman ate all the cookies.")
running = False running = False
not_won = False
break break
# Q-Learning part ############################################################################ # Q-Learning
a = rl.epsilon_greedy(q, s)
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
s_new, r, labyrinth = rl.take_action(s, a, labyrinth) s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
# print(s) # debugging
# print(q[s]) # debugging
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a]) q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
s = s_new s = s_new
move_pacman(pacman, a) move_pacman(pacman, a)
if iter % 3 == 0: if iter % 3 == 0:
# Ghost moves towards Pacman
ghost.move_towards_pacman(pacman) ghost.move_towards_pacman(pacman)
# Update state
s = (pacman.x, pacman.y, ghost.x, ghost.y) s = (pacman.x, pacman.y, ghost.x, ghost.y)
# End of Q-Learning part ###################################################################### # Draw
# Draw the labyrinth, pacman, and ghost
draw_labyrinth() draw_labyrinth()
pacman.draw() pacman.draw()
ghost.draw() ghost.draw()
# Update display
pygame.display.flip() pygame.display.flip()
# Cap the frame rate tick_speed = 20 # if game_num % 20 == 0 else 100
# tick_speed = 100
tick_speed = 5 if outer_iter % 20 == 0 else 50
clock.tick(tick_speed) clock.tick(tick_speed)
# Main function
def main():
global labyrinth
# Load existing Q-table or create new one
q = load_q_table("q_table.json")
if q is None:
q = rl.q_init()
print("Training for 10000 iterations...")
q = train(q, num_iterations=20000)
print("\nTraining complete! Starting visualization...")
visualize(q, num_games=10)
pygame.quit() pygame.quit()
# Save Q-table when exiting
save_q_table(q, "q_table.json")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -136,6 +136,11 @@ def take_action(s, a, labyrinth):
row_list[s_new[0]] = " " row_list[s_new[0]] = " "
labyrinth[s_new[1]] = "".join(row_list) labyrinth[s_new[1]] = "".join(row_list)
# Check if all cookies are eaten
if all("." not in row for row in labyrinth):
r = 100.0
#print("All cookies eaten")
return tuple(s_new), r, labyrinth return tuple(s_new), r, labyrinth
def max_q(q, s_new, labyrinth): def max_q(q, s_new, labyrinth):
@ -149,3 +154,21 @@ def max_q(q, s_new, labyrinth):
q_max = max(q_max, q[s_new][a]) q_max = max(q_max, q[s_new][a])
return q_max return q_max
def get_nearest_cookie(pacman_x, pacman_y, labyrinth):
cookies = [
(x, y)
for y, row in enumerate(labyrinth)
for x, cell in enumerate(row)
if cell == "."
]
if cookies:
nearest = min(
cookies, key=lambda c: abs(c[0] - pacman_x) + abs(c[1] - pacman_y)
)
cookie_dx = int(np.sign(nearest[0] - pacman_x))
cookie_dy = int(np.sign(nearest[1] - pacman_y))
else:
cookie_dx, cookie_dy = 0, 0
return cookie_dx, cookie_dy