Finished assigment

main
2wenty1ne 2025-12-01 15:33:28 +01:00
parent 454ac5092d
commit 93076e2426
3 changed files with 114 additions and 58 deletions

53
game.py
View File

@ -1,8 +1,8 @@
import pygame import pygame
import random
import math import math
import os
from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
# Initialize pygame # Initialize pygame
pygame.init() pygame.init()
@ -12,6 +12,8 @@ SCREEN_WIDTH = 400
SCREEN_HEIGHT = 400 SCREEN_HEIGHT = 400
CELL_SIZE = 40 CELL_SIZE = 40
os.environ['SDL_VIDEODRIVER'] = 'dummy'
# Define colors # Define colors
YELLOW = (255, 255, 0) YELLOW = (255, 255, 0)
RED = (255, 0, 0) RED = (255, 0, 0)
@ -20,7 +22,7 @@ BLUE = (0, 0, 255)
BLACK = (0, 0, 0) BLACK = (0, 0, 0)
# Labyrinth as a string # Labyrinth as a string
labyrinth = [ labyrinth_init = [
"##########", "##########",
"#........#", "#........#",
"#.##..##.#", "#.##..##.#",
@ -28,13 +30,16 @@ labyrinth = [
"##########" "##########"
] ]
labyrinth = labyrinth_init.copy()
# Get labyrinth dimensions # Get labyrinth dimensions
ROWS = len(labyrinth) ROWS = len(labyrinth)
COLS = len(labyrinth[0]) COLS = len(labyrinth[0])
# Initialize game screen # Initialize game screen
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) # screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman") # pygame.display.set_caption("Micro-Pacman")
screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE))
# Pacman class # Pacman class
class Pacman: class Pacman:
@ -100,7 +105,7 @@ class Ghost:
pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size)) pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
# Draw walls and cookies # Draw walls and cookies
def draw_labyrinth(): def draw_labyrinth(labyrinth):
for y, row in enumerate(labyrinth): for y, row in enumerate(labyrinth):
for x, cell in enumerate(row): for x, cell in enumerate(row):
if cell == "#": if cell == "#":
@ -110,19 +115,20 @@ def draw_labyrinth():
# Main game function # Main game function
def main(q_values, EPSILON, ALPHA, GAMMA): def run_game(q_values, EPSILON, ALPHA, GAMMA):
clock = pygame.time.Clock() clock = pygame.time.Clock()
labyrinth = labyrinth_init.copy()
# Initialize Pacman and Ghost positions # Initialize Pacman and Ghost positions
pacman = Pacman(1, 1) pacman = Pacman(1, 1)
ghost = Ghost(COLS - 2, ROWS - 2) ghost = Ghost(COLS - 2, ROWS - 2)
# Game loop
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
reward = 0 reward = 0
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
#? GAME LOOP
running = True running = True
iter = 0 iter = 0
while running: while running:
@ -156,7 +162,6 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
pacman.move(0, 1) pacman.move(0, 1)
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
if iter%3==0: if iter%3==0:
# Ghost moves towards Pacman # Ghost moves towards Pacman
ghost.move_towards_pacman(pacman) ghost.move_towards_pacman(pacman)
@ -165,6 +170,7 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
if pacman.x == ghost.x and pacman.y == ghost.y: if pacman.x == ghost.x and pacman.y == ghost.y:
print("Game Over! The ghost caught Pacman.") print("Game Over! The ghost caught Pacman.")
running = False running = False
reward = -10
# Eat cookies # Eat cookies
if labyrinth[pacman.y][pacman.x] == ".": if labyrinth[pacman.y][pacman.x] == ".":
@ -173,29 +179,44 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
# Check if all cookies are eaten (game over) # Check if all cookies are eaten (game over)
if all("." not in row for row in labyrinth): if all("." not in row for row in labyrinth):
print("You Win! Pacman ate all the cookies.") print("You Win! Pacman ate all the cookies.")
reward = 10
running = False running = False
# Draw the labyrinth, pacman, and ghost # Draw the labyrinth, pacman, and ghost
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y) if not running:
new_state = state
else:
new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
best_value_new_state, _ = get_best_q_value(q_values, new_state) best_action_new_state, _ = get_best_q_action(q_values, new_state)
best_value_new_state = q_values[(new_state, best_action_new_state)]
current_value = q_values.get((state, action), 0) current_value = q_values.get((state, action))
adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value) adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
q_values[(state, action)] = current_value + adjusted_value q_values[(state, action)] = current_value + adjusted_value
state = new_state
if not running:
counter = 0
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == ".":
counter += 1
return 20-counter
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
draw_labyrinth() draw_labyrinth(labyrinth)
pacman.draw() pacman.draw()
ghost.draw() ghost.draw()
# Update display # Update display
pygame.display.flip() # pygame.display.flip()
# Cap the frame rate # Cap the frame rate
clock.tick(1) clock.tick(10000)
pygame.quit() pygame.quit()
if __name__ == "__main__": if __name__ == "__main__":
main() run_game()

20
main.py
View File

@ -1,10 +1,13 @@
from util import epsilon_greedy, get_start_state, test import matplotlib.pyplot as plt
from game import run_game
from util import initial_q_fill
AMOUNT_RUNS = 10 AMOUNT_RUNS = 5000
EPSILON = 0.1 EPSILON = 0.1
ALPHA = 0.1 ALPHA = 0.1
GAMMA = 0.1 GAMMA = 0.9
""" """
@ -13,12 +16,15 @@ action: Direction
q_value: (state, action) q_value: (state, action)
""" """
q_values = {} q_values = {}
initial_q_fill(q_values)
cookies_per_run = []
# Amount of single runs # Amount of single runs
for x in range(AMOUNT_RUNS): for x in range(AMOUNT_RUNS):
state = get_start_state() amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
cookies_per_run.append(amount_cookies_ate)
print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
# Single run, until win or death
while(True):
action = epsilon_greedy(q_values, state, EPSILON)
plt.plot(cookies_per_run)
plt.show()

99
util.py
View File

@ -8,6 +8,17 @@ class Direction(Enum):
LEFT = 3 LEFT = 3
def initial_q_fill(q_values):
for x in range(8):
for y in range(3):
for cookie_direction in Direction:
for action in Direction:
state = (x, y, cookie_direction)
q_values[(state, action)] = random.random() * 0.2 - 0.1
def get_start_state(): def get_start_state():
first_direction_cookie = random.choice([True, False]) first_direction_cookie = random.choice([True, False])
if first_direction_cookie: if first_direction_cookie:
@ -18,68 +29,86 @@ def get_start_state():
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
# distance pacman - ghost x_ghost_dist = abs(pac_x - ghost_x)
x_dist = abs(pac_x - ghost_x) y_ghost_dist = abs(pac_y - ghost_y)
y_dist = abs(pac_y - ghost_y)
# closest cookie cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
best_distance = 12
best_cords = None
all_cookie_locations = get_all_cookies_locations(labyrinth) return x_ghost_dist, y_ghost_dist, cookie_direction
for (cookie_x, cookie_y) in all_cookie_locations:
dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
if dist < best_distance:
best_distance = dist
best_cords = (cookie_x, cookie_y)
# closest cookie direction
cookie_direction = None
real_dist_x = pac_x - cookie_x
real_dist_y = pac_y - cookie_y
#TODO
if real_dist_x >= 0 & real_dist_y > 0:
cookie_direction = Direction
return x_dist, y_dist, cookie_direction
def get_all_cookies_locations(labyrinth): def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
cookie_locations = [] cookie_distances = []
for y, row in enumerate(labyrinth): for y, row in enumerate(labyrinth):
for x, cell in enumerate(row): for x, cell in enumerate(row):
if cell == ".": if cell == ".":
cookie_locations.append((x, y)) x_dist = abs(pac_x - x)
y_dist = abs(pac_y - y)
dist = x_dist + y_dist
cookie_distances.append((dist, (x, y)))
return cookie_locations closest_cookie = min(cookie_distances, key=lambda item: item[0])
closest_cookie_cords = closest_cookie[1]
cookie_x = closest_cookie_cords[0]
cookie_y = closest_cookie_cords[1]
dx = cookie_x - pac_x
dy = cookie_y - pac_y
if abs(dx) >= abs(dy):
#? X distance bigger
if dy > 0:
return Direction.DOWN
elif dy < 0:
return Direction.UP
else:
#? Cookie on the same Y level
if dx > 0:
return Direction.RIGHT
else:
return Direction.LEFT
else:
#? Y distance bigger
if dx > 0:
return Direction.RIGHT
elif dx < 0:
return Direction.LEFT
else:
#? Cookie on the same X level
if dy > 0:
return Direction.DOWN
else:
return Direction.UP
def epsilon_greedy(q_values, state, epsilon): def epsilon_greedy(q_values, state, epsilon):
best_action, states_for_epsilon = get_best_q_value(q_values, state) best_action, actions_for_epsilon = get_best_q_action(q_values, state)
if random.random() < epsilon: if random.random() < epsilon:
if not states_for_epsilon: if not actions_for_epsilon:
best_action = get_random_direction() best_action = get_random_direction()
return best_action return best_action
random_action = random.choice(states_for_epsilon) random_action = random.choice(actions_for_epsilon)
return random_action return random_action
return best_action return best_action
def get_best_q_value(q_values, state): def get_best_q_action(q_values, state):
best_action = None best_action = None
best_value = None best_value = None
states_for_epsilon = [] actions_for_epsilon = []
for (q_state, q_action), value in q_values.items(): for (q_state, q_action), value in q_values.items():
if q_state == state: if q_state == state:
states_for_epsilon.append(q_action) actions_for_epsilon.append(q_action)
if best_value is None: if best_value is None:
best_value = value best_value = value
@ -93,7 +122,7 @@ def get_best_q_value(q_values, state):
if not best_action: if not best_action:
best_action = get_random_direction() best_action = get_random_direction()
return best_action, states_for_epsilon return best_action, actions_for_epsilon
def get_random_direction(): def get_random_direction():