Finished assigment
parent
454ac5092d
commit
93076e2426
51
game.py
51
game.py
|
|
@ -1,8 +1,8 @@
|
||||||
import pygame
|
import pygame
|
||||||
import random
|
|
||||||
import math
|
import math
|
||||||
|
import os
|
||||||
|
|
||||||
from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value
|
from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action
|
||||||
|
|
||||||
# Initialize pygame
|
# Initialize pygame
|
||||||
pygame.init()
|
pygame.init()
|
||||||
|
|
@ -12,6 +12,8 @@ SCREEN_WIDTH = 400
|
||||||
SCREEN_HEIGHT = 400
|
SCREEN_HEIGHT = 400
|
||||||
CELL_SIZE = 40
|
CELL_SIZE = 40
|
||||||
|
|
||||||
|
os.environ['SDL_VIDEODRIVER'] = 'dummy'
|
||||||
|
|
||||||
# Define colors
|
# Define colors
|
||||||
YELLOW = (255, 255, 0)
|
YELLOW = (255, 255, 0)
|
||||||
RED = (255, 0, 0)
|
RED = (255, 0, 0)
|
||||||
|
|
@ -20,7 +22,7 @@ BLUE = (0, 0, 255)
|
||||||
BLACK = (0, 0, 0)
|
BLACK = (0, 0, 0)
|
||||||
|
|
||||||
# Labyrinth as a string
|
# Labyrinth as a string
|
||||||
labyrinth = [
|
labyrinth_init = [
|
||||||
"##########",
|
"##########",
|
||||||
"#........#",
|
"#........#",
|
||||||
"#.##..##.#",
|
"#.##..##.#",
|
||||||
|
|
@ -28,13 +30,16 @@ labyrinth = [
|
||||||
"##########"
|
"##########"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
labyrinth = labyrinth_init.copy()
|
||||||
|
|
||||||
# Get labyrinth dimensions
|
# Get labyrinth dimensions
|
||||||
ROWS = len(labyrinth)
|
ROWS = len(labyrinth)
|
||||||
COLS = len(labyrinth[0])
|
COLS = len(labyrinth[0])
|
||||||
|
|
||||||
# Initialize game screen
|
# Initialize game screen
|
||||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
# screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||||
pygame.display.set_caption("Micro-Pacman")
|
# pygame.display.set_caption("Micro-Pacman")
|
||||||
|
screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||||
|
|
||||||
# Pacman class
|
# Pacman class
|
||||||
class Pacman:
|
class Pacman:
|
||||||
|
|
@ -100,7 +105,7 @@ class Ghost:
|
||||||
pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
|
pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
|
||||||
|
|
||||||
# Draw walls and cookies
|
# Draw walls and cookies
|
||||||
def draw_labyrinth():
|
def draw_labyrinth(labyrinth):
|
||||||
for y, row in enumerate(labyrinth):
|
for y, row in enumerate(labyrinth):
|
||||||
for x, cell in enumerate(row):
|
for x, cell in enumerate(row):
|
||||||
if cell == "#":
|
if cell == "#":
|
||||||
|
|
@ -110,19 +115,20 @@ def draw_labyrinth():
|
||||||
|
|
||||||
|
|
||||||
# Main game function
|
# Main game function
|
||||||
def main(q_values, EPSILON, ALPHA, GAMMA):
|
def run_game(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
clock = pygame.time.Clock()
|
clock = pygame.time.Clock()
|
||||||
|
labyrinth = labyrinth_init.copy()
|
||||||
|
|
||||||
# Initialize Pacman and Ghost positions
|
# Initialize Pacman and Ghost positions
|
||||||
pacman = Pacman(1, 1)
|
pacman = Pacman(1, 1)
|
||||||
ghost = Ghost(COLS - 2, ROWS - 2)
|
ghost = Ghost(COLS - 2, ROWS - 2)
|
||||||
|
|
||||||
# Game loop
|
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
||||||
reward = 0
|
reward = 0
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
|
#? GAME LOOP
|
||||||
running = True
|
running = True
|
||||||
iter = 0
|
iter = 0
|
||||||
while running:
|
while running:
|
||||||
|
|
@ -156,7 +162,6 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
pacman.move(0, 1)
|
pacman.move(0, 1)
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
|
|
||||||
if iter%3==0:
|
if iter%3==0:
|
||||||
# Ghost moves towards Pacman
|
# Ghost moves towards Pacman
|
||||||
ghost.move_towards_pacman(pacman)
|
ghost.move_towards_pacman(pacman)
|
||||||
|
|
@ -165,6 +170,7 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||||
print("Game Over! The ghost caught Pacman.")
|
print("Game Over! The ghost caught Pacman.")
|
||||||
running = False
|
running = False
|
||||||
|
reward = -10
|
||||||
|
|
||||||
# Eat cookies
|
# Eat cookies
|
||||||
if labyrinth[pacman.y][pacman.x] == ".":
|
if labyrinth[pacman.y][pacman.x] == ".":
|
||||||
|
|
@ -173,29 +179,44 @@ def main(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
# Check if all cookies are eaten (game over)
|
# Check if all cookies are eaten (game over)
|
||||||
if all("." not in row for row in labyrinth):
|
if all("." not in row for row in labyrinth):
|
||||||
print("You Win! Pacman ate all the cookies.")
|
print("You Win! Pacman ate all the cookies.")
|
||||||
|
reward = 10
|
||||||
running = False
|
running = False
|
||||||
|
|
||||||
# Draw the labyrinth, pacman, and ghost
|
# Draw the labyrinth, pacman, and ghost
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
if not running:
|
||||||
|
new_state = state
|
||||||
|
else:
|
||||||
new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
||||||
|
|
||||||
best_value_new_state, _ = get_best_q_value(q_values, new_state)
|
best_action_new_state, _ = get_best_q_action(q_values, new_state)
|
||||||
|
best_value_new_state = q_values[(new_state, best_action_new_state)]
|
||||||
|
|
||||||
current_value = q_values.get((state, action), 0)
|
current_value = q_values.get((state, action))
|
||||||
adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
|
adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
|
||||||
q_values[(state, action)] = current_value + adjusted_value
|
q_values[(state, action)] = current_value + adjusted_value
|
||||||
|
|
||||||
|
state = new_state
|
||||||
|
|
||||||
|
if not running:
|
||||||
|
counter = 0
|
||||||
|
for y, row in enumerate(labyrinth):
|
||||||
|
for x, cell in enumerate(row):
|
||||||
|
if cell == ".":
|
||||||
|
counter += 1
|
||||||
|
return 20-counter
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
draw_labyrinth()
|
draw_labyrinth(labyrinth)
|
||||||
pacman.draw()
|
pacman.draw()
|
||||||
ghost.draw()
|
ghost.draw()
|
||||||
|
|
||||||
# Update display
|
# Update display
|
||||||
pygame.display.flip()
|
# pygame.display.flip()
|
||||||
|
|
||||||
# Cap the frame rate
|
# Cap the frame rate
|
||||||
clock.tick(1)
|
clock.tick(10000)
|
||||||
|
|
||||||
pygame.quit()
|
pygame.quit()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
run_game()
|
||||||
20
main.py
20
main.py
|
|
@ -1,10 +1,13 @@
|
||||||
from util import epsilon_greedy, get_start_state, test
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from game import run_game
|
||||||
|
from util import initial_q_fill
|
||||||
|
|
||||||
|
|
||||||
AMOUNT_RUNS = 10
|
AMOUNT_RUNS = 5000
|
||||||
EPSILON = 0.1
|
EPSILON = 0.1
|
||||||
ALPHA = 0.1
|
ALPHA = 0.1
|
||||||
GAMMA = 0.1
|
GAMMA = 0.9
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
@ -13,12 +16,15 @@ action: Direction
|
||||||
q_value: (state, action)
|
q_value: (state, action)
|
||||||
"""
|
"""
|
||||||
q_values = {}
|
q_values = {}
|
||||||
|
initial_q_fill(q_values)
|
||||||
|
|
||||||
|
cookies_per_run = []
|
||||||
# Amount of single runs
|
# Amount of single runs
|
||||||
for x in range(AMOUNT_RUNS):
|
for x in range(AMOUNT_RUNS):
|
||||||
state = get_start_state()
|
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA)
|
||||||
|
cookies_per_run.append(amount_cookies_ate)
|
||||||
|
print(f"Run {x}: {amount_cookies_ate} cookies ate\n")
|
||||||
|
|
||||||
# Single run, until win or death
|
|
||||||
while(True):
|
|
||||||
action = epsilon_greedy(q_values, state, EPSILON)
|
|
||||||
|
|
||||||
|
plt.plot(cookies_per_run)
|
||||||
|
plt.show()
|
||||||
|
|
|
||||||
99
util.py
99
util.py
|
|
@ -8,6 +8,17 @@ class Direction(Enum):
|
||||||
LEFT = 3
|
LEFT = 3
|
||||||
|
|
||||||
|
|
||||||
|
def initial_q_fill(q_values):
|
||||||
|
for x in range(8):
|
||||||
|
for y in range(3):
|
||||||
|
for cookie_direction in Direction:
|
||||||
|
for action in Direction:
|
||||||
|
state = (x, y, cookie_direction)
|
||||||
|
q_values[(state, action)] = random.random() * 0.2 - 0.1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_start_state():
|
def get_start_state():
|
||||||
first_direction_cookie = random.choice([True, False])
|
first_direction_cookie = random.choice([True, False])
|
||||||
if first_direction_cookie:
|
if first_direction_cookie:
|
||||||
|
|
@ -18,68 +29,86 @@ def get_start_state():
|
||||||
|
|
||||||
|
|
||||||
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
||||||
# distance pacman - ghost
|
x_ghost_dist = abs(pac_x - ghost_x)
|
||||||
x_dist = abs(pac_x - ghost_x)
|
y_ghost_dist = abs(pac_y - ghost_y)
|
||||||
y_dist = abs(pac_y - ghost_y)
|
|
||||||
|
|
||||||
# closest cookie
|
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
||||||
best_distance = 12
|
|
||||||
best_cords = None
|
|
||||||
|
|
||||||
all_cookie_locations = get_all_cookies_locations(labyrinth)
|
return x_ghost_dist, y_ghost_dist, cookie_direction
|
||||||
for (cookie_x, cookie_y) in all_cookie_locations:
|
|
||||||
dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
|
|
||||||
|
|
||||||
if dist < best_distance:
|
|
||||||
best_distance = dist
|
|
||||||
best_cords = (cookie_x, cookie_y)
|
|
||||||
|
|
||||||
# closest cookie direction
|
|
||||||
cookie_direction = None
|
|
||||||
real_dist_x = pac_x - cookie_x
|
|
||||||
real_dist_y = pac_y - cookie_y
|
|
||||||
|
|
||||||
#TODO
|
|
||||||
if real_dist_x >= 0 & real_dist_y > 0:
|
|
||||||
cookie_direction = Direction
|
|
||||||
|
|
||||||
return x_dist, y_dist, cookie_direction
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_cookies_locations(labyrinth):
|
def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
|
||||||
cookie_locations = []
|
cookie_distances = []
|
||||||
|
|
||||||
for y, row in enumerate(labyrinth):
|
for y, row in enumerate(labyrinth):
|
||||||
for x, cell in enumerate(row):
|
for x, cell in enumerate(row):
|
||||||
if cell == ".":
|
if cell == ".":
|
||||||
cookie_locations.append((x, y))
|
x_dist = abs(pac_x - x)
|
||||||
|
y_dist = abs(pac_y - y)
|
||||||
|
dist = x_dist + y_dist
|
||||||
|
cookie_distances.append((dist, (x, y)))
|
||||||
|
|
||||||
return cookie_locations
|
closest_cookie = min(cookie_distances, key=lambda item: item[0])
|
||||||
|
closest_cookie_cords = closest_cookie[1]
|
||||||
|
cookie_x = closest_cookie_cords[0]
|
||||||
|
cookie_y = closest_cookie_cords[1]
|
||||||
|
|
||||||
|
|
||||||
|
dx = cookie_x - pac_x
|
||||||
|
dy = cookie_y - pac_y
|
||||||
|
|
||||||
|
if abs(dx) >= abs(dy):
|
||||||
|
#? X distance bigger
|
||||||
|
|
||||||
|
if dy > 0:
|
||||||
|
return Direction.DOWN
|
||||||
|
elif dy < 0:
|
||||||
|
return Direction.UP
|
||||||
|
else:
|
||||||
|
#? Cookie on the same Y level
|
||||||
|
if dx > 0:
|
||||||
|
return Direction.RIGHT
|
||||||
|
else:
|
||||||
|
return Direction.LEFT
|
||||||
|
else:
|
||||||
|
#? Y distance bigger
|
||||||
|
|
||||||
|
if dx > 0:
|
||||||
|
return Direction.RIGHT
|
||||||
|
elif dx < 0:
|
||||||
|
return Direction.LEFT
|
||||||
|
else:
|
||||||
|
#? Cookie on the same X level
|
||||||
|
if dy > 0:
|
||||||
|
return Direction.DOWN
|
||||||
|
else:
|
||||||
|
return Direction.UP
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def epsilon_greedy(q_values, state, epsilon):
|
def epsilon_greedy(q_values, state, epsilon):
|
||||||
best_action, states_for_epsilon = get_best_q_value(q_values, state)
|
best_action, actions_for_epsilon = get_best_q_action(q_values, state)
|
||||||
|
|
||||||
if random.random() < epsilon:
|
if random.random() < epsilon:
|
||||||
if not states_for_epsilon:
|
if not actions_for_epsilon:
|
||||||
best_action = get_random_direction()
|
best_action = get_random_direction()
|
||||||
return best_action
|
return best_action
|
||||||
|
|
||||||
random_action = random.choice(states_for_epsilon)
|
random_action = random.choice(actions_for_epsilon)
|
||||||
return random_action
|
return random_action
|
||||||
|
|
||||||
return best_action
|
return best_action
|
||||||
|
|
||||||
|
|
||||||
def get_best_q_value(q_values, state):
|
def get_best_q_action(q_values, state):
|
||||||
best_action = None
|
best_action = None
|
||||||
best_value = None
|
best_value = None
|
||||||
|
|
||||||
states_for_epsilon = []
|
actions_for_epsilon = []
|
||||||
|
|
||||||
for (q_state, q_action), value in q_values.items():
|
for (q_state, q_action), value in q_values.items():
|
||||||
if q_state == state:
|
if q_state == state:
|
||||||
states_for_epsilon.append(q_action)
|
actions_for_epsilon.append(q_action)
|
||||||
|
|
||||||
if best_value is None:
|
if best_value is None:
|
||||||
best_value = value
|
best_value = value
|
||||||
|
|
@ -93,7 +122,7 @@ def get_best_q_value(q_values, state):
|
||||||
if not best_action:
|
if not best_action:
|
||||||
best_action = get_random_direction()
|
best_action = get_random_direction()
|
||||||
|
|
||||||
return best_action, states_for_epsilon
|
return best_action, actions_for_epsilon
|
||||||
|
|
||||||
|
|
||||||
def get_random_direction():
|
def get_random_direction():
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue