Added current state calc partialy
parent
32a9b5a670
commit
454ac5092d
34
game.py
34
game.py
|
|
@ -2,6 +2,8 @@ import pygame
|
||||||
import random
|
import random
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
from util import Direction, calc_current_state, epsilon_greedy, get_best_q_value
|
||||||
|
|
||||||
# Initialize pygame
|
# Initialize pygame
|
||||||
pygame.init()
|
pygame.init()
|
||||||
|
|
||||||
|
|
@ -106,8 +108,9 @@ def draw_labyrinth():
|
||||||
elif cell == ".":
|
elif cell == ".":
|
||||||
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
|
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
|
||||||
|
|
||||||
|
|
||||||
# Main game function
|
# Main game function
|
||||||
def main():
|
def main(q_values, EPSILON, ALPHA, GAMMA):
|
||||||
clock = pygame.time.Clock()
|
clock = pygame.time.Clock()
|
||||||
|
|
||||||
# Initialize Pacman and Ghost positions
|
# Initialize Pacman and Ghost positions
|
||||||
|
|
@ -115,6 +118,11 @@ def main():
|
||||||
ghost = Ghost(COLS - 2, ROWS - 2)
|
ghost = Ghost(COLS - 2, ROWS - 2)
|
||||||
|
|
||||||
# Game loop
|
# Game loop
|
||||||
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
||||||
|
reward = 0
|
||||||
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
running = True
|
running = True
|
||||||
iter = 0
|
iter = 0
|
||||||
while running:
|
while running:
|
||||||
|
|
@ -136,6 +144,19 @@ def main():
|
||||||
if keys[pygame.K_DOWN]:
|
if keys[pygame.K_DOWN]:
|
||||||
pacman.move(0, 1)
|
pacman.move(0, 1)
|
||||||
|
|
||||||
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
action = epsilon_greedy(q_values, state, EPSILON)
|
||||||
|
if action == Direction.LEFT:
|
||||||
|
pacman.move(-1, 0)
|
||||||
|
if action == Direction.RIGHT:
|
||||||
|
pacman.move(1, 0)
|
||||||
|
if action == Direction.UP:
|
||||||
|
pacman.move(0, -1)
|
||||||
|
if action == Direction.DOWN:
|
||||||
|
pacman.move(0, 1)
|
||||||
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
|
|
||||||
if iter%3==0:
|
if iter%3==0:
|
||||||
# Ghost moves towards Pacman
|
# Ghost moves towards Pacman
|
||||||
ghost.move_towards_pacman(pacman)
|
ghost.move_towards_pacman(pacman)
|
||||||
|
|
@ -155,6 +176,15 @@ def main():
|
||||||
running = False
|
running = False
|
||||||
|
|
||||||
# Draw the labyrinth, pacman, and ghost
|
# Draw the labyrinth, pacman, and ghost
|
||||||
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
||||||
|
|
||||||
|
best_value_new_state, _ = get_best_q_value(q_values, new_state)
|
||||||
|
|
||||||
|
current_value = q_values.get((state, action), 0)
|
||||||
|
adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
|
||||||
|
q_values[(state, action)] = current_value + adjusted_value
|
||||||
|
#? -------------------------MY CODE-----------------------------------
|
||||||
draw_labyrinth()
|
draw_labyrinth()
|
||||||
pacman.draw()
|
pacman.draw()
|
||||||
ghost.draw()
|
ghost.draw()
|
||||||
|
|
@ -163,7 +193,7 @@ def main():
|
||||||
pygame.display.flip()
|
pygame.display.flip()
|
||||||
|
|
||||||
# Cap the frame rate
|
# Cap the frame rate
|
||||||
clock.tick(5)
|
clock.tick(1)
|
||||||
|
|
||||||
pygame.quit()
|
pygame.quit()
|
||||||
|
|
||||||
|
|
|
||||||
5
main.py
5
main.py
|
|
@ -1,8 +1,10 @@
|
||||||
from util import epsilon_greedy, get_start_state
|
from util import epsilon_greedy, get_start_state, test
|
||||||
|
|
||||||
|
|
||||||
AMOUNT_RUNS = 10
|
AMOUNT_RUNS = 10
|
||||||
EPSILON = 0.1
|
EPSILON = 0.1
|
||||||
|
ALPHA = 0.1
|
||||||
|
GAMMA = 0.1
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
@ -12,7 +14,6 @@ q_value: (state, action)
|
||||||
"""
|
"""
|
||||||
q_values = {}
|
q_values = {}
|
||||||
|
|
||||||
|
|
||||||
# Amount of single runs
|
# Amount of single runs
|
||||||
for x in range(AMOUNT_RUNS):
|
for x in range(AMOUNT_RUNS):
|
||||||
state = get_start_state()
|
state = get_start_state()
|
||||||
|
|
|
||||||
68
util.py
68
util.py
|
|
@ -16,7 +16,62 @@ def get_start_state():
|
||||||
return(7, 2, Direction.RIGHT)
|
return(7, 2, Direction.RIGHT)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
||||||
|
# distance pacman - ghost
|
||||||
|
x_dist = abs(pac_x - ghost_x)
|
||||||
|
y_dist = abs(pac_y - ghost_y)
|
||||||
|
|
||||||
|
# closest cookie
|
||||||
|
best_distance = 12
|
||||||
|
best_cords = None
|
||||||
|
|
||||||
|
all_cookie_locations = get_all_cookies_locations(labyrinth)
|
||||||
|
for (cookie_x, cookie_y) in all_cookie_locations:
|
||||||
|
dist = abs(pac_x - cookie_x) + abs(pac_y - cookie_y)
|
||||||
|
|
||||||
|
if dist < best_distance:
|
||||||
|
best_distance = dist
|
||||||
|
best_cords = (cookie_x, cookie_y)
|
||||||
|
|
||||||
|
# closest cookie direction
|
||||||
|
cookie_direction = None
|
||||||
|
real_dist_x = pac_x - cookie_x
|
||||||
|
real_dist_y = pac_y - cookie_y
|
||||||
|
|
||||||
|
#TODO
|
||||||
|
if real_dist_x >= 0 & real_dist_y > 0:
|
||||||
|
cookie_direction = Direction
|
||||||
|
|
||||||
|
return x_dist, y_dist, cookie_direction
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_cookies_locations(labyrinth):
|
||||||
|
cookie_locations = []
|
||||||
|
for y, row in enumerate(labyrinth):
|
||||||
|
for x, cell in enumerate(row):
|
||||||
|
if cell == ".":
|
||||||
|
cookie_locations.append((x, y))
|
||||||
|
|
||||||
|
return cookie_locations
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def epsilon_greedy(q_values, state, epsilon):
|
def epsilon_greedy(q_values, state, epsilon):
|
||||||
|
best_action, states_for_epsilon = get_best_q_value(q_values, state)
|
||||||
|
|
||||||
|
if random.random() < epsilon:
|
||||||
|
if not states_for_epsilon:
|
||||||
|
best_action = get_random_direction()
|
||||||
|
return best_action
|
||||||
|
|
||||||
|
random_action = random.choice(states_for_epsilon)
|
||||||
|
return random_action
|
||||||
|
|
||||||
|
return best_action
|
||||||
|
|
||||||
|
|
||||||
|
def get_best_q_value(q_values, state):
|
||||||
best_action = None
|
best_action = None
|
||||||
best_value = None
|
best_value = None
|
||||||
|
|
||||||
|
|
@ -35,12 +90,11 @@ def epsilon_greedy(q_values, state, epsilon):
|
||||||
best_value = value
|
best_value = value
|
||||||
best_action = q_action
|
best_action = q_action
|
||||||
|
|
||||||
if random.random() < epsilon:
|
if not best_action:
|
||||||
random_action = random.choice(states_for_epsilon)
|
best_action = get_random_direction()
|
||||||
return random_action
|
|
||||||
|
return best_action, states_for_epsilon
|
||||||
return best_action
|
|
||||||
|
|
||||||
|
|
||||||
def take_action(state, action):
|
def get_random_direction():
|
||||||
pass
|
return random.choice(list(Direction))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue