commencing with actual reinforcement learning
parent
48a351518d
commit
a891d51ca9
|
|
@ -135,77 +135,72 @@ def main():
|
|||
q = rl.q_init()
|
||||
|
||||
# Game loop
|
||||
not_won = True
|
||||
running = True
|
||||
iter = 0
|
||||
while running:
|
||||
screen.fill(BLACK)
|
||||
iter = iter + 1
|
||||
while not_won:
|
||||
|
||||
labyrinth = [
|
||||
"##########",
|
||||
"#........#",
|
||||
"#.##..##.#",
|
||||
"#........#",
|
||||
"##########"
|
||||
]
|
||||
|
||||
# Handle events
|
||||
for event in pygame.event.get():
|
||||
if event.type == pygame.QUIT:
|
||||
not_won = False
|
||||
|
||||
while running:
|
||||
screen.fill(BLACK)
|
||||
iter = iter + 1
|
||||
|
||||
# Check for collisions (game over if ghost catches pacman)
|
||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||
print("Game Over! The ghost caught Pacman.")
|
||||
running = False
|
||||
|
||||
# Check for collisions (game over if ghost catches pacman)
|
||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||
print("Game Over! The ghost caught Pacman.")
|
||||
running = False
|
||||
# Eat cookies
|
||||
if labyrinth[pacman.y][pacman.x] == ".":
|
||||
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
||||
|
||||
# Eat cookies
|
||||
if labyrinth[pacman.y][pacman.x] == ".":
|
||||
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
||||
# Check if all cookies are eaten (game over)
|
||||
if all("." not in row for row in labyrinth):
|
||||
print("You Win! Pacman ate all the cookies.")
|
||||
running = False
|
||||
not_won = False
|
||||
|
||||
# Check if all cookies are eaten (game over)
|
||||
if all("." not in row for row in labyrinth):
|
||||
print("You Win! Pacman ate all the cookies.")
|
||||
running = False
|
||||
# Start of my code ######################################################################
|
||||
|
||||
# Start of my code
|
||||
labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy
|
||||
s_not_terminal = True
|
||||
a = None
|
||||
iteration = 0
|
||||
max_iterations = 50 # Prevent infinite loops
|
||||
|
||||
while s_not_terminal and iteration < max_iterations:
|
||||
iteration += 1
|
||||
# print("s: " + str(s)) # debugging
|
||||
# print("q[s] before action: " + str(q[s])) # debugging
|
||||
labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy
|
||||
|
||||
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
|
||||
s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy)
|
||||
|
||||
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a])
|
||||
|
||||
if all("." not in row for row in labyrinth_copy):
|
||||
s_not_terminal = False
|
||||
q[s][a] = 10.0
|
||||
print("There is a parallel universe with victory")
|
||||
|
||||
|
||||
s = s_new
|
||||
time.sleep(0.025)
|
||||
|
||||
if iteration >= max_iterations:
|
||||
print(f"Max iterations reached for this loop ")
|
||||
# zumindest angeben wo der nächste punkt ist, ohne geist im zustand s.
|
||||
# After everything was calculated; just move Pacman according to highest action a in Q-Table q.
|
||||
move_pacman(pacman, a)
|
||||
|
||||
s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
|
||||
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
|
||||
move_pacman(pacman, a)
|
||||
if iter%3==0:
|
||||
# Ghost moves towards Pacman
|
||||
ghost.move_towards_pacman(pacman)
|
||||
|
||||
if iter%3==0:
|
||||
# Ghost moves towards Pacman
|
||||
ghost.move_towards_pacman(pacman)
|
||||
# Draw the labyrinth, pacman, and ghost
|
||||
draw_labyrinth()
|
||||
pacman.draw()
|
||||
ghost.draw()
|
||||
|
||||
# Draw the labyrinth, pacman, and ghost
|
||||
draw_labyrinth()
|
||||
pacman.draw()
|
||||
ghost.draw()
|
||||
# Update display
|
||||
pygame.display.flip()
|
||||
|
||||
# Update display
|
||||
pygame.display.flip()
|
||||
|
||||
# Cap the frame rate
|
||||
clock.tick(5)
|
||||
# Cap the frame rate
|
||||
clock.tick(5)
|
||||
|
||||
pygame.quit()
|
||||
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ def epsilon_greedy(q, s, epsilon=0.025):
|
|||
|
||||
return a
|
||||
|
||||
def max_q(q, s_new, labyrinth, depth=0, max_depth=2):
|
||||
def max_q(q, s_new, labyrinth, depth=0, max_depth=1):
|
||||
"""Calculate Q-values for all possible actions in state s_new and return the maximum"""
|
||||
q_max = 0
|
||||
for a in range(4):
|
||||
|
|
@ -133,7 +133,7 @@ def max_q(q, s_new, labyrinth, depth=0, max_depth=2):
|
|||
|
||||
def calc_reward(s_new, labyrinth):
|
||||
|
||||
# Reward for cookies
|
||||
# Reward for cookies; punish for not eating cookies
|
||||
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
|
||||
|
||||
return r
|
||||
|
|
|
|||
Loading…
Reference in New Issue