commencing with actual reinforcement learning
parent
48a351518d
commit
a891d51ca9
|
|
@ -135,15 +135,27 @@ def main():
|
||||||
q = rl.q_init()
|
q = rl.q_init()
|
||||||
|
|
||||||
# Game loop
|
# Game loop
|
||||||
|
not_won = True
|
||||||
running = True
|
running = True
|
||||||
iter = 0
|
iter = 0
|
||||||
while running:
|
while not_won:
|
||||||
screen.fill(BLACK)
|
|
||||||
iter = iter + 1
|
labyrinth = [
|
||||||
|
"##########",
|
||||||
|
"#........#",
|
||||||
|
"#.##..##.#",
|
||||||
|
"#........#",
|
||||||
|
"##########"
|
||||||
|
]
|
||||||
|
|
||||||
# Handle events
|
# Handle events
|
||||||
for event in pygame.event.get():
|
for event in pygame.event.get():
|
||||||
if event.type == pygame.QUIT:
|
if event.type == pygame.QUIT:
|
||||||
running = False
|
not_won = False
|
||||||
|
|
||||||
|
while running:
|
||||||
|
screen.fill(BLACK)
|
||||||
|
iter = iter + 1
|
||||||
|
|
||||||
# Check for collisions (game over if ghost catches pacman)
|
# Check for collisions (game over if ghost catches pacman)
|
||||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||||
|
|
@ -158,38 +170,21 @@ def main():
|
||||||
if all("." not in row for row in labyrinth):
|
if all("." not in row for row in labyrinth):
|
||||||
print("You Win! Pacman ate all the cookies.")
|
print("You Win! Pacman ate all the cookies.")
|
||||||
running = False
|
running = False
|
||||||
|
not_won = False
|
||||||
|
|
||||||
|
# Start of my code ######################################################################
|
||||||
|
|
||||||
# Start of my code
|
|
||||||
labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy
|
labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy
|
||||||
s_not_terminal = True
|
|
||||||
a = None
|
|
||||||
iteration = 0
|
|
||||||
max_iterations = 50 # Prevent infinite loops
|
|
||||||
|
|
||||||
while s_not_terminal and iteration < max_iterations:
|
|
||||||
iteration += 1
|
|
||||||
# print("s: " + str(s)) # debugging
|
|
||||||
# print("q[s] before action: " + str(q[s])) # debugging
|
|
||||||
|
|
||||||
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
|
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
|
||||||
s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy)
|
s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy)
|
||||||
|
|
||||||
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a])
|
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a])
|
||||||
|
|
||||||
if all("." not in row for row in labyrinth_copy):
|
|
||||||
s_not_terminal = False
|
|
||||||
q[s][a] = 10.0
|
|
||||||
print("There is a parallel universe with victory")
|
|
||||||
|
|
||||||
|
|
||||||
s = s_new
|
s = s_new
|
||||||
time.sleep(0.025)
|
|
||||||
|
|
||||||
if iteration >= max_iterations:
|
# zumindest angeben wo der nächste punkt ist, ohne geist im zustand s.
|
||||||
print(f"Max iterations reached for this loop ")
|
# After everything was calculated; just move Pacman according to highest action a in Q-Table q.
|
||||||
|
|
||||||
s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
|
|
||||||
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
|
|
||||||
move_pacman(pacman, a)
|
move_pacman(pacman, a)
|
||||||
|
|
||||||
if iter%3==0:
|
if iter%3==0:
|
||||||
|
|
|
||||||
|
|
@ -108,7 +108,7 @@ def epsilon_greedy(q, s, epsilon=0.025):
|
||||||
|
|
||||||
return a
|
return a
|
||||||
|
|
||||||
def max_q(q, s_new, labyrinth, depth=0, max_depth=2):
|
def max_q(q, s_new, labyrinth, depth=0, max_depth=1):
|
||||||
"""Calculate Q-values for all possible actions in state s_new and return the maximum"""
|
"""Calculate Q-values for all possible actions in state s_new and return the maximum"""
|
||||||
q_max = 0
|
q_max = 0
|
||||||
for a in range(4):
|
for a in range(4):
|
||||||
|
|
@ -133,7 +133,7 @@ def max_q(q, s_new, labyrinth, depth=0, max_depth=2):
|
||||||
|
|
||||||
def calc_reward(s_new, labyrinth):
|
def calc_reward(s_new, labyrinth):
|
||||||
|
|
||||||
# Reward for cookies
|
# Reward for cookies; punish for not eating cookies
|
||||||
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
|
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
|
||||||
|
|
||||||
return r
|
return r
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue