diff --git a/04_pacman_rl/pacman.py b/04_pacman_rl/pacman.py index 492c32a..1064c9e 100644 --- a/04_pacman_rl/pacman.py +++ b/04_pacman_rl/pacman.py @@ -135,77 +135,72 @@ def main(): q = rl.q_init() # Game loop + not_won = True running = True iter = 0 - while running: - screen.fill(BLACK) - iter = iter + 1 + while not_won: + + labyrinth = [ + "##########", + "#........#", + "#.##..##.#", + "#........#", + "##########" + ] + # Handle events for event in pygame.event.get(): if event.type == pygame.QUIT: + not_won = False + + while running: + screen.fill(BLACK) + iter = iter + 1 + + # Check for collisions (game over if ghost catches pacman) + if pacman.x == ghost.x and pacman.y == ghost.y: + print("Game Over! The ghost caught Pacman.") running = False - # Check for collisions (game over if ghost catches pacman) - if pacman.x == ghost.x and pacman.y == ghost.y: - print("Game Over! The ghost caught Pacman.") - running = False + # Eat cookies + if labyrinth[pacman.y][pacman.x] == ".": + labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] - # Eat cookies - if labyrinth[pacman.y][pacman.x] == ".": - labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] + # Check if all cookies are eaten (game over) + if all("." not in row for row in labyrinth): + print("You Win! Pacman ate all the cookies.") + running = False + not_won = False - # Check if all cookies are eaten (game over) - if all("." not in row for row in labyrinth): - print("You Win! Pacman ate all the cookies.") - running = False - - # Start of my code - labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy - s_not_terminal = True - a = None - iteration = 0 - max_iterations = 50 # Prevent infinite loops - - while s_not_terminal and iteration < max_iterations: - iteration += 1 - # print("s: " + str(s)) # debugging - # print("q[s] before action: " + str(q[s])) # debugging + # Start of my code ###################################################################### + + labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy) q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a]) - if all("." not in row for row in labyrinth_copy): - s_not_terminal = False - q[s][a] = 10.0 - print("There is a parallel universe with victory") - - s = s_new - time.sleep(0.025) - - if iteration >= max_iterations: - print(f"Max iterations reached for this loop ") - - s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable - a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down - move_pacman(pacman, a) - if iter%3==0: - # Ghost moves towards Pacman - ghost.move_towards_pacman(pacman) + # zumindest angeben wo der nächste punkt ist, ohne geist im zustand s. + # After everything was calculated; just move Pacman according to highest action a in Q-Table q. + move_pacman(pacman, a) - # Draw the labyrinth, pacman, and ghost - draw_labyrinth() - pacman.draw() - ghost.draw() + if iter%3==0: + # Ghost moves towards Pacman + ghost.move_towards_pacman(pacman) - # Update display - pygame.display.flip() + # Draw the labyrinth, pacman, and ghost + draw_labyrinth() + pacman.draw() + ghost.draw() - # Cap the frame rate - clock.tick(5) + # Update display + pygame.display.flip() + + # Cap the frame rate + clock.tick(5) pygame.quit() diff --git a/04_pacman_rl/reinforcement_learning.py b/04_pacman_rl/reinforcement_learning.py index a747b91..ee5c340 100644 --- a/04_pacman_rl/reinforcement_learning.py +++ b/04_pacman_rl/reinforcement_learning.py @@ -108,7 +108,7 @@ def epsilon_greedy(q, s, epsilon=0.025): return a -def max_q(q, s_new, labyrinth, depth=0, max_depth=2): +def max_q(q, s_new, labyrinth, depth=0, max_depth=1): """Calculate Q-values for all possible actions in state s_new and return the maximum""" q_max = 0 for a in range(4): @@ -133,7 +133,7 @@ def max_q(q, s_new, labyrinth, depth=0, max_depth=2): def calc_reward(s_new, labyrinth): - # Reward for cookies + # Reward for cookies; punish for not eating cookies r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0 return r