commencing with actual reinforcement learning

master
Ruben-FreddyLoafers 2025-12-01 15:16:34 +01:00
parent 48a351518d
commit a891d51ca9
2 changed files with 48 additions and 53 deletions

View File

@ -135,15 +135,27 @@ def main():
q = rl.q_init() q = rl.q_init()
# Game loop # Game loop
not_won = True
running = True running = True
iter = 0 iter = 0
while running: while not_won:
screen.fill(BLACK)
iter = iter + 1 labyrinth = [
"##########",
"#........#",
"#.##..##.#",
"#........#",
"##########"
]
# Handle events # Handle events
for event in pygame.event.get(): for event in pygame.event.get():
if event.type == pygame.QUIT: if event.type == pygame.QUIT:
running = False not_won = False
while running:
screen.fill(BLACK)
iter = iter + 1
# Check for collisions (game over if ghost catches pacman) # Check for collisions (game over if ghost catches pacman)
if pacman.x == ghost.x and pacman.y == ghost.y: if pacman.x == ghost.x and pacman.y == ghost.y:
@ -158,38 +170,21 @@ def main():
if all("." not in row for row in labyrinth): if all("." not in row for row in labyrinth):
print("You Win! Pacman ate all the cookies.") print("You Win! Pacman ate all the cookies.")
running = False running = False
not_won = False
# Start of my code ######################################################################
# Start of my code
labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy labyrinth_copy = [list(row) for row in labyrinth] # Create proper deep copy
s_not_terminal = True
a = None
iteration = 0
max_iterations = 50 # Prevent infinite loops
while s_not_terminal and iteration < max_iterations:
iteration += 1
# print("s: " + str(s)) # debugging
# print("q[s] before action: " + str(q[s])) # debugging
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy) s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy)
q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a]) q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a])
if all("." not in row for row in labyrinth_copy):
s_not_terminal = False
q[s][a] = 10.0
print("There is a parallel universe with victory")
s = s_new s = s_new
time.sleep(0.025)
if iteration >= max_iterations: # zumindest angeben wo der nächste punkt ist, ohne geist im zustand s.
print(f"Max iterations reached for this loop ") # After everything was calculated; just move Pacman according to highest action a in Q-Table q.
s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
move_pacman(pacman, a) move_pacman(pacman, a)
if iter%3==0: if iter%3==0:

View File

@ -108,7 +108,7 @@ def epsilon_greedy(q, s, epsilon=0.025):
return a return a
def max_q(q, s_new, labyrinth, depth=0, max_depth=2): def max_q(q, s_new, labyrinth, depth=0, max_depth=1):
"""Calculate Q-values for all possible actions in state s_new and return the maximum""" """Calculate Q-values for all possible actions in state s_new and return the maximum"""
q_max = 0 q_max = 0
for a in range(4): for a in range(4):
@ -133,7 +133,7 @@ def max_q(q, s_new, labyrinth, depth=0, max_depth=2):
def calc_reward(s_new, labyrinth): def calc_reward(s_new, labyrinth):
# Reward for cookies # Reward for cookies; punish for not eating cookies
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0 r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
return r return r