ASs 4 started

2025-11-13 13:54:32 +01:00 · 2025-11-13 13:54:32 +01:00 · 469d1d1a47
parent 1df2ad190a
commit 469d1d1a47
2 changed files with 94 additions and 24 deletions
--- a/04_pacman_rl/pacman.py
+++ b/04_pacman_rl/pacman.py
@ -1,6 +1,7 @@
 import pygame
 import random
 import math
+import reinforcement_learning as rl

 # Initialize pygame
 pygame.init()
@ -154,6 +155,25 @@ def main():
            print("You Win! Pacman ate all the cookies.")
            running = False

+        # Start of my code
+        alpha = 0.8
+        gamma = 0.9
+        
+        s = [pacman.x, pacman.y, ghost.x, ghost.y]
+        s_not_terminal = True
+        
+        q = rl.q_init()
+        
+        while s_not_terminal:
+            a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
+            s_new, r = rl.take_action(s, a)
+            
+            q[s][a] += alpha * (r + gamma * max_q(q, s_new) - q[s][a])
+            
+            s = s_new
+            pass
+    
+
        # Draw the labyrinth, pacman, and ghost
        draw_labyrinth()
        pacman.draw()
@ -168,27 +188,4 @@ def main():
    pygame.quit()

 if __name__ == "__main__":
-    main()
-
-
-'''
-Write a pacman game using pygame. The pacman should be a yellow circle, the ghost is a red square. The labyrinth is written as a string as follows:
-"##########
-#........#
-#.##..##.#
-#........#
-##########"
-
-The "." are cookies the pacman can eat (as Graphics small white circles). The "#" are walls (as graphics blue squares on a black background ).  The ghost always tries to catch the pacman. Pacman as well as the ghost go one step in each game loop iteration. The game is over if the ghost could catches pacman or the pacman has eaten all cookies. Start your answer with "Shure, here is the full pacman code.
-
-
-Now change the code that the following strings are the pixel of the ghost:
-
-" ####
-######
-## # #
-######
-######
-# # # " 
-
-'''
+    main()
--- a/04_pacman_rl/reinforcement_learning.py
+++ b/04_pacman_rl/reinforcement_learning.py
@ -0,0 +1,73 @@
+"""
+Entwickeln Sie einen Reinforcement Learning (RL) Agenten, der in
+einem minimalistischen Pacman-Spiel (bereitgestellt auf meiner
+Homepage) effektiv Punkte sammelt, während er dem Geist
+ausweicht und somit vermeidet gefressen zu werden.
+"""
+
+import numpy as np
+
+def q_init():
+    """ Fill every possible action in every state with a small value for initialization"""
+
+    # Configuration
+    NUM_ACTIONS = 4
+    INITIAL_Q_VALUE = 0.0 # Small value for initialization
+
+    s1_range = range(1, 9)
+    s2_range = range(1, 4)
+    s3_range = range(1, 9)
+    s4_range = range(1, 4)
+    s_constrained_values = {1, 4, 5, 8}
+
+    # The Q-Table dictionary
+    q_table = {}
+
+    # Iterate through all possible combinations of s1, s2, s3, s4
+    for s1 in s1_range:
+        for s2 in s2_range:
+            for s3 in s3_range:
+                for s4 in s4_range:
+                    
+                    # Skip impossible states
+                    if s2 == 2 and s1 not in s_constrained_values:
+                        continue
+                    if s4 == 2 and s3 not in s_constrained_values:
+                        continue 
+
+                    # Assign all possible states a tuple of values
+                    state_key = (s1, s2, s3, s4)
+                    q_table[state_key] = [INITIAL_Q_VALUE] * NUM_ACTIONS
+
+    print(f"Total number of valid states initialized: {len(q_table)}") # debugging
+    # print(list(q_table.items())[:5]) # Uncomment to see the first 5 entries
+    return q_table
+
+def epsilon_greedy(q, s, epsilon=0.9):
+    """ 
+    Return which direction Pacman should move to 
+    epsilon-greedy algorithm TBD
+    """
+    a_val = max(q[s])
+    a = q[s].index(a_val)
+    
+    return a
+
+
+def take_action(s, a):
+    s_new = s    
+    if a == 0:
+        s_new[0] -= 1
+    if a == 1:
+        s_new[0] += 1
+    if a == 2:
+        s_new[1] += 1
+    if a == 3:
+        s_new[1] -= 1
+    
+    # Calculate fucking r
+    # include if there is a point on the field 
+    r = 0
+    
+    return s_new, r
+