diff --git a/04_pacman_rl/pacman.py b/04_pacman_rl/pacman.py
index 1064c9e..9e3cf9a 100644
--- a/04_pacman_rl/pacman.py
+++ b/04_pacman_rl/pacman.py
@@ -1,8 +1,6 @@
 import pygame
-import random
 import math
 import reinforcement_learning as rl
-import time
 
 # Initialize pygame
 pygame.init()
@@ -125,19 +123,14 @@ def move_pacman(pacman, a):
 # Main game function
 def main():
     global labyrinth
-    clock = pygame.time.Clock()
-
-    # Initialize Pacman and Ghost positions
-    pacman = Pacman(1, 1)
-    ghost = Ghost(COLS - 2, ROWS - 2)
-
-    s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
     q = rl.q_init()
+    
+    clock = pygame.time.Clock()
 
     # Game loop
     not_won = True
-    running = True
-    iter = 0
+    outer_iter = 0
+    
     while not_won:
         
         labyrinth = [
@@ -147,20 +140,30 @@ def main():
             "#........#",
             "##########"
         ]
+        running = True
+        iter = 0
+        
+        # Initialize Pacman and Ghost positions
+        pacman = Pacman(1, 1)
+        ghost = Ghost(COLS - 2, ROWS - 2)
+        s = (pacman.x, pacman.y, ghost.x, ghost.y) # as a tuple so the state becomes hashable
         
         # Handle events
         for event in pygame.event.get():
             if event.type == pygame.QUIT:
                 not_won = False
-                    
-        while running:
+                  
+        print(outer_iter)  
+        while running or iter < 100:
             screen.fill(BLACK)
             iter = iter + 1
 
             # Check for collisions (game over if ghost catches pacman)
             if pacman.x == ghost.x and pacman.y == ghost.y:
                 print("Game Over! The ghost caught Pacman.")
+                outer_iter = outer_iter + 1
                 running = False
+                break
 
             # Eat cookies
             if labyrinth[pacman.y][pacman.x] == ".":
@@ -171,36 +174,42 @@ def main():
                 print("You Win! Pacman ate all the cookies.")
                 running = False
                 not_won = False
+                break
 
-            # Start of my code ######################################################################
-            
-            labyrinth_copy = [list(row) for row in labyrinth]  # Create proper deep copy            
+            # Q-Learning part ############################################################################       
 
             a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
-            s_new, r, labyrinth_copy = rl.take_action(s, a, labyrinth_copy)
+            s_new, r, labyrinth = rl.take_action(s, a, labyrinth)
+            # print(s) # debugging
+            # print(q[s]) # debugging
             
-            q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth_copy) - q[s][a])
+            q[s][a] += ALPHA * (r + GAMMA * rl.max_q(q, s_new, labyrinth) - q[s][a])
 
             s = s_new
 
-            # zumindest angeben wo der nächste punkt ist, ohne geist im zustand s.
-            # After everything was calculated; just move Pacman according to highest action a in Q-Table q.
+
             move_pacman(pacman, a)
 
-            if iter%3==0:
+            if iter % 3 == 0:
                 # Ghost moves towards Pacman
                 ghost.move_towards_pacman(pacman)
+                # Update state
+                s = (pacman.x, pacman.y, ghost.x, ghost.y)
+                
+            # End of Q-Learning part ######################################################################
 
             # Draw the labyrinth, pacman, and ghost
             draw_labyrinth()
             pacman.draw()
             ghost.draw()
-
+            
             # Update display
             pygame.display.flip()
-
+            
             # Cap the frame rate
-            clock.tick(5)
+            # tick_speed = 100 
+            tick_speed = 5 if outer_iter % 20 == 0 else 100
+            clock.tick(tick_speed)
 
     pygame.quit()
 
diff --git a/04_pacman_rl/reinforcement_learning.py b/04_pacman_rl/reinforcement_learning.py
index ee5c340..8b248a7 100644
--- a/04_pacman_rl/reinforcement_learning.py
+++ b/04_pacman_rl/reinforcement_learning.py
@@ -6,7 +6,7 @@ ausweicht und somit vermeidet gefressen zu werden.
 """
 
 import numpy as np
-from collections import deque
+import random
 
 GAMMA = 0.90
 ALPHA = 0.2
@@ -16,7 +16,8 @@ def q_init():
 
     # Configuration
     NUM_ACTIONS = 4
-    INITIAL_Q_VALUE = 2.0 # Small value for initialization
+    RAND_Q_VALUES = [random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1)]
+    # print(RAND_Q_VALUES) # debugging
 
     # Labyrinth layout
     labyrinth = [
@@ -50,7 +51,7 @@ def q_init():
 
                     # Assign all possible states a tuple of values
                     state_key = (s0, s1, s2, s3)
-                    q_values = [INITIAL_Q_VALUE] * NUM_ACTIONS
+                    q_values = RAND_Q_VALUES.copy()  # Create a copy for each state
                     
                     # Check which actions are blocked by walls
                     # Action 0: move left (s0 - 1)
@@ -72,7 +73,7 @@ def q_init():
     # print(list(q_table.items())[:5]) # Uncomment to see the first 5 entries
     return q_table
 
-def epsilon_greedy(q, s, epsilon=0.025):
+def epsilon_greedy(q, s, epsilon=0.1):
     """
     Return which direction Pacman should move to using epsilon-greedy algorithm
     With probability epsilon, choose a random action. Otherwise choose the greedy action.
@@ -102,44 +103,15 @@ def epsilon_greedy(q, s, epsilon=0.025):
             elif a == 3:  # down
                 s_test[1] += 1
             
-            # Check if this action would cause collision
-            if s_test[0] == s[2] and s_test[1] == s[3]:
-                continue  # Skip this action, try next highest Q-value
-            
             return a    
 
-def max_q(q, s_new, labyrinth, depth=0, max_depth=1):
-    """Calculate Q-values for all possible actions in state s_new and return the maximum"""
-    q_max = 0
-    for a in range(4):
-        if q[s_new][a] != None and s_new in q:  # Only consider valid (non-blocked) actions
-            s_test = tuple(list(s_new)[:2] + [s_new[2], s_new[3]])  # Keep ghost position
-            s_test_list = list(s_test)
-            if a == 0:  # left
-                s_test_list[0] -= 1
-            elif a == 1:  # right
-                s_test_list[0] += 1
-            elif a == 2:  # up
-                s_test_list[1] -= 1
-            elif a == 3:  # down
-                s_test_list[1] += 1 
-            s_test = tuple(s_test_list)
-            
-            if s_test in q and depth < max_depth:
-                q[s_new][a] += ALPHA * (calc_reward(s_test, labyrinth) + GAMMA * max_q(q, s_test, labyrinth, depth + 1, max_depth) - q[s_new][a])
-            q_max = max(q_max, q[s_new][a])
-    
-    return q_max
-
 def calc_reward(s_new, labyrinth):
-        
     # Reward for cookies; punish for not eating cookies
     r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
 
     return r
 
 def take_action(s, a, labyrinth):
-    # Use the labyrinth parameter (already updated from previous iterations)
     s_new = list(s)
     if a == 0:  # left
         s_new[0] -= 1
@@ -150,10 +122,30 @@ def take_action(s, a, labyrinth):
     if a == 3:  # down
         s_new[1] += 1
     
+    # Check if action caused gameover (Pacman caught by ghost)
+    if s_new[0] == s_new[2] and s_new[1] == s_new[3]:
+        r = -100.0
+        print("Invalid action type shit")
+    else:
+        r = calc_reward(tuple(s_new), labyrinth)
+    
     # Mark new Pacman position as eaten (if it's a cookie)
     if labyrinth[s_new[1]][s_new[0]] == ".":
-        labyrinth[s_new[1]][s_new[0]] = " "
+        # Convert string row to list, modify it, then convert back to string
+        row_list = list(labyrinth[s_new[1]])
+        row_list[s_new[0]] = " "
+        labyrinth[s_new[1]] = "".join(row_list)
     
-    r = calc_reward(tuple(s_new), labyrinth)
+    return tuple(s_new), r, labyrinth
+
+def max_q(q, s_new, labyrinth):
+    """Return the maximum Q-value among valid actions in state s_new"""
+    if s_new not in q:
+        return 0
     
-    return tuple(s_new), r, labyrinth
\ No newline at end of file
+    q_max = 0
+    for a in range(4):
+        if q[s_new][a] is not None:  # Only consider valid (non-blocked) actions
+            q_max = max(q_max, q[s_new][a])
+    
+    return q_max
\ No newline at end of file