finally fucking did pacman

master
Ruben-FreddyLoafers 2025-12-09 11:05:37 +01:00
parent 3fb0afd80e
commit f623d1375c
3 changed files with 32 additions and 30 deletions

4
.gitignore vendored
View File

@ -2,4 +2,6 @@
.vscode .vscode
__pycache__/ __pycache__/
*.pyc *.pyc
q_table.json

View File

@ -213,7 +213,7 @@ def train(q, num_iterations=10000):
elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#": elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
ghost_y -= 1 ghost_y -= 1
s = (pacman_x, pacman_y, ghost_x, ghost_y) s = (pacman_x, pacman_y, ghost_x, ghost_y)
outer_iter += 1 outer_iter += 1
if outer_iter % 100 == 0: if outer_iter % 100 == 0:
@ -276,7 +276,8 @@ def visualize(q, num_games=10):
if iter % 3 == 0: if iter % 3 == 0:
ghost.move_towards_pacman(pacman) ghost.move_towards_pacman(pacman)
s = (pacman.x, pacman.y, ghost.x, ghost.y)
s = (pacman.x, pacman.y, ghost.x, ghost.y)
# Draw # Draw
draw_labyrinth() draw_labyrinth()
@ -284,7 +285,7 @@ def visualize(q, num_games=10):
ghost.draw() ghost.draw()
pygame.display.flip() pygame.display.flip()
tick_speed = 20 # if game_num % 20 == 0 else 100 tick_speed = 10 # if game_num % 20 == 0 else 100
clock.tick(tick_speed) clock.tick(tick_speed)
# Main function # Main function
@ -297,7 +298,7 @@ def main():
q = rl.q_init() q = rl.q_init()
print("Training for 10000 iterations...") print("Training for 10000 iterations...")
q = train(q, num_iterations=20000) q = train(q, num_iterations=10000)
print("\nTraining complete! Starting visualization...") print("\nTraining complete! Starting visualization...")
visualize(q, num_games=10) visualize(q, num_games=10)

View File

@ -15,7 +15,6 @@ def q_init():
""" Fill every possible action in every state with a small value for initialization""" """ Fill every possible action in every state with a small value for initialization"""
# Configuration # Configuration
NUM_ACTIONS = 4
RAND_Q_VALUES = [random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1)] RAND_Q_VALUES = [random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1)]
# print(RAND_Q_VALUES) # debugging # print(RAND_Q_VALUES) # debugging
@ -79,35 +78,35 @@ def epsilon_greedy(q, s, epsilon=0.1):
With probability epsilon, choose a random action. Otherwise choose the greedy action. With probability epsilon, choose a random action. Otherwise choose the greedy action.
Avoids actions that would result in collision with ghost. Avoids actions that would result in collision with ghost.
""" """
if np.random.random() < epsilon: # if np.random.random() < epsilon:
# Explore: choose random action (excluding blocked actions with Q=0) # # Explore: choose random action (excluding blocked actions with Q=0)
valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None] # valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None]
return np.random.choice(valid_actions) # return np.random.choice(valid_actions)
else: # else:
# Get all valid (non-blocked) actions with their Q-values # Get all valid (non-blocked) actions with their Q-values
valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None] valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None]
# Sort by Q-value in descending order
valid_actions.sort(key=lambda x: x[1], reverse=True)
# Try each action starting from highest Q-value
for a, q_val in valid_actions:
s_test = list(s)
if a == 0: # left
s_test[0] -= 1
elif a == 1: # right
s_test[0] += 1
elif a == 2: # up
s_test[1] -= 1
elif a == 3: # down
s_test[1] += 1
# Sort by Q-value in descending order return a
valid_actions.sort(key=lambda x: x[1], reverse=True)
# Try each action starting from highest Q-value
for a, q_val in valid_actions:
s_test = list(s)
if a == 0: # left
s_test[0] -= 1
elif a == 1: # right
s_test[0] += 1
elif a == 2: # up
s_test[1] -= 1
elif a == 3: # down
s_test[1] += 1
return a
def calc_reward(s_new, labyrinth): def calc_reward(s_new, labyrinth):
# Reward for cookies; punish for not eating cookies # Reward for cookies; punish for not eating cookies
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0 r = 2.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
return r return r