finally fucking did pacman

master
Ruben-FreddyLoafers 2025-12-09 11:05:37 +01:00
parent 3fb0afd80e
commit f623d1375c
3 changed files with 32 additions and 30 deletions

4
.gitignore vendored
View File

@ -2,4 +2,6 @@
.vscode
__pycache__/
*.pyc
*.pyc
q_table.json

View File

@ -213,7 +213,7 @@ def train(q, num_iterations=10000):
elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
ghost_y -= 1
s = (pacman_x, pacman_y, ghost_x, ghost_y)
s = (pacman_x, pacman_y, ghost_x, ghost_y)
outer_iter += 1
if outer_iter % 100 == 0:
@ -276,7 +276,8 @@ def visualize(q, num_games=10):
if iter % 3 == 0:
ghost.move_towards_pacman(pacman)
s = (pacman.x, pacman.y, ghost.x, ghost.y)
s = (pacman.x, pacman.y, ghost.x, ghost.y)
# Draw
draw_labyrinth()
@ -284,7 +285,7 @@ def visualize(q, num_games=10):
ghost.draw()
pygame.display.flip()
tick_speed = 20 # if game_num % 20 == 0 else 100
tick_speed = 10 # if game_num % 20 == 0 else 100
clock.tick(tick_speed)
# Main function
@ -297,7 +298,7 @@ def main():
q = rl.q_init()
print("Training for 10000 iterations...")
q = train(q, num_iterations=20000)
q = train(q, num_iterations=10000)
print("\nTraining complete! Starting visualization...")
visualize(q, num_games=10)

View File

@ -15,7 +15,6 @@ def q_init():
""" Fill every possible action in every state with a small value for initialization"""
# Configuration
NUM_ACTIONS = 4
RAND_Q_VALUES = [random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1)]
# print(RAND_Q_VALUES) # debugging
@ -79,35 +78,35 @@ def epsilon_greedy(q, s, epsilon=0.1):
With probability epsilon, choose a random action. Otherwise choose the greedy action.
Avoids actions that would result in collision with ghost.
"""
if np.random.random() < epsilon:
# Explore: choose random action (excluding blocked actions with Q=0)
valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None]
return np.random.choice(valid_actions)
# if np.random.random() < epsilon:
# # Explore: choose random action (excluding blocked actions with Q=0)
# valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None]
# return np.random.choice(valid_actions)
else:
# Get all valid (non-blocked) actions with their Q-values
valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None]
# else:
# Get all valid (non-blocked) actions with their Q-values
valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None]
# Sort by Q-value in descending order
valid_actions.sort(key=lambda x: x[1], reverse=True)
# Try each action starting from highest Q-value
for a, q_val in valid_actions:
s_test = list(s)
if a == 0: # left
s_test[0] -= 1
elif a == 1: # right
s_test[0] += 1
elif a == 2: # up
s_test[1] -= 1
elif a == 3: # down
s_test[1] += 1
# Sort by Q-value in descending order
valid_actions.sort(key=lambda x: x[1], reverse=True)
# Try each action starting from highest Q-value
for a, q_val in valid_actions:
s_test = list(s)
if a == 0: # left
s_test[0] -= 1
elif a == 1: # right
s_test[0] += 1
elif a == 2: # up
s_test[1] -= 1
elif a == 3: # down
s_test[1] += 1
return a
return a
def calc_reward(s_new, labyrinth):
# Reward for cookies; punish for not eating cookies
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
r = 2.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
return r