finally fucking did pacman
parent
3fb0afd80e
commit
f623d1375c
|
|
@ -3,3 +3,5 @@
|
||||||
|
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
|
|
||||||
|
q_table.json
|
||||||
|
|
@ -213,7 +213,7 @@ def train(q, num_iterations=10000):
|
||||||
elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
|
elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
|
||||||
ghost_y -= 1
|
ghost_y -= 1
|
||||||
|
|
||||||
s = (pacman_x, pacman_y, ghost_x, ghost_y)
|
s = (pacman_x, pacman_y, ghost_x, ghost_y)
|
||||||
|
|
||||||
outer_iter += 1
|
outer_iter += 1
|
||||||
if outer_iter % 100 == 0:
|
if outer_iter % 100 == 0:
|
||||||
|
|
@ -276,7 +276,8 @@ def visualize(q, num_games=10):
|
||||||
|
|
||||||
if iter % 3 == 0:
|
if iter % 3 == 0:
|
||||||
ghost.move_towards_pacman(pacman)
|
ghost.move_towards_pacman(pacman)
|
||||||
s = (pacman.x, pacman.y, ghost.x, ghost.y)
|
|
||||||
|
s = (pacman.x, pacman.y, ghost.x, ghost.y)
|
||||||
|
|
||||||
# Draw
|
# Draw
|
||||||
draw_labyrinth()
|
draw_labyrinth()
|
||||||
|
|
@ -284,7 +285,7 @@ def visualize(q, num_games=10):
|
||||||
ghost.draw()
|
ghost.draw()
|
||||||
pygame.display.flip()
|
pygame.display.flip()
|
||||||
|
|
||||||
tick_speed = 20 # if game_num % 20 == 0 else 100
|
tick_speed = 10 # if game_num % 20 == 0 else 100
|
||||||
clock.tick(tick_speed)
|
clock.tick(tick_speed)
|
||||||
|
|
||||||
# Main function
|
# Main function
|
||||||
|
|
@ -297,7 +298,7 @@ def main():
|
||||||
q = rl.q_init()
|
q = rl.q_init()
|
||||||
|
|
||||||
print("Training for 10000 iterations...")
|
print("Training for 10000 iterations...")
|
||||||
q = train(q, num_iterations=20000)
|
q = train(q, num_iterations=10000)
|
||||||
|
|
||||||
print("\nTraining complete! Starting visualization...")
|
print("\nTraining complete! Starting visualization...")
|
||||||
visualize(q, num_games=10)
|
visualize(q, num_games=10)
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ def q_init():
|
||||||
""" Fill every possible action in every state with a small value for initialization"""
|
""" Fill every possible action in every state with a small value for initialization"""
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
NUM_ACTIONS = 4
|
|
||||||
RAND_Q_VALUES = [random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1)]
|
RAND_Q_VALUES = [random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1)]
|
||||||
# print(RAND_Q_VALUES) # debugging
|
# print(RAND_Q_VALUES) # debugging
|
||||||
|
|
||||||
|
|
@ -79,35 +78,35 @@ def epsilon_greedy(q, s, epsilon=0.1):
|
||||||
With probability epsilon, choose a random action. Otherwise choose the greedy action.
|
With probability epsilon, choose a random action. Otherwise choose the greedy action.
|
||||||
Avoids actions that would result in collision with ghost.
|
Avoids actions that would result in collision with ghost.
|
||||||
"""
|
"""
|
||||||
if np.random.random() < epsilon:
|
# if np.random.random() < epsilon:
|
||||||
# Explore: choose random action (excluding blocked actions with Q=0)
|
# # Explore: choose random action (excluding blocked actions with Q=0)
|
||||||
valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None]
|
# valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None]
|
||||||
return np.random.choice(valid_actions)
|
# return np.random.choice(valid_actions)
|
||||||
|
|
||||||
else:
|
# else:
|
||||||
# Get all valid (non-blocked) actions with their Q-values
|
# Get all valid (non-blocked) actions with their Q-values
|
||||||
valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None]
|
valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None]
|
||||||
|
|
||||||
# Sort by Q-value in descending order
|
# Sort by Q-value in descending order
|
||||||
valid_actions.sort(key=lambda x: x[1], reverse=True)
|
valid_actions.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
# Try each action starting from highest Q-value
|
# Try each action starting from highest Q-value
|
||||||
for a, q_val in valid_actions:
|
for a, q_val in valid_actions:
|
||||||
s_test = list(s)
|
s_test = list(s)
|
||||||
if a == 0: # left
|
if a == 0: # left
|
||||||
s_test[0] -= 1
|
s_test[0] -= 1
|
||||||
elif a == 1: # right
|
elif a == 1: # right
|
||||||
s_test[0] += 1
|
s_test[0] += 1
|
||||||
elif a == 2: # up
|
elif a == 2: # up
|
||||||
s_test[1] -= 1
|
s_test[1] -= 1
|
||||||
elif a == 3: # down
|
elif a == 3: # down
|
||||||
s_test[1] += 1
|
s_test[1] += 1
|
||||||
|
|
||||||
return a
|
return a
|
||||||
|
|
||||||
def calc_reward(s_new, labyrinth):
|
def calc_reward(s_new, labyrinth):
|
||||||
# Reward for cookies; punish for not eating cookies
|
# Reward for cookies; punish for not eating cookies
|
||||||
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
|
r = 2.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
|
||||||
|
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue