finally fucking did pacman
parent
3fb0afd80e
commit
f623d1375c
|
|
@ -2,4 +2,6 @@
|
|||
.vscode
|
||||
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyc
|
||||
|
||||
q_table.json
|
||||
|
|
@ -213,7 +213,7 @@ def train(q, num_iterations=10000):
|
|||
elif ghost_y > pacman_y and labyrinth[ghost_y - 1][ghost_x] != "#":
|
||||
ghost_y -= 1
|
||||
|
||||
s = (pacman_x, pacman_y, ghost_x, ghost_y)
|
||||
s = (pacman_x, pacman_y, ghost_x, ghost_y)
|
||||
|
||||
outer_iter += 1
|
||||
if outer_iter % 100 == 0:
|
||||
|
|
@ -276,7 +276,8 @@ def visualize(q, num_games=10):
|
|||
|
||||
if iter % 3 == 0:
|
||||
ghost.move_towards_pacman(pacman)
|
||||
s = (pacman.x, pacman.y, ghost.x, ghost.y)
|
||||
|
||||
s = (pacman.x, pacman.y, ghost.x, ghost.y)
|
||||
|
||||
# Draw
|
||||
draw_labyrinth()
|
||||
|
|
@ -284,7 +285,7 @@ def visualize(q, num_games=10):
|
|||
ghost.draw()
|
||||
pygame.display.flip()
|
||||
|
||||
tick_speed = 20 # if game_num % 20 == 0 else 100
|
||||
tick_speed = 10 # if game_num % 20 == 0 else 100
|
||||
clock.tick(tick_speed)
|
||||
|
||||
# Main function
|
||||
|
|
@ -297,7 +298,7 @@ def main():
|
|||
q = rl.q_init()
|
||||
|
||||
print("Training for 10000 iterations...")
|
||||
q = train(q, num_iterations=20000)
|
||||
q = train(q, num_iterations=10000)
|
||||
|
||||
print("\nTraining complete! Starting visualization...")
|
||||
visualize(q, num_games=10)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ def q_init():
|
|||
""" Fill every possible action in every state with a small value for initialization"""
|
||||
|
||||
# Configuration
|
||||
NUM_ACTIONS = 4
|
||||
RAND_Q_VALUES = [random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1), random.uniform(-0.1, 0.1)]
|
||||
# print(RAND_Q_VALUES) # debugging
|
||||
|
||||
|
|
@ -79,35 +78,35 @@ def epsilon_greedy(q, s, epsilon=0.1):
|
|||
With probability epsilon, choose a random action. Otherwise choose the greedy action.
|
||||
Avoids actions that would result in collision with ghost.
|
||||
"""
|
||||
if np.random.random() < epsilon:
|
||||
# Explore: choose random action (excluding blocked actions with Q=0)
|
||||
valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None]
|
||||
return np.random.choice(valid_actions)
|
||||
# if np.random.random() < epsilon:
|
||||
# # Explore: choose random action (excluding blocked actions with Q=0)
|
||||
# valid_actions = [i for i in range(len(q[s])) if q[s][i] is not None]
|
||||
# return np.random.choice(valid_actions)
|
||||
|
||||
else:
|
||||
# Get all valid (non-blocked) actions with their Q-values
|
||||
valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None]
|
||||
# else:
|
||||
# Get all valid (non-blocked) actions with their Q-values
|
||||
valid_actions = [(i, q[s][i]) for i in range(len(q[s])) if q[s][i] is not None]
|
||||
|
||||
# Sort by Q-value in descending order
|
||||
valid_actions.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Try each action starting from highest Q-value
|
||||
for a, q_val in valid_actions:
|
||||
s_test = list(s)
|
||||
if a == 0: # left
|
||||
s_test[0] -= 1
|
||||
elif a == 1: # right
|
||||
s_test[0] += 1
|
||||
elif a == 2: # up
|
||||
s_test[1] -= 1
|
||||
elif a == 3: # down
|
||||
s_test[1] += 1
|
||||
|
||||
# Sort by Q-value in descending order
|
||||
valid_actions.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Try each action starting from highest Q-value
|
||||
for a, q_val in valid_actions:
|
||||
s_test = list(s)
|
||||
if a == 0: # left
|
||||
s_test[0] -= 1
|
||||
elif a == 1: # right
|
||||
s_test[0] += 1
|
||||
elif a == 2: # up
|
||||
s_test[1] -= 1
|
||||
elif a == 3: # down
|
||||
s_test[1] += 1
|
||||
|
||||
return a
|
||||
return a
|
||||
|
||||
def calc_reward(s_new, labyrinth):
|
||||
# Reward for cookies; punish for not eating cookies
|
||||
r = 1.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
|
||||
r = 2.0 if labyrinth[s_new[1]][s_new[0]] == "." else -1.0
|
||||
|
||||
return r
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue