ASs 4 started

master
Ruben-FreddyLoafers 2025-11-13 13:54:32 +01:00
parent 1df2ad190a
commit 469d1d1a47
2 changed files with 94 additions and 24 deletions

View File

@ -1,6 +1,7 @@
import pygame
import random
import math
import reinforcement_learning as rl
# Initialize pygame
pygame.init()
@ -154,6 +155,25 @@ def main():
print("You Win! Pacman ate all the cookies.")
running = False
# Start of my code
alpha = 0.8
gamma = 0.9
s = [pacman.x, pacman.y, ghost.x, ghost.y]
s_not_terminal = True
q = rl.q_init()
while s_not_terminal:
a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down
s_new, r = rl.take_action(s, a)
q[s][a] += alpha * (r + gamma * max_q(q, s_new) - q[s][a])
s = s_new
pass
# Draw the labyrinth, pacman, and ghost
draw_labyrinth()
pacman.draw()
@ -168,27 +188,4 @@ def main():
pygame.quit()
if __name__ == "__main__":
main()
'''
Write a pacman game using pygame. The pacman should be a yellow circle, the ghost is a red square. The labyrinth is written as a string as follows:
"##########
#........#
#.##..##.#
#........#
##########"
The "." are cookies the pacman can eat (as Graphics small white circles). The "#" are walls (as graphics blue squares on a black background ). The ghost always tries to catch the pacman. Pacman as well as the ghost go one step in each game loop iteration. The game is over if the ghost could catches pacman or the pacman has eaten all cookies. Start your answer with "Shure, here is the full pacman code.
Now change the code that the following strings are the pixel of the ghost:
" ####
######
## # #
######
######
# # # "
'''
main()

View File

@ -0,0 +1,73 @@
"""
Entwickeln Sie einen Reinforcement Learning (RL) Agenten, der in
einem minimalistischen Pacman-Spiel (bereitgestellt auf meiner
Homepage) effektiv Punkte sammelt, während er dem Geist
ausweicht und somit vermeidet gefressen zu werden.
"""
import numpy as np
def q_init():
""" Fill every possible action in every state with a small value for initialization"""
# Configuration
NUM_ACTIONS = 4
INITIAL_Q_VALUE = 0.0 # Small value for initialization
s1_range = range(1, 9)
s2_range = range(1, 4)
s3_range = range(1, 9)
s4_range = range(1, 4)
s_constrained_values = {1, 4, 5, 8}
# The Q-Table dictionary
q_table = {}
# Iterate through all possible combinations of s1, s2, s3, s4
for s1 in s1_range:
for s2 in s2_range:
for s3 in s3_range:
for s4 in s4_range:
# Skip impossible states
if s2 == 2 and s1 not in s_constrained_values:
continue
if s4 == 2 and s3 not in s_constrained_values:
continue
# Assign all possible states a tuple of values
state_key = (s1, s2, s3, s4)
q_table[state_key] = [INITIAL_Q_VALUE] * NUM_ACTIONS
print(f"Total number of valid states initialized: {len(q_table)}") # debugging
# print(list(q_table.items())[:5]) # Uncomment to see the first 5 entries
return q_table
def epsilon_greedy(q, s, epsilon=0.9):
"""
Return which direction Pacman should move to
epsilon-greedy algorithm TBD
"""
a_val = max(q[s])
a = q[s].index(a_val)
return a
def take_action(s, a):
s_new = s
if a == 0:
s_new[0] -= 1
if a == 1:
s_new[0] += 1
if a == 2:
s_new[1] += 1
if a == 3:
s_new[1] -= 1
# Calculate fucking r
# include if there is a point on the field
r = 0
return s_new, r