diff --git a/04_pacman_rl/pacman.py b/04_pacman_rl/pacman.py index d14f6fe..9bdb4fd 100644 --- a/04_pacman_rl/pacman.py +++ b/04_pacman_rl/pacman.py @@ -1,6 +1,7 @@ import pygame import random import math +import reinforcement_learning as rl # Initialize pygame pygame.init() @@ -154,6 +155,25 @@ def main(): print("You Win! Pacman ate all the cookies.") running = False + # Start of my code + alpha = 0.8 + gamma = 0.9 + + s = [pacman.x, pacman.y, ghost.x, ghost.y] + s_not_terminal = True + + q = rl.q_init() + + while s_not_terminal: + a = rl.epsilon_greedy(q, s) # 0 = Left; 1 = Right ; 2 = Up ; 3 = Down + s_new, r = rl.take_action(s, a) + + q[s][a] += alpha * (r + gamma * max_q(q, s_new) - q[s][a]) + + s = s_new + pass + + # Draw the labyrinth, pacman, and ghost draw_labyrinth() pacman.draw() @@ -168,27 +188,4 @@ def main(): pygame.quit() if __name__ == "__main__": - main() - - -''' -Write a pacman game using pygame. The pacman should be a yellow circle, the ghost is a red square. The labyrinth is written as a string as follows: -"########## -#........# -#.##..##.# -#........# -##########" - -The "." are cookies the pacman can eat (as Graphics small white circles). The "#" are walls (as graphics blue squares on a black background ). The ghost always tries to catch the pacman. Pacman as well as the ghost go one step in each game loop iteration. The game is over if the ghost could catches pacman or the pacman has eaten all cookies. Start your answer with "Shure, here is the full pacman code. - - -Now change the code that the following strings are the pixel of the ghost: - -" #### -###### -## # # -###### -###### -# # # " - -''' \ No newline at end of file + main() \ No newline at end of file diff --git a/04_pacman_rl/reinforcement_learning.py b/04_pacman_rl/reinforcement_learning.py new file mode 100644 index 0000000..5265bd2 --- /dev/null +++ b/04_pacman_rl/reinforcement_learning.py @@ -0,0 +1,73 @@ +""" +Entwickeln Sie einen Reinforcement Learning (RL) Agenten, der in +einem minimalistischen Pacman-Spiel (bereitgestellt auf meiner +Homepage) effektiv Punkte sammelt, während er dem Geist +ausweicht und somit vermeidet gefressen zu werden. +""" + +import numpy as np + +def q_init(): + """ Fill every possible action in every state with a small value for initialization""" + + # Configuration + NUM_ACTIONS = 4 + INITIAL_Q_VALUE = 0.0 # Small value for initialization + + s1_range = range(1, 9) + s2_range = range(1, 4) + s3_range = range(1, 9) + s4_range = range(1, 4) + s_constrained_values = {1, 4, 5, 8} + + # The Q-Table dictionary + q_table = {} + + # Iterate through all possible combinations of s1, s2, s3, s4 + for s1 in s1_range: + for s2 in s2_range: + for s3 in s3_range: + for s4 in s4_range: + + # Skip impossible states + if s2 == 2 and s1 not in s_constrained_values: + continue + if s4 == 2 and s3 not in s_constrained_values: + continue + + # Assign all possible states a tuple of values + state_key = (s1, s2, s3, s4) + q_table[state_key] = [INITIAL_Q_VALUE] * NUM_ACTIONS + + print(f"Total number of valid states initialized: {len(q_table)}") # debugging + # print(list(q_table.items())[:5]) # Uncomment to see the first 5 entries + return q_table + +def epsilon_greedy(q, s, epsilon=0.9): + """ + Return which direction Pacman should move to + epsilon-greedy algorithm TBD + """ + a_val = max(q[s]) + a = q[s].index(a_val) + + return a + + +def take_action(s, a): + s_new = s + if a == 0: + s_new[0] -= 1 + if a == 1: + s_new[0] += 1 + if a == 2: + s_new[1] += 1 + if a == 3: + s_new[1] -= 1 + + # Calculate fucking r + # include if there is a point on the field + r = 0 + + return s_new, r +