""" Entwickeln Sie einen Reinforcement Learning (RL) Agenten, der in einem minimalistischen Pacman-Spiel (bereitgestellt auf meiner Homepage) effektiv Punkte sammelt, während er dem Geist ausweicht und somit vermeidet gefressen zu werden. """ import numpy as np def q_init(): """ Fill every possible action in every state with a small value for initialization""" # Configuration NUM_ACTIONS = 4 INITIAL_Q_VALUE = 1.0 # Small value for initialization s0_range = range(1, 9) s1_range = range(1, 4) s2_range = range(1, 9) s3_range = range(1, 4) s_constrained_values = {1, 4, 5, 8} # The Q-Table dictionary q_table = {} # Iterate through all possible combinations of s0, s1, s2, s3 for s0 in s0_range: for s1 in s1_range: for s2 in s2_range: for s3 in s3_range: # Skip impossible states if s1 == 2 and s0 not in s_constrained_values: continue if s3 == 2 and s2 not in s_constrained_values: continue # Assign all possible states a tuple of values state_key = (s0, s1, s2, s3) q_table[state_key] = [INITIAL_Q_VALUE] * NUM_ACTIONS print(f"Total number of valid states initialized: {len(q_table)}") # debugging # print(list(q_table.items())[:5]) # Uncomment to see the first 5 entries return q_table def epsilon_greedy(q, s, epsilon=0.9): """ Return which direction Pacman should move to epsilon-greedy algorithm TBD """ q_max = max(q[s]) a = q[s].index(q_max) return a def take_action(s, a, labyrinth): s_new = list(s) if a == 0: s_new[0] -= 1 if a == 1: s_new[0] += 1 if a == 2: s_new[1] += 1 if a == 3: s_new[1] -= 1 # consider if there is a point on the field r = 1 if labyrinth[s_new[0]][s_new[1]] == "." else 0 # consider new distance between Pacman and Ghost distance = abs(s[0] - s[2]) + abs(s[1] - s[3]) distance_new = abs(s_new[0] - s_new[2]) + abs(s_new[1] - s_new[3]) r += distance_new - distance # adjust this value if necessary return tuple(s_new), r