""" Entwickeln Sie einen Reinforcement Learning (RL) Agenten, der in einem minimalistischen Pacman-Spiel (bereitgestellt auf meiner Homepage) effektiv Punkte sammelt, während er dem Geist ausweicht und somit vermeidet gefressen zu werden. """ import numpy as np def q_init(): """ Fill every possible action in every state with a small value for initialization""" # Configuration NUM_ACTIONS = 4 INITIAL_Q_VALUE = 0.0 # Small value for initialization s1_range = range(1, 9) s2_range = range(1, 4) s3_range = range(1, 9) s4_range = range(1, 4) s_constrained_values = {1, 4, 5, 8} # The Q-Table dictionary q_table = {} # Iterate through all possible combinations of s1, s2, s3, s4 for s1 in s1_range: for s2 in s2_range: for s3 in s3_range: for s4 in s4_range: # Skip impossible states if s2 == 2 and s1 not in s_constrained_values: continue if s4 == 2 and s3 not in s_constrained_values: continue # Assign all possible states a tuple of values state_key = (s1, s2, s3, s4) q_table[state_key] = [INITIAL_Q_VALUE] * NUM_ACTIONS print(f"Total number of valid states initialized: {len(q_table)}") # debugging # print(list(q_table.items())[:5]) # Uncomment to see the first 5 entries return q_table def epsilon_greedy(q, s, epsilon=0.9): """ Return which direction Pacman should move to epsilon-greedy algorithm TBD """ a_val = max(q[s]) a = q[s].index(a_val) return a def take_action(s, a): s_new = s if a == 0: s_new[0] -= 1 if a == 1: s_new[0] += 1 if a == 2: s_new[1] += 1 if a == 3: s_new[1] -= 1 # Calculate fucking r # include if there is a point on the field r = 0 return s_new, r