"""
Entwickeln Sie einen Reinforcement Learning (RL) Agenten, der in
einem minimalistischen Pacman-Spiel (bereitgestellt auf meiner
Homepage) effektiv Punkte sammelt, während er dem Geist
ausweicht und somit vermeidet gefressen zu werden.
"""

import numpy as np

def q_init():
    """ Fill every possible action in every state with a small value for initialization"""

    # Configuration
    NUM_ACTIONS = 4
    INITIAL_Q_VALUE = 0.0 # Small value for initialization

    s1_range = range(1, 9)
    s2_range = range(1, 4)
    s3_range = range(1, 9)
    s4_range = range(1, 4)
    s_constrained_values = {1, 4, 5, 8}

    # The Q-Table dictionary
    q_table = {}

    # Iterate through all possible combinations of s1, s2, s3, s4
    for s1 in s1_range:
        for s2 in s2_range:
            for s3 in s3_range:
                for s4 in s4_range:
                    
                    # Skip impossible states
                    if s2 == 2 and s1 not in s_constrained_values:
                        continue
                    if s4 == 2 and s3 not in s_constrained_values:
                        continue 

                    # Assign all possible states a tuple of values
                    state_key = (s1, s2, s3, s4)
                    q_table[state_key] = [INITIAL_Q_VALUE] * NUM_ACTIONS

    print(f"Total number of valid states initialized: {len(q_table)}") # debugging
    # print(list(q_table.items())[:5]) # Uncomment to see the first 5 entries
    return q_table

def epsilon_greedy(q, s, epsilon=0.9):
    """ 
    Return which direction Pacman should move to 
    epsilon-greedy algorithm TBD
    """
    a_val = max(q[s])
    a = q[s].index(a_val)
    
    return a


def take_action(s, a):
    s_new = s    
    if a == 0:
        s_new[0] -= 1
    if a == 1:
        s_new[0] += 1
    if a == 2:
        s_new[1] += 1
    if a == 3:
        s_new[1] -= 1
    
    # Calculate fucking r
    # include if there is a point on the field 
    r = 0
    
    return s_new, r