import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ReinforcmentLearning.game import run_game
from ReinforcmentLearning.util import initial_q_fill


def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    cookies_per_try = []
    wins_per_try = []
    
    for x in range(AMOUNT_TRIES):
        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
        cookies_per_run.append(cookies_per_run)
        wins_per_try.append(amount_wins)
        # print(f"Finished try {x+1}\n")

    return cookies_per_try, wins_per_try


def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    """
    state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
    action: Direction
    q_value: (state, action)
    """
    
    q_values = {}

    initial_q_fill(q_values)

    cookies_per_run = []
    # Amount of single runs
    for x in range(AMOUNT_RUNS):
        amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
        cookies_per_run.append(amount_cookies_ate)

    wins = 0
    for element in cookies_per_run:
        if element == 20:
            wins += 1
    toAdd = 1 if element == 20 else 0 
        
    print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")

    return cookies_per_run, wins


def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
    # print("---------DONE---------")
    # print("Used: ")
    # print(f"Epsilon: {EPSILON}")
    # print(f"Gamma: {GAMMA}")
    # print(f"Alpha: {ALPHA}")

    # print("---------SUMMARY---------")
    print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
    # print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
    # print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")