import matplotlib.pyplot as plt import numpy as np import pandas as pd from ReinforcmentLearning.game import run_game from ReinforcmentLearning.util import initial_q_fill def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): cookies_per_try = [] wins_per_try = [] for x in range(AMOUNT_TRIES): cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) cookies_per_run.append(cookies_per_run) wins_per_try.append(amount_wins) # print(f"Finished try {x+1}\n") return cookies_per_try, wins_per_try def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): """ state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction) action: Direction q_value: (state, action) """ q_values = {} initial_q_fill(q_values) cookies_per_run = [] # Amount of single runs for x in range(AMOUNT_RUNS): amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE) cookies_per_run.append(amount_cookies_ate) wins = 0 for element in cookies_per_run: if element == 20: wins += 1 toAdd = 1 if element == 20 else 0 print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%") return cookies_per_run, wins def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS): # print("---------DONE---------") # print("Used: ") # print(f"Epsilon: {EPSILON}") # print(f"Gamma: {GAMMA}") # print(f"Alpha: {ALPHA}") # print("---------SUMMARY---------") print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n") # print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%") # print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")