63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from ReinforcmentLearning.game import run_game
|
|
from ReinforcmentLearning.util import initial_q_fill
|
|
|
|
|
|
|
|
def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
|
cookies_per_try = []
|
|
wins_per_try = []
|
|
|
|
for x in range(AMOUNT_TRIES):
|
|
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
|
cookies_per_run.append(cookies_per_run)
|
|
wins_per_try.append(amount_wins)
|
|
# print(f"Finished try {x+1}\n")
|
|
|
|
return cookies_per_try, wins_per_try
|
|
|
|
|
|
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
|
"""
|
|
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
|
|
action: Direction
|
|
q_value: (state, action)
|
|
"""
|
|
|
|
q_values = {}
|
|
|
|
initial_q_fill(q_values)
|
|
|
|
cookies_per_run = []
|
|
# Amount of single runs
|
|
for x in range(AMOUNT_RUNS):
|
|
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
|
|
cookies_per_run.append(amount_cookies_ate)
|
|
|
|
wins = 0
|
|
for element in cookies_per_run:
|
|
if element == 20:
|
|
wins += 1
|
|
toAdd = 1 if element == 20 else 0
|
|
|
|
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
|
|
|
return cookies_per_run, wins
|
|
|
|
|
|
def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
|
|
# print("---------DONE---------")
|
|
# print("Used: ")
|
|
# print(f"Epsilon: {EPSILON}")
|
|
# print(f"Gamma: {GAMMA}")
|
|
# print(f"Alpha: {ALPHA}")
|
|
|
|
# print("---------SUMMARY---------")
|
|
print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
|
|
# print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
|
|
# print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")
|
|
|