MLE-Pacman/ReinforcmentLearning/learning.py

85 lines
2.4 KiB
Python

# import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ReinforcmentLearning.game import start_try
import conf
def multipleTries(EPSILON, ALPHA, GAMMA):
conf.show_game = False
conf.plot_result = False
wins_per_try = []
for x in range(conf.AMOUNT_TRIES):
cookies_per_run = oneTry(EPSILON, ALPHA, GAMMA)
results_last_700 = cookies_per_run[-700:]
wins_in_last_700 = sum(1 for result in results_last_700 if result == 20)
win_probalitiy_last_700 = (wins_in_last_700 / 700)
wins_per_try.append(win_probalitiy_last_700)
# print(f"Finished try {x+1}\n")
return wins_per_try
def oneTry(EPSILON, ALPHA, GAMMA):
"""
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
action: Direction
q_value: (state, action)
"""
cookies_per_run, iterations = start_try(EPSILON, ALPHA, GAMMA)
if conf.plot_result:
wins = sum(1 for result in cookies_per_run if result == 20)
print(f"Win percentage overall: {(wins/conf.AMOUNT_RUNS)*100}%")
last_700_results = cookies_per_run[-700:]
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
win_percentage = (wins_in_last_700 / 700) * 100
print(f"Win percentage in the last 700: {win_percentage:.2f}%\n")
plot_results(cookies_per_run, iterations)
return cookies_per_run
def plot_results(cookies_per_run, iterations):
wins = []
losses = []
win_count = 0
for i, r in enumerate(cookies_per_run):
if r == 20:
win_count += 1
wins.append(win_count)
losses.append((i + 1) - win_count) # Losses count down from top
# Create figure with 2 subplots (wins/losses and iterations)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
# Plot 1: Wins vs Losses
ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
ax1.set_xlabel('Attempt')
ax1.set_ylabel('Count')
ax1.set_title('Wins vs Losses Over Time')
ax1.legend()
# Plot 2: Iterations per run
ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
ax2.set_xlabel('Run Number')
ax2.set_ylabel('Iterations')
ax2.set_title('Iterations per Run')
plt.tight_layout()
plt.show()