MLE-Pacman/ReinforcmentLearning/learning.py

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ReinforcmentLearning.game import run_game, wrapper
from ReinforcmentLearning.util import initial_q_fill


def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
    cookies_per_try = []
    wins_per_try = []

    for x in range(AMOUNT_TRIES):
        plot_result = False
        cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result)
        cookies_per_run.append(cookies_per_run)
        wins_per_try.append(amount_wins)
        # print(f"Finished try {x+1}\n")

    return cookies_per_try, wins_per_try


def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game):
    """
    state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
    action: Direction
    q_value: (state, action)
    """

    cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)

        # if show_game:
        #     if x == AMOUNT_RUNS / 4:
        #         print("1 / 4 done")

        #     if x == AMOUNT_RUNS / 2:
        #         print("2 / 4 done")

        #     if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
        #         print("3 / 4 done")


    wins = sum(1 for result in cookies_per_run if result == 20)


    print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")

    if plot_result:
        plot_results(cookies_per_run)

    return cookies_per_run, wins


def plot_results(cookies_per_run):
    wins = []
    losses = []
    win_count = 0

    for i, r in enumerate(cookies_per_run):
        if r == 20:
            win_count += 1
        wins.append(win_count)
        losses.append((i + 1) - win_count)  # Losses count down from top

    # Last 700 attempts
    last_700_wins = wins[-700:] if len(wins) >= 700 else wins
    last_700_losses = losses[-700:] if len(losses) >= 700 else losses
    last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))

    # Create figure with 2 subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

    # Plot 1: All attempts (with thicker lines: linewidth=1.5)
    ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
    ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
    ax1.set_xlabel('Attempt')
    ax1.set_ylabel('Count')
    ax1.set_title('All Attempts: Wins vs Losses')
    ax1.legend()

    # Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
    ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
    ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
    ax2.set_xlabel('Attempt')
    ax2.set_ylabel('Count')
    ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
    ax2.legend()

    plt.tight_layout()
    plt.show()