working version 1.0

main
2wenty1ne 2025-12-09 18:17:05 +01:00
parent ca843080fe
commit a965dc07ce
4 changed files with 63 additions and 62 deletions

View File

@ -101,10 +101,9 @@ class Ghost:
def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game): def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
#? Learning initial #? Learning initial
q_values = {} q_values = initial_q_fill()
initial_q_fill(q_values)
#? Game initial #? Game initial
pygame.init() pygame.init()
@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE)) screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman") pygame.display.set_caption("Micro-Pacman")
#? Start try
cookies_per_run = [] cookies_per_run = []
iterations = []
for x in range(AMOUNT_RUNS): for x in range(AMOUNT_RUNS):
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen) # if show_game:
if x == AMOUNT_RUNS / 4:
print("1 / 4 done")
if x == AMOUNT_RUNS / 2:
print("2 / 4 done")
if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
print("3 / 4 done")
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
cookies_per_run.append(amount_cookies_ate) cookies_per_run.append(amount_cookies_ate)
iterations.append(iterations_per_run)
pygame.quit() pygame.quit()
return cookies_per_run return cookies_per_run, iterations
# Main game function
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen): def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
clock = pygame.time.Clock() clock = pygame.time.Clock()
labyrinth = LABYRINTH_INIT.copy() labyrinth = LABYRINTH_INIT.copy()
@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
reward = 0 reward = 0
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
if show_game:
screen.fill(BLACK)
iter = iter + 1
# Handle events # Handle events
for event in pygame.event.get(): for event in pygame.event.get():
if event.type == pygame.QUIT: if event.type == pygame.QUIT:
running = False running = False
if show_game:
screen.fill(BLACK)
iter = iter + 1
# Handle Pacman movement # Handle Pacman movement
keys = pygame.key.get_pressed() keys = pygame.key.get_pressed()
if keys[pygame.K_LEFT]: if keys[pygame.K_LEFT]:
@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
if keys[pygame.K_DOWN]: if keys[pygame.K_DOWN]:
pacman.move(0, 1) pacman.move(0, 1)
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
action = epsilon_greedy(q_values, state, EPSILON) action = epsilon_greedy(q_values, state, EPSILON)
if action == Direction.LEFT: if action == Direction.LEFT:
@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
pacman.move(labyrinth, 0, 1) pacman.move(labyrinth, 0, 1)
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
if iter%3==0: if iter%3==0:
# Ghost moves towards Pacman
ghost.move_towards_pacman(labyrinth, pacman) ghost.move_towards_pacman(labyrinth, pacman)
# Check for collisions (game over if ghost catches pacman)
if pacman.x == ghost.x and pacman.y == ghost.y: if pacman.x == ghost.x and pacman.y == ghost.y:
# print("Game Over! The ghost caught Pacman.") if show_game:
print("Game Over! The ghost caught Pacman.")
running = False running = False
reward = REWARD_ON_LOSE reward = REWARD_ON_LOSE
@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:] labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
cookie_counter = 0 #? half reward
# cookie_counter = 0
for y, row in enumerate(labyrinth): # for y, row in enumerate(labyrinth):
for x, cell in enumerate(row): # for x, cell in enumerate(row):
if cell == ".": # if cell == ".":
cookie_counter += 1 # cookie_counter += 1
if cookie_counter == 10: # if cookie_counter == 10:
reward = REWARD_ON_HALF # # reward = REWARD_ON_HALF
if show_game: # if show_game:
print("Half reward") # print("Got half reward")
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
for x, cell in enumerate(row): for x, cell in enumerate(row):
if cell == ".": if cell == ".":
counter += 1 counter += 1
return 20-counter return 20-counter, iter
#? -------------------------MY CODE----------------------------------- #? -------------------------MY CODE-----------------------------------
@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
pygame.display.flip() pygame.display.flip()
# Cap the frame rate # Cap the frame rate
clock.tick(20) clock.tick(40)
def draw_labyrinth(screen, labyrinth): def draw_labyrinth(screen, labyrinth):

View File

@ -2,8 +2,7 @@ import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from ReinforcmentLearning.game import run_game, wrapper from ReinforcmentLearning.game import start_try
from ReinforcmentLearning.util import initial_q_fill
@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
q_value: (state, action) q_value: (state, action)
""" """
cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game) cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
# if show_game:
# if x == AMOUNT_RUNS / 4:
# print("1 / 4 done")
# if x == AMOUNT_RUNS / 2:
# print("2 / 4 done")
# if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
# print("3 / 4 done")
wins = sum(1 for result in cookies_per_run if result == 20) wins = sum(1 for result in cookies_per_run if result == 20)
@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%") print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
if plot_result: if plot_result:
plot_results(cookies_per_run) plot_results(cookies_per_run, iterations)
return cookies_per_run, wins return cookies_per_run, wins
def plot_results(cookies_per_run): def plot_results(cookies_per_run, iterations):
wins = [] wins = []
losses = [] losses = []
win_count = 0 win_count = 0
@ -63,29 +51,22 @@ def plot_results(cookies_per_run):
wins.append(win_count) wins.append(win_count)
losses.append((i + 1) - win_count) # Losses count down from top losses.append((i + 1) - win_count) # Losses count down from top
# Last 700 attempts # Create figure with 2 subplots (wins/losses and iterations)
last_700_wins = wins[-700:] if len(wins) >= 700 else wins
last_700_losses = losses[-700:] if len(losses) >= 700 else losses
last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
# Create figure with 2 subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8)) fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
# Plot 1: All attempts (with thicker lines: linewidth=1.5) # Plot 1: Wins vs Losses
ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins') ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses') ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
ax1.set_xlabel('Attempt') ax1.set_xlabel('Attempt')
ax1.set_ylabel('Count') ax1.set_ylabel('Count')
ax1.set_title('All Attempts: Wins vs Losses') ax1.set_title('Wins vs Losses Over Time')
ax1.legend() ax1.legend()
# Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5) # Plot 2: Iterations per run
ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins') ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses') ax2.set_xlabel('Run Number')
ax2.set_xlabel('Attempt') ax2.set_ylabel('Iterations')
ax2.set_ylabel('Count') ax2.set_title('Iterations per Run')
ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
ax2.legend()
plt.tight_layout() plt.tight_layout()
plt.show() plt.show()

View File

@ -8,7 +8,9 @@ class Direction(Enum):
LEFT = 3 LEFT = 3
def initial_q_fill(q_values): def initial_q_fill():
q_values = {}
for x in range(-7, 8): for x in range(-7, 8):
for y in range(-2, 3): for y in range(-2, 3):
for cookie_direction in Direction: for cookie_direction in Direction:
@ -16,6 +18,7 @@ def initial_q_fill(q_values):
state = (x, y, cookie_direction) state = (x, y, cookie_direction)
q_values[(state, action)] = random.random() * 0.2 - 0.1 q_values[(state, action)] = random.random() * 0.2 - 0.1
return q_values
@ -109,7 +112,6 @@ def get_best_q_action(q_values, state):
for (q_state, q_action), value in q_values.items(): for (q_state, q_action), value in q_values.items():
if q_state == state: if q_state == state:
actions_for_epsilon.append(q_action) actions_for_epsilon.append(q_action)
if best_value is None: if best_value is None:
best_value = value best_value = value
best_action = q_action best_action = q_action

View File

@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
# EPSILON = 0.1618 # EPSILON = 0.1618
EPSILON = 0.01 EPSILON = 0.01
ALPHA = 0.01 # ALPHA = 0.01
GAMMA = 0.2713 ALPHA = 0.2
# GAMMA = 0.2713
GAMMA = 0.8
AMOUNT_RUNS = 5000 AMOUNT_RUNS = 5000
AMOUNT_TRIES = 5 AMOUNT_TRIES = 5
@ -14,7 +16,7 @@ REWARD_ON_WIN = 400
REWARD_ON_LOSE = -250 REWARD_ON_LOSE = -250
plot_result = True plot_result = True
show_game = True show_game = False
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game) oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)