working version 1.0

main
2wenty1ne 2025-12-09 18:17:05 +01:00
parent ca843080fe
commit a965dc07ce
4 changed files with 63 additions and 62 deletions

View File

@ -101,10 +101,9 @@ class Ghost:
def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
#? Learning initial
q_values = {}
initial_q_fill(q_values)
q_values = initial_q_fill()
#? Game initial
pygame.init()
@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman")
#? Start try
cookies_per_run = []
iterations = []
for x in range(AMOUNT_RUNS):
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
# if show_game:
if x == AMOUNT_RUNS / 4:
print("1 / 4 done")
if x == AMOUNT_RUNS / 2:
print("2 / 4 done")
if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
print("3 / 4 done")
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
cookies_per_run.append(amount_cookies_ate)
iterations.append(iterations_per_run)
pygame.quit()
return cookies_per_run
return cookies_per_run, iterations
# Main game function
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
clock = pygame.time.Clock()
labyrinth = LABYRINTH_INIT.copy()
@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
reward = 0
#? -------------------------MY CODE-----------------------------------
if show_game:
screen.fill(BLACK)
iter = iter + 1
# Handle events
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
if show_game:
screen.fill(BLACK)
iter = iter + 1
# Handle Pacman movement
keys = pygame.key.get_pressed()
if keys[pygame.K_LEFT]:
@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
if keys[pygame.K_DOWN]:
pacman.move(0, 1)
#? -------------------------MY CODE-----------------------------------
action = epsilon_greedy(q_values, state, EPSILON)
if action == Direction.LEFT:
@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
pacman.move(labyrinth, 0, 1)
#? -------------------------MY CODE-----------------------------------
if iter%3==0:
# Ghost moves towards Pacman
ghost.move_towards_pacman(labyrinth, pacman)
# Check for collisions (game over if ghost catches pacman)
if pacman.x == ghost.x and pacman.y == ghost.y:
# print("Game Over! The ghost caught Pacman.")
if show_game:
print("Game Over! The ghost caught Pacman.")
running = False
reward = REWARD_ON_LOSE
@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
#? -------------------------MY CODE-----------------------------------
cookie_counter = 0
#? half reward
# cookie_counter = 0
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == ".":
cookie_counter += 1
# for y, row in enumerate(labyrinth):
# for x, cell in enumerate(row):
# if cell == ".":
# cookie_counter += 1
if cookie_counter == 10:
reward = REWARD_ON_HALF
if show_game:
print("Half reward")
# if cookie_counter == 10:
# # reward = REWARD_ON_HALF
# if show_game:
# print("Got half reward")
#? -------------------------MY CODE-----------------------------------
@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
for x, cell in enumerate(row):
if cell == ".":
counter += 1
return 20-counter
return 20-counter, iter
#? -------------------------MY CODE-----------------------------------
@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
pygame.display.flip()
# Cap the frame rate
clock.tick(20)
clock.tick(40)
def draw_labyrinth(screen, labyrinth):

View File

@ -2,8 +2,7 @@ import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ReinforcmentLearning.game import run_game, wrapper
from ReinforcmentLearning.util import initial_q_fill
from ReinforcmentLearning.game import start_try
@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
q_value: (state, action)
"""
cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
# if show_game:
# if x == AMOUNT_RUNS / 4:
# print("1 / 4 done")
# if x == AMOUNT_RUNS / 2:
# print("2 / 4 done")
# if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
# print("3 / 4 done")
cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
wins = sum(1 for result in cookies_per_run if result == 20)
@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
if plot_result:
plot_results(cookies_per_run)
plot_results(cookies_per_run, iterations)
return cookies_per_run, wins
def plot_results(cookies_per_run):
def plot_results(cookies_per_run, iterations):
wins = []
losses = []
win_count = 0
@ -63,29 +51,22 @@ def plot_results(cookies_per_run):
wins.append(win_count)
losses.append((i + 1) - win_count) # Losses count down from top
# Last 700 attempts
last_700_wins = wins[-700:] if len(wins) >= 700 else wins
last_700_losses = losses[-700:] if len(losses) >= 700 else losses
last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
# Create figure with 2 subplots
# Create figure with 2 subplots (wins/losses and iterations)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
# Plot 1: All attempts (with thicker lines: linewidth=1.5)
# Plot 1: Wins vs Losses
ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
ax1.set_xlabel('Attempt')
ax1.set_ylabel('Count')
ax1.set_title('All Attempts: Wins vs Losses')
ax1.set_title('Wins vs Losses Over Time')
ax1.legend()
# Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
ax2.set_xlabel('Attempt')
ax2.set_ylabel('Count')
ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
ax2.legend()
# Plot 2: Iterations per run
ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
ax2.set_xlabel('Run Number')
ax2.set_ylabel('Iterations')
ax2.set_title('Iterations per Run')
plt.tight_layout()
plt.show()
plt.show()

View File

@ -8,14 +8,17 @@ class Direction(Enum):
LEFT = 3
def initial_q_fill(q_values):
def initial_q_fill():
q_values = {}
for x in range(-7, 8):
for y in range(-2, 3):
for cookie_direction in Direction:
for action in Direction:
state = (x, y, cookie_direction)
q_values[(state, action)] = random.random() * 0.2 - 0.1
return q_values
@ -109,7 +112,6 @@ def get_best_q_action(q_values, state):
for (q_state, q_action), value in q_values.items():
if q_state == state:
actions_for_epsilon.append(q_action)
if best_value is None:
best_value = value
best_action = q_action

View File

@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
# EPSILON = 0.1618
EPSILON = 0.01
ALPHA = 0.01
GAMMA = 0.2713
# ALPHA = 0.01
ALPHA = 0.2
# GAMMA = 0.2713
GAMMA = 0.8
AMOUNT_RUNS = 5000
AMOUNT_TRIES = 5
@ -14,7 +16,7 @@ REWARD_ON_WIN = 400
REWARD_ON_LOSE = -250
plot_result = True
show_game = True
show_game = False
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)