working version 1.0
parent
ca843080fe
commit
a965dc07ce
|
|
@ -101,10 +101,9 @@ class Ghost:
|
|||
|
||||
|
||||
|
||||
def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
|
||||
def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
|
||||
#? Learning initial
|
||||
q_values = {}
|
||||
initial_q_fill(q_values)
|
||||
q_values = initial_q_fill()
|
||||
|
||||
#? Game initial
|
||||
pygame.init()
|
||||
|
|
@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s
|
|||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
pygame.display.set_caption("Micro-Pacman")
|
||||
|
||||
#? Start try
|
||||
cookies_per_run = []
|
||||
iterations = []
|
||||
for x in range(AMOUNT_RUNS):
|
||||
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
|
||||
# if show_game:
|
||||
if x == AMOUNT_RUNS / 4:
|
||||
print("1 / 4 done")
|
||||
|
||||
if x == AMOUNT_RUNS / 2:
|
||||
print("2 / 4 done")
|
||||
|
||||
if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
|
||||
print("3 / 4 done")
|
||||
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
|
||||
cookies_per_run.append(amount_cookies_ate)
|
||||
iterations.append(iterations_per_run)
|
||||
|
||||
pygame.quit()
|
||||
|
||||
return cookies_per_run
|
||||
return cookies_per_run, iterations
|
||||
|
||||
|
||||
|
||||
# Main game function
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
|
||||
clock = pygame.time.Clock()
|
||||
labyrinth = LABYRINTH_INIT.copy()
|
||||
|
|
@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
reward = 0
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
if show_game:
|
||||
screen.fill(BLACK)
|
||||
|
||||
iter = iter + 1
|
||||
# Handle events
|
||||
for event in pygame.event.get():
|
||||
if event.type == pygame.QUIT:
|
||||
running = False
|
||||
|
||||
if show_game:
|
||||
screen.fill(BLACK)
|
||||
|
||||
|
||||
iter = iter + 1
|
||||
|
||||
# Handle Pacman movement
|
||||
keys = pygame.key.get_pressed()
|
||||
if keys[pygame.K_LEFT]:
|
||||
|
|
@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
if keys[pygame.K_DOWN]:
|
||||
pacman.move(0, 1)
|
||||
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
action = epsilon_greedy(q_values, state, EPSILON)
|
||||
if action == Direction.LEFT:
|
||||
|
|
@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
pacman.move(labyrinth, 0, 1)
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
|
||||
if iter%3==0:
|
||||
# Ghost moves towards Pacman
|
||||
ghost.move_towards_pacman(labyrinth, pacman)
|
||||
|
||||
# Check for collisions (game over if ghost catches pacman)
|
||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||
# print("Game Over! The ghost caught Pacman.")
|
||||
if show_game:
|
||||
print("Game Over! The ghost caught Pacman.")
|
||||
running = False
|
||||
reward = REWARD_ON_LOSE
|
||||
|
||||
|
|
@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
cookie_counter = 0
|
||||
#? half reward
|
||||
# cookie_counter = 0
|
||||
|
||||
for y, row in enumerate(labyrinth):
|
||||
for x, cell in enumerate(row):
|
||||
if cell == ".":
|
||||
cookie_counter += 1
|
||||
# for y, row in enumerate(labyrinth):
|
||||
# for x, cell in enumerate(row):
|
||||
# if cell == ".":
|
||||
# cookie_counter += 1
|
||||
|
||||
if cookie_counter == 10:
|
||||
reward = REWARD_ON_HALF
|
||||
if show_game:
|
||||
print("Half reward")
|
||||
# if cookie_counter == 10:
|
||||
# # reward = REWARD_ON_HALF
|
||||
# if show_game:
|
||||
# print("Got half reward")
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
|
||||
|
|
@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
for x, cell in enumerate(row):
|
||||
if cell == ".":
|
||||
counter += 1
|
||||
return 20-counter
|
||||
return 20-counter, iter
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
|
||||
|
|
@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
pygame.display.flip()
|
||||
|
||||
# Cap the frame rate
|
||||
clock.tick(20)
|
||||
clock.tick(40)
|
||||
|
||||
|
||||
|
||||
def draw_labyrinth(screen, labyrinth):
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@ import matplotlib.pyplot as plt
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from ReinforcmentLearning.game import run_game, wrapper
|
||||
from ReinforcmentLearning.util import initial_q_fill
|
||||
from ReinforcmentLearning.game import start_try
|
||||
|
||||
|
||||
|
||||
|
|
@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
|
|||
q_value: (state, action)
|
||||
"""
|
||||
|
||||
cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
|
||||
|
||||
# if show_game:
|
||||
# if x == AMOUNT_RUNS / 4:
|
||||
# print("1 / 4 done")
|
||||
|
||||
# if x == AMOUNT_RUNS / 2:
|
||||
# print("2 / 4 done")
|
||||
|
||||
# if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
|
||||
# print("3 / 4 done")
|
||||
|
||||
cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
|
||||
|
||||
wins = sum(1 for result in cookies_per_run if result == 20)
|
||||
|
||||
|
|
@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
|
|||
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
||||
|
||||
if plot_result:
|
||||
plot_results(cookies_per_run)
|
||||
plot_results(cookies_per_run, iterations)
|
||||
|
||||
return cookies_per_run, wins
|
||||
|
||||
|
||||
def plot_results(cookies_per_run):
|
||||
def plot_results(cookies_per_run, iterations):
|
||||
wins = []
|
||||
losses = []
|
||||
win_count = 0
|
||||
|
|
@ -63,29 +51,22 @@ def plot_results(cookies_per_run):
|
|||
wins.append(win_count)
|
||||
losses.append((i + 1) - win_count) # Losses count down from top
|
||||
|
||||
# Last 700 attempts
|
||||
last_700_wins = wins[-700:] if len(wins) >= 700 else wins
|
||||
last_700_losses = losses[-700:] if len(losses) >= 700 else losses
|
||||
last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
|
||||
|
||||
# Create figure with 2 subplots
|
||||
# Create figure with 2 subplots (wins/losses and iterations)
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
|
||||
|
||||
# Plot 1: All attempts (with thicker lines: linewidth=1.5)
|
||||
# Plot 1: Wins vs Losses
|
||||
ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
|
||||
ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
|
||||
ax1.set_xlabel('Attempt')
|
||||
ax1.set_ylabel('Count')
|
||||
ax1.set_title('All Attempts: Wins vs Losses')
|
||||
ax1.set_title('Wins vs Losses Over Time')
|
||||
ax1.legend()
|
||||
|
||||
# Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
|
||||
ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
|
||||
ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
|
||||
ax2.set_xlabel('Attempt')
|
||||
ax2.set_ylabel('Count')
|
||||
ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
|
||||
ax2.legend()
|
||||
# Plot 2: Iterations per run
|
||||
ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
|
||||
ax2.set_xlabel('Run Number')
|
||||
ax2.set_ylabel('Iterations')
|
||||
ax2.set_title('Iterations per Run')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
plt.show()
|
||||
|
|
@ -8,14 +8,17 @@ class Direction(Enum):
|
|||
LEFT = 3
|
||||
|
||||
|
||||
def initial_q_fill(q_values):
|
||||
def initial_q_fill():
|
||||
q_values = {}
|
||||
|
||||
for x in range(-7, 8):
|
||||
for y in range(-2, 3):
|
||||
for cookie_direction in Direction:
|
||||
for action in Direction:
|
||||
state = (x, y, cookie_direction)
|
||||
q_values[(state, action)] = random.random() * 0.2 - 0.1
|
||||
|
||||
|
||||
return q_values
|
||||
|
||||
|
||||
|
||||
|
|
@ -109,7 +112,6 @@ def get_best_q_action(q_values, state):
|
|||
for (q_state, q_action), value in q_values.items():
|
||||
if q_state == state:
|
||||
actions_for_epsilon.append(q_action)
|
||||
|
||||
if best_value is None:
|
||||
best_value = value
|
||||
best_action = q_action
|
||||
|
|
|
|||
8
main.py
8
main.py
|
|
@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
|
|||
|
||||
# EPSILON = 0.1618
|
||||
EPSILON = 0.01
|
||||
ALPHA = 0.01
|
||||
GAMMA = 0.2713
|
||||
# ALPHA = 0.01
|
||||
ALPHA = 0.2
|
||||
# GAMMA = 0.2713
|
||||
GAMMA = 0.8
|
||||
|
||||
AMOUNT_RUNS = 5000
|
||||
AMOUNT_TRIES = 5
|
||||
|
|
@ -14,7 +16,7 @@ REWARD_ON_WIN = 400
|
|||
REWARD_ON_LOSE = -250
|
||||
|
||||
plot_result = True
|
||||
show_game = True
|
||||
show_game = False
|
||||
|
||||
|
||||
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||
|
|
|
|||
Loading…
Reference in New Issue