working version 1.0
parent
ca843080fe
commit
a965dc07ce
|
|
@ -101,10 +101,9 @@ class Ghost:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
|
def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
|
||||||
#? Learning initial
|
#? Learning initial
|
||||||
q_values = {}
|
q_values = initial_q_fill()
|
||||||
initial_q_fill(q_values)
|
|
||||||
|
|
||||||
#? Game initial
|
#? Game initial
|
||||||
pygame.init()
|
pygame.init()
|
||||||
|
|
@ -114,17 +113,29 @@ def wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, s
|
||||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||||
pygame.display.set_caption("Micro-Pacman")
|
pygame.display.set_caption("Micro-Pacman")
|
||||||
|
|
||||||
|
#? Start try
|
||||||
cookies_per_run = []
|
cookies_per_run = []
|
||||||
|
iterations = []
|
||||||
for x in range(AMOUNT_RUNS):
|
for x in range(AMOUNT_RUNS):
|
||||||
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
|
# if show_game:
|
||||||
|
if x == AMOUNT_RUNS / 4:
|
||||||
|
print("1 / 4 done")
|
||||||
|
|
||||||
|
if x == AMOUNT_RUNS / 2:
|
||||||
|
print("2 / 4 done")
|
||||||
|
|
||||||
|
if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
|
||||||
|
print("3 / 4 done")
|
||||||
|
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
|
||||||
cookies_per_run.append(amount_cookies_ate)
|
cookies_per_run.append(amount_cookies_ate)
|
||||||
|
iterations.append(iterations_per_run)
|
||||||
|
|
||||||
pygame.quit()
|
pygame.quit()
|
||||||
|
|
||||||
return cookies_per_run
|
return cookies_per_run, iterations
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Main game function
|
|
||||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
|
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
|
||||||
clock = pygame.time.Clock()
|
clock = pygame.time.Clock()
|
||||||
labyrinth = LABYRINTH_INIT.copy()
|
labyrinth = LABYRINTH_INIT.copy()
|
||||||
|
|
@ -146,15 +157,17 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
reward = 0
|
reward = 0
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
if show_game:
|
|
||||||
screen.fill(BLACK)
|
|
||||||
|
|
||||||
iter = iter + 1
|
|
||||||
# Handle events
|
# Handle events
|
||||||
for event in pygame.event.get():
|
for event in pygame.event.get():
|
||||||
if event.type == pygame.QUIT:
|
if event.type == pygame.QUIT:
|
||||||
running = False
|
running = False
|
||||||
|
|
||||||
|
if show_game:
|
||||||
|
screen.fill(BLACK)
|
||||||
|
|
||||||
|
|
||||||
|
iter = iter + 1
|
||||||
|
|
||||||
# Handle Pacman movement
|
# Handle Pacman movement
|
||||||
keys = pygame.key.get_pressed()
|
keys = pygame.key.get_pressed()
|
||||||
if keys[pygame.K_LEFT]:
|
if keys[pygame.K_LEFT]:
|
||||||
|
|
@ -166,6 +179,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
if keys[pygame.K_DOWN]:
|
if keys[pygame.K_DOWN]:
|
||||||
pacman.move(0, 1)
|
pacman.move(0, 1)
|
||||||
|
|
||||||
|
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
action = epsilon_greedy(q_values, state, EPSILON)
|
action = epsilon_greedy(q_values, state, EPSILON)
|
||||||
if action == Direction.LEFT:
|
if action == Direction.LEFT:
|
||||||
|
|
@ -178,13 +192,13 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
pacman.move(labyrinth, 0, 1)
|
pacman.move(labyrinth, 0, 1)
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
|
|
||||||
if iter%3==0:
|
if iter%3==0:
|
||||||
# Ghost moves towards Pacman
|
|
||||||
ghost.move_towards_pacman(labyrinth, pacman)
|
ghost.move_towards_pacman(labyrinth, pacman)
|
||||||
|
|
||||||
# Check for collisions (game over if ghost catches pacman)
|
|
||||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||||
# print("Game Over! The ghost caught Pacman.")
|
if show_game:
|
||||||
|
print("Game Over! The ghost caught Pacman.")
|
||||||
running = False
|
running = False
|
||||||
reward = REWARD_ON_LOSE
|
reward = REWARD_ON_LOSE
|
||||||
|
|
||||||
|
|
@ -193,17 +207,18 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
||||||
|
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
cookie_counter = 0
|
#? half reward
|
||||||
|
# cookie_counter = 0
|
||||||
|
|
||||||
for y, row in enumerate(labyrinth):
|
# for y, row in enumerate(labyrinth):
|
||||||
for x, cell in enumerate(row):
|
# for x, cell in enumerate(row):
|
||||||
if cell == ".":
|
# if cell == ".":
|
||||||
cookie_counter += 1
|
# cookie_counter += 1
|
||||||
|
|
||||||
if cookie_counter == 10:
|
# if cookie_counter == 10:
|
||||||
reward = REWARD_ON_HALF
|
# # reward = REWARD_ON_HALF
|
||||||
if show_game:
|
# if show_game:
|
||||||
print("Half reward")
|
# print("Got half reward")
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -236,7 +251,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
for x, cell in enumerate(row):
|
for x, cell in enumerate(row):
|
||||||
if cell == ".":
|
if cell == ".":
|
||||||
counter += 1
|
counter += 1
|
||||||
return 20-counter
|
return 20-counter, iter
|
||||||
#? -------------------------MY CODE-----------------------------------
|
#? -------------------------MY CODE-----------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -250,7 +265,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
||||||
pygame.display.flip()
|
pygame.display.flip()
|
||||||
|
|
||||||
# Cap the frame rate
|
# Cap the frame rate
|
||||||
clock.tick(20)
|
clock.tick(40)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def draw_labyrinth(screen, labyrinth):
|
def draw_labyrinth(screen, labyrinth):
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,7 @@ import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from ReinforcmentLearning.game import run_game, wrapper
|
from ReinforcmentLearning.game import start_try
|
||||||
from ReinforcmentLearning.util import initial_q_fill
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -28,18 +27,7 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
|
||||||
q_value: (state, action)
|
q_value: (state, action)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cookies_per_run = wrapper(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
|
cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
|
||||||
|
|
||||||
# if show_game:
|
|
||||||
# if x == AMOUNT_RUNS / 4:
|
|
||||||
# print("1 / 4 done")
|
|
||||||
|
|
||||||
# if x == AMOUNT_RUNS / 2:
|
|
||||||
# print("2 / 4 done")
|
|
||||||
|
|
||||||
# if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
|
|
||||||
# print("3 / 4 done")
|
|
||||||
|
|
||||||
|
|
||||||
wins = sum(1 for result in cookies_per_run if result == 20)
|
wins = sum(1 for result in cookies_per_run if result == 20)
|
||||||
|
|
||||||
|
|
@ -47,12 +35,12 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
|
||||||
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
||||||
|
|
||||||
if plot_result:
|
if plot_result:
|
||||||
plot_results(cookies_per_run)
|
plot_results(cookies_per_run, iterations)
|
||||||
|
|
||||||
return cookies_per_run, wins
|
return cookies_per_run, wins
|
||||||
|
|
||||||
|
|
||||||
def plot_results(cookies_per_run):
|
def plot_results(cookies_per_run, iterations):
|
||||||
wins = []
|
wins = []
|
||||||
losses = []
|
losses = []
|
||||||
win_count = 0
|
win_count = 0
|
||||||
|
|
@ -63,29 +51,22 @@ def plot_results(cookies_per_run):
|
||||||
wins.append(win_count)
|
wins.append(win_count)
|
||||||
losses.append((i + 1) - win_count) # Losses count down from top
|
losses.append((i + 1) - win_count) # Losses count down from top
|
||||||
|
|
||||||
# Last 700 attempts
|
# Create figure with 2 subplots (wins/losses and iterations)
|
||||||
last_700_wins = wins[-700:] if len(wins) >= 700 else wins
|
|
||||||
last_700_losses = losses[-700:] if len(losses) >= 700 else losses
|
|
||||||
last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
|
|
||||||
|
|
||||||
# Create figure with 2 subplots
|
|
||||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
|
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
|
||||||
|
|
||||||
# Plot 1: All attempts (with thicker lines: linewidth=1.5)
|
# Plot 1: Wins vs Losses
|
||||||
ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
|
ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
|
||||||
ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
|
ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
|
||||||
ax1.set_xlabel('Attempt')
|
ax1.set_xlabel('Attempt')
|
||||||
ax1.set_ylabel('Count')
|
ax1.set_ylabel('Count')
|
||||||
ax1.set_title('All Attempts: Wins vs Losses')
|
ax1.set_title('Wins vs Losses Over Time')
|
||||||
ax1.legend()
|
ax1.legend()
|
||||||
|
|
||||||
# Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
|
# Plot 2: Iterations per run
|
||||||
ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
|
ax2.plot(range(1, len(iterations)+1), iterations, 'g-', linewidth=1.5)
|
||||||
ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
|
ax2.set_xlabel('Run Number')
|
||||||
ax2.set_xlabel('Attempt')
|
ax2.set_ylabel('Iterations')
|
||||||
ax2.set_ylabel('Count')
|
ax2.set_title('Iterations per Run')
|
||||||
ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
|
|
||||||
ax2.legend()
|
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
@ -8,7 +8,9 @@ class Direction(Enum):
|
||||||
LEFT = 3
|
LEFT = 3
|
||||||
|
|
||||||
|
|
||||||
def initial_q_fill(q_values):
|
def initial_q_fill():
|
||||||
|
q_values = {}
|
||||||
|
|
||||||
for x in range(-7, 8):
|
for x in range(-7, 8):
|
||||||
for y in range(-2, 3):
|
for y in range(-2, 3):
|
||||||
for cookie_direction in Direction:
|
for cookie_direction in Direction:
|
||||||
|
|
@ -16,6 +18,7 @@ def initial_q_fill(q_values):
|
||||||
state = (x, y, cookie_direction)
|
state = (x, y, cookie_direction)
|
||||||
q_values[(state, action)] = random.random() * 0.2 - 0.1
|
q_values[(state, action)] = random.random() * 0.2 - 0.1
|
||||||
|
|
||||||
|
return q_values
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -109,7 +112,6 @@ def get_best_q_action(q_values, state):
|
||||||
for (q_state, q_action), value in q_values.items():
|
for (q_state, q_action), value in q_values.items():
|
||||||
if q_state == state:
|
if q_state == state:
|
||||||
actions_for_epsilon.append(q_action)
|
actions_for_epsilon.append(q_action)
|
||||||
|
|
||||||
if best_value is None:
|
if best_value is None:
|
||||||
best_value = value
|
best_value = value
|
||||||
best_action = q_action
|
best_action = q_action
|
||||||
|
|
|
||||||
8
main.py
8
main.py
|
|
@ -4,8 +4,10 @@ from ReinforcmentLearning.learning import multipleTries, oneTry
|
||||||
|
|
||||||
# EPSILON = 0.1618
|
# EPSILON = 0.1618
|
||||||
EPSILON = 0.01
|
EPSILON = 0.01
|
||||||
ALPHA = 0.01
|
# ALPHA = 0.01
|
||||||
GAMMA = 0.2713
|
ALPHA = 0.2
|
||||||
|
# GAMMA = 0.2713
|
||||||
|
GAMMA = 0.8
|
||||||
|
|
||||||
AMOUNT_RUNS = 5000
|
AMOUNT_RUNS = 5000
|
||||||
AMOUNT_TRIES = 5
|
AMOUNT_TRIES = 5
|
||||||
|
|
@ -14,7 +16,7 @@ REWARD_ON_WIN = 400
|
||||||
REWARD_ON_LOSE = -250
|
REWARD_ON_LOSE = -250
|
||||||
|
|
||||||
plot_result = True
|
plot_result = True
|
||||||
show_game = True
|
show_game = False
|
||||||
|
|
||||||
|
|
||||||
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue