it finally works?
parent
cfdef49a73
commit
0f98ce944e
|
|
@ -12,8 +12,6 @@ SCREEN_WIDTH = 400
|
|||
SCREEN_HEIGHT = 400
|
||||
CELL_SIZE = 40
|
||||
|
||||
os.environ['SDL_VIDEODRIVER'] = 'dummy'
|
||||
|
||||
# Define colors
|
||||
YELLOW = (255, 255, 0)
|
||||
RED = (255, 0, 0)
|
||||
|
|
@ -21,6 +19,8 @@ WHITE = (255, 255, 255)
|
|||
BLUE = (0, 0, 255)
|
||||
BLACK = (0, 0, 0)
|
||||
|
||||
REWARD_ON_HALF = 50
|
||||
|
||||
# Labyrinth as a string
|
||||
labyrinth_init = [
|
||||
"##########",
|
||||
|
|
@ -37,9 +37,9 @@ ROWS = len(labyrinth)
|
|||
COLS = len(labyrinth[0])
|
||||
|
||||
# Initialize game screen
|
||||
# screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
# pygame.display.set_caption("Micro-Pacman")
|
||||
screen = None
|
||||
# screen = pygame.Surface((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
# pygame.display.set_caption("Micro-Pacman")
|
||||
|
||||
# Pacman class
|
||||
class Pacman:
|
||||
|
|
@ -115,24 +115,34 @@ def draw_labyrinth(labyrinth):
|
|||
|
||||
|
||||
# Main game function
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
|
||||
clock = pygame.time.Clock()
|
||||
labyrinth = labyrinth_init.copy()
|
||||
|
||||
if show_game:
|
||||
pygame.display.set_caption("Micro-Pacman")
|
||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
|
||||
|
||||
# Initialize Pacman and Ghost positions
|
||||
pacman = Pacman(1, 1)
|
||||
ghost = Ghost(COLS - 2, ROWS - 2)
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
||||
reward = 0
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
#? GAME LOOP
|
||||
running = True
|
||||
iter = 0
|
||||
while running:
|
||||
#screen.fill(BLACK)
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
reward = 0
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
if show_game:
|
||||
screen.fill(BLACK)
|
||||
|
||||
iter = iter + 1
|
||||
# Handle events
|
||||
for event in pygame.event.get():
|
||||
|
|
@ -176,9 +186,25 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
|
|||
if labyrinth[pacman.y][pacman.x] == ".":
|
||||
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
cookie_counter = 0
|
||||
|
||||
for y, row in enumerate(labyrinth):
|
||||
for x, cell in enumerate(row):
|
||||
if cell == ".":
|
||||
cookie_counter += 1
|
||||
|
||||
if cookie_counter == 10:
|
||||
reward = REWARD_ON_HALF
|
||||
if show_game:
|
||||
print("Half reward")
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
|
||||
# Check if all cookies are eaten (game over)
|
||||
if all("." not in row for row in labyrinth):
|
||||
# print("You Win! Pacman ate all the cookies.")
|
||||
if show_game:
|
||||
print("You Win! Pacman ate all the cookies.")
|
||||
reward = REWARD_ON_WIN
|
||||
running = False
|
||||
|
||||
|
|
@ -208,15 +234,16 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE):
|
|||
return 20-counter
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
#draw_labyrinth(labyrinth)
|
||||
#pacman.draw()
|
||||
#ghost.draw()
|
||||
if show_game:
|
||||
draw_labyrinth(labyrinth)
|
||||
pacman.draw()
|
||||
ghost.draw()
|
||||
|
||||
# Update display
|
||||
# pygame.display.flip()
|
||||
# Update display
|
||||
pygame.display.flip()
|
||||
|
||||
# Cap the frame rate
|
||||
# clock.tick(5)
|
||||
# Cap the frame rate
|
||||
clock.tick(20)
|
||||
|
||||
pygame.quit()
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI
|
|||
wins_per_try = []
|
||||
|
||||
for x in range(AMOUNT_TRIES):
|
||||
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
plot_result = False
|
||||
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result)
|
||||
cookies_per_run.append(cookies_per_run)
|
||||
wins_per_try.append(amount_wins)
|
||||
# print(f"Finished try {x+1}\n")
|
||||
|
|
@ -20,7 +21,7 @@ def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WI
|
|||
return cookies_per_try, wins_per_try
|
||||
|
||||
|
||||
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game):
|
||||
"""
|
||||
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
|
||||
action: Direction
|
||||
|
|
@ -34,29 +35,54 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
|||
cookies_per_run = []
|
||||
# Amount of single runs
|
||||
for x in range(AMOUNT_RUNS):
|
||||
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
|
||||
cookies_per_run.append(amount_cookies_ate)
|
||||
|
||||
wins = 0
|
||||
for element in cookies_per_run:
|
||||
if element == 20:
|
||||
wins += 1
|
||||
toAdd = 1 if element == 20 else 0
|
||||
|
||||
wins = sum(1 for result in cookies_per_run if result == 20)
|
||||
|
||||
|
||||
print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%")
|
||||
|
||||
if plot_result:
|
||||
plot_results(cookies_per_run)
|
||||
|
||||
return cookies_per_run, wins
|
||||
|
||||
|
||||
def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS):
|
||||
# print("---------DONE---------")
|
||||
# print("Used: ")
|
||||
# print(f"Epsilon: {EPSILON}")
|
||||
# print(f"Gamma: {GAMMA}")
|
||||
# print(f"Alpha: {ALPHA}")
|
||||
def plot_results(cookies_per_run):
|
||||
wins = []
|
||||
losses = []
|
||||
win_count = 0
|
||||
|
||||
# print("---------SUMMARY---------")
|
||||
print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n")
|
||||
# print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%")
|
||||
# print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%")
|
||||
for i, r in enumerate(cookies_per_run):
|
||||
if r == 20:
|
||||
win_count += 1
|
||||
wins.append(win_count)
|
||||
losses.append((i + 1) - win_count) # Losses count down from top
|
||||
|
||||
# Last 700 attempts
|
||||
last_700_wins = wins[-700:] if len(wins) >= 700 else wins
|
||||
last_700_losses = losses[-700:] if len(losses) >= 700 else losses
|
||||
last_700_indices = list(range(len(wins)-len(last_700_wins)+1, len(wins)+1))
|
||||
|
||||
# Create figure with 2 subplots
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
|
||||
|
||||
# Plot 1: All attempts (with thicker lines: linewidth=1.5)
|
||||
ax1.plot(range(1, len(wins)+1), wins, 'b-', linewidth=1.5, label='Wins')
|
||||
ax1.plot(range(1, len(losses)+1), losses, 'orange', linewidth=1.5, label='Losses')
|
||||
ax1.set_xlabel('Attempt')
|
||||
ax1.set_ylabel('Count')
|
||||
ax1.set_title('All Attempts: Wins vs Losses')
|
||||
ax1.legend()
|
||||
|
||||
# Plot 2: Last 700 attempts (with thicker lines: linewidth=1.5)
|
||||
ax2.plot(last_700_indices, last_700_wins, 'b-', linewidth=1.5, label='Wins')
|
||||
ax2.plot(last_700_indices, last_700_losses, 'orange', linewidth=1.5, label='Losses')
|
||||
ax2.set_xlabel('Attempt')
|
||||
ax2.set_ylabel('Count')
|
||||
ax2.set_title(f'Last {len(last_700_wins)} Attempts: Wins vs Losses')
|
||||
ax2.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
|
|
|||
17
main.py
17
main.py
|
|
@ -1,17 +1,22 @@
|
|||
from GenTunic.gen_tuning import gen_tuning_main
|
||||
from ReinforcmentLearning.learning import multipleTries
|
||||
from ReinforcmentLearning.learning import multipleTries, oneTry
|
||||
|
||||
|
||||
EPSILON = 0.1
|
||||
ALPHA = 0.4
|
||||
GAMMA = 0.8
|
||||
# EPSILON = 0.1618
|
||||
EPSILON = 0.01
|
||||
ALPHA = 0.01
|
||||
GAMMA = 0.2713
|
||||
|
||||
AMOUNT_RUNS = 5000
|
||||
AMOUNT_TRIES = 5
|
||||
|
||||
REWARD_ON_WIN = 10
|
||||
REWARD_ON_WIN = 100
|
||||
REWARD_ON_LOSE = -450
|
||||
|
||||
plot_result = True
|
||||
show_game = False
|
||||
|
||||
multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
|
||||
oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
#gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
Loading…
Reference in New Issue