Added global conf file, cleaning up
parent
44b7a4e942
commit
a52de42507
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
import math
|
||||
import time
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
# import matplotlib
|
||||
# matplotlib.use('Agg')
|
||||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
from GenTunic.gen_math import project_bit
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import math
|
|||
import os
|
||||
|
||||
from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
|
||||
import conf
|
||||
|
||||
# Initialize pygame
|
||||
|
||||
|
|
@ -101,7 +102,7 @@ class Ghost:
|
|||
|
||||
|
||||
|
||||
def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game):
|
||||
def start_try(EPSILON, ALPHA, GAMMA):
|
||||
#? Learning initial
|
||||
q_values = initial_q_fill()
|
||||
|
||||
|
|
@ -109,35 +110,47 @@ def start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE,
|
|||
pygame.init()
|
||||
screen = None
|
||||
|
||||
if show_game:
|
||||
if conf.show_game:
|
||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
pygame.display.set_caption("Micro-Pacman")
|
||||
|
||||
#? Start try
|
||||
cookies_per_run = []
|
||||
iterations = []
|
||||
for x in range(AMOUNT_RUNS):
|
||||
if show_game:
|
||||
if x == AMOUNT_RUNS / 4:
|
||||
for x in range(conf.AMOUNT_RUNS):
|
||||
if conf.show_game:
|
||||
if x == conf.AMOUNT_RUNS / 4:
|
||||
print("1 / 4 done")
|
||||
|
||||
if x == AMOUNT_RUNS / 2:
|
||||
if x == conf.AMOUNT_RUNS / 2:
|
||||
print("2 / 4 done")
|
||||
|
||||
if x == (AMOUNT_RUNS / 2) + (AMOUNT_RUNS / 4):
|
||||
if x == (conf.AMOUNT_RUNS / 2) + (conf.AMOUNT_RUNS / 4):
|
||||
print("3 / 4 done")
|
||||
|
||||
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen)
|
||||
|
||||
|
||||
amount_cookies_ate, iterations_per_run = run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
|
||||
cookies_per_run.append(amount_cookies_ate)
|
||||
iterations.append(iterations_per_run)
|
||||
|
||||
print(f"Run {x+1}: {iterations_per_run} iterations")
|
||||
|
||||
if conf.show_trained:
|
||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
pygame.display.set_caption("Micro-Pacman")
|
||||
|
||||
while True:
|
||||
print("After game")
|
||||
run_game(q_values, EPSILON, ALPHA, GAMMA, screen)
|
||||
|
||||
|
||||
pygame.quit()
|
||||
|
||||
return cookies_per_run, iterations
|
||||
|
||||
|
||||
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game, screen):
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
||||
clock = pygame.time.Clock()
|
||||
labyrinth = LABYRINTH_INIT.copy()
|
||||
|
||||
|
|
@ -163,7 +176,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
if event.type == pygame.QUIT:
|
||||
running = False
|
||||
|
||||
if show_game:
|
||||
if conf.show_game:
|
||||
screen.fill(BLACK)
|
||||
|
||||
|
||||
|
|
@ -198,10 +211,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
ghost.move_towards_pacman(labyrinth, pacman)
|
||||
|
||||
if pacman.x == ghost.x and pacman.y == ghost.y:
|
||||
if show_game:
|
||||
if conf.show_game:
|
||||
print("Game Over! The ghost caught Pacman.")
|
||||
running = False
|
||||
reward = REWARD_ON_LOSE
|
||||
reward = conf.REWARD_ON_LOSE
|
||||
|
||||
# Eat cookies
|
||||
if labyrinth[pacman.y][pacman.x] == ".":
|
||||
|
|
@ -228,10 +241,10 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
if all("." not in row for row in labyrinth):
|
||||
# time_reward = calc_time_reward(iter)
|
||||
# reward = REWARD_ON_WIN * time_reward
|
||||
reward = REWARD_ON_WIN
|
||||
reward = conf.REWARD_ON_WIN
|
||||
running = False
|
||||
|
||||
if show_game:
|
||||
if conf.show_game:
|
||||
# print(f"You Win! Took {iter} iterations, reward: {time_reward}")
|
||||
print(f"You Win! Took {iter} iterations")
|
||||
|
||||
|
|
@ -262,7 +275,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, sho
|
|||
|
||||
|
||||
# Draw the labyrinth, pacman, and ghost
|
||||
if show_game:
|
||||
if conf.show_game:
|
||||
draw_labyrinth(screen, labyrinth)
|
||||
pacman.draw()
|
||||
ghost.draw()
|
||||
|
|
|
|||
|
|
@ -1,47 +1,46 @@
|
|||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
# import matplotlib
|
||||
# matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from ReinforcmentLearning.game import start_try
|
||||
import conf
|
||||
|
||||
|
||||
|
||||
def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE):
|
||||
cookies_per_try = []
|
||||
def multipleTries(EPSILON, ALPHA, GAMMA):
|
||||
conf.show_game = False
|
||||
conf.plot_result = False
|
||||
|
||||
wins_per_try = []
|
||||
|
||||
for x in range(AMOUNT_TRIES):
|
||||
plot_result = False
|
||||
show_game = False
|
||||
cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||
for x in range(conf.AMOUNT_TRIES):
|
||||
cookies_per_run = oneTry(EPSILON, ALPHA, GAMMA)
|
||||
|
||||
last_700_results = cookies_per_run[-700:]
|
||||
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
|
||||
win_probalitiy = (wins_in_last_700 / 700)
|
||||
results_last_700 = cookies_per_run[-700:]
|
||||
wins_in_last_700 = sum(1 for result in results_last_700 if result == 20)
|
||||
win_probalitiy_last_700 = (wins_in_last_700 / 700)
|
||||
wins_per_try.append(win_probalitiy_last_700)
|
||||
|
||||
cookies_per_run.append(cookies_per_run)
|
||||
wins_per_try.append(win_probalitiy)
|
||||
# print(f"Finished try {x+1}\n")
|
||||
|
||||
return cookies_per_try, wins_per_try
|
||||
return wins_per_try
|
||||
|
||||
|
||||
def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game):
|
||||
|
||||
def oneTry(EPSILON, ALPHA, GAMMA):
|
||||
"""
|
||||
state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction)
|
||||
action: Direction
|
||||
q_value: (state, action)
|
||||
"""
|
||||
|
||||
cookies_per_run, iterations = start_try(AMOUNT_RUNS, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE, show_game)
|
||||
cookies_per_run, iterations = start_try(EPSILON, ALPHA, GAMMA)
|
||||
|
||||
wins = sum(1 for result in cookies_per_run if result == 20)
|
||||
|
||||
|
||||
if plot_result:
|
||||
print(f"Win percentage overall: {(wins/AMOUNT_RUNS)*100}%")
|
||||
if conf.plot_result:
|
||||
wins = sum(1 for result in cookies_per_run if result == 20)
|
||||
print(f"Win percentage overall: {(wins/conf.AMOUNT_RUNS)*100}%")
|
||||
|
||||
last_700_results = cookies_per_run[-700:]
|
||||
wins_in_last_700 = sum(1 for result in last_700_results if result == 20)
|
||||
|
|
@ -50,7 +49,8 @@ def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, pl
|
|||
|
||||
plot_results(cookies_per_run, iterations)
|
||||
|
||||
return cookies_per_run, wins
|
||||
return cookies_per_run
|
||||
|
||||
|
||||
|
||||
def plot_results(cookies_per_run, iterations):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
EPSILON = 0.01
|
||||
# EPSILON = 0.005
|
||||
ALPHA = 0.2
|
||||
GAMMA = 0.8
|
||||
|
||||
AMOUNT_RUNS = 5000
|
||||
AMOUNT_TRIES = 5
|
||||
|
||||
REWARD_ON_WIN = 400
|
||||
REWARD_ON_LOSE = -250
|
||||
|
||||
plot_result = True
|
||||
show_game = False
|
||||
show_trained = True
|
||||
23
main.py
23
main.py
|
|
@ -1,26 +1,11 @@
|
|||
from GenTunic.gen_tuning import gen_tuning_main
|
||||
from ReinforcmentLearning.learning import multipleTries, oneTry
|
||||
from ReinforcmentLearning.util import calc_time_reward
|
||||
|
||||
|
||||
EPSILON = 0.01
|
||||
# EPSILON = 0.005
|
||||
ALPHA = 0.2
|
||||
GAMMA = 0.8
|
||||
|
||||
AMOUNT_RUNS = 5000
|
||||
AMOUNT_TRIES = 5
|
||||
|
||||
REWARD_ON_WIN = 400
|
||||
REWARD_ON_LOSE = -250
|
||||
|
||||
plot_result = False
|
||||
show_game = False
|
||||
import conf
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, plot_result, show_game)
|
||||
#multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
|
||||
oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA)
|
||||
# multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
|
||||
# gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)
|
||||
|
|
|
|||
Loading…
Reference in New Issue