refactor
parent
a52de42507
commit
1082c90fea
|
|
@ -2,116 +2,24 @@ import pygame
|
|||
import math
|
||||
import os
|
||||
|
||||
from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, epsilon_greedy, get_best_q_action, initial_q_fill
|
||||
import conf
|
||||
|
||||
# Initialize pygame
|
||||
|
||||
# Define constants
|
||||
SCREEN_WIDTH = 400
|
||||
SCREEN_HEIGHT = 400
|
||||
CELL_SIZE = 40
|
||||
|
||||
# Define colors
|
||||
YELLOW = (255, 255, 0)
|
||||
RED = (255, 0, 0)
|
||||
WHITE = (255, 255, 255)
|
||||
BLUE = (0, 0, 255)
|
||||
BLACK = (0, 0, 0)
|
||||
|
||||
REWARD_ON_HALF = 50
|
||||
|
||||
# Labyrinth as a string
|
||||
LABYRINTH_INIT = [
|
||||
"##########",
|
||||
"#........#",
|
||||
"#.##..##.#",
|
||||
"#........#",
|
||||
"##########"
|
||||
]
|
||||
|
||||
# Get labyrinth dimensions
|
||||
ROWS = len(LABYRINTH_INIT)
|
||||
COLS = len(LABYRINTH_INIT[0])
|
||||
|
||||
|
||||
|
||||
class Pacman:
|
||||
def __init__(self, screen, x, y):
|
||||
self.screen = screen
|
||||
self.x = x
|
||||
self.y = y
|
||||
self.count = 0
|
||||
|
||||
def move(self, labyrinth, dx, dy):
|
||||
new_x, new_y = self.x + dx, self.y + dy
|
||||
if labyrinth[new_y][new_x] != "#":
|
||||
self.x = new_x
|
||||
self.y = new_y
|
||||
|
||||
def draw(self):
|
||||
radius = CELL_SIZE // 2 - 4
|
||||
start_angle = math.pi / 6
|
||||
end_angle = -math.pi / 6
|
||||
pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
|
||||
# Calculate the points for the mouth
|
||||
start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
|
||||
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
|
||||
end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
|
||||
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
|
||||
self.count += 1
|
||||
if self.count%2==0:
|
||||
# Draw the mouth by filling a polygon
|
||||
pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
|
||||
|
||||
|
||||
class Ghost:
|
||||
# Define the pixel art for the ghost using strings
|
||||
ghost_pixels = [
|
||||
" #### ",
|
||||
"######",
|
||||
"## # #",
|
||||
"######",
|
||||
"######",
|
||||
"# # # "
|
||||
]
|
||||
|
||||
def __init__(self, screen, x, y):
|
||||
self.screen = screen
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def move_towards_pacman(self, labyrinth, pacman):
|
||||
if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
|
||||
self.x += 1
|
||||
elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
|
||||
self.x -= 1
|
||||
elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
|
||||
self.y += 1
|
||||
elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
|
||||
self.y -= 1
|
||||
|
||||
def draw(self):
|
||||
pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art
|
||||
for row_idx, row in enumerate(self.ghost_pixels):
|
||||
for col_idx, pixel in enumerate(row):
|
||||
if pixel == "#":
|
||||
pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
|
||||
pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
|
||||
pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
|
||||
|
||||
from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill
|
||||
import data.classes_consts as consts
|
||||
import data.conf as conf
|
||||
# import data.classes as classes
|
||||
from data.classes import Pacman, Ghost
|
||||
|
||||
|
||||
def start_try(EPSILON, ALPHA, GAMMA):
|
||||
#? Learning initial
|
||||
q_values = initial_q_fill()
|
||||
print(len(q_values))
|
||||
|
||||
#? Game initial
|
||||
pygame.init()
|
||||
screen = None
|
||||
|
||||
if conf.show_game:
|
||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
screen = consts.screen
|
||||
pygame.display.set_caption("Micro-Pacman")
|
||||
|
||||
#? Start try
|
||||
|
|
@ -136,7 +44,7 @@ def start_try(EPSILON, ALPHA, GAMMA):
|
|||
print(f"Run {x+1}: {iterations_per_run} iterations")
|
||||
|
||||
if conf.show_trained:
|
||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
screen = consts.screen
|
||||
pygame.display.set_caption("Micro-Pacman")
|
||||
|
||||
while True:
|
||||
|
|
@ -152,24 +60,21 @@ def start_try(EPSILON, ALPHA, GAMMA):
|
|||
|
||||
def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
||||
clock = pygame.time.Clock()
|
||||
labyrinth = LABYRINTH_INIT.copy()
|
||||
labyrinth = consts.LABYRINTH_INIT.copy()
|
||||
|
||||
|
||||
# Initialize Pacman and Ghost positions
|
||||
pacman = Pacman(screen, 1, 1)
|
||||
ghost = Ghost(screen, COLS - 2, ROWS - 2)
|
||||
ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2)
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
#? GAME LOOP
|
||||
running = True
|
||||
iter = 0
|
||||
while running:
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
reward = 0
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
iter = iter + 1
|
||||
|
||||
# Handle events
|
||||
for event in pygame.event.get():
|
||||
|
|
@ -177,12 +82,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
running = False
|
||||
|
||||
if conf.show_game:
|
||||
screen.fill(BLACK)
|
||||
screen.fill(consts.BLACK)
|
||||
|
||||
|
||||
iter = iter + 1
|
||||
|
||||
# Handle Pacman movement
|
||||
#? Arrow key movements
|
||||
keys = pygame.key.get_pressed()
|
||||
if keys[pygame.K_LEFT]:
|
||||
pacman.move(-1, 0)
|
||||
|
|
@ -193,8 +95,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
if keys[pygame.K_DOWN]:
|
||||
pacman.move(0, 1)
|
||||
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
#? Agent movements
|
||||
action = epsilon_greedy(q_values, state, EPSILON)
|
||||
if action == Direction.LEFT:
|
||||
pacman.move(labyrinth, -1, 0)
|
||||
|
|
@ -204,7 +105,6 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
pacman.move(labyrinth, 0, -1)
|
||||
if action == Direction.DOWN:
|
||||
pacman.move(labyrinth, 0, 1)
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
|
||||
if iter%3==0:
|
||||
|
|
@ -220,9 +120,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
if labyrinth[pacman.y][pacman.x] == ".":
|
||||
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
#? half reward
|
||||
|
||||
# cookie_counter = 0
|
||||
|
||||
# for y, row in enumerate(labyrinth):
|
||||
|
|
@ -234,10 +132,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
# # reward = REWARD_ON_HALF
|
||||
# if show_game:
|
||||
# print("Got half reward")
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
|
||||
# Check if all cookies are eaten (game over)
|
||||
if all("." not in row for row in labyrinth):
|
||||
# time_reward = calc_time_reward(iter)
|
||||
# reward = REWARD_ON_WIN * time_reward
|
||||
|
|
@ -245,11 +141,9 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
running = False
|
||||
|
||||
if conf.show_game:
|
||||
# print(f"You Win! Took {iter} iterations, reward: {time_reward}")
|
||||
print(f"You Win! Took {iter} iterations")
|
||||
|
||||
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
if not running:
|
||||
new_state = state
|
||||
else:
|
||||
|
|
@ -271,10 +165,8 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
if cell == ".":
|
||||
counter += 1
|
||||
return 20-counter, iter
|
||||
#? -------------------------MY CODE-----------------------------------
|
||||
|
||||
|
||||
# Draw the labyrinth, pacman, and ghost
|
||||
if conf.show_game:
|
||||
draw_labyrinth(screen, labyrinth)
|
||||
pacman.draw()
|
||||
|
|
@ -287,16 +179,5 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
|
|||
clock.tick(40)
|
||||
|
||||
|
||||
|
||||
def draw_labyrinth(screen, labyrinth):
|
||||
for y, row in enumerate(labyrinth):
|
||||
for x, cell in enumerate(row):
|
||||
if cell == "#":
|
||||
pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
|
||||
elif cell == ".":
|
||||
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_game()
|
||||
|
|
@ -5,7 +5,7 @@ import numpy as np
|
|||
import pandas as pd
|
||||
|
||||
from ReinforcmentLearning.game import start_try
|
||||
import conf
|
||||
import data.conf as conf
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
from enum import Enum
|
||||
import random
|
||||
import pygame
|
||||
|
||||
import numpy as np
|
||||
|
||||
import data.classes_consts as consts
|
||||
|
||||
class Direction(Enum):
|
||||
UP = 0
|
||||
|
|
@ -17,17 +22,24 @@ def initial_q_fill():
|
|||
for action in Direction:
|
||||
state = (x, y, cookie_direction)
|
||||
q_values[(state, action)] = random.random() * 0.2 - 0.1
|
||||
# q_values[state][action] = random.random() * 0.2 - 0.1
|
||||
|
||||
return q_values
|
||||
|
||||
|
||||
def initial_q_fill2():
|
||||
indexer = consts.indexer
|
||||
|
||||
def get_start_state():
|
||||
first_direction_cookie = random.choice([True, False])
|
||||
if first_direction_cookie:
|
||||
return (7, 2, Direction.DOWN)
|
||||
"""Initialize Q-table using linear indexing"""
|
||||
# Create 2D array: [state_index, action]
|
||||
# 300 states × 4 actions = 1200 entries
|
||||
q_table = np.random.uniform(
|
||||
low=-0.1,
|
||||
high=0.1,
|
||||
size=(indexer.total_states, 4) # 300 × 4
|
||||
)
|
||||
|
||||
return(7, 2, Direction.RIGHT)
|
||||
return q_table, indexer
|
||||
|
||||
|
||||
|
||||
|
|
@ -140,3 +152,17 @@ def calc_time_reward(amount_iterations):
|
|||
return 1
|
||||
|
||||
return - (1 / 1000) * amount_iterations + 11
|
||||
|
||||
|
||||
|
||||
def draw_labyrinth(screen, labyrinth):
|
||||
CELL_SIZE = consts.CELL_SIZE
|
||||
BLUE = consts.BLUE
|
||||
WHITE = consts.WHITE
|
||||
|
||||
for y, row in enumerate(labyrinth):
|
||||
for x, cell in enumerate(row):
|
||||
if cell == "#":
|
||||
pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
|
||||
elif cell == ".":
|
||||
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,111 @@
|
|||
import math
|
||||
import pygame
|
||||
|
||||
from data.classes_consts import CELL_SIZE, YELLOW, BLACK, RED
|
||||
|
||||
|
||||
class Pacman:
|
||||
def __init__(self, screen, x, y):
|
||||
self.screen = screen
|
||||
self.x = x
|
||||
self.y = y
|
||||
self.count = 0
|
||||
|
||||
def move(self, labyrinth, dx, dy):
|
||||
new_x, new_y = self.x + dx, self.y + dy
|
||||
if labyrinth[new_y][new_x] != "#":
|
||||
self.x = new_x
|
||||
self.y = new_y
|
||||
|
||||
def draw(self):
|
||||
radius = CELL_SIZE // 2 - 4
|
||||
start_angle = math.pi / 6
|
||||
end_angle = -math.pi / 6
|
||||
pygame.draw.circle(self.screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
|
||||
# Calculate the points for the mouth
|
||||
start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
|
||||
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
|
||||
end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
|
||||
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
|
||||
self.count += 1
|
||||
if self.count%2==0:
|
||||
# Draw the mouth by filling a polygon
|
||||
pygame.draw.polygon(self.screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
|
||||
|
||||
|
||||
class Ghost:
|
||||
# Define the pixel art for the ghost using strings
|
||||
ghost_pixels = [
|
||||
" #### ",
|
||||
"######",
|
||||
"## # #",
|
||||
"######",
|
||||
"######",
|
||||
"# # # "
|
||||
]
|
||||
|
||||
def __init__(self, screen, x, y):
|
||||
self.screen = screen
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def move_towards_pacman(self, labyrinth, pacman):
|
||||
if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
|
||||
self.x += 1
|
||||
elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
|
||||
self.x -= 1
|
||||
elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
|
||||
self.y += 1
|
||||
elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
|
||||
self.y -= 1
|
||||
|
||||
def draw(self):
|
||||
pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art
|
||||
for row_idx, row in enumerate(self.ghost_pixels):
|
||||
for col_idx, pixel in enumerate(row):
|
||||
if pixel == "#":
|
||||
pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
|
||||
pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
|
||||
pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
|
||||
|
||||
|
||||
|
||||
class StateIndexer:
|
||||
"""Converts (x, y, cookie_dir) states to unique indices"""
|
||||
def __init__(self):
|
||||
# State space boundaries
|
||||
self.x_min, self.x_max = -7, 7 # 15 values: -7 to 7 inclusive
|
||||
self.y_min, self.y_max = -2, 2 # 5 values: -2 to 2 inclusive
|
||||
self.dir_min, self.dir_max = 0, 3 # 4 directions: 0 to 3
|
||||
|
||||
# Ranges
|
||||
self.x_range = self.x_max - self.x_min + 1 # 15
|
||||
self.y_range = self.y_max - self.y_min + 1 # 5
|
||||
self.dir_range = self.dir_max - self.dir_min + 1 # 4
|
||||
|
||||
# Multipliers for indexing
|
||||
self.y_dir_product = self.y_range * self.dir_range # 5 * 4 = 20
|
||||
self.total_states = self.x_range * self.y_dir_product # 15 * 20 = 300
|
||||
|
||||
def to_index(self, x, y, cookie_dir):
|
||||
"""Convert state to unique index 0..299"""
|
||||
# Convert to zero-based indices
|
||||
x_idx = x - self.x_min # -7→0, -6→1, ..., 7→14
|
||||
y_idx = y - self.y_min # -2→0, -1→1, ..., 2→4
|
||||
dir_idx = cookie_dir - self.dir_min # 0→0, 1→1, 2→2, 3→3
|
||||
|
||||
# Linear mapping: (x * y_range * dir_range) + (y * dir_range) + dir
|
||||
return (x_idx * self.y_dir_product) + (y_idx * self.dir_range) + dir_idx
|
||||
|
||||
def from_index(self, idx):
|
||||
"""Convert index back to state"""
|
||||
dir_idx = idx % self.dir_range
|
||||
idx //= self.dir_range
|
||||
y_idx = idx % self.y_range
|
||||
x_idx = idx // self.y_range
|
||||
|
||||
return (
|
||||
x_idx + self.x_min,
|
||||
y_idx + self.y_min,
|
||||
dir_idx + self.dir_min
|
||||
)
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
import pygame
|
||||
|
||||
|
||||
|
||||
LABYRINTH_INIT = [
|
||||
"##########",
|
||||
"#........#",
|
||||
"#.##..##.#",
|
||||
"#........#",
|
||||
"##########"
|
||||
]
|
||||
|
||||
SCREEN_WIDTH = 400
|
||||
SCREEN_HEIGHT = 400
|
||||
CELL_SIZE = 40
|
||||
|
||||
# Define colors
|
||||
YELLOW = (255, 255, 0)
|
||||
RED = (255, 0, 0)
|
||||
WHITE = (255, 255, 255)
|
||||
BLUE = (0, 0, 255)
|
||||
BLACK = (0, 0, 0)
|
||||
|
||||
# Get labyrinth dimensions
|
||||
ROWS = len(LABYRINTH_INIT)
|
||||
COLS = len(LABYRINTH_INIT[0])
|
||||
|
||||
|
||||
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
|
||||
|
|
@ -1,3 +1,8 @@
|
|||
from data.classes import StateIndexer
|
||||
|
||||
|
||||
indexer = StateIndexer()
|
||||
|
||||
EPSILON = 0.01
|
||||
# EPSILON = 0.005
|
||||
ALPHA = 0.2
|
||||
|
|
@ -7,8 +12,9 @@ AMOUNT_RUNS = 5000
|
|||
AMOUNT_TRIES = 5
|
||||
|
||||
REWARD_ON_WIN = 400
|
||||
REWARD_ON_HALF = 50
|
||||
REWARD_ON_LOSE = -250
|
||||
|
||||
plot_result = True
|
||||
show_game = False
|
||||
show_game = True
|
||||
show_trained = True
|
||||
Loading…
Reference in New Issue