added numpy for state, refactor

main
2wenty1ne 2025-12-10 18:54:43 +01:00
parent 1082c90fea
commit 8aeb8c1449
8 changed files with 29 additions and 282 deletions

View File

@ -5,14 +5,12 @@ import os
from ReinforcmentLearning.util import Direction, calc_current_state, calc_time_reward, draw_labyrinth, epsilon_greedy, get_best_q_action, initial_q_fill
import data.classes_consts as consts
import data.conf as conf
# import data.classes as classes
from data.classes import Pacman, Ghost
def start_try(EPSILON, ALPHA, GAMMA):
#? Learning initial
q_values = initial_q_fill()
print(len(q_values))
#? Game initial
pygame.init()
@ -41,7 +39,7 @@ def start_try(EPSILON, ALPHA, GAMMA):
cookies_per_run.append(amount_cookies_ate)
iterations.append(iterations_per_run)
print(f"Run {x+1}: {iterations_per_run} iterations")
# print(f"Run {x+1}: {iterations_per_run} iterations")
if conf.show_trained:
screen = consts.screen
@ -62,7 +60,6 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
clock = pygame.time.Clock()
labyrinth = consts.LABYRINTH_INIT.copy()
# Initialize Pacman and Ghost positions
pacman = Pacman(screen, 1, 1)
ghost = Ghost(screen, consts.COLS - 2, consts.ROWS - 2)
@ -149,12 +146,12 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA, screen):
else:
new_state = calc_current_state(labyrinth, pacman.x, pacman.y, ghost.x, ghost.y)
best_action_new_state, _ = get_best_q_action(q_values, new_state)
best_value_new_state = q_values[(new_state, best_action_new_state)]
best_action_new_state = get_best_q_action(q_values, new_state)
best_value_new_state = q_values[new_state][best_action_new_state.value]
current_value = q_values.get((state, action))
current_value = q_values[state][action.value]
adjusted_value = ALPHA * (reward + GAMMA * best_value_new_state - current_value)
q_values[(state, action)] = current_value + adjusted_value
q_values[state][action.value] = current_value + adjusted_value
state = new_state

View File

@ -1,5 +1,3 @@
# import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

View File

@ -19,29 +19,15 @@ def initial_q_fill():
for x in range(-7, 8):
for y in range(-2, 3):
for cookie_direction in Direction:
for action in Direction:
state = (x, y, cookie_direction)
q_values[(state, action)] = random.random() * 0.2 - 0.1
# q_values[state][action] = random.random() * 0.2 - 0.1
state = (x, y, cookie_direction)
q_values[state] = np.zeros(4)
for action_idx in range(len(Direction)):
q_values[state][action_idx] = random.random() * 0.2 - 0.1
return q_values
def initial_q_fill2():
indexer = consts.indexer
"""Initialize Q-table using linear indexing"""
# Create 2D array: [state_index, action]
# 300 states × 4 actions = 1200 entries
q_table = np.random.uniform(
low=-0.1,
high=0.1,
size=(indexer.total_states, 4) # 300 × 4
)
return q_table, indexer
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
x_ghost_dist = pac_x - ghost_x
@ -70,8 +56,11 @@ def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
dx = cookie_x - pac_x
dy = cookie_y - pac_y
dy = cookie_y - pac_y
return cords_to_direction(dx, dy)
def cords_to_direction(dx, dy):
if abs(dx) >= abs(dy):
#? X distance bigger
@ -102,41 +91,19 @@ def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
def epsilon_greedy(q_values, state, epsilon):
best_action, actions_for_epsilon = get_best_q_action(q_values, state)
if random.random() < epsilon:
if not actions_for_epsilon:
best_action = get_random_direction()
return best_action
random_action = random.choice(actions_for_epsilon)
random_action = get_random_direction()
return random_action
best_action = get_best_q_action(q_values, state)
return best_action
def get_best_q_action(q_values, state):
best_action = None
best_value = None
state_q_values = q_values[state]
best_action_index = np.argmax(state_q_values)
actions_for_epsilon = []
for (q_state, q_action), value in q_values.items():
if q_state == state:
actions_for_epsilon.append(q_action)
if best_value is None:
best_value = value
best_action = q_action
continue
if value > best_value:
best_value = value
best_action = q_action
if not best_action:
best_action = get_random_direction()
return best_action, actions_for_epsilon
return Direction(best_action_index)
def get_random_direction():

View File

@ -1,171 +0,0 @@
import pygame
import random
import math
# Initialize pygame
pygame.init()
# Define constants
SCREEN_WIDTH = 400
SCREEN_HEIGHT = 400
CELL_SIZE = 40
# Define colors
YELLOW = (255, 255, 0)
RED = (255, 0, 0)
WHITE = (255, 255, 255)
BLUE = (0, 0, 255)
BLACK = (0, 0, 0)
# Labyrinth as a string
labyrinth = [
"##########",
"#........#",
"#.##..##.#",
"#........#",
"##########"
]
# Get labyrinth dimensions
ROWS = len(labyrinth)
COLS = len(labyrinth[0])
# Initialize game screen
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
pygame.display.set_caption("Micro-Pacman")
# Pacman class
class Pacman:
def __init__(self, x, y):
self.x = x
self.y = y
self.count = 0
def move(self, dx, dy):
new_x, new_y = self.x + dx, self.y + dy
if labyrinth[new_y][new_x] != "#":
self.x = new_x
self.y = new_y
def draw(self):
radius = CELL_SIZE // 2 - 4
start_angle = math.pi / 6
end_angle = -math.pi / 6
pygame.draw.circle(screen, YELLOW, (self.x * CELL_SIZE + CELL_SIZE // 2, self.y * CELL_SIZE + CELL_SIZE // 2), CELL_SIZE // 2 - 4)
# Calculate the points for the mouth
start_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(start_angle)),
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(start_angle)))
end_pos = (self.x* CELL_SIZE + CELL_SIZE // 2 + int(radius*1.3 * math.cos(end_angle)),
self.y* CELL_SIZE + CELL_SIZE // 2 - int(radius*1.3 * math.sin(end_angle)))
self.count += 1
if self.count%2==0:
# Draw the mouth by filling a polygon
pygame.draw.polygon(screen, BLACK, [(self.x* CELL_SIZE + CELL_SIZE // 2, self.y* CELL_SIZE + CELL_SIZE // 2), start_pos, end_pos])
# Ghost class with pixel art
class Ghost:
# Define the pixel art for the ghost using strings
ghost_pixels = [
" #### ",
"######",
"## # #",
"######",
"######",
"# # # "
]
def __init__(self, x, y):
self.x = x
self.y = y
def move_towards_pacman(self, pacman):
if self.x < pacman.x and labyrinth[self.y][self.x + 1] != "#":
self.x += 1
elif self.x > pacman.x and labyrinth[self.y][self.x - 1] != "#":
self.x -= 1
elif self.y < pacman.y and labyrinth[self.y + 1][self.x] != "#":
self.y += 1
elif self.y > pacman.y and labyrinth[self.y - 1][self.x] != "#":
self.y -= 1
def draw(self):
pixel_size = CELL_SIZE // len(self.ghost_pixels) # Size of each pixel in the ghost art
for row_idx, row in enumerate(self.ghost_pixels):
for col_idx, pixel in enumerate(row):
if pixel == "#":
pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
pygame.draw.rect(screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
# Draw walls and cookies
def draw_labyrinth():
for y, row in enumerate(labyrinth):
for x, cell in enumerate(row):
if cell == "#":
pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
elif cell == ".":
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
# Main game function
def main():
clock = pygame.time.Clock()
# Initialize Pacman and Ghost positions
pacman = Pacman(1, 1)
ghost = Ghost(COLS - 2, ROWS - 2)
# Game loop
running = True
iter = 0
while running:
screen.fill(BLACK)
iter = iter + 1
# Handle events
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
# Handle Pacman movement
keys = pygame.key.get_pressed()
if keys[pygame.K_LEFT]:
pacman.move(-1, 0)
if keys[pygame.K_RIGHT]:
pacman.move(1, 0)
if keys[pygame.K_UP]:
pacman.move(0, -1)
if keys[pygame.K_DOWN]:
pacman.move(0, 1)
if iter%3==0:
# Ghost moves towards Pacman
ghost.move_towards_pacman(pacman)
# Check for collisions (game over if ghost catches pacman)
if pacman.x == ghost.x and pacman.y == ghost.y:
print("Game Over! The ghost caught Pacman.")
running = False
# Eat cookies
if labyrinth[pacman.y][pacman.x] == ".":
labyrinth[pacman.y] = labyrinth[pacman.y][:pacman.x] + " " + labyrinth[pacman.y][pacman.x+1:]
# Check if all cookies are eaten (game over)
if all("." not in row for row in labyrinth):
print("You Win! Pacman ate all the cookies.")
running = False
# Draw the labyrinth, pacman, and ghost
draw_labyrinth()
pacman.draw()
ghost.draw()
# Update display
pygame.display.flip()
# Cap the frame rate
clock.tick(5)
pygame.quit()
if __name__ == "__main__":
main()

View File

@ -67,45 +67,3 @@ class Ghost:
pixel_x = self.x * CELL_SIZE + col_idx * pixel_size
pixel_y = self.y * CELL_SIZE + row_idx * pixel_size
pygame.draw.rect(self.screen, RED, (pixel_x, pixel_y, pixel_size, pixel_size))
class StateIndexer:
"""Converts (x, y, cookie_dir) states to unique indices"""
def __init__(self):
# State space boundaries
self.x_min, self.x_max = -7, 7 # 15 values: -7 to 7 inclusive
self.y_min, self.y_max = -2, 2 # 5 values: -2 to 2 inclusive
self.dir_min, self.dir_max = 0, 3 # 4 directions: 0 to 3
# Ranges
self.x_range = self.x_max - self.x_min + 1 # 15
self.y_range = self.y_max - self.y_min + 1 # 5
self.dir_range = self.dir_max - self.dir_min + 1 # 4
# Multipliers for indexing
self.y_dir_product = self.y_range * self.dir_range # 5 * 4 = 20
self.total_states = self.x_range * self.y_dir_product # 15 * 20 = 300
def to_index(self, x, y, cookie_dir):
"""Convert state to unique index 0..299"""
# Convert to zero-based indices
x_idx = x - self.x_min # -7→0, -6→1, ..., 7→14
y_idx = y - self.y_min # -2→0, -1→1, ..., 2→4
dir_idx = cookie_dir - self.dir_min # 0→0, 1→1, 2→2, 3→3
# Linear mapping: (x * y_range * dir_range) + (y * dir_range) + dir
return (x_idx * self.y_dir_product) + (y_idx * self.dir_range) + dir_idx
def from_index(self, idx):
"""Convert index back to state"""
dir_idx = idx % self.dir_range
idx //= self.dir_range
y_idx = idx % self.y_range
x_idx = idx // self.y_range
return (
x_idx + self.x_min,
y_idx + self.y_min,
dir_idx + self.dir_min
)

View File

@ -1,5 +1,7 @@
import pygame
from data import conf
LABYRINTH_INIT = [
@ -26,4 +28,6 @@ ROWS = len(LABYRINTH_INIT)
COLS = len(LABYRINTH_INIT[0])
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))
screen = None
if conf.show_game:
screen = pygame.display.set_mode((COLS * CELL_SIZE, ROWS * CELL_SIZE))

View File

@ -1,10 +1,4 @@
from data.classes import StateIndexer
indexer = StateIndexer()
EPSILON = 0.01
# EPSILON = 0.005
EPSILON = 0.005
ALPHA = 0.2
GAMMA = 0.8
@ -16,5 +10,6 @@ REWARD_ON_HALF = 50
REWARD_ON_LOSE = -250
plot_result = True
show_game = True
show_trained = True
show_game = False
show_trained = False

View File

@ -5,7 +5,6 @@ import data.conf as conf
oneTry(conf.EPSILON, conf.ALPHA, conf.GAMMA)
# multipleTries(EPSILON, ALPHA, GAMMA,AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE)
# gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE, EPSILON)