169 lines
4.2 KiB
Python
169 lines
4.2 KiB
Python
from enum import Enum
|
||
import random
|
||
import pygame
|
||
|
||
import numpy as np
|
||
|
||
import data.classes_consts as consts
|
||
|
||
class Direction(Enum):
|
||
UP = 0
|
||
RIGHT = 1
|
||
DOWN = 2
|
||
LEFT = 3
|
||
|
||
|
||
def initial_q_fill():
|
||
q_values = {}
|
||
|
||
for x in range(-7, 8):
|
||
for y in range(-2, 3):
|
||
for cookie_direction in Direction:
|
||
for action in Direction:
|
||
state = (x, y, cookie_direction)
|
||
q_values[(state, action)] = random.random() * 0.2 - 0.1
|
||
# q_values[state][action] = random.random() * 0.2 - 0.1
|
||
|
||
return q_values
|
||
|
||
|
||
def initial_q_fill2():
|
||
indexer = consts.indexer
|
||
|
||
"""Initialize Q-table using linear indexing"""
|
||
# Create 2D array: [state_index, action]
|
||
# 300 states × 4 actions = 1200 entries
|
||
q_table = np.random.uniform(
|
||
low=-0.1,
|
||
high=0.1,
|
||
size=(indexer.total_states, 4) # 300 × 4
|
||
)
|
||
|
||
return q_table, indexer
|
||
|
||
|
||
|
||
def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y):
|
||
x_ghost_dist = pac_x - ghost_x
|
||
y_ghost_dist = pac_y - ghost_y
|
||
|
||
cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y)
|
||
|
||
return x_ghost_dist, y_ghost_dist, cookie_direction
|
||
|
||
|
||
def get_closest_cookie_direction(labyrinth, pac_x, pac_y):
|
||
cookie_distances = []
|
||
|
||
for y, row in enumerate(labyrinth):
|
||
for x, cell in enumerate(row):
|
||
if cell == ".":
|
||
x_dist = abs(pac_x - x)
|
||
y_dist = abs(pac_y - y)
|
||
dist = x_dist + y_dist
|
||
cookie_distances.append((dist, (x, y)))
|
||
|
||
closest_cookie = min(cookie_distances, key=lambda item: item[0])
|
||
closest_cookie_cords = closest_cookie[1]
|
||
cookie_x = closest_cookie_cords[0]
|
||
cookie_y = closest_cookie_cords[1]
|
||
|
||
|
||
dx = cookie_x - pac_x
|
||
dy = cookie_y - pac_y
|
||
|
||
if abs(dx) >= abs(dy):
|
||
#? X distance bigger
|
||
|
||
if dy > 0:
|
||
return Direction.DOWN
|
||
elif dy < 0:
|
||
return Direction.UP
|
||
else:
|
||
#? Cookie on the same Y level
|
||
if dx > 0:
|
||
return Direction.RIGHT
|
||
else:
|
||
return Direction.LEFT
|
||
else:
|
||
#? Y distance bigger
|
||
|
||
if dx > 0:
|
||
return Direction.RIGHT
|
||
elif dx < 0:
|
||
return Direction.LEFT
|
||
else:
|
||
#? Cookie on the same X level
|
||
if dy > 0:
|
||
return Direction.DOWN
|
||
else:
|
||
return Direction.UP
|
||
|
||
|
||
|
||
def epsilon_greedy(q_values, state, epsilon):
|
||
best_action, actions_for_epsilon = get_best_q_action(q_values, state)
|
||
|
||
if random.random() < epsilon:
|
||
if not actions_for_epsilon:
|
||
best_action = get_random_direction()
|
||
return best_action
|
||
|
||
random_action = random.choice(actions_for_epsilon)
|
||
return random_action
|
||
|
||
return best_action
|
||
|
||
|
||
def get_best_q_action(q_values, state):
|
||
best_action = None
|
||
best_value = None
|
||
|
||
actions_for_epsilon = []
|
||
|
||
for (q_state, q_action), value in q_values.items():
|
||
if q_state == state:
|
||
actions_for_epsilon.append(q_action)
|
||
if best_value is None:
|
||
best_value = value
|
||
best_action = q_action
|
||
continue
|
||
|
||
if value > best_value:
|
||
best_value = value
|
||
best_action = q_action
|
||
|
||
if not best_action:
|
||
best_action = get_random_direction()
|
||
|
||
return best_action, actions_for_epsilon
|
||
|
||
|
||
def get_random_direction():
|
||
return random.choice(list(Direction))
|
||
|
||
|
||
|
||
def calc_time_reward(amount_iterations):
|
||
if amount_iterations < 1000:
|
||
return 10
|
||
|
||
if amount_iterations > 10000:
|
||
return 1
|
||
|
||
return - (1 / 1000) * amount_iterations + 11
|
||
|
||
|
||
|
||
def draw_labyrinth(screen, labyrinth):
|
||
CELL_SIZE = consts.CELL_SIZE
|
||
BLUE = consts.BLUE
|
||
WHITE = consts.WHITE
|
||
|
||
for y, row in enumerate(labyrinth):
|
||
for x, cell in enumerate(row):
|
||
if cell == "#":
|
||
pygame.draw.rect(screen, BLUE, (x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
|
||
elif cell == ".":
|
||
pygame.draw.circle(screen, WHITE, (x * CELL_SIZE + CELL_SIZE // 2, y * CELL_SIZE + CELL_SIZE // 2), 5)
|