diff --git a/Aufgabe_4.py b/Aufgabe_4.py index 335cf97..d258721 100644 --- a/Aufgabe_4.py +++ b/Aufgabe_4.py @@ -151,9 +151,9 @@ def calcState(pacman, ghost, labyrinth): clock = pygame.time.Clock() q = np.random.rand(((ROWS * COLS)**2) * 16, 4)*0.1 # q[s][a]=0..0.1, q[pac + ghost][4] -alpha = 0.9 # Lernrate +alpha = 0.5 # Lernrate gamma = 0.9 # Discount Faktor -epsilon = 30 # für Epsilon-Greedy Aktionsauswahl +epsilon = 10 # für Epsilon-Greedy Aktionsauswahl max_iter = 0 iter = 0 @@ -167,7 +167,7 @@ while True: print("Won: ", win, " Lose: ", lose) # Initialize Pacman and Ghost positions pacman = Pacman(1, 1) - ghost = Ghost(4, 1) + ghost = Ghost(COLS - 2, ROWS - 2) labyrinth = deepcopy(labyrinth_origin) # Game loop # reward = 1 @@ -209,7 +209,7 @@ while True: ghost.move_towards_pacman(pacman) # neuer eindimensionaler Zustand - reward = -0.1 + reward = -1 new_s = calcState(pacman, ghost, labyrinth) if pacman.caught(ghost): @@ -232,7 +232,7 @@ while True: q[s][a] += alpha * (reward + gamma * np.max(q[new_s]) - q[s][a]) if(round > 100000): - epsilon = 0 + # epsilon = 0 draw_labyrinth() pacman.draw() ghost.draw()