From 738b122f43cfcdbcea5f1bd87cc22ca8de639e32 Mon Sep 17 00:00:00 2001 From: 2wenty1ne Date: Wed, 3 Dec 2025 15:50:46 +0100 Subject: [PATCH] Added GA to tune constants --- GenTunic/__pycache__/gen_math.cpython-313.pyc | Bin 0 -> 1555 bytes .../__pycache__/gen_tuning.cpython-313.pyc | Bin 0 -> 2594 bytes GenTunic/__pycache__/gen_util.cpython-313.pyc | Bin 0 -> 5072 bytes GenTunic/gen_math.py | 37 ++++++ GenTunic/gen_tuning.py | 64 +++++++++++ GenTunic/gen_util.py | 106 ++++++++++++++++++ .../__pycache__/game.cpython-313.pyc | Bin 0 -> 10767 bytes .../__pycache__/learning.cpython-313.pyc | Bin 0 -> 1865 bytes .../__pycache__/util.cpython-313.pyc | Bin 0 -> 4289 bytes game.py => ReinforcmentLearning/game.py | 10 +- ReinforcmentLearning/learning.py | 62 ++++++++++ util.py => ReinforcmentLearning/util.py | 8 +- argument_swapper.py | 11 -- data.txt | 28 +++-- learning.py | 58 ---------- main.py | 17 +++ 16 files changed, 312 insertions(+), 89 deletions(-) create mode 100644 GenTunic/__pycache__/gen_math.cpython-313.pyc create mode 100644 GenTunic/__pycache__/gen_tuning.cpython-313.pyc create mode 100644 GenTunic/__pycache__/gen_util.cpython-313.pyc create mode 100644 GenTunic/gen_math.py create mode 100644 GenTunic/gen_tuning.py create mode 100644 GenTunic/gen_util.py create mode 100644 ReinforcmentLearning/__pycache__/game.cpython-313.pyc create mode 100644 ReinforcmentLearning/__pycache__/learning.cpython-313.pyc create mode 100644 ReinforcmentLearning/__pycache__/util.cpython-313.pyc rename game.py => ReinforcmentLearning/game.py (96%) create mode 100644 ReinforcmentLearning/learning.py rename util.py => ReinforcmentLearning/util.py (96%) delete mode 100644 argument_swapper.py delete mode 100644 learning.py create mode 100644 main.py diff --git a/GenTunic/__pycache__/gen_math.cpython-313.pyc b/GenTunic/__pycache__/gen_math.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..670104dca0ab76750d8d3a56347aaa67faad575f GIT binary patch literal 1555 zcmcgsOK%fb6h3!e88UVtkG2#la3Fvz;igfOHmX`slt3+|iAsi%EZLIfP6o#%zIN`| zQ7u(smPuUIPH9x9t7J!ZZ2K?pCPD)XB-pT{xIduhj2}r6Qa6Yr-E&{(+{gEQ_qM{} zen5K||15V$0(efHu3%x3#K}4&P>K7Q{r;6yBjPi$JXdHFTk-J(*Y7U=~6Ltu`sTnE?fpMAOeYtmJ~xIO~qmqXR6eT832|VL}M23 zv--v$sx4;!+$H<#dbWMEXV(Q?{$=0@n5gQ@s}A=iUi6hatGS}Dm2F+|)!QUa(hOd!@YUq)aF8xFttSr53)sK_&Km#?EJW>RX53F7uq7$~ruq z&y-d^EmnM^C<;G1c8RtYBe);2=-xx;!BgGXn62vG%;V5W??&KW42}G8<+m%1i}$a3 z^H2Ljwdhu~K2yEw&Atc^)hB*UY_P|=@r$TcwknOm>d%c^oA>t4?w@O(9B=C5Tk_vB z41D-XgA)UdOWU!%nZ33Bxd#*b@#bLc;c)ZRWK*B?W}X=#FW$l} z-X&t_4MWZM6gvh7@?xM8LCYPef}X9Qd|%5Hiw<8FebiAiLS!mYS)la5SF*yXh(T<+ zbQx5D7^ZOpNlKRr>0tU39QlAk=Fxe8^winr&5dbqE}-If8%bI@44)@PLN#O4sLwV= zcjfJP!`)qPj$As_FB2P)e>4aU)TXzl>vPp>kCcxEE&85;AA0w|i;wUc;xsx$^(?gs z+P8s9XcDUQ0_e*;kEIK$L7;xQ5$)(3K^?oFI}Aqxnb-O^`|F=K2Djbq>BjPIwmIAd zb@T|-+3Gisl+yvI_&=z;oG(@a&V$z`h{a&n0|;W;MC)JY3(NSJuZdBdr@zIe>%5W~ XV^3h{wH9XK*B3N)rZu9mHe>$+0el}F literal 0 HcmV?d00001 diff --git a/GenTunic/__pycache__/gen_tuning.cpython-313.pyc b/GenTunic/__pycache__/gen_tuning.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a61085623a26b23c67bec3a8800b236b6be1a286 GIT binary patch literal 2594 zcma)8O>7&-6`m!RQDohwuKm-)JIaMybb%xYx zl_G}>Ff;GHc{A_LoAW}nF%_LMnN45J}$t>O}n<)=YmefkAsMqv*MK#McSiDy@ zp=KCZC?QQXwL?qTUqUs*s6EsW=BjnGqmSce9YM9KL9=LR6>=~+WcN#AcKN+`Y5Qby z`EP%vZD(@#2o+P80N9=^v0bV&`pD=eeeYLfdt<;5@}^ z%f|O34?U*=^hyl$9S4fvUR_*RjLGhCPnWL~UH!4_@5~>`-h9B;b7t8$&U85uC?Q*Z zSStIJuq~f>696>`4rxj*XMavpqPAu}W>d~QNHRc4*!(ETE+uJmGH0JNO1!se7;s`B z2j~tMRQl{ai@!aHMo1Yrkp4~%9*Wm~3}Hw)W^2eH<+x2B(%g?b^vr!qJo_mz{kIb_ ziM4$e!QxBw0VEF{VdONByw5W*s))AD{Gd%a^B|BTlFz=+O~G)T28JsRrb+Cfpq#KZ zWx=+y;F=@R4ge9r7|}R+pb@n-$ZnFK^z~sJCLCYoL>S01IZpixgL8*oaIU|5;!n+1 zneDtuk8`%f$vt5p`Omp+FGyYu+rH(~Hs#FZ=H4IGJ>}@wlv5`nDLca@k`9G?AjQuk zS{RtSLTda$87}YAtX@6BU%GTjypoX$;=)vZYA#dA5SF2Sb+ogH2>g5Zy+5-_YMktw|hR$SugTQ-vFHZoVkpxShe= z^-PySG``rY>beH+Vvml>(8T`2bUq_Z=Vq^AwyK&d*mJK^t05e|n$Jm6?q()m%uf|E z*j>>eqB)2tkRXD;0Egz}0R?>*}2{@$(T5|KBdAhy}G;s~^T%14s8o z4myXS_VQuij{U%eGQ6w3g5Ap+#1UOZ1iMWQ6)A=rn0bY~wJkHSr>o1-MI4}aDt3(- zz2s`*k2ZX>G_fQb8Zwq1mLELOjHPR}5(zaljiocK+{ z`{;sNs;Y45x&{k%SS~FQ7mFsL;PROM2uB>YSXIjqohLpBq#GZS^lgB)(DQcR>wRww zG_pIw;O5Y|KQ6X}%Z==I|51xgZp9Og*=;_&b7uO>eyaqZ0Pw79-n9q7*_^A%jal6B z?f&7%ku_$E8~7(T*yIK`E^czqw7Jk)Ws@7&4kX`CwE{!!ps=3a3?ADG3g3Idv8O)B zzL))&`E0&9Jn==zy0X~1{6h2O3$3HKEv~OUFk+2OHU}mx?#NExDQom_{5?If@%)=3@2I~YYo;d}SxdOkPKp~p`zm?nabV4}W|Hf!C)T~|s~abp$ziKE zWrfpUaUoe<@HW-T^++>G#Sl@4^Uz@f< z;wJl4yUYAK```ZvAa>E95b&4&p65o`R1c#60*HS0Fi$F*7GI@&2T}AuB9(ebdFqc4 z{S0Bj&WJIZ0u%_kHT*+)-7Hr+DAK_i2MM~s5T_>}r2|22@_R%F`aSJr(I z1LiJPYj;V4UPjLnmLj8ZfecUF48wd20$+o`H{j{7LF^ll`WHa|0O*MqB$5`0ZbcJ~ b(3a5C2yX|X4Q?wKY54cp(@X?SlKuY!&}nBw literal 0 HcmV?d00001 diff --git a/GenTunic/__pycache__/gen_util.cpython-313.pyc b/GenTunic/__pycache__/gen_util.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..549b9ae7c7a085e0929eb3544bfd4f2c8e060a08 GIT binary patch literal 5072 zcmbVQTWlN072PFw$))(vgNj7GMM;)LTMt{7TuF)I$PQ(_Wpl-ZiIlJ_YAvlzE}2=< zvC9;h?=4iJ>@`-R@`Twc1|&ffjU)BAQwwRS;jh`bhAzzW&vxCKPbok8iVd?Je*AyV_Ea>=ZaN%9Lw;H5-yg_w^6vK{inYEoY< zxrnKHide@LXi;*29I&%C%P6YQW%(Ph_1Q%07%b&bY9nGc);??#?TfBrSCklpRp8_& zCQcB9NLl3QCvNl>%ZRsFEB&(+OGn8;yeCK^9i>@nz!n=g4z1a4f^ZY9goUe5ANE+$ zMnvt!b#xFR$DCG{NoR6NE-R!({b-lDWg#hK*FPpTdL_G_;SoNnZ*UP}G@F>wXhF=L z>DQcBl4&k`c0i-w;89xAXvB%Dyyk$vm2^sT;(r*a(HuhDddQ(slJEwvF&D#gadz_M zFmeM+t)?VC4i}p#NY_X~v#;`^gip6sPufCaWGO1~NLsosT)W0gOV6hhxfCyErKQuY z6Y0d7lvv9p`6ZT5@luvYgG;kBVVrv|afuWsE?wlsg`6lPmVj|Qmlcw|nRTr)fp{*< z$4l!#4Iq<&EM14OKB2l#%GAk+Ael(exGW^BBDykPEhsZwKUSsJK@l8L2W!KF zMW^NZk5F;AekW@mEcCpzs@D}+;uofIjgN3%}i~WnGuyV&QkMEoA^K2=hi*RZ%sjQYoGjH3x@QQ{0VEC91_l&)DpIbZ#NO5Sa|K zn!C`7MCVw|8wtNS7I{8CKNo*-a!&J=nltlk_%V!J5wapLNpUHc(#ixt*mYqg$0bo6 z7D3(sv?ViZoaW>+l8^*Q)2m!6#c5<*t1TMJcotjA`0Ij*5w{FLHh5}P%Yd7BQ35dM zZ)hGXO{xSb1{y6Ts>iZx6I?Q3K~a$pIt{&k$kN}T05ml<14zEE>CTt!`2rh_imx+o z-yeSZkAdF@?zG=|O&y+8Tci4_e?UEb%)v7bQRYOYEkXkj8cOG<{d}mZ)f_qG}!h}9& zn(vnrwLOojiTcLdb+_u|_Til#rF}xJo0O@VJ%32?kM5)t|IE#qgYNTpMwRZ%^1@Z6 z`zkDZV^pC+2kzjGM{WJXZE`P-wygH>uHkfN z@y^Y2iyZwRn)Bd61hiv#BWzWShY#)z9zGrIC*8)d5YZe7C>YE9MdQq2n0EVm3QEt6kRQ+La}vM;EXwaLfM zs%2;Q1I@R+x4a+mpInenjjP=k)y|2}f@)wU?*)C|CY82+#eHUbR&k$~!;A9OYsykm z38!T72TJ%&)&1t4r)AgEsdzf&BNNmU<{fOsB5Y!P>#GD&Sdk#>RCFIP?E{{@-PLEwhi7eaj--Ys&UMA^YgrjiuQl`JJ+wtfOPgpfRwckx$_E zeNkUm;bK=nQ!bLTU>l?KAsT3tMF#jthZ6(=)lppoOuZn1*BQfuiIQM#Tyx`wr2!(b zQ1UYnK3v1kaOog7Oba&Ay2U~GJ=JJFmC3FfZnQg*78BscijdI&^oGiH6NHBF$clm; zN`Rj(rLXe{)nIb9*|r!XtpenF;JQw#rN$cFs@BHJw}J zuJ;7E4B!HUJ_zmBpH%8kZjEi-Q0s^CcaG zep|z!_eY1wP{o*Qcv+zjLvr^y`Gpsiv6$NSlIm%>-$VFL-aAEj zT4bu(u+yeRg{v{`bT?d))lQc};3(MxCJ(}6(*uf_78f`aQpEIViWI<{Y4uCoEJ3yz z+_EHV8@7v7l!EAH4rI~aA?t`Z1T1OkWpTu4-Ad~|h=FT}woI=n>MZ&sEZqav7osVw z&Uz|EQ;+k&7IpSg!5$ELI6e+vG@OXfB)NnUc|W`oOgt=u!e*pZK?4{%y-2+IQ0G$ebK|MGkbyfmZ?FUpMvun)d-llRul>tv;qy zAKT=%0%~=C-nAEKQvw6qOG@DUona*~oA>V5wr!58wWqhol-j|(XaBkB&t}x;Udop# zOvmPu!t`$stIX(q7tz#v&qH`>K!C2$A)n#e(PA!@SvO+6%HsEfUi|$a1%|G4-g*S6 zHAQ$qypl!|BqD48xA?nSLxE6T0BSi9M4_ufVsAXVJ^h zk5yj6KH@z)Ns?a?-mi$Ne-jrJ;=(_O?>%rZWcP!bDsr5B(Cs5b4?2Qm%fl(MjI7Za F{ufp7IL-h7 literal 0 HcmV?d00001 diff --git a/GenTunic/gen_math.py b/GenTunic/gen_math.py new file mode 100644 index 0000000..867f7ed --- /dev/null +++ b/GenTunic/gen_math.py @@ -0,0 +1,37 @@ +import numpy as np + + +def project_bit(bit): + n = len(bit) + q_min = 0.1 + q_max = 0.5 + + reverse_bit = np.flip(bit) + dec = np.uint64(0) + + for i in range(n): + dec += np.uint64(2)**i * reverse_bit[i] + + q = q_min + ((q_max - q_min) / (2**n - 1)) * dec + + return q + + + +def bit_to_grey(bit): + grey = [bit[0]] + + for i in range(1, len(bit)): + grey.append(bit[i-1] ^ bit[i]) + + return np.array(grey) + + + +def grey_to_bit(grey): + bit = [grey[0]] + + for i in range(1, len(grey)): + bit.append(bit[i-1] ^ grey[i]) + + return np.array(bit) \ No newline at end of file diff --git a/GenTunic/gen_tuning.py b/GenTunic/gen_tuning.py new file mode 100644 index 0000000..43036e8 --- /dev/null +++ b/GenTunic/gen_tuning.py @@ -0,0 +1,64 @@ + +import math +import time +from matplotlib import pyplot as plt +import numpy as np +from GenTunic.gen_math import project_bit +from GenTunic.gen_util import calc_population_fitness, create_population, crossover, mutation, turnament_selection + + +POPULATIUON_SIZE = 200 +MUTATION_RATE = 0.05 +CROSSOVER_RATE = 0.65 + +GEN_SIZE = 8 * 3 +THRESHOLD = 0.5 + +def gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + start_time = time.time() + + population = create_population(POPULATIUON_SIZE, GEN_SIZE) + + best_fintess_values = [] + best_fitness = 0 + + while True: + #? Calc fitness + population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + + _, best_fitness = fintess_values + best_fintess_values.append(best_fitness) + print(best_fitness) + + if best_fitness > THRESHOLD: + print("Breaking") + break + + #? Selection + amount_selections = math.floor((1 - CROSSOVER_RATE) * len(population_propability)) + amount_crossover = POPULATIUON_SIZE - amount_selections + + new_population = turnament_selection(population_propability, amount_selections) + + #? Crossover + new_population = crossover(population_propability, new_population, amount_crossover, GEN_SIZE) + + #? Mutation + population = mutation(new_population, MUTATION_RATE, GEN_SIZE) + + + population_propability, fintess_values = calc_population_fitness(population, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + best_fintess_index, best_fitness = fintess_values + + print("\n=== BEST PARAMETERS ===") + gen = population[best_fintess_index]["population"] + parameter_names = ["Alpha: ", "Epsilon: ", "Gamma: "] + parameters = [project_bit(x) for x in np.split(gen, 3)] + for index, name in enumerate(parameter_names): + print(f"{name}{parameters[index]}") + + time_amount = time.time() - start_time + print(f"\nTook {time_amount}s") + + plt.plot(best_fintess_values) + plt.show() \ No newline at end of file diff --git a/GenTunic/gen_util.py b/GenTunic/gen_util.py new file mode 100644 index 0000000..4704184 --- /dev/null +++ b/GenTunic/gen_util.py @@ -0,0 +1,106 @@ +import random +import numpy as np + +from GenTunic.gen_math import bit_to_grey, grey_to_bit, project_bit +from ReinforcmentLearning.learning import multipleTries + + +def create_population(size, GEN_SIZE): + dtype = [("population", np.int32, (GEN_SIZE,)), ("probability", np.float64)] + population_propability = np.zeros(size, dtype=dtype) + + for i in range(size): + gen = np.random.randint(0, 2, GEN_SIZE) + population_propability[i] = (gen, 0) + + return np.array(population_propability) + + +def calc_population_fitness(population_propability, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + population_fitness_sum = 0 + + for individual in population_propability: + gen = individual["population"] + alpha, epsilon, gamma = [project_bit(x) for x in np.split(gen, 3)] + _, multiple_tries_win_prob = multipleTries(alpha, epsilon, gamma, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + fitness = np.array(multiple_tries_win_prob).mean() + + individual["probability"] = fitness + population_fitness_sum += fitness + + + best_fitness_index = np.argmax(population_propability["probability"]) + best_fitness = population_propability[best_fitness_index]["probability"] + + population_propability["probability"] = population_propability["probability"] / population_fitness_sum + + return population_propability, (best_fitness_index, best_fitness) + + + +def turnament_selection(population_propability, amount_selections): + selected_population = [] + + best_fitness_index = np.argmax(population_propability["probability"]) + selected_population.append(population_propability[best_fitness_index]) + + while len(selected_population) < amount_selections: + pair_indecies = random.sample(range(len(population_propability)), 2) + + if population_propability[pair_indecies[0]]["probability"] > population_propability[pair_indecies[1]]["probability"]: + selected_population.append(population_propability[pair_indecies[0]]) + else: + selected_population.append(population_propability[pair_indecies[1]]) + + return np.array(selected_population) + + + +def crossover(population_propability, selected_population, amount_crossover, GEN_SIZE): + crossover_population = turnament_selection(population_propability, amount_crossover) + + select_one_parent = False + + if amount_crossover % 2 == 1: + amount_crossover -= 1 + select_one_parent = True + + for i in range(0, amount_crossover, 2): + crossover_point = np.random.randint(1, GEN_SIZE) + + mother_a = crossover_population[i]["population"][:crossover_point] + mother_b = crossover_population[i]["population"][crossover_point:] + + father_a = crossover_population[i+1]["population"][:crossover_point] + father_b = crossover_population[i+1]["population"][crossover_point:] + + child_one = np.empty(1, dtype=selected_population.dtype) + child_one["population"] = np.concatenate((mother_a, father_b)) + child_one["probability"] = 0 + + child_two = np.empty(1, dtype=selected_population.dtype) + child_two["population"] = np.concatenate((mother_b, father_a)) + child_two["probability"] = 0 + + selected_population = np.concatenate((selected_population, child_one)) + selected_population = np.concatenate((selected_population, child_two)) + + is_last_iteration = (i >= amount_crossover - 2) + if is_last_iteration and select_one_parent: + selected_population = np.append(selected_population, crossover_population[i]) + + return selected_population + + + +def mutation(population, MUTATION_RATE, GEN_SIZE): + amount_mutation = len(population) * MUTATION_RATE + mutation_indecies = np.random.choice(len(population), int(amount_mutation), replace=False) + + for individual_index in mutation_indecies: + bit_index = np.random.randint(0, GEN_SIZE) + bit_to_mutate = population[individual_index]["population"][bit_index] + mutated_grey = bit_to_grey(bit_to_mutate) ^ 1 + population[individual_index]["population"][bit_index] = grey_to_bit(mutated_grey) + + return population \ No newline at end of file diff --git a/ReinforcmentLearning/__pycache__/game.cpython-313.pyc b/ReinforcmentLearning/__pycache__/game.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..135a7f91195e0109753c456ad573e980949eec8e GIT binary patch literal 10767 zcmc&aTWlLgl0AGsB}yXov?W@!Y%`8uu_Vi0$B80Q59?)fJd$kEo-hf;(#b6m@6?Z}JV7711MS&y1B6+0r8tR8*_QpFlc zwO8S;%)&_}QnM3^q?R?4I<|<^v&E!=wU9=(WFI0;CkeLHTk6Ijc}bduwR&y!$ZKta zJ>bNai**v=?a8ebu-g3sz=jM)R(Oqj0MuJ9J{uMCn5s%tk4SShB8!?7;xM*aXe)*` zi`-U2mQ*8l3t0;JR&Om?c4QbKpAK+t&80?=2og@~I!4s*RPp-u5F7AELt_kI>i32H zw10wS15A{TM19c!UlbURgu-JC9b^N6kr}=u7>Lr(1tL-U673U)h2~((>$?CQuO6AW zcyWd=Cfj@I!R~g~K)btp(B*b2*Z&0FEM@@~@4yL%A(b~Y2(35+2n;A@-)azWsQ+ki z$O+1OGs$ro$_}W)&%v5779(VS_{k}ANa*1XXu1k4gxL&Oj9`_d0?4N#m5{4R733OH z&1!=hrkl7;>?#_zgyqg*dsvzb_?kjt3@fA{aw_^W zyXT2%o|xg){;>%rdR@t@B7yKITLN%wDP$3h+-|q?UjGp<6$!8r?_}u0g+RpHKIWge z2&WbC9#m7tpPXg4o+u3pqq`EjZ8Q_f> zlHkfgXpS6#49HPYGe4P_TvQ}pSZx1Q$2%QM?st0P6Kh869K{)H7F?XMe$k&YI@7l5 z*E%43z2m+bnI2=SV7Omq4XA*iid17^*H@7Ts$wFEJwPK;u?OUUAv`j@+%k0}NPtrA z0cK|u-wpMc1gG?9q_J*1e_nE9!O2S=EKx%86iFpG#R4igy92D)vNdr(nbXJYotLtZ zn~iH!WMOit)0Clz9rM)>T9F5{g^(zMR0;EE>5aJ|FV!PTR@4|;QrbUeci*{&0$10j zYXE*}R4}T)A}y5i{{R1IQFsj^Ez}fxl5kh&9U@do^&ZeC5k(ksF#**I!4Z2vVn~f} zwT=K+f4BUAsgs{_Izo3M_CWCv{tmyx!A9kcp6mZ5dLt>C|X ziqlt5tz$JM-c%M|-{$J=rOECyE>MiV=y_f-9^y6QGx!qWRU@o#ir4rIDTZYH$`yqpZO35)VT|Th;tfVrZ^w;ZmYEu3ys?T`|rYS9R5#uKKO!Yt2jT zH#)9&ynmW|{Mg4;pZGqiym-Y`*lLJ>tHSu=$C&{$5$dd~>4tt;1Z|_G>fCt?#_Z zl{I~+yxno<;0MP)wSHFlyUL{7lj`hvn*;V93_*-?%fT?w(t$tf;0#Zgnq{nJ3p7l?LD(*6C4uMsj2DDZV#BV4y& z3}8;n@?pPIhm9TvSQm^Q15IBX8v*kP$|jn=G~o+pdWva!G{i=t;SdvG#%P+Yz-a1m zCa{)`7OdrvpY}zgZ0Na(Xdpt<*O4%*U_`L^t`ssnAcCd#=jee#sjhfnP^oLaHks7} z3K1Mb`r#)h=>mF)t+0*8D)=S}@J;f}RWMq$q=wZ6wXi>R=lQWnbiEtKIyILyc5E`+ z|ELiy=gaW4!yoH{ZXtsf++Z(k-)<3Bu$g@eN`&aYuE^4twF10kyoz+dl;ewpHObcH zDGY_pimY|{x<)jRq`gC@U>pf%<-b7d=4NFfWAg|Q-1j7?YZ)J?PORwi65Lvwn5$rB zKp*ZW6@vK*#(;2{P&z`o;x+GlB<8&PW;dKj`S#l#d;W&>{d|CCXEQE{Fi22=>rGv_RW7 z&o<9>%y%WaQpPRUs+JmWG+%G#wmq@jc&qtl^X=A~N4Rar?l@B0TxpZ#O2;c5pqhJr z(UCTnTp64loEx1#pE#c~*Iqlkbn3?7^+C?jxP0o?;LX9?zME$_$J2M(QjX3?8VMH~ z3~0G+{Z07?<$vLk_myaS7m+mAiW(%>#ex$RT`V!ulyxFJ8B<7*{sO!to3(Hh;DTuZ zkB|y$5ed2sb^Q%O6EJ4nkuyd%P@(^Prr$-W!L5bU8Ug4&&>n!ffJ2sDF?KeFTXBx{ z5~Bflyt}-V2fs~**JjO7x9rAc2fMM;oR>VHL&fC5YF$Xf!QqldRo0N~dmbSp7?gc4 ziq9ZoYXHz`WS#Kfm0_?3O+kDi(c_`%KsbUeIS62|5NgV~HXa9H@n%)w*c7`@80>bn zJ9VOD7=+PuBy>5zYuT|WIs}fq5(>Q5KNc2hY5@eSN<0tv_L;&=1d~-X4l^<(&SVf} z2wR8G0Yv0A$iO-gxI){qCi-KAd?~%)g z?wmYSrfmaTJE#5UP;n=$9Z} z5UP-6jFKH-;O&3T@OEiVzw9hS|GSy~MoJ~}zR#7bmkZrBZdvkRNtxu4<-{XP0Hw_G zH|BYY*jUfjvZW1td;-R(;HH-(5}sMxq2S7jrS)W=IV-bBt@$g<&JUp*kbQh=o3Azt z)2JX-*>FeSQbz7Rl;qB-?iVf>p4iJa0M2!w0s_W?jUiT+FZLv^3gV@Drk8RFV#;DW zlUEOQb)RxUpiI7&SPBDU&A_Ytfp9oKPF93vhn8b(tPdcu2^3j?3`D84W}*9T>5f@V zoLH-{FP8oC#o5kyYr42>F7oONYnJkPW5T#$fP;rMKR%p1aq^>M+`*Gc^3=-EvmE*C zia(qjzr^`3apa}s6Kpa%an~{lVpX{V#HwtkcuB}!JggM67`_f3ehiG28;2_tsdQtq z=B1`gozdh(2lRPBP?h1d9y!iN5+or7^ckH8E&?%SV{Kv_&?7UHSeM7>J=nz&((trf zR@71(Fqh6~%os=|V0?u8s7}#f7i^y!5@ea_9)MrWGQYN?{Zz zb2l;#w~3A@pk)NCAiJAtn*usW7z4&PmOV<%7(ss(fWn*zSPRUFdq|yt z1uUD*N0Xb3S0Pf5g>Unbh36Hu7Kv2CImFPlz}N3X#Oi|_!5sxS~G>H5=pivz))dInKIl)l{P9cwki@4(Jf;g^VZc8w`BhaS(nFv8Ct$D&TM%&=}~zz?*W4_zCQP5?xh; zt7poXoWvpJC;_1hjIUqtIh+b!v3I{<1=D3whadIsGxH9Ve;oQ@rv*V1_s1?;^(8KqL|v;Wa&UudCyf5FzHZJ+!;K zv+ESE>Y=F>O`g&twSIUQ!{!xN!{FY=8%KLOt`jEFnKjy-%E0kgs_j`Iv$VlDtM*DcK4*mZr^a>uQ%n_ag9 zzX^U2yfg9JsgI^orh%9%Z7zy++}EMvvRG%@Xo|J}hoL0%Lg@|Himn>FLB84PxatdA z&Ad0^U9~lEwuZa5opD#X*m~vi?B%xx-}b!e`S;>o>FTx!vX>?ukVts3j39%A9ZifOCZgWWIVmTb=x z@|ca-#4)htQv0HAxgu3^IIc+>OWrs=e=cz@Wpqdwq*-wfouE_3?NX04EAD~4ri|ON zJ>sn2#wL!ruR$iu906g>1Fs&wXDV4WHE^be#nBbh9^6%WrDe8dZeo5aF_kjbE;j$F z<(-zLi5pYbr&5mQd*wCp&a}-D@4%q8`QgMcXRKYAe0%!MY0kK7sV!yPm#*J^qw9JX zSKqW8TAsW$eRDcl|MZ=MTz$tURjZ_jga2-C@~nULY=Ao(NR9@%v%%!q^U3ZIS09RZ z;{YkVyaL*oin*eLOI4|&gR91aoblj2TYbDEjaTqLaA+0j%B{2AUsOJ}bokc!o9FLn zeyjUP_esNNJO6cOk{n(o&v4|KRHY~0{RM`v}F z&eioKbzAOL+TS+4X<7^{4KC8D%BHwET~)i(l!TDe?oO`0^OHSX{Sa3*94|_5Kd{`- zZGR@NTPUBa<8<5ZS*qgd|12&~msdaxtJ<;HeYbjVLNiCq9bBv1wN&;`FD5#H?W6FK z%R=Pp3+c)_;CRzNY9FeReeah_RI?8_+FCvrOdLs?w!*IU6>~Eyx@~FkP3ddqw9b6R zFl%_tbl-&ZMYuS3Xhm1^%>@NAmd@_{icui7@#UVEdVba?IO6zi+(loAp=oEG7^Q$) z7(Ua1m%CMkK3&>75e6Cxq_x@E_4HU=55B`4TXpFbc3QL^`n5d09- zj2NnG#RKFJ9IrbnzG*zhz5^X_m!ilYAcME01o1gCe~yfwBmIA&(G(g5UL~|I8(%X1 zy%~y@vY06inK^ALis|p!cg7Si8-8j?8nq=tk z2O3SOB35+Yg3LAI3)YlrTTK6@2I-9P!+-Z;O!a%MA%1x7 literal 0 HcmV?d00001 diff --git a/ReinforcmentLearning/__pycache__/learning.cpython-313.pyc b/ReinforcmentLearning/__pycache__/learning.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c41db0bafe0e7e7d2a66c28b0f8f129c0ad65ba2 GIT binary patch literal 1865 zcmZ{l&2JM&6u@WwxmkZCIDDi~$WlxbOC&%;5DK)CrC7~}?L^*eu@qOE#hy5;tY^*a znkG@J`baBP%7u#FIP^f(dzC{E{S$13hG-~KrP2epK>7#t&H5`;=}7bD&CK(gnfHGC zd>DyH2-**sYsQlxLVxi|r+M1S;T$NBkb)Fp9?c2VBe!%f^|@<)8i3I=ADk1Z=Cbc1c^x&88FTWVOtu17=yR%sq?M>4Q zN{aOtEoK}0P8Krh6z^k;da5xb zF-W;!ww3YfCW};Rrejo12n}4Eo`xHQpxa9z>S+J;P<>&4@KinhDiYhivlo#c3V)1@ zJuUCxMo!xusW!#u2KCeevXT@my0p_jrC!9DNf+(TfLR6h7Ao zjW&Fvhu{|#+j-|Y>T>i8Odg?Yhy;!>6zJua6V@Z>9l9<8UE*?=p8QHamQQu)B7JHw zca&WBScKHRp6|@17B`AOBsB=U4uHpy8d5w{d4x*9X9mKeN{V+Z0y|PB*5+)><;=HL zlWa$K@RU5dp_L5KNfB#~rIpt#+qoid9=AyFhNHER(WVWGi;iKDgrvLUl-ymC?rRTp zv&Nmfhg^|cCJBl1a`5^j7yMI8_^1yC%EL?fw?;+28m2tPlou7slS|bI|U&*O%8wo6lE`1+ps(HlRC$_F9}N9xlr20wTh z+z-XJ?|t|2et2Lz`OWON(_nD&QkyWj;gjhZIP)FC_?6sB1*rtWdUPqm5uSt&4Ly$3 z?%Ej7zMo6QGX|VRk>Z``);AfIA@t)k#Ym_=L%8pSqzx;Bl&_5^gBi#qye0Qsdx zIR2yk^v>?ctzRd0zTCZ?ZjPiIi_6Uqmm8n2G%v3-;(9|bH6;AIZ`F-gBFauwbf;=s zj%nP3!oW2aXpi2i=`cU3RiI#aNEOy~Qqpbasg~G#$Ca3I{{P_;_&@tK$1q#0@`}@f zhCk#dncU*j*vEZ$6^I{g#**r z*3>fzuKN(JiHVb$xIZoV^RypLJN@uir|s8jXdHD*6Q<>dKLf8z%UI8RWAd-2$?$*gjxFm~m0+vnu zWz8~v@xss*jUD*rx5qSgWbphI?SLRC$w*R)3Xx9`z;PDkYMxG2eQ zCUBJO84)8`mgK4MTvCd6&MaVJDGJDq6(4|iP`0Er^;*{HN)2Wm&eQ;aI|I1VV2m27 z1p>0eL-exqX8_A+BEQBloT-PX4MqhT;t-OVqE$1;7Oj3AR!tK*ZpwcWDCS+1HlVN} zFK_NP%a#dpS~|w_m_b3{SUtHH&e8!F8xZ-`0Ns!zF-CJ$#>xKlSI3-}VR1ngvJXDUs?>vjQGVOb4x+eO9;; ziO-3O#>{KXg64@P64xbBh#4-XStHcdGz&4ms8~UMfDU{BP_0aorKA*z3$x%y@i=Y* zq5~e~y8u#X-Q`W6NYDN_yvVGV`_g#j_0tPdUSO~SgY*Vv7p97J7#pS6M(m2+l5JHdw)gBWS@jFlpB4wS*Maje^U`s)wvHnvce0!3u(I7KI!&pex=zB_>HoVhSOk z!1E9Z45b$UXuiJXqcEE7eqVYZFUhL=plUz3;czZS z?pHr(TxwKHn^d+**Q${jK@=i5)#?dAmQkpJxT$OjwbI#I2$vAUn~WSO+d@})8@c<( z%oyCLjvO}{Pbd)lx!V-3N0M4RGP!^yIeAU9%tWHXygn^xwyA3g*fMV*3z~CY&}lJA zNovl8qBq^;!z?Nntw!PD>oc|wf(f}IIXg$b@dQ+7=u`qs^=YWvSoCPWQ3ecA#f?fz4Yud2Rt)_ zMeCNL^?rot`V?>Dm|mz!G2kABqBW8KUdTt3ZtRimMJv=C`IVB;-3~aIT*{lri;bB( zb?6r)%XH_RAF|B3-PvV=+=~t#N)Z%5^BI%u;2j)G#K}(H$yq4o0!*isg)-D7+@XKa z*;60_WMDrN5-rpk>83%27I7{z+z#$AK@}V>iH{Ls1bzbjFyw1Ix2m!BqU%6%b z@fLlT1mm9Z)nw7{zI?|0<$i^`7oBn6`v$FJ8q>L}0+Iv>Ne%6g4a?w{97C)JsmU}( zI|y3JNs}Ta#p&WiWABScrYB>Ovs&l>is8<_cmkqKISYu+55-ZB#T5Ev;7R%KF3{M4 zPnfTZ3z#NDaw=$4Ny=`}`P$B(+ zM-}hgx_e>qT-IHYR(|xo_3FA6{{7cKXh>U&(5%0H>2{jUdIFz$+B2T^)yl`mS8uGA zJ@uU4aiFTUFU_c;{kaGEs@3XX=GZqf2L@7O+1lW0Q>ON)%2s9FEsxG++-<3$0`|;` zO1+tqYUrp+%IUmmD_uNs=dJrK>B*n9ui2`B!&#PQ?wwk%sC#gB>8$E(%DUX?rh8Wj zp>L^Abv6Qz$D2NP?*?4u!R@8ns;6xeuv<&FRL{X@_96lOOZ}>|xk!L6ahCvn8xipK z(%Y)1<>lO)FZipV*W$h|#29@W&8Xdl9&P_t!05zyQ!j~E@F|`pwjv`2Av2NBp$4aH z=ApP1$=#A^_+l z+F$vkYOVcZhHXs^Z`xe%-u(W}JGWQ-Yqr4W?&kHH&b6AZ#o=s4gX(P9tZMkk_0XlZ zoms2uU36vLb*jB?!{Jj|pKd)8@y5to?+-vBzhANm9y3%>mPh0H8fvgZ-OHg+OS|=2 zNv(SonvHMe>m_6fo*mCu%))NoYSNe#_&Od25VRL8hev^+l~SwG<{OU@f^AlBOh`|jGp0rF z&F!#kBosZLTmlLvU&R@zum!Gzs%GjGT_S!}N z3ZHd&KXue*osFM4+cM6!NArJic5S$;Gw!BreRH}r+t8MF{MA#n;ja9&K>DfUOFwEq z{jVD2^soC4ulbHHI*3UfM^yHRZdEuyevn9^{m>8~|Gx<@=yD<-L57`-+0ZLX1B8Zx z#>OFs>%{>qg6KJPEK4@HX1XVUjcLW_8ONtLB zC)TBt^V8oL+ELRsQP1BbEU40{a9S@5U)Ah=`XBaL{2kyU&a3nRcxGl8=I`jh-;nD$ w%QCJs`y9cvU1nuQm~EehY2Nl)nOC;G9Zc)?B;#QG+jTzX=*|g-IZUPh7u1M-D*ylh literal 0 HcmV?d00001 diff --git a/game.py b/ReinforcmentLearning/game.py similarity index 96% rename from game.py rename to ReinforcmentLearning/game.py index ada0948..47cc11a 100644 --- a/game.py +++ b/ReinforcmentLearning/game.py @@ -2,7 +2,7 @@ import pygame import math import os -from util import Direction, calc_current_state, epsilon_greedy, get_best_q_action +from ReinforcmentLearning.util import Direction, calc_current_state, epsilon_greedy, get_best_q_action # Initialize pygame pygame.init() @@ -115,7 +115,7 @@ def draw_labyrinth(labyrinth): # Main game function -def run_game(q_values, EPSILON, ALPHA, GAMMA): +def run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE): clock = pygame.time.Clock() labyrinth = labyrinth_init.copy() @@ -170,7 +170,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA): if pacman.x == ghost.x and pacman.y == ghost.y: # print("Game Over! The ghost caught Pacman.") running = False - reward = -10 + reward = REWARD_ON_LOSE # Eat cookies if labyrinth[pacman.y][pacman.x] == ".": @@ -179,10 +179,11 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA): # Check if all cookies are eaten (game over) if all("." not in row for row in labyrinth): # print("You Win! Pacman ate all the cookies.") - reward = 10 + reward = REWARD_ON_WIN running = False # Draw the labyrinth, pacman, and ghost + #? -------------------------MY CODE----------------------------------- if not running: new_state = state @@ -206,6 +207,7 @@ def run_game(q_values, EPSILON, ALPHA, GAMMA): counter += 1 return 20-counter #? -------------------------MY CODE----------------------------------- + draw_labyrinth(labyrinth) pacman.draw() ghost.draw() diff --git a/ReinforcmentLearning/learning.py b/ReinforcmentLearning/learning.py new file mode 100644 index 0000000..44f8d33 --- /dev/null +++ b/ReinforcmentLearning/learning.py @@ -0,0 +1,62 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from ReinforcmentLearning.game import run_game +from ReinforcmentLearning.util import initial_q_fill + + + + +def multipleTries(EPSILON, ALPHA, GAMMA, AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + cookies_per_try = [] + wins_per_try = [] + + for x in range(AMOUNT_TRIES): + cookies_per_run, amount_wins = oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) + cookies_per_run.append(cookies_per_run) + wins_per_try.append(amount_wins) + # print(f"Finished try {x+1}\n") + + return cookies_per_try, wins_per_try + + +def oneTry(EPSILON, ALPHA, GAMMA, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE): + """ + state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction) + action: Direction + q_value: (state, action) + """ + + q_values = {} + + initial_q_fill(q_values) + + cookies_per_run = [] + # Amount of single runs + for x in range(AMOUNT_RUNS): + amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE) + cookies_per_run.append(amount_cookies_ate) + + wins = 0 + for element in cookies_per_run: + if element == 20: + wins += 1 + + # print(f"Win percentage: {(wins/AMOUNT_RUNS)*100}%") + + return cookies_per_run, wins + + +def print_results(cookies_per_try, wins_per_try, EPSILON, ALPHA, GAMMA, AMOUNT_RUNS): + # print("---------DONE---------") + # print("Used: ") + # print(f"Epsilon: {EPSILON}") + # print(f"Gamma: {GAMMA}") + # print(f"Alpha: {ALPHA}") + + # print("---------SUMMARY---------") + print(f"Average win percantage: {((sum(wins_per_try) / len(wins_per_try)) / AMOUNT_RUNS)*100}%\n") + # print(f"Best try: {(max(wins_per_try) / AMOUNT_RUNS)*100}%") + # print(f"Worst try: {(min(wins_per_try) / AMOUNT_RUNS)*100}%") + diff --git a/util.py b/ReinforcmentLearning/util.py similarity index 96% rename from util.py rename to ReinforcmentLearning/util.py index f632371..59cfaee 100644 --- a/util.py +++ b/ReinforcmentLearning/util.py @@ -9,8 +9,8 @@ class Direction(Enum): def initial_q_fill(q_values): - for x in range(8): - for y in range(3): + for x in range(-7, 8): + for y in range(-2, 3): for cookie_direction in Direction: for action in Direction: state = (x, y, cookie_direction) @@ -29,8 +29,8 @@ def get_start_state(): def calc_current_state(labyrinth, pac_x, pac_y, ghost_x, ghost_y): - x_ghost_dist = abs(pac_x - ghost_x) - y_ghost_dist = abs(pac_y - ghost_y) + x_ghost_dist = pac_x - ghost_x + y_ghost_dist = pac_y - ghost_y cookie_direction = get_closest_cookie_direction(labyrinth, pac_x, pac_y) diff --git a/argument_swapper.py b/argument_swapper.py deleted file mode 100644 index 9dbd365..0000000 --- a/argument_swapper.py +++ /dev/null @@ -1,11 +0,0 @@ -import numpy as np -from learning import runTry - -EPSILON = 0.5 -ALPHA = 0.5 -GAMMA = 0.5 - -STEPS = 10 - -for alpha in np.linspace(0.1, 0.5, 10): - runTry(EPSILON, alpha, GAMMA) \ No newline at end of file diff --git a/data.txt b/data.txt index 0116767..520be21 100644 --- a/data.txt +++ b/data.txt @@ -1,16 +1,20 @@ -E: 0,1; A: 0.1; G: 0.9; 200/5000 -E: 0,1; A: 0.1; G: 0.9; 150/5000 +# High Gamma +E: 0.1; A: 0.1; G: 0.9; 0.04% +E: 0.1; A: 0.1; G: 0.9; 0.03% -E: 0,5; A: 0.1; G: 0.9; 0.0034% -E: 0,5; A: 0.1; G: 0.9; 0.002% +E: 0.5; A: 0.1; G: 0.9; 0.0034% +E: 0.5; A: 0.1; G: 0.9; 0.002% -E: 0,5; A: 0.5; G: 0.5; 0.0012% -E: 0,5; A: 0.5; G: 0.5; 0.0002% -E: 0,5; A: 0.5; G: 0.5; 0.001% +E: 0.5; A: 0.5; G: 0.5; 0.0012% +E: 0.5; A: 0.5; G: 0.5; 0.0002% +E: 0.5; A: 0.5; G: 0.5; 0.001% -E: 0,5; A: 0.3; G: 0.5; 0.0018% -E: 0,5; A: 0.3; G: 0.5; 0.0022% -E: 0,5; A: 0.3; G: 0.5; 0.0014% -E: 0,5; A: 0.3; G: 0.5; 0.0016% -E: 0,5; A: 0.3; G: 0.5; 0.0022% +E: 0.5; A: 0.3; G: 0.5; 0.0018% +E: 0.5; A: 0.3; G: 0.5; 0.0022% +E: 0.5; A: 0.3; G: 0.5; 0.0014% +E: 0.5; A: 0.3; G: 0.5; 0.0016% +E: 0.5; A: 0.3; G: 0.5; 0.0022% + + +# AFTER ABSOLUT CHANGE diff --git a/learning.py b/learning.py deleted file mode 100644 index 0719915..0000000 --- a/learning.py +++ /dev/null @@ -1,58 +0,0 @@ -import matplotlib.pyplot as plt -import pandas as pd - -from game import run_game -from util import initial_q_fill - - -EPSILON = 0.5 -ALPHA = 0.3 -GAMMA = 0.8 - -def runTry(EPSILON, ALPHA, GAMMA): - """ - state: (x_distance_to_ghost, y_distance_to_ghost, next_cookie_Direction) - action: Direction - q_value: (state, action) - """ - - AMOUNT_RUNS = 5000 - q_values = {} - - initial_q_fill(q_values) - - cookies_per_run = [] - # Amount of single runs - for x in range(AMOUNT_RUNS): - amount_cookies_ate = run_game(q_values, EPSILON, ALPHA, GAMMA) - cookies_per_run.append(amount_cookies_ate) - # print(f"Run {x}: {amount_cookies_ate} cookies ate\n") - - wins = 0 - for element in cookies_per_run: - if element == 20: - wins += 1 - - print(f"Win percentage: {wins/AMOUNT_RUNS}%") - - return cookies_per_run - - - -cookies_per_run = runTry(EPSILON, ALPHA, GAMMA) - - -window_size = 100 # Adjust based on your needs -rolling_avg = pd.Series(cookies_per_run).rolling(window=window_size, center=True).mean() - -plt.figure(figsize=(12, 6)) -plt.plot(cookies_per_run, alpha=0.2, label='Raw Data', linewidth=0.5, color='gray') -plt.plot(rolling_avg, label=f'{window_size}-point Moving Average', - linewidth=2, color='blue') -plt.title("Data with Rolling Average") -plt.xlabel("Index") -plt.ylabel("Value") -plt.legend() -plt.grid(True, alpha=0.3) -plt.show() - diff --git a/main.py b/main.py new file mode 100644 index 0000000..6d0bca7 --- /dev/null +++ b/main.py @@ -0,0 +1,17 @@ +from GenTunic.gen_tuning import gen_tuning_main +from ReinforcmentLearning.learning import multipleTries + + +EPSILON = 0.3 +ALPHA = 0.3 +GAMMA = 0.8 + +AMOUNT_RUNS = 5000 +AMOUNT_TRIES = 10 + +REWARD_ON_WIN = 10 +REWARD_ON_LOSE = -10 + + +#multipleTries(EPSILON, ALPHA, GAMMA, REWARD_ON_WIN, REWARD_ON_LOSE) +gen_tuning_main(AMOUNT_TRIES, AMOUNT_RUNS, REWARD_ON_WIN, REWARD_ON_LOSE) \ No newline at end of file