Sarsa-Lambda
作者:互联网
from maze_env import Maze
from RL_brain import SarsaLambdaTable
def update():
for episode in range(100):
# initial observation
observation = env.reset()
# RL choose action based on observation
action = RL.choose_action(str(observation))
# initial all zero eligibility trace
RL.eligibility_trace *= 0
while True:
# fresh env
env.render()
# RL take action and get next observation and reward
observation_, reward, done = env.step(action)
# RL choose action based on next observation
action_ = RL.choose_action(str(observation_))
# RL learn from this transition (s, a, r, s, a) ==> Sarsa
RL.learn(str(observation), action, reward, str(observation_), action_)
# swap observation and action
observation = observation_
action = action_
# break while loop when end of this episode
if done:
break
# end of game
print('game over')
env.destroy()
if __name__ == "__main__":
env = Maze()
RL = SarsaLambdaTable(actions=list(range(env.n_actions)))
env.after(100, update)
env.mainloop()
print(RL.eligibility_trace)
标签:__,observation,RL,Sarsa,choose,env,action,Lambda 来源: https://blog.csdn.net/weixin_40653652/article/details/120243075