强化学习的代码格式

def update():
    while True:
        #fresh env 
        env.render()
    
        #RL choose action based on observation
        action = RL.choose_action(str(observation))
    
        #Rl take action and get next observation and reward 
        observation_,reward,done = env.step(action)
    
        #RL learn from this transition
        Rl.learn(str(observation),action,reward,str(observation_))
    
        #swap observation
        observation = observation_
    
        #break while loop when end of this episode
        if done:
            break

    #end of game
    print("game over")
    env.destroy()


版权声明:本文为weixin_45193103原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。