强化学习的代码格式

def update():
    while True:
        #fresh env 
        env.render()
    
        #RL choose action based on observation
        action = RL.choose_action(str(observation))
    
        #Rl take action and get next observation and reward 
        observation_,reward,done = env.step(action)
    
        #RL learn from this transition
        Rl.learn(str(observation),action,reward,str(observation_))
    
        #swap observation
        observation = observation_
    
        #break while loop when end of this episode
        if done:
            break

    #end of game
    print("game over")
    env.destroy()

原文链接：https://blog.csdn.net/weixin_45193103/article/details/124891355