|
|
|
|
|
|
" steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward]) \n", |
|
|
|
" summary_writer = tf.summary.FileWriter(summary_path)\n", |
|
|
|
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n", |
|
|
|
" trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)\n", |
|
|
|
" trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model)\n", |
|
|
|
" new_info = trainer.take_action(info, env, brain_name)\n", |
|
|
|
" new_info = trainer.take_action(info, env, brain_name, steps)\n", |
|
|
|
" info = new_info\n", |
|
|
|
" trainer.process_experiences(info, time_horizon, gamma, lambd)\n", |
|
|
|
" if len(trainer.training_buffer['actions']) > buffer_size and train_model:\n", |
|
|
|