|
|
|
|
|
|
"summary_freq = 10000 # Frequency at which to save training statistics.\n", |
|
|
|
"save_freq = 50000 # Frequency at which to save model.\n", |
|
|
|
"env_name = \"environment\" # Name of the training environment file.\n", |
|
|
|
"curriculum_file = None\n", |
|
|
|
"\n", |
|
|
|
"### Algorithm-specific parameters for tuning\n", |
|
|
|
"gamma = 0.99 # Reward discount rate.\n", |
|
|
|
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"env = UnityEnvironment(file_name=env_name)\n", |
|
|
|
"env = UnityEnvironment(file_name=env_name, curriculum=curriculum_file)\n", |
|
|
|
"print(str(env))\n", |
|
|
|
"brain_name = env.brain_names[0]" |
|
|
|
] |
|
|
|
|
|
|
"source": [ |
|
|
|
"tf.reset_default_graph()\n", |
|
|
|
"\n", |
|
|
|
"if curriculum_file == \"None\":\n", |
|
|
|
" curriculum_file = None\n", |
|
|
|
"\n", |
|
|
|
"\n", |
|
|
|
"def get_progress():\n", |
|
|
|
" if curriculum_file is not None:\n", |
|
|
|
" if env._curriculum.measure_type == \"progress\":\n", |
|
|
|
" return steps / max_steps\n", |
|
|
|
" elif env._curriculum.measure_type == \"reward\":\n", |
|
|
|
" return last_reward\n", |
|
|
|
" else:\n", |
|
|
|
" return None\n", |
|
|
|
" else:\n", |
|
|
|
" return None\n", |
|
|
|
"\n", |
|
|
|
"# Create the Tensorflow model graph\n", |
|
|
|
"ppo_model = create_agent_model(env, lr=learning_rate,\n", |
|
|
|
" h_size=hidden_units, epsilon=epsilon,\n", |
|
|
|
|
|
|
" saver.restore(sess, ckpt.model_checkpoint_path)\n", |
|
|
|
" else:\n", |
|
|
|
" sess.run(init)\n", |
|
|
|
" steps = sess.run(ppo_model.global_step)\n", |
|
|
|
" steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward]) \n", |
|
|
|
" info = env.reset(train_mode=train_model)[brain_name]\n", |
|
|
|
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n", |
|
|
|
" info = env.reset(train_mode=train_model)[brain_name]\n", |
|
|
|
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n", |
|
|
|
" # Decide and take an action\n", |
|
|
|
" new_info = trainer.take_action(info, env, brain_name)\n", |
|
|
|
" info = new_info\n", |
|
|
|
|
|
|
" save_model(sess, model_path=model_path, steps=steps, saver=saver)\n", |
|
|
|
" steps += 1\n", |
|
|
|
" sess.run(ppo_model.increment_step)\n", |
|
|
|
" if len(trainer.stats['cumulative_reward']) > 0:\n", |
|
|
|
" mean_reward = np.mean(trainer.stats['cumulative_reward'])\n", |
|
|
|
" sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})\n", |
|
|
|
" last_reward = sess.run(ppo_model.last_reward)\n", |
|
|
|
" # Final save Tensorflow model\n", |
|
|
|
" if steps != 0 and train_model:\n", |
|
|
|
" save_model(sess, model_path=model_path, steps=steps, saver=saver)\n", |
|
|
|