浏览代码

Add curriculum code to notebook and simplify

/develop-generalizationTraining-TrainerController
Arthur Juliani 7 年前
当前提交
4a11c005
共有 3 个文件被更改,包括 32 次插入14 次删除
  1. 28
      python/PPO.ipynb
  2. 16
      python/ppo.py
  3. 2
      python/unityagents/environment.py

28
python/PPO.ipynb


"summary_freq = 10000 # Frequency at which to save training statistics.\n",
"save_freq = 50000 # Frequency at which to save model.\n",
"env_name = \"environment\" # Name of the training environment file.\n",
"curriculum_file = None\n",
"\n",
"### Algorithm-specific parameters for tuning\n",
"gamma = 0.99 # Reward discount rate.\n",

"metadata": {},
"outputs": [],
"source": [
"env = UnityEnvironment(file_name=env_name)\n",
"env = UnityEnvironment(file_name=env_name, curriculum=curriculum_file)\n",
"print(str(env))\n",
"brain_name = env.brain_names[0]"
]

"source": [
"tf.reset_default_graph()\n",
"\n",
"if curriculum_file == \"None\":\n",
" curriculum_file = None\n",
"\n",
"\n",
"def get_progress():\n",
" if curriculum_file is not None:\n",
" if env._curriculum.measure_type == \"progress\":\n",
" return steps / max_steps\n",
" elif env._curriculum.measure_type == \"reward\":\n",
" return last_reward\n",
" else:\n",
" return None\n",
" else:\n",
" return None\n",
"\n",
"# Create the Tensorflow model graph\n",
"ppo_model = create_agent_model(env, lr=learning_rate,\n",
" h_size=hidden_units, epsilon=epsilon,\n",

" saver.restore(sess, ckpt.model_checkpoint_path)\n",
" else:\n",
" sess.run(init)\n",
" steps = sess.run(ppo_model.global_step)\n",
" steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward]) \n",
" info = env.reset(train_mode=train_model)[brain_name]\n",
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
" info = env.reset(train_mode=train_model)[brain_name]\n",
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
" # Decide and take an action\n",
" new_info = trainer.take_action(info, env, brain_name)\n",
" info = new_info\n",

" save_model(sess, model_path=model_path, steps=steps, saver=saver)\n",
" steps += 1\n",
" sess.run(ppo_model.increment_step)\n",
" if len(trainer.stats['cumulative_reward']) > 0:\n",
" mean_reward = np.mean(trainer.stats['cumulative_reward'])\n",
" sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})\n",
" last_reward = sess.run(ppo_model.last_reward)\n",
" # Final save Tensorflow model\n",
" if steps != 0 and train_model:\n",
" save_model(sess, model_path=model_path, steps=steps, saver=saver)\n",

16
python/ppo.py


Options:
--help Show this message.
--curriculum Whether to use curriculum for training (requires curriculum json) [default: False]
--curriculum-path=<path> Path to curriculum json file for environment [default: curriculum.json]
--curriculum=<file> Curriculum json file for environment [default: None]
--max-steps=<n> Maximum number of steps to run environment [default: 1e6].
--run-path=<path> The sub-directory name for model and summary statistics [default: ppo].
--load Whether to load the model or randomly initialize [default: False].

env_name = options['<env>']
keep_checkpoints = int(options['--keep-checkpoints'])
worker_id = int(options['--worker-id'])
use_curriculum = options['--curriculum']
if use_curriculum:
curriculum_path = str(options['--curriculum-path'])
else:
curriculum_path = None
curriculum_file = str(options['--curriculum'])
if curriculum_file == "None":
curriculum_file = None
# Algorithm-specific parameters for tuning
gamma = float(options['--gamma'])

hidden_units = int(options['--hidden-units'])
batch_size = int(options['--batch-size'])
env = UnityEnvironment(file_name=env_name, worker_id=worker_id, curriculum=curriculum_path)
env = UnityEnvironment(file_name=env_name, worker_id=worker_id, curriculum=curriculum_file)
print(str(env))
brain_name = env.brain_names[0]

def get_progress():
if use_curriculum:
if curriculum_file is not None:
if env._curriculum.measure_type == "progress":
return steps / max_steps
elif env._curriculum.measure_type == "reward":

else:
return None
with tf.Session() as sess:
# Instantiate model parameters

2
python/unityagents/environment.py


:return: A Data structure corresponding to the initial reset state of the environment.
"""
old_lesson = self._curriculum.get_lesson_number()
config = self._curriculum.get_lesson(progress) if config is None else config
config = self._curriculum.get_lesson(progress) if config is None else config
if old_lesson != self._curriculum.get_lesson_number():
logger.info("\nLesson changed. Now in Lesson {0} : \t{1}"
.format(self._curriculum.get_lesson_number(),

正在加载...
取消
保存