"buffer_size = 2048 # How large the experience buffer should be before gradient descent.\n",
"learning_rate = 3e-4 # Model learning rate.\n",
"hidden_units = 64 # Number of units in hidden layer.\n",
"batch_size = 64 # How many experiences per gradient descent update step."
"batch_size = 64 # How many experiences per gradient descent update step.\n",
"\n",
"### Logging dictionary for hyperparameters\n",
"hyperparameter_dict = {'max_steps':max_steps, 'run_path':run_path, 'env_name':env_name,\n",
" 'curriculum_file':curriculum_file, 'gamma':gamma, 'lambd':lambd, 'time_horizon':time_horizon,\n",
" 'beta':beta, 'num_epoch':num_epoch, 'epsilon':epsilon, 'buffe_size':buffer_size,\n",
" 'leaning_rate':learning_rate, 'hidden_units':hidden_units, 'batch_size':batch_size}"
]
},
{
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"brain_name = env.brain_names[0]"
"brain_name = env.external_brain_names[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
" summary_writer = tf.summary.FileWriter(summary_path)\n",
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
" trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model)\n",
" if train_model:\n",
" trainer.write_text(summary_writer, 'Hyperparameters', hyperparameter_dict, steps)\n",
" while steps <= max_steps:\n",
" if env.global_done:\n",
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3 ",
"display_name": "Python 2 ",
"name": "python3 "
"name": "python2 "
"version": 3
"version": 2
"pygments_lexer": "ipython3 ",
"version": "3.6.2 "
"pygments_lexer": "ipython2 ",
"version": "2.7.10 "
}
},
"nbformat": 4,