浏览代码

moved posed scene to SharedAssets

/hh-develop-all-posed-characters
HH 5 年前
当前提交
27ded81d
共有 496 个文件被更改,包括 11961 次插入0 次删除
  1. 8
      Project/Assets/ML-Agents/Examples/SharedAssets/Scenes.meta
  2. 1001
      results/CrawlerDynamic/CrawlerDynamic.nn
  3. 2
      results/CrawlerDynamic/CrawlerDynamic/checkpoint
  4. 5
      results/CrawlerDynamic/CrawlerDynamic/events.out.tfevents.1593650175.BrandonHunterHenry.local
  5. 1001
      results/CrawlerDynamic/CrawlerDynamic/frozen_graph_def.pb
  6. 22
      results/CrawlerDynamic/CrawlerDynamic/model-1429.ckpt.index
  7. 1001
      results/CrawlerDynamic/CrawlerDynamic/model-1429.ckpt.meta
  8. 1001
      results/CrawlerDynamic/CrawlerDynamic/raw_graph_def.pb
  9. 56
      results/CrawlerDynamic/configuration.yaml
  10. 208
      results/CrawlerDynamic/run_logs/timers.json
  11. 7
      results/CrawlerDynamic/run_logs/training_status.json
  12. 1001
      results/CrawlerStatic/CrawlerStatic.nn
  13. 2
      results/CrawlerStatic/CrawlerStatic/checkpoint
  14. 5
      results/CrawlerStatic/CrawlerStatic/events.out.tfevents.1593650221.BrandonHunterHenry.local
  15. 1001
      results/CrawlerStatic/CrawlerStatic/frozen_graph_def.pb
  16. 20
      results/CrawlerStatic/CrawlerStatic/model-1548.ckpt.index
  17. 1001
      results/CrawlerStatic/CrawlerStatic/model-1548.ckpt.meta
  18. 1001
      results/CrawlerStatic/CrawlerStatic/raw_graph_def.pb
  19. 56
      results/CrawlerStatic/configuration.yaml
  20. 208
      results/CrawlerStatic/run_logs/timers.json
  21. 7
      results/CrawlerStatic/run_logs/training_status.json
  22. 36
      results/WalkerDy236/wdyclv-ppo/config/imitation/CrawlerStatic.yaml
  23. 36
      results/WalkerDy236/wdyclv-ppo/config/imitation/FoodCollector.yaml
  24. 37
      results/WalkerDy236/wdyclv-ppo/config/imitation/Hallway.yaml
  25. 31
      results/WalkerDy236/wdyclv-ppo/config/imitation/PushBlock.yaml
  26. 34
      results/WalkerDy236/wdyclv-ppo/config/imitation/Pyramids.yaml
  27. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/3DBall.yaml
  28. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/3DBallHard.yaml
  29. 38
      results/WalkerDy236/wdyclv-ppo/config/ppo/3DBall_randomize.yaml
  30. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/Basic.yaml
  31. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/Bouncer.yaml
  32. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/CrawlerDynamic.yaml
  33. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/CrawlerStatic.yaml
  34. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/FoodCollector.yaml
  35. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/GridWorld.yaml
  36. 29
      results/WalkerDy236/wdyclv-ppo/config/ppo/Hallway.yaml
  37. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/PushBlock.yaml
  38. 31
      results/WalkerDy236/wdyclv-ppo/config/ppo/Pyramids.yaml
  39. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/Reacher.yaml
  40. 33
      results/WalkerDy236/wdyclv-ppo/config/ppo/SoccerTwos.yaml
  41. 65
      results/WalkerDy236/wdyclv-ppo/config/ppo/StrikersVsGoalie.yaml
  42. 33
      results/WalkerDy236/wdyclv-ppo/config/ppo/Tennis.yaml
  43. 29
      results/WalkerDy236/wdyclv-ppo/config/ppo/VisualHallway.yaml
  44. 29
      results/WalkerDy236/wdyclv-ppo/config/ppo/VisualPushBlock.yaml
  45. 31
      results/WalkerDy236/wdyclv-ppo/config/ppo/VisualPyramids.yaml
  46. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/WalkerDynamic.yaml
  47. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/WalkerStatic.yaml
  48. 51
      results/WalkerDy236/wdyclv-ppo/config/ppo/WallJump.yaml
  49. 68
      results/WalkerDy236/wdyclv-ppo/config/ppo/WallJump_curriculum.yaml
  50. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/WormDynamic.yaml
  51. 26
      results/WalkerDy236/wdyclv-ppo/config/ppo/WormStatic.yaml
  52. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/3DBall.yaml
  53. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/3DBallHard.yaml
  54. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/Basic.yaml
  55. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/Bouncer.yaml
  56. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/CrawlerDynamic.yaml
  57. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/CrawlerStatic.yaml
  58. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/FoodCollector.yaml
  59. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/GridWorld.yaml
  60. 31
      results/WalkerDy236/wdyclv-ppo/config/sac/Hallway.yaml
  61. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/PushBlock.yaml
  62. 36
      results/WalkerDy236/wdyclv-ppo/config/sac/Pyramids.yaml
  63. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/Reacher.yaml
  64. 35
      results/WalkerDy236/wdyclv-ppo/config/sac/Tennis.yaml
  65. 31
      results/WalkerDy236/wdyclv-ppo/config/sac/VisualHallway.yaml
  66. 31
      results/WalkerDy236/wdyclv-ppo/config/sac/VisualPushBlock.yaml
  67. 36
      results/WalkerDy236/wdyclv-ppo/config/sac/VisualPyramids.yaml
  68. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/WalkerDynamic.yaml
  69. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/WalkerStatic.yaml
  70. 55
      results/WalkerDy236/wdyclv-ppo/config/sac/WallJump.yaml
  71. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/WormDynamic.yaml
  72. 28
      results/WalkerDy236/wdyclv-ppo/config/sac/WormStatic.yaml
  73. 17
      results/WalkerDy236/wdyclv-ppo/custom_config/cluster_config.ini
  74. 33
      results/WalkerDy236/wdyclv-ppo/custom_config/custom_trainer_config.yaml
  75. 1
      results/WalkerDy236/wdyclv-ppo/results/inference/WalkerDynamic_timers.json
  76. 667
      results/WalkerDy236/wdyclv-ppo/results/wdyclv-ppo/WalkerDynamic.csv
  77. 1001
      results/WalkerDy236/wdyclv-ppo/results/wdyclv-ppo/WalkerDynamic.nn
  78. 6
      results/WalkerDy236/wdyclv-ppo/results/wdyclv-ppo/WalkerDynamic/checkpoint

8
Project/Assets/ML-Agents/Examples/SharedAssets/Scenes.meta


fileFormatVersion: 2
guid: 60bd92f6edd434a8c8a5089a08ca414b
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
results/CrawlerDynamic/CrawlerDynamic.nn
文件差异内容过多而无法显示
查看文件

2
results/CrawlerDynamic/CrawlerDynamic/checkpoint


model_checkpoint_path: "model-1429.ckpt"
all_model_checkpoint_paths: "model-1429.ckpt"

5
results/CrawlerDynamic/CrawlerDynamic/events.out.tfevents.1593650175.BrandonHunterHenry.local


�K" ��J��A brain.Event:2__p}]�� ���J��A*�
�
HyperparametersB� B trainer_typeBppoBhyperparametersB�{'batch_size': 2024, 'buffer_size': 20240, 'learning_rate': 0.0003, 'beta': 0.005, 'epsilon': 0.2, 'lambd': 0.95, 'num_epoch': 3, 'learning_rate_schedule': 'linear'}Bnetwork_settingsBf{'normalize': True, 'hidden_units': 512, 'num_layers': 3, 'vis_encode_type': 'simple', 'memory': None}Breward_signalsB0{'extrinsic': {'gamma': 0.995, 'strength': 1.0}}B init_pathBNoneBkeep_checkpointsB5Bcheckpoint_intervalB500000B max_stepsB10000000B time_horizonB1000B summary_freqB30000BthreadedBTrueB self_playBNoneBbehavioral_cloningBNoneJ

textԹN�

1001
results/CrawlerDynamic/CrawlerDynamic/frozen_graph_def.pb
文件差异内容过多而无法显示
查看文件

22
results/CrawlerDynamic/CrawlerDynamic/model-1429.ckpt.index


 action_output_shape(5��8 global_step (5��W� is_continuous_control (5�e memory_size (5�{: normalization_steps (5��W�optimizer//extrinsic_value/bias (5�{:kernel � (�5/�= main_graph_0/hidden_0/bias� �(�5�3"�!kernel
�� � (��5���1/bias� ��(�5�3"�!kernel
�� ��(��@5��Ҍ2/bias� ��Q(�5�3"�!kernel
�� ��Q(��@5�71� $optimizer//extrinsic_value/bias/Adam ���(5�{:/_1 ���(5�{:1optimizer//optimizer//extrinsic_value/kernel/Adam � ���(�5�3"�1_1 � ���(�5�3"�main_graph_0/hidden_0/bias/Adam� ���(�5�3"�5_1� ���(�5�3"�, kernel/Adam
�� ���(��5��>7_1
�� ���(��5��>* 1/bias/Adam� ���(�5�3"�5_1� ���(�5�3"�, kernel/Adam
�� ���(��@5+��7_1
�� ���(��@5+��* 2/bias/Adam� ���(�5�3"�5_1� ���(�5�3"�, kernel/Adam
�� ���(��@5+��7_1
�� ���(��@5+�� policy/log_std/Adam ���(P5C+�f_1 ���(P5C+�f1optimizer//policy/main_graph_0/hidden_0/bias/Adam� ���(�5�3"�1_1� ���(�5�3"�( kernel/Adam
�� ���(��5��>3_1
�� ���(��5��>& 1/bias/Adam� ���(�5�3"�1_1� ���(�5�3"�( kernel/Adam
�� ���(��@5+��3_1
�� ���(��@5+��& 2/bias/Adam� ���(�5�3"�1_1� ���(�5�3"�( kernel/Adam
�� ���(��@5+��3_1
�� ���(��@5+�� u/bias/Adam ���(P5C+�f_1 ���(P5C+�f kernel/Adam � ���(��5c?c _1 � ���(��5c?coptimizer/beta1_power ���(5����2_power ���(5����policy/log_std ���(P5C+�fmain_graph_0/hidden_0/bias� ���(�5�3"�kernel
�� ���(��5]�=�1/bias� ���(�5�3"�kernel
�� ���(��@5����2/bias� ���(�5�3"�kernel
�� ���(��@5
e�gu/bias ���(P5C+�f
kernel � ���(��5v�E� running_mean� ���(�5^���variance� ���(�5
�1trainer_major_version ���(5�{: inor_version ���(5�'yU patch_version ���(5�{:version_number ���(5��s'�� �H�{����w��2%���W���$uG�

1001
results/CrawlerDynamic/CrawlerDynamic/model-1429.ckpt.meta
文件差异内容过多而无法显示
查看文件

1001
results/CrawlerDynamic/CrawlerDynamic/raw_graph_def.pb
文件差异内容过多而无法显示
查看文件

56
results/CrawlerDynamic/configuration.yaml


behaviors:
CrawlerDynamic:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
memory: null
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
init_path: null
keep_checkpoints: 5
checkpoint_interval: 500000
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true
self_play: null
behavioral_cloning: null
env_settings:
env_path: null
env_args: null
base_port: 5005
num_envs: 1
seed: -1
engine_settings:
width: 84
height: 84
quality_level: 5
time_scale: 20
target_frame_rate: -1
capture_frame_rate: 60
no_graphics: false
parameter_randomization: null
curriculum: null
checkpoint_settings:
run_id: CrawlerDynamic
initialize_from: null
load_model: false
resume: false
force: true
train_model: false
inference: false
debug: false

208
results/CrawlerDynamic/run_logs/timers.json


{
"name": "root",
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1593650162",
"python_version": "3.6.4 (default, Mar 1 2018, 18:36:42) \n[GCC 4.2.1 Compatible Apple LLVM 9.0.0 (clang-900.0.39.2)]",
"command_line_arguments": "/Users/brandonh/unity_projects/ml-agents-master/ml-agents/venv/bin/mlagents-learn config/ppo/CrawlerDynamic.yaml --run-id=CrawlerDynamic --force",
"mlagents_version": "0.18.0.dev0",
"mlagents_envs_version": "0.18.0.dev0",
"communication_protocol_version": "1.0.0",
"tensorflow_version": "2.2.0",
"end_time_seconds": "1593650180"
},
"total": 18.675483288941905,
"count": 1,
"self": 0.008750283974222839,
"children": {
"run_training.setup": {
"total": 0.015203848015516996,
"count": 1,
"self": 0.015203848015516996
},
"TrainerController.start_learning": {
"total": 18.651529156952165,
"count": 1,
"self": 1.732642306946218,
"children": {
"TrainerController._reset_env": {
"total": 13.572397847892717,
"count": 1,
"self": 13.572397847892717
},
"TrainerController.advance": {
"total": 3.0705842591123655,
"count": 235,
"self": 0.002972850692458451,
"children": {
"env_step": {
"total": 3.067611408419907,
"count": 235,
"self": 2.728578581358306,
"children": {
"SubprocessEnvManager._take_step": {
"total": 0.33576088724657893,
"count": 235,
"self": 0.007079660310409963,
"children": {
"NNPolicy.evaluate": {
"total": 0.32868122693616897,
"count": 166,
"self": 0.32868122693616897
}
}
},
"workers": {
"total": 0.0032719398150220513,
"count": 234,
"self": 0.0,
"children": {
"worker_root": {
"total": 17.95689078711439,
"count": 234,
"is_parallel": true,
"self": 15.592823190963827,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.003518798970617354,
"count": 1,
"is_parallel": true,
"self": 0.00029500690288841724,
"children": {
"_process_vector_observation": {
"total": 0.0032237920677289367,
"count": 2,
"is_parallel": true,
"self": 0.0032237920677289367
}
}
},
"UnityEnvironment.step": {
"total": 0.0244212350808084,
"count": 1,
"is_parallel": true,
"self": 0.0002589779905974865,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0002669510431587696,
"count": 1,
"is_parallel": true,
"self": 0.0002669510431587696
},
"communicator.exchange": {
"total": 0.023314381018280983,
"count": 1,
"is_parallel": true,
"self": 0.023314381018280983
},
"steps_from_proto": {
"total": 0.000580925028771162,
"count": 1,
"is_parallel": true,
"self": 0.00011981092393398285,
"children": {
"_process_vector_observation": {
"total": 0.0004611141048371792,
"count": 2,
"is_parallel": true,
"self": 0.0004611141048371792
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 2.364067596150562,
"count": 233,
"is_parallel": true,
"self": 0.035590133047662675,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.03359149140305817,
"count": 233,
"is_parallel": true,
"self": 0.03359149140305817
},
"communicator.exchange": {
"total": 2.196396351675503,
"count": 233,
"is_parallel": true,
"self": 2.196396351675503
},
"steps_from_proto": {
"total": 0.09848962002433836,
"count": 233,
"is_parallel": true,
"self": 0.022750661824829876,
"children": {
"_process_vector_observation": {
"total": 0.07573895819950849,
"count": 466,
"is_parallel": true,
"self": 0.07573895819950849
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 3.962998744100332e-05,
"count": 1,
"self": 3.962998744100332e-05,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 3.0491049140691757,
"count": 12027,
"is_parallel": true,
"self": 0.19714495609514415,
"children": {
"process_trajectory": {
"total": 2.8519599579740316,
"count": 12027,
"is_parallel": true,
"self": 2.8519599579740316
}
}
}
}
}
}
},
"TrainerController._save_model": {
"total": 0.275865113013424,
"count": 1,
"self": 0.00018105306662619114,
"children": {
"Trainer.save_model": {
"total": 0.2756840599467978,
"count": 1,
"self": 0.2756840599467978
}
}
}
}
}
}
}

7
results/CrawlerDynamic/run_logs/training_status.json


{
"metadata": {
"stats_format_version": "0.1.0",
"mlagents_version": "0.18.0.dev0",
"tensorflow_version": "2.2.0"
}
}

1001
results/CrawlerStatic/CrawlerStatic.nn
文件差异内容过多而无法显示
查看文件

2
results/CrawlerStatic/CrawlerStatic/checkpoint


model_checkpoint_path: "model-1548.ckpt"
all_model_checkpoint_paths: "model-1548.ckpt"

5
results/CrawlerStatic/CrawlerStatic/events.out.tfevents.1593650221.BrandonHunterHenry.local


�K" @ K��A brain.Event:2q�S�}]�� #2E K��A*�
�
HyperparametersB� B trainer_typeBppoBhyperparametersB�{'batch_size': 2024, 'buffer_size': 20240, 'learning_rate': 0.0003, 'beta': 0.005, 'epsilon': 0.2, 'lambd': 0.95, 'num_epoch': 3, 'learning_rate_schedule': 'linear'}Bnetwork_settingsBf{'normalize': True, 'hidden_units': 512, 'num_layers': 3, 'vis_encode_type': 'simple', 'memory': None}Breward_signalsB0{'extrinsic': {'gamma': 0.995, 'strength': 1.0}}B init_pathBNoneBkeep_checkpointsB5Bcheckpoint_intervalB500000B max_stepsB10000000B time_horizonB1000B summary_freqB30000BthreadedBTrueB self_playBNoneBbehavioral_cloningBNoneJ

text�b�y

1001
results/CrawlerStatic/CrawlerStatic/frozen_graph_def.pb
文件差异内容过多而无法显示
查看文件

20
results/CrawlerStatic/CrawlerStatic/model-1548.ckpt.index


 action_output_shape(5��8 global_step (59 � is_continuous_control (5�e memory_size (5�{: normalization_steps (59 �optimizer//extrinsic_value/bias (5�{:kernel � (�5Q�[U main_graph_0/hidden_0/bias� �(�5�3"�!kernel
�� � (��5���-1/bias� ��(�5�3"�!kernel
�� ��(��@5��D�2/bias� ��Q(�5�3"�!kernel
�� ��Q(��@5�nm3 $optimizer//extrinsic_value/bias/Adam ���(5�{:/_1 ���(5�{:1optimizer//optimizer//extrinsic_value/kernel/Adam � ���(�5�3"�1_1 � ���(�5�3"�main_graph_0/hidden_0/bias/Adam� ���(�5�3"�5_1� ���(�5�3"�, kernel/Adam
�� ���(��5��>7_1
�� ���(��5��>* 1/bias/Adam� ���(�5�3"�5_1� ���(�5�3"�, kernel/Adam
�� ���(��@5+��7_1
�� ���(��@5+��* 2/bias/Adam� ���(�5�3"�5_1� ���(�5�3"�, kernel/Adam
�� ���(��@5+��7_1
�� ���(��@5+�� policy/log_std/Adam ���(P5C+�f_1 ���(P5C+�f1optimizer//policy/main_graph_0/hidden_0/bias/Adam� ���(�5�3"�1_1� ���(�5�3"�( kernel/Adam
�� ���(��5��>3_1
�� ���(��5��>& 1/bias/Adam� ���(�5�3"�1_1� ���(�5�3"�( kernel/Adam
�� ���(��@5+��3_1
�� ���(��@5+��& 2/bias/Adam� ���(�5�3"�1_1� ���(�5�3"�( kernel/Adam
�� ���(��@5+��3_1
�� ���(��@5+�� u/bias/Adam ���(P5C+�f_1 ���(P5C+�f kernel/Adam � ���(��5c?c _1 � ���(��5c?coptimizer/beta1_power ���(5����2_power ���(5����policy/log_std ���(P5C+�fmain_graph_0/hidden_0/bias� ���(�5�3"�kernel
�� ���(��5H�m�1/bias� ���(�5�3"�kernel
�� ���(��@5~t&2/bias� ���(�5�3"�kernel
�� ���(��@5��wLu/bias ���(P5C+�f
kernel � ���(��5�H running_mean� ���(�5�i�variance� ���(�5{8�*trainer_major_version ���(5�{: inor_version ���(5�'yU patch_version ���(5�{:version_number ���(5��s'�� #�Ӹ����w��2%���W���$uG�

1001
results/CrawlerStatic/CrawlerStatic/model-1548.ckpt.meta
文件差异内容过多而无法显示
查看文件

1001
results/CrawlerStatic/CrawlerStatic/raw_graph_def.pb
文件差异内容过多而无法显示
查看文件

56
results/CrawlerStatic/configuration.yaml


behaviors:
CrawlerStatic:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
memory: null
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
init_path: null
keep_checkpoints: 5
checkpoint_interval: 500000
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true
self_play: null
behavioral_cloning: null
env_settings:
env_path: null
env_args: null
base_port: 5005
num_envs: 1
seed: -1
engine_settings:
width: 84
height: 84
quality_level: 5
time_scale: 20
target_frame_rate: -1
capture_frame_rate: 60
no_graphics: false
parameter_randomization: null
curriculum: null
checkpoint_settings:
run_id: CrawlerStatic
initialize_from: null
load_model: false
resume: false
force: true
train_model: false
inference: false
debug: false

208
results/CrawlerStatic/run_logs/timers.json


{
"name": "root",
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1593650208",
"python_version": "3.6.4 (default, Mar 1 2018, 18:36:42) \n[GCC 4.2.1 Compatible Apple LLVM 9.0.0 (clang-900.0.39.2)]",
"command_line_arguments": "/Users/brandonh/unity_projects/ml-agents-master/ml-agents/venv/bin/mlagents-learn config/ppo/CrawlerStatic.yaml --run-id=CrawlerStatic --force",
"mlagents_version": "0.18.0.dev0",
"mlagents_envs_version": "0.18.0.dev0",
"communication_protocol_version": "1.0.0",
"tensorflow_version": "2.2.0",
"end_time_seconds": "1593650226"
},
"total": 18.12544374493882,
"count": 1,
"self": 0.008958819904364645,
"children": {
"run_training.setup": {
"total": 0.010861473041586578,
"count": 1,
"self": 0.010861473041586578
},
"TrainerController.start_learning": {
"total": 18.10562345199287,
"count": 1,
"self": 1.6754347131354734,
"children": {
"TrainerController._reset_env": {
"total": 12.422339505981654,
"count": 1,
"self": 12.422339505981654
},
"TrainerController.advance": {
"total": 3.6985495468834415,
"count": 251,
"self": 0.0033409703755751252,
"children": {
"env_step": {
"total": 3.6952085765078664,
"count": 251,
"self": 3.3399255577242,
"children": {
"SubprocessEnvManager._take_step": {
"total": 0.35151520220097154,
"count": 251,
"self": 0.00889498379547149,
"children": {
"NNPolicy.evaluate": {
"total": 0.34262021840550005,
"count": 181,
"self": 0.34262021840550005
}
}
},
"workers": {
"total": 0.0037678165826946497,
"count": 250,
"self": 0.0,
"children": {
"worker_root": {
"total": 17.38591544260271,
"count": 250,
"is_parallel": true,
"self": 14.477078061550856,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.00139414903242141,
"count": 1,
"is_parallel": true,
"self": 0.0003408790798857808,
"children": {
"_process_vector_observation": {
"total": 0.0010532699525356293,
"count": 2,
"is_parallel": true,
"self": 0.0010532699525356293
}
}
},
"UnityEnvironment.step": {
"total": 0.024324635043740273,
"count": 1,
"is_parallel": true,
"self": 0.00019912992138415575,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00030468206387013197,
"count": 1,
"is_parallel": true,
"self": 0.00030468206387013197
},
"communicator.exchange": {
"total": 0.023232000996358693,
"count": 1,
"is_parallel": true,
"self": 0.023232000996358693
},
"steps_from_proto": {
"total": 0.0005888220621272922,
"count": 1,
"is_parallel": true,
"self": 0.0001468671252951026,
"children": {
"_process_vector_observation": {
"total": 0.00044195493683218956,
"count": 2,
"is_parallel": true,
"self": 0.00044195493683218956
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 2.9088373810518533,
"count": 249,
"is_parallel": true,
"self": 0.0450834094081074,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0420442724134773,
"count": 249,
"is_parallel": true,
"self": 0.0420442724134773
},
"communicator.exchange": {
"total": 2.6973308051237836,
"count": 249,
"is_parallel": true,
"self": 2.6973308051237836
},
"steps_from_proto": {
"total": 0.12437889410648495,
"count": 249,
"is_parallel": true,
"self": 0.02756049670279026,
"children": {
"_process_vector_observation": {
"total": 0.09681839740369469,
"count": 498,
"is_parallel": true,
"self": 0.09681839740369469
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 4.0604034438729286e-05,
"count": 1,
"self": 4.0604034438729286e-05,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 3.662667714059353,
"count": 12283,
"is_parallel": true,
"self": 0.25680465332698077,
"children": {
"process_trajectory": {
"total": 3.405863060732372,
"count": 12283,
"is_parallel": true,
"self": 3.405863060732372
}
}
}
}
}
}
},
"TrainerController._save_model": {
"total": 0.3092590819578618,
"count": 1,
"self": 0.00035183189902454615,
"children": {
"Trainer.save_model": {
"total": 0.30890725005883723,
"count": 1,
"self": 0.30890725005883723
}
}
}
}
}
}
}

7
results/CrawlerStatic/run_logs/training_status.json


{
"metadata": {
"stats_format_version": "0.1.0",
"mlagents_version": "0.18.0.dev0",
"tensorflow_version": "2.2.0"
}
}

36
results/WalkerDy236/wdyclv-ppo/config/imitation/CrawlerStatic.yaml


behaviors:
CrawlerStatic:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
gail:
gamma: 0.99
strength: 1.0
encoding_size: 128
learning_rate: 0.0003
use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true
behavioral_cloning:
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
steps: 50000
strength: 0.5
samples_per_update: 0

36
results/WalkerDy236/wdyclv-ppo/config/imitation/FoodCollector.yaml


behaviors:
FoodCollector:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
gail:
gamma: 0.99
strength: 0.1
encoding_size: 128
learning_rate: 0.0003
use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 10000
threaded: true
behavioral_cloning:
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
steps: 0
strength: 1.0
samples_per_update: 0

37
results/WalkerDy236/wdyclv-ppo/config/imitation/Hallway.yaml


behaviors:
Hallway:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory:
sequence_length: 64
memory_size: 256
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
gail:
gamma: 0.99
strength: 0.1
encoding_size: 128
learning_rate: 0.0003
use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64
summary_freq: 10000
threaded: true

31
results/WalkerDy236/wdyclv-ppo/config/imitation/PushBlock.yaml


behaviors:
PushBlock:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
gail:
gamma: 0.99
strength: 1.0
encoding_size: 128
learning_rate: 0.0003
use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
keep_checkpoints: 5
max_steps: 15000000
time_horizon: 64
summary_freq: 60000
threaded: true

34
results/WalkerDy236/wdyclv-ppo/config/imitation/Pyramids.yaml


behaviors:
Pyramids:
trainer_type: ppo
time_horizon: 128
max_steps: 1.0e7
hyperparameters:
batch_size: 128
beta: 0.01
buffer_size: 2048
epsilon: 0.2
lambd: 0.95
learning_rate: 0.0003
num_epoch: 3
network_settings:
num_layers: 2
normalize: false
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
curiosity:
strength: 0.02
gamma: 0.99
encoding_size: 256
gail:
strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
behavioral_cloning:
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 150000

26
results/WalkerDy236/wdyclv-ppo/config/ppo/3DBall.yaml


behaviors:
3DBall:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.99
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000
summary_freq: 12000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/3DBallHard.yaml


behaviors:
3DBallHard:
trainer_type: ppo
hyperparameters:
batch_size: 1200
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 1000
summary_freq: 12000
threaded: true

38
results/WalkerDy236/wdyclv-ppo/config/ppo/3DBall_randomize.yaml


behaviors:
3DBall:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.99
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000
summary_freq: 12000
threaded: true
parameter_randomization:
mass:
sampler_type: uniform
sampler_parameters:
min_value: 0.5
max_value: 10
scale:
sampler_type: uniform
sampler_parameters:
min_value: 0.75
max_value: 3

26
results/WalkerDy236/wdyclv-ppo/config/ppo/Basic.yaml


behaviors:
Basic:
trainer_type: ppo
hyperparameters:
batch_size: 32
buffer_size: 256
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 20
num_layers: 1
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.9
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 3
summary_freq: 2000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/Bouncer.yaml


behaviors:
Bouncer:
trainer_type: ppo
hyperparameters:
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 64
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 4000000
time_horizon: 64
summary_freq: 10000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/CrawlerDynamic.yaml


behaviors:
CrawlerDynamic:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/CrawlerStatic.yaml


behaviors:
CrawlerStatic:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/FoodCollector.yaml


behaviors:
FoodCollector:
trainer_type: ppo
hyperparameters:
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 10000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/GridWorld.yaml


behaviors:
GridWorld:
trainer_type: ppo
hyperparameters:
batch_size: 32
buffer_size: 256
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 1
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.9
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 5
summary_freq: 20000
threaded: true

29
results/WalkerDy236/wdyclv-ppo/config/ppo/Hallway.yaml


behaviors:
Hallway:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory:
sequence_length: 64
memory_size: 128
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64
summary_freq: 10000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/PushBlock.yaml


behaviors:
PushBlock:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 60000
threaded: true

31
results/WalkerDy236/wdyclv-ppo/config/ppo/Pyramids.yaml


behaviors:
Pyramids:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
curiosity:
gamma: 0.99
strength: 0.02
encoding_size: 256
learning_rate: 0.0003
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128
summary_freq: 30000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/Reacher.yaml


behaviors:
Reacher:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000
summary_freq: 60000
threaded: true

33
results/WalkerDy236/wdyclv-ppo/config/ppo/SoccerTwos.yaml


behaviors:
SoccerTwos:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
self_play:
save_steps: 50000
team_change: 200000
swap_steps: 2000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0

65
results/WalkerDy236/wdyclv-ppo/config/ppo/StrikersVsGoalie.yaml


behaviors:
Goalie:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
self_play:
save_steps: 50000
team_change: 200000
swap_steps: 1000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0
Striker:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
self_play:
save_steps: 50000
team_change: 200000
swap_steps: 4000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0

33
results/WalkerDy236/wdyclv-ppo/config/ppo/Tennis.yaml


behaviors:
Tennis:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: true
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
self_play:
save_steps: 50000
team_change: 100000
swap_steps: 2000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0

29
results/WalkerDy236/wdyclv-ppo/config/ppo/VisualHallway.yaml


behaviors:
VisualHallway:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 1
vis_encode_type: simple
memory:
sequence_length: 64
memory_size: 128
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64
summary_freq: 10000
threaded: true

29
results/WalkerDy236/wdyclv-ppo/config/ppo/VisualPushBlock.yaml


behaviors:
VisualPushBlock:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 1
vis_encode_type: simple
memory:
sequence_length: 32
memory_size: 128
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 64
summary_freq: 60000
threaded: true

31
results/WalkerDy236/wdyclv-ppo/config/ppo/VisualPyramids.yaml


behaviors:
VisualPyramids:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 2024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 1
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
curiosity:
gamma: 0.99
strength: 0.01
encoding_size: 256
learning_rate: 0.0003
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128
summary_freq: 10000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/WalkerDynamic.yaml


behaviors:
WalkerDynamic:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000
summary_freq: 30000
threaded: true

26
results/WalkerDy236/wdyclv-ppo/config/ppo/WalkerStatic.yaml


behaviors:
WalkerStatic:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000
summary_freq: 30000
threaded: true

51
results/WalkerDy236/wdyclv-ppo/config/ppo/WallJump.yaml


behaviors:
BigWallJump:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 128
summary_freq: 20000
threaded: true
SmallWallJump:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 128
summary_freq: 20000
threaded: true

68
results/WalkerDy236/wdyclv-ppo/config/ppo/WallJump_curriculum.yaml


behaviors:
BigWallJump:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2