Chris Elion
5 年前
当前提交
68b68396
共有 107 个文件被更改,包括 1826 次插入 和 293 次删除
-
4.gitignore
-
20com.unity.ml-agents/CHANGELOG.md
-
4com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
-
2com.unity.ml-agents/Runtime/Agent.cs
-
2com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
-
4com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
-
2com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
-
2com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
-
2com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
-
2com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
-
2com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
-
4com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
-
4com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
-
2com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs
-
4com.unity.ml-agents/Tests/Editor/TensorUtilsTest.cs
-
15docs/Getting-Started.md
-
43docs/Learning-Environment-Create-New.md
-
4docs/Learning-Environment-Examples.md
-
13docs/Learning-Environment-Executable.md
-
19docs/Migrating.md
-
2docs/Using-Tensorboard.md
-
3gym-unity/README.md
-
4gym-unity/gym_unity/__init__.py
-
4ml-agents-envs/mlagents_envs/__init__.py
-
30ml-agents-envs/mlagents_envs/environment.py
-
12ml-agents-envs/mlagents_envs/tests/test_envs.py
-
4ml-agents/mlagents/trainers/__init__.py
-
169ml-agents/mlagents/trainers/learn.py
-
5ml-agents/mlagents/trainers/policy/tf_policy.py
-
3ml-agents/mlagents/trainers/ppo/trainer.py
-
7ml-agents/mlagents/trainers/sac/trainer.py
-
6ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
-
2ml-agents/mlagents/trainers/tests/test_bcmodule.py
-
9ml-agents/mlagents/trainers/tests/test_ghost.py
-
133ml-agents/mlagents/trainers/tests/test_learn.py
-
8ml-agents/mlagents/trainers/tests/test_nn_policy.py
-
2ml-agents/mlagents/trainers/tests/test_policy.py
-
9ml-agents/mlagents/trainers/tests/test_ppo.py
-
2ml-agents/mlagents/trainers/tests/test_reward_signals.py
-
2ml-agents/mlagents/trainers/tests/test_rl_trainer.py
-
14ml-agents/mlagents/trainers/tests/test_sac.py
-
6ml-agents/mlagents/trainers/tests/test_simple_rl.py
-
6ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-
101ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
3ml-agents/mlagents/trainers/trainer/trainer.py
-
20ml-agents/mlagents/trainers/trainer_controller.py
-
55ml-agents/mlagents/trainers/trainer_util.py
-
17ml-agents/tests/yamato/scripts/run_llapi.py
-
4ml-agents/tests/yamato/training_int_tests.py
-
3ml-agents/tests/yamato/yamato_utils.py
-
41ml-agents/mlagents/trainers/cli_utils.py
-
29config/imitation/CrawlerStatic.yaml
-
29config/imitation/FoodCollector.yaml
-
28config/imitation/Hallway.yaml
-
25config/imitation/PushBlock.yaml
-
36config/imitation/Pyramids.yaml
-
25config/ppo/3DBall.yaml
-
25config/ppo/3DBallHard.yaml
-
40config/ppo/3DBall_randomize.yaml
-
25config/ppo/Basic.yaml
-
25config/ppo/Bouncer.yaml
-
25config/ppo/CrawlerDynamic.yaml
-
25config/ppo/CrawlerStatic.yaml
-
25config/ppo/FoodCollector.yaml
-
25config/ppo/GridWorld.yaml
-
25config/ppo/Hallway.yaml
-
25config/ppo/PushBlock.yaml
-
29config/ppo/Pyramids.yaml
-
25config/ppo/Reacher.yaml
-
38config/ppo/SoccerTwos.yaml
-
62config/ppo/StrikersVsGoalie.yaml
-
31config/ppo/Tennis.yaml
-
25config/ppo/VisualHallway.yaml
-
25config/ppo/VisualPushBlock.yaml
-
29config/ppo/VisualPyramids.yaml
-
25config/ppo/Walker.yaml
-
50config/ppo/WallJump.yaml
-
65config/ppo/WallJump_curriculum.yaml
-
25config/ppo/WormDynamic.yaml
-
25config/ppo/WormStatic.yaml
-
25config/sac/3DBall.yaml
-
25config/sac/3DBallHard.yaml
-
25config/sac/Basic.yaml
-
25config/sac/Bouncer.yaml
-
25config/sac/CrawlerDynamic.yaml
-
25config/sac/CrawlerStatic.yaml
-
25config/sac/FoodCollector.yaml
-
25config/sac/GridWorld.yaml
-
25config/sac/Hallway.yaml
-
25config/sac/PushBlock.yaml
-
31config/sac/Pyramids.yaml
-
25config/sac/Reacher.yaml
-
30config/sac/Tennis.yaml
-
26config/sac/VisualHallway.yaml
-
26config/sac/VisualPushBlock.yaml
|
|||
# Version of the library that will be used to upload to pypi |
|||
__version__ = "0.16.0" |
|||
__version__ = "0.17.0.dev0" |
|||
__release_tag__ = "release_1" |
|||
__release_tag__ = None |
|
|||
# Version of the library that will be used to upload to pypi |
|||
__version__ = "0.16.0" |
|||
__version__ = "0.17.0.dev0" |
|||
__release_tag__ = "release_1" |
|||
__release_tag__ = None |
|
|||
# Version of the library that will be used to upload to pypi |
|||
__version__ = "0.16.0" |
|||
__version__ = "0.17.0.dev0" |
|||
__release_tag__ = "release_1" |
|||
__release_tag__ = None |
|
|||
from typing import Set |
|||
import argparse |
|||
|
|||
|
|||
class DetectDefault(argparse.Action): |
|||
""" |
|||
Internal custom Action to help detect arguments that aren't default. |
|||
""" |
|||
|
|||
non_default_args: Set[str] = set() |
|||
|
|||
def __call__(self, arg_parser, namespace, values, option_string=None): |
|||
setattr(namespace, self.dest, values) |
|||
DetectDefault.non_default_args.add(self.dest) |
|||
|
|||
|
|||
class DetectDefaultStoreTrue(DetectDefault): |
|||
""" |
|||
Internal class to help detect arguments that aren't default. |
|||
Used for store_true arguments. |
|||
""" |
|||
|
|||
def __init__(self, nargs=0, **kwargs): |
|||
super().__init__(nargs=nargs, **kwargs) |
|||
|
|||
def __call__(self, arg_parser, namespace, values, option_string=None): |
|||
super().__call__(arg_parser, namespace, True, option_string) |
|||
|
|||
|
|||
class StoreConfigFile(argparse.Action): |
|||
""" |
|||
Custom Action to store the config file location not as part of the CLI args. |
|||
This is because we want to maintain an equivalence between the config file's |
|||
contents and the args themselves. |
|||
""" |
|||
|
|||
trainer_config_path: str |
|||
|
|||
def __call__(self, arg_parser, namespace, values, option_string=None): |
|||
delattr(namespace, self.dest) |
|||
StoreConfigFile.trainer_config_path = values |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1e7 |
|||
memory_size: 256 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
gail: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo |
|||
strength: 0.5 |
|||
steps: 50000 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 2.0e6 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
gail: |
|||
strength: 0.1 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo |
|||
strength: 1.0 |
|||
steps: 0 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.0e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gail: |
|||
strength: 0.1 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.5e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
gail: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.0e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|||
gail: |
|||
strength: 0.01 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|||
strength: 0.5 |
|||
steps: 150000 |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
3DBallHard: |
|||
trainer: ppo |
|||
batch_size: 1200 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
parameter_randomization: |
|||
resampling-interval: 500 |
|||
mass: |
|||
sampler-type: "uniform" |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
gravity: |
|||
sampler-type: "uniform" |
|||
min_value: 7 |
|||
max_value: 12 |
|||
scale: |
|||
sampler-type: "uniform" |
|||
min_value: 0.75 |
|||
max_value: 3 |
|
|||
behaviors: |
|||
Basic: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 20 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 3 |
|||
sequence_length: 64 |
|||
summary_freq: 2000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|
|||
behaviors: |
|||
Bouncer: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 64 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 4.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
CrawlerDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
GridWorld: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 500000 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 5 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
SoccerTwos: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 200000 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.05, 0.1] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
ball_touch: [1.0, 0.5, 0.0] |
|
|||
behaviors: |
|||
Goalie: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 25000 |
|||
team_change: 200000 |
|||
|
|||
Striker: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 100000 |
|||
team_change: 200000 |
|
|||
behaviors: |
|||
Tennis: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 100000 |
|
|||
behaviors: |
|||
VisualHallway: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
VisualPushBlock: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 60000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
VisualPyramids: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 2024 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.01 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|
|||
behaviors: |
|||
Walker: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
big_wall_min_height: [0.0, 4.0, 6.0, 8.0] |
|||
big_wall_max_height: [4.0, 7.0, 8.0, 8.0] |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
small_wall_height: [1.5, 2.0, 2.5, 4.0] |
|
|||
behaviors: |
|||
WormDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
WormStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 64 |
|||
init_entcoef: 0.5 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
3DBallHard: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Basic: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 20 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 10 |
|||
sequence_length: 64 |
|||
summary_freq: 2000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Bouncer: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 64 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
CrawlerDynamic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 2000 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 3e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.05 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
GridWorld: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 1000 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.5 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 500000 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 5 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.1 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.05 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 100000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 10000 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 16 |
|||
summary_freq: 30000 |
|||
tau: 0.01 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 2.0 |
|||
gamma: 0.99 |
|||
gail: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
use_actions: true |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Tennis: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_current_self_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|
|||
behaviors: |
|||
VisualHallway: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
VisualPushBlock: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 3.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 60000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
部分文件因为文件数量过多而无法显示
撰写
预览
正在加载...
取消
保存
Reference in new issue