浏览代码

refactor vis_encoder_type and add to doc

refactor vis_encoder_type and add to doc
/develop-generalizationTraining-TrainerController
GitHub 5 年前
当前提交
6225317d
共有 9 个文件被更改,包括 33 次插入18 次删除
  1. 13
      docs/Training-PPO.md
  2. 23
      ml-agents/mlagents/trainers/models.py
  3. 4
      ml-agents/mlagents/trainers/ppo/models.py
  4. 5
      ml-agents/mlagents/trainers/ppo/policy.py
  5. 1
      ml-agents/mlagents/trainers/ppo/trainer.py
  6. 1
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  7. 1
      ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
  8. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  9. 1
      ml-agents/mlagents/trainers/tests/test_reward_signals.py

13
docs/Training-PPO.md


Typical Range: `32` - `512`
### (Optional) Visual Encoder Type
`vis_encode_type` corresponds to the encoder type for encoding visual observations.
Valid options include:
* `simple` (default): a simple encoder which consists of two convolutional layers
* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236),
consisting of three convolutional layers
* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
consisting of three stacked layers, each with two risidual blocks, making a
much larger network than the other two.
Options: `simple`, `nature_cnn`, `resnet`
## (Optional) Recurrent Neural Network Hyperparameters
The below hyperparameters are only used when `use_recurrent` is set to true.

23
ml-agents/mlagents/trainers/models.py


class EncoderType(Enum):
RESNET = "resnet"
SIMPLE = "simple"
DEFAUL = "default"
RESNET = "resnet"
class LearningModel(object):

:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
print("creating nature cnn")
with tf.variable_scope(scope):
conv1 = tf.layers.conv2d(
image_input,

:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
print("creating resnet")
n_channels = [16, 32, 32] # channel for each stack
n_blocks = 2 # number of residual blocks
with tf.variable_scope(scope):

)
def create_observation_streams(
self, num_streams, h_size, num_layers, vis_encode_type="default"
):
self,
num_streams: int,
h_size: int,
num_layers: int,
vis_encode_type: EncoderType = EncoderType.SIMPLE,
) -> tf.Tensor:
"""
Creates encoding stream for observations.
:param num_streams: Number of streams to create.

visual_encoders = []
hidden_state, hidden_visual = None, None
if self.vis_obs_size > 0:
vis_encode_type = EncoderType(vis_encode_type)
if vis_encode_type == EncoderType.RESNET:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_resnet_visual_observation_encoder(

self.value_heads[name] = value
self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
def create_cc_actor_critic(
self, h_size: int, num_layers: int, vis_encode_type: EncoderType
) -> None:
"""
Creates Continuous control actor-critic model.
:param h_size: Size of hidden linear layers.

(tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
)
def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
def create_dc_actor_critic(
self, h_size: int, num_layers: int, vis_encode_type: EncoderType
) -> None:
"""
Creates Discrete control actor-critic model.
:param h_size: Size of hidden linear layers.

4
ml-agents/mlagents/trainers/ppo/models.py


import numpy as np
import tensorflow as tf
from mlagents.trainers.models import LearningModel
from mlagents.trainers.models import LearningModel, EncoderType
logger = logging.getLogger("mlagents.trainers")

m_size=None,
seed=0,
stream_names=None,
vis_encode_type="default",
vis_encode_type=EncoderType.SIMPLE,
):
"""
Takes a Unity environment and model-specific hyper-parameters and returns the

5
ml-agents/mlagents/trainers/ppo/policy.py


from mlagents.envs.timers import timed
from mlagents.trainers import BrainInfo, ActionInfo
from mlagents.trainers.models import EncoderType
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.components.reward_signals.reward_signal_factory import (

m_size=self.m_size,
seed=seed,
stream_names=list(reward_signal_configs.keys()),
vis_encode_type=trainer_params["vis_encode_type"],
vis_encode_type=EncoderType(
trainer_params.get("vis_encode_type", "simple")
),
)
self.model.create_ppo_optimizer()

1
ml-agents/mlagents/trainers/ppo/trainer.py


"memory_size",
"model_path",
"reward_signals",
"vis_encode_type",
]
self.check_param_keys()

1
ml-agents/mlagents/trainers/tests/test_bcmodule.py


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
pretraining:
demo_path: ./demos/ExpertPyramid.demo

1
ml-agents/mlagents/trainers/tests/test_environments/test_simple.py


extrinsic:
strength: 1.0
gamma: 0.99
vis_encode_type: default
"""
# Create controller and begin training.
with tempfile.TemporaryDirectory() as dir:

2
ml-agents/mlagents/trainers/tests/test_ppo.py


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1

"sequence_length": 64,
"summary_freq": 3000,
"use_recurrent": False,
"vis_encode_type": "default",
"use_curiosity": False,
"curiosity_strength": 0.01,
"curiosity_enc_size": 128,

1
ml-agents/mlagents/trainers/tests/test_reward_signals.py


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1

正在加载...
取消
保存