浏览代码

Rename LearningModel to ModelUtils

/develop/nopreviousactions
Ervin Teng 5 年前
当前提交
14f2a7f2
共有 10 个文件被更改,包括 112 次插入114 次删除
  1. 18
      ml-agents/mlagents/trainers/common/nn_policy.py
  2. 24
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  3. 20
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  4. 30
      ml-agents/mlagents/trainers/models.py
  5. 26
      ml-agents/mlagents/trainers/ppo/optimizer.py
  6. 36
      ml-agents/mlagents/trainers/sac/network.py
  7. 30
      ml-agents/mlagents/trainers/sac/optimizer.py
  8. 26
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  9. 4
      ml-agents/mlagents/trainers/tests/test_ppo.py
  10. 12
      ml-agents/mlagents/trainers/tf_policy.py

18
ml-agents/mlagents/trainers/common/nn_policy.py


from mlagents_envs.base_env import BatchedStepResult
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.models import EncoderType
from mlagents.trainers.models import LearningModel
from mlagents.trainers.models import ModelUtils
from mlagents.trainers.tf_policy import TFPolicy
logger = logging.getLogger("mlagents.trainers")

:param resample: Whether we are using the resampling trick to update the policy.
"""
with tf.variable_scope("policy"):
hidden_stream = LearningModel.create_observation_streams(
hidden_stream = ModelUtils.create_observation_streams(
self.visual_in,
self.processed_vector_in,
1,

self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden_policy, memory_policy_out = LearningModel.create_recurrent_encoder(
hidden_policy, memory_policy_out = ModelUtils.create_recurrent_encoder(
hidden_stream,
self.memory_in,
self.sequence_length_ph,

self.act_size[0],
activation=None,
name="mu",
kernel_initializer=LearningModel.scaled_init(0.01),
kernel_initializer=ModelUtils.scaled_init(0.01),
reuse=tf.AUTO_REUSE,
)

self.act_size[0],
activation=None,
name="log_sigma",
kernel_initializer=LearningModel.scaled_init(0.01),
kernel_initializer=ModelUtils.scaled_init(0.01),
)
else:
log_sigma = tf.get_variable(

:param vis_encode_type: Type of visual encoder to use if visual input.
"""
with tf.variable_scope("policy"):
hidden_stream = LearningModel.create_observation_streams(
hidden_stream = ModelUtils.create_observation_streams(
self.visual_in,
self.processed_vector_in,
1,

self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden_policy, memory_policy_out = LearningModel.create_recurrent_encoder(
hidden_policy, memory_policy_out = ModelUtils.create_recurrent_encoder(
hidden_policy,
self.memory_in,
self.sequence_length_ph,

size,
activation=None,
use_bias=False,
kernel_initializer=LearningModel.scaled_init(0.01),
kernel_initializer=ModelUtils.scaled_init(0.01),
)
)

shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
)
output, self.action_probs, normalized_logits = LearningModel.create_discrete_action_masking_layer(
output, self.action_probs, normalized_logits = ModelUtils.create_discrete_action_masking_layer(
raw_log_probs, self.action_masks, self.act_size
)

24
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


from typing import List, Tuple
from mlagents.tf_utils import tf
from mlagents.trainers.models import LearningModel
from mlagents.trainers.models import ModelUtils
from mlagents.trainers.tf_policy import TFPolicy

next_visual_encoders = []
for i in range(self.policy.vis_obs_size):
# Create input ops for next (t+1) visual observations.
next_visual_input = LearningModel.create_visual_input(
next_visual_input = ModelUtils.create_visual_input(
self.policy.brain.camera_resolutions[i],
name="curiosity_next_visual_observation_" + str(i),
)

# Note that these encoders are siamese.
encoded_visual = LearningModel.create_visual_observation_encoder(
encoded_visual = ModelUtils.create_visual_observation_encoder(
LearningModel.swish,
ModelUtils.swish,
encoded_next_visual = LearningModel.create_visual_observation_encoder(
encoded_next_visual = ModelUtils.create_visual_observation_encoder(
LearningModel.swish,
ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
True,

name="curiosity_next_vector_observation",
)
encoded_vector_obs = LearningModel.create_vector_observation_encoder(
encoded_vector_obs = ModelUtils.create_vector_observation_encoder(
LearningModel.swish,
ModelUtils.swish,
encoded_next_vector_obs = LearningModel.create_vector_observation_encoder(
encoded_next_vector_obs = ModelUtils.create_vector_observation_encoder(
LearningModel.swish,
ModelUtils.swish,
2,
"curiosity_vector_obs_encoder",
True,

:param encoded_next_state: Tensor corresponding to encoded next state.
"""
combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
hidden = tf.layers.dense(combined_input, 256, activation=LearningModel.swish)
hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
if self.policy.brain.vector_action_space_type == "continuous":
pred_action = tf.layers.dense(
hidden, self.policy.act_size[0], activation=None

combined_input = tf.concat(
[encoded_state, self.policy.selected_actions], axis=1
)
hidden = tf.layers.dense(combined_input, 256, activation=LearningModel.swish)
hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
pred_next_state = tf.layers.dense(
hidden,
self.encoding_size

20
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


from mlagents.tf_utils import tf
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.models import LearningModel
from mlagents.trainers.models import ModelUtils
EPSILON = 1e-7

)
if self.policy.normalize:
encoded_expert_list.append(
LearningModel.normalize_vector_obs(
ModelUtils.normalize_vector_obs(
self.obs_in_expert,
self.policy.running_mean,
self.policy.running_variance,

visual_expert_encoders = []
for i in range(self.policy.vis_obs_size):
# Create input ops for next (t+1) visual observations.
visual_input = LearningModel.create_visual_input(
visual_input = ModelUtils.create_visual_input(
encoded_policy_visual = LearningModel.create_visual_observation_encoder(
encoded_policy_visual = ModelUtils.create_visual_observation_encoder(
LearningModel.swish,
ModelUtils.swish,
encoded_expert_visual = LearningModel.create_visual_observation_encoder(
encoded_expert_visual = ModelUtils.create_visual_observation_encoder(
LearningModel.swish,
ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
True,

hidden_1 = tf.layers.dense(
concat_input,
self.h_size,
activation=LearningModel.swish,
activation=ModelUtils.swish,
name="gail_d_hidden_1",
reuse=reuse,
)

self.h_size,
activation=LearningModel.swish,
activation=ModelUtils.swish,
name="gail_d_hidden_2",
reuse=reuse,
)

self.z_size,
reuse=reuse,
name="gail_z_mean",
kernel_initializer=LearningModel.scaled_init(0.01),
kernel_initializer=ModelUtils.scaled_init(0.01),
)
self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)

30
ml-agents/mlagents/trainers/models.py


running_variance: tf.Tensor
class LearningModel:
class ModelUtils:
# Minimum supported side for each encoder type. If refactoring an encoder, please
# adjust these also.
MIN_RESOLUTION_FOR_ENCODER = {

"""
visual_in: List[tf.Tensor] = []
for i, camera_resolution in enumerate(camera_resolutions):
visual_input = LearningModel.create_visual_input(
visual_input = ModelUtils.create_visual_input(
camera_resolution, name="visual_observation_" + str(i)
)
visual_in.append(visual_input)

dtype=tf.float32,
initializer=tf.ones_initializer(),
)
update_normalization = LearningModel.create_normalizer_update(
update_normalization = ModelUtils.create_normalizer_update(
vector_obs, steps, running_mean, running_variance
)
return NormalizerTensors(

hidden = tf.layers.flatten(conv2)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(
hidden_flat = ModelUtils.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat

hidden = tf.layers.flatten(conv3)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(
hidden_flat = ModelUtils.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat

hidden = tf.layers.flatten(hidden)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(
hidden_flat = ModelUtils.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat

ENCODER_FUNCTION_BY_TYPE = {
EncoderType.SIMPLE: LearningModel.create_visual_observation_encoder,
EncoderType.NATURE_CNN: LearningModel.create_nature_cnn_visual_observation_encoder,
EncoderType.RESNET: LearningModel.create_resnet_visual_observation_encoder,
EncoderType.SIMPLE: ModelUtils.create_visual_observation_encoder,
EncoderType.NATURE_CNN: ModelUtils.create_nature_cnn_visual_observation_encoder,
EncoderType.RESNET: ModelUtils.create_resnet_visual_observation_encoder,
encoder_type, LearningModel.create_visual_observation_encoder
encoder_type, ModelUtils.create_visual_observation_encoder
)
@staticmethod

def _check_resolution_for_encoder(
vis_in: tf.Tensor, vis_encoder_type: EncoderType
) -> None:
min_res = LearningModel.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
height = vis_in.shape[1]
width = vis_in.shape[2]
if height < min_res or width < min_res:

the scopes for each of the streams. None if all under the same TF scope.
:return: List of encoded streams.
"""
activation_fn = LearningModel.swish
activation_fn = ModelUtils.swish
create_encoder_func = LearningModel.get_encoder_for_type(vis_encode_type)
create_encoder_func = ModelUtils.get_encoder_for_type(vis_encode_type)
visual_encoders = []
hidden_state, hidden_visual = None, None

LearningModel._check_resolution_for_encoder(vis_in, vis_encode_type)
ModelUtils._check_resolution_for_encoder(vis_in, vis_encode_type)
encoded_visual = create_encoder_func(
vis_in,
h_size,

visual_encoders.append(encoded_visual)
hidden_visual = tf.concat(visual_encoders, axis=1)
if vector_in.get_shape()[-1] > 0: # Don't encode 0-shape inputs
hidden_state = LearningModel.create_vector_observation_encoder(
hidden_state = ModelUtils.create_vector_observation_encoder(
vector_observation_input,
h_size,
activation_fn,

26
ml-agents/mlagents/trainers/ppo/optimizer.py


import numpy as np
from mlagents.tf_utils import tf
from mlagents_envs.timers import timed
from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule
from mlagents.trainers.models import ModelUtils, EncoderType, LearningRateSchedule
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.common.tf_optimizer import TFOptimizer
from mlagents.trainers.buffer import AgentBuffer

else:
self._create_dc_critic(h_size, num_layers, vis_encode_type)
self.learning_rate = LearningModel.create_learning_rate(
self.learning_rate = ModelUtils.create_learning_rate(
lr_schedule, lr, self.policy.global_step, int(max_step)
)
self._create_losses(

lr,
max_step,
)
self.create_ppo_optimizer()
self._create_ppo_optimizer_ops()
self.update_dict.update(
{

:param num_layers: Number of hidden linear layers.
:param vis_encode_type: The type of visual encoder to use.
"""
hidden_stream = LearningModel.create_observation_streams(
hidden_stream = ModelUtils.create_observation_streams(
self.policy.visual_in,
self.policy.processed_vector_in,
1,

)[0]
if self.policy.use_recurrent:
hidden_value, memory_value_out = LearningModel.create_recurrent_encoder(
hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
hidden_stream,
self.memory_in,
self.policy.sequence_length_ph,

else:
hidden_value = hidden_stream
self.value_heads, self.value = LearningModel.create_value_heads(
self.value_heads, self.value = ModelUtils.create_value_heads(
self.stream_names, hidden_value
)
self.all_old_log_probs = tf.placeholder(

:param num_layers: Number of hidden linear layers.
:param vis_encode_type: The type of visual encoder to use.
"""
hidden_stream = LearningModel.create_observation_streams(
hidden_stream = ModelUtils.create_observation_streams(
self.policy.visual_in,
self.policy.processed_vector_in,
1,

)[0]
if self.policy.use_recurrent:
hidden_value, memory_value_out = LearningModel.create_recurrent_encoder(
hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
hidden_stream,
self.memory_in,
self.policy.sequence_length_ph,

else:
hidden_value = hidden_stream
self.value_heads, self.value = LearningModel.create_value_heads(
self.value_heads, self.value = ModelUtils.create_value_heads(
self.stream_names, hidden_value
)

name="old_probabilities",
)
_, _, old_normalized_logits = LearningModel.create_discrete_action_masking_layer(
_, _, old_normalized_logits = ModelUtils.create_discrete_action_masking_layer(
self.all_old_log_probs, self.policy.action_masks, self.policy.act_size
)

* tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
)
def create_ppo_optimizer(self):
def _create_ppo_optimizer_ops(self):
self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
self.grads = self.tf_optimizer.compute_gradients(self.loss)
self.update_batch = self.tf_optimizer.minimize(self.loss)

:param num_sequences: Number of sequences to process.
:return: Results of update.
"""
feed_dict = self.construct_feed_dict(batch, num_sequences)
feed_dict = self._construct_feed_dict(batch, num_sequences)
stats_needed = self.stats_name_to_update_name
update_stats = {}
# Collect feed dicts for all reward signals.

update_stats[stat_name] = update_vals[update_name]
return update_stats
def construct_feed_dict(
def _construct_feed_dict(
self, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
# Do an optional burn-in for memories

36
ml-agents/mlagents/trainers/sac/network.py


from mlagents.tf_utils import tf
from mlagents.trainers.models import LearningModel, EncoderType
from mlagents.trainers.models import ModelUtils, EncoderType
LOG_STD_MAX = 2
LOG_STD_MIN = -20

self.num_layers = num_layers
self.stream_names = stream_names
self.h_size = h_size
self.activ_fn = LearningModel.swish
self.activ_fn = ModelUtils.swish
self.sequence_length_ph = tf.placeholder(
shape=None, dtype=tf.int32, name="sac_sequence_length"

:param scope: TF scope for value network.
"""
with tf.variable_scope(scope):
value_hidden = LearningModel.create_vector_observation_encoder(
value_hidden = ModelUtils.create_vector_observation_encoder(
value_hidden, memory_out = LearningModel.create_recurrent_encoder(
value_hidden, memory_out = ModelUtils.create_recurrent_encoder(
value_hidden,
self.value_memory_in,
self.sequence_length_ph,

:param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
"""
with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
q1_hidden = LearningModel.create_vector_observation_encoder(
q1_hidden = ModelUtils.create_vector_observation_encoder(
q1_hidden, memory_out = LearningModel.create_recurrent_encoder(
q1_hidden, memory_out = ModelUtils.create_recurrent_encoder(
q1_hidden,
self.q1_memory_in,
self.sequence_length_ph,

q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
q2_hidden = LearningModel.create_vector_observation_encoder(
q2_hidden = ModelUtils.create_vector_observation_encoder(
q2_hidden, memory_out = LearningModel.create_recurrent_encoder(
q2_hidden, memory_out = ModelUtils.create_recurrent_encoder(
q2_hidden,
self.q2_memory_in,
self.sequence_length_ph,

vis_encode_type,
)
with tf.variable_scope(TARGET_SCOPE):
self.visual_in = LearningModel.create_visual_input_placeholders(
self.visual_in = ModelUtils.create_visual_input_placeholders(
self.vector_in = LearningModel.create_vector_input(policy.vec_obs_size)
self.vector_in = ModelUtils.create_vector_input(policy.vec_obs_size)
normalization_tensors = LearningModel.create_normalizer(self.vector_in)
normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
self.processed_vector_in = LearningModel.normalize_vector_obs(
self.processed_vector_in = ModelUtils.normalize_vector_obs(
self.vector_in,
self.running_mean,
self.running_variance,

shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
)
self.value_memory_in = self.memory_in
hidden_streams = LearningModel.create_observation_streams(
hidden_streams = ModelUtils.create_observation_streams(
self.visual_in,
self.processed_vector_in,
1,

vis_encode_type,
)
if self.policy.use_recurrent:
self.create_memory_ins(m_size)
self._create_memory_ins(m_size)
hidden_critic = self.create_observation_in(vis_encode_type)
hidden_critic = self._create_observation_in(vis_encode_type)
self.policy.output = self.policy.output
# Use the sequence length of the policy
self.sequence_length_ph = self.policy.sequence_length_ph

mem_outs = [self.value_memory_out, self.q1_memory_out, self.q2_memory_out]
self.memory_out = tf.concat(mem_outs, axis=1)
def create_memory_ins(self, m_size):
def _create_memory_ins(self, m_size):
"""
Creates the memory input placeholders for LSTM.
:param m_size: the total size of the memory.

self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
def create_observation_in(self, vis_encode_type):
def _create_observation_in(self, vis_encode_type):
"""
Creates the observation inputs, and a CNN if needed,
:param vis_encode_type: Type of CNN encoder.

"""
with tf.variable_scope(POLICY_SCOPE):
hidden_streams = LearningModel.create_observation_streams(
hidden_streams = ModelUtils.create_observation_streams(
self.policy.visual_in,
self.policy.processed_vector_in,
1,

30
ml-agents/mlagents/trainers/sac/optimizer.py


from mlagents.tf_utils import tf
from mlagents.trainers.sac.network import SACPolicyNetwork, SACTargetNetwork
from mlagents.trainers.models import LearningRateSchedule, EncoderType, LearningModel
from mlagents.trainers.models import LearningRateSchedule, EncoderType, ModelUtils
from mlagents.trainers.common.tf_optimizer import TFOptimizer
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.buffer import AgentBuffer

)
# The optimizer's m_size is 3 times the policy (Q1, Q2, and Value)
self.m_size = 3 * self.policy.m_size
self.create_inputs_and_outputs()
self.learning_rate = LearningModel.create_learning_rate(
self._create_inputs_and_outputs()
self.learning_rate = ModelUtils.create_learning_rate(
lr_schedule, lr, self.policy.global_step, int(max_step)
)
self._create_losses(

stream_names,
discrete=not self.policy.use_continuous_act,
)
self.create_sac_optimizers()
self._create_sac_optimizer_ops()
self.selected_actions = (
self.policy.selected_actions

"learning_rate": self.learning_rate,
}
def create_inputs_and_outputs(self) -> None:
def _create_inputs_and_outputs(self) -> None:
"""
Assign the higher-level SACModel's inputs and outputs to those of its policy or
target network.

for name in stream_names:
if discrete:
_branched_mpq1 = self.apply_as_branches(
_branched_mpq1 = self._apply_as_branches(
self.policy_network.q1_pheads[name] * discrete_action_probs
)
branched_mpq1 = tf.stack(

)
_q1_p_mean = tf.reduce_mean(branched_mpq1, axis=0)
_branched_mpq2 = self.apply_as_branches(
_branched_mpq2 = self._apply_as_branches(
self.policy_network.q2_pheads[name] * discrete_action_probs
)
branched_mpq2 = tf.stack(

if discrete:
# We need to break up the Q functions by branch, and update them individually.
branched_q1_stream = self.apply_as_branches(
branched_q1_stream = self._apply_as_branches(
branched_q2_stream = self.apply_as_branches(
branched_q2_stream = self._apply_as_branches(
self.policy.action_oh * q2_streams[name]
)

self.ent_coef = tf.exp(self.log_ent_coef)
if discrete:
# We also have to do a different entropy and target_entropy per branch.
branched_per_action_ent = self.apply_as_branches(per_action_entropy)
branched_per_action_ent = self._apply_as_branches(per_action_entropy)
branched_ent_sums = tf.stack(
[
tf.reduce_sum(_lp, axis=1, keep_dims=True) + _te

# Same with policy loss, we have to do the loss per branch and average them,
# so that larger branches don't get more weight.
# The equivalent KL divergence from Eq 10 of Haarnoja et al. is also pi*log(pi) - Q
branched_q_term = self.apply_as_branches(
branched_q_term = self._apply_as_branches(
discrete_action_probs * self.policy_network.q1_p
)

self.entropy = self.policy_network.entropy
def apply_as_branches(self, concat_logits: tf.Tensor) -> List[tf.Tensor]:
def _apply_as_branches(self, concat_logits: tf.Tensor) -> List[tf.Tensor]:
"""
Takes in a concatenated set of logits and breaks it up into a list of non-concatenated logits, one per
action branch

]
return branches_logits
def create_sac_optimizers(self) -> None:
def _create_sac_optimizer_ops(self) -> None:
"""
Creates the Adam optimizers and update ops for SAC, including
the policy, value, and entropy updates, as well as the target network update.

indexed by name. If none, don't update the reward signals.
:return: Output from update process.
"""
feed_dict = self.construct_feed_dict(self.policy, batch, num_sequences)
feed_dict = self._construct_feed_dict(self.policy, batch, num_sequences)
stats_needed = self.stats_name_to_update_name
update_stats: Dict[str, float] = {}
update_vals = self._execute_model(feed_dict, self.update_dict)

update_dict.update(self.reward_signals[name].update_dict)
stats_needed.update(self.reward_signals[name].stats_name_to_update_name)
def construct_feed_dict(
def _construct_feed_dict(
self, policy: TFPolicy, batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
"""

26
ml-agents/mlagents/trainers/tests/test_nn_policy.py


import yaml
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.models import EncoderType, LearningModel
from mlagents.trainers.models import EncoderType, ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.brain import BrainParameters, CameraResolution
from mlagents.trainers.tests import mock_brain as mb

def test_min_visual_size():
# Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER
assert set(LearningModel.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType)
assert set(ModelUtils.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType)
good_size = LearningModel.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
vis_input = LearningModel.create_visual_input(
good_res, "test_min_visual_size"
)
LearningModel._check_resolution_for_encoder(vis_input, encoder_type)
enc_func = LearningModel.get_encoder_for_type(encoder_type)
enc_func(vis_input, 32, LearningModel.swish, 1, "test", False)
vis_input = ModelUtils.create_visual_input(good_res, "test_min_visual_size")
ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
enc_func = ModelUtils.get_encoder_for_type(encoder_type)
enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)
bad_size = LearningModel.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1
bad_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1
vis_input = LearningModel.create_visual_input(
vis_input = ModelUtils.create_visual_input(
LearningModel._check_resolution_for_encoder(vis_input, encoder_type)
ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
enc_func = LearningModel.get_encoder_for_type(encoder_type)
enc_func(vis_input, 32, LearningModel.swish, 1, "test", False)
enc_func = ModelUtils.get_encoder_for_type(encoder_type)
enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)
if __name__ == "__main__":

4
ml-agents/mlagents/trainers/tests/test_ppo.py


NUM_AGENTS = 12
def create_ppo_optimizer_mock(dummy_config, use_rnn, use_discrete, use_visual):
def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visual):
mock_brain = mb.setup_mock_brain(
use_discrete,
use_visual,

def test_ppo_optimizer_update(dummy_config, rnn, visual, discrete):
# Test evaluate
tf.reset_default_graph()
optimizer = create_ppo_optimizer_mock(
optimizer = _create_ppo_optimizer_ops_mock(
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
# Test update

12
ml-agents/mlagents/trainers/tf_policy.py


from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents_envs.base_env import BatchedStepResult
from mlagents.trainers.models import LearningModel
from mlagents.trainers.models import ModelUtils
logger = logging.getLogger("mlagents.trainers")

def create_input_placeholders(self):
with self.graph.as_default():
self.global_step, self.increment_step_op, self.steps_to_increment = (
LearningModel.create_global_steps()
ModelUtils.create_global_steps()
self.visual_in = LearningModel.create_visual_input_placeholders(
self.visual_in = ModelUtils.create_visual_input_placeholders(
self.vector_in = LearningModel.create_vector_input(self.vec_obs_size)
self.vector_in = ModelUtils.create_vector_input(self.vec_obs_size)
normalization_tensors = LearningModel.create_normalizer(self.vector_in)
normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
self.processed_vector_in = LearningModel.normalize_vector_obs(
self.processed_vector_in = ModelUtils.normalize_vector_obs(
self.vector_in,
self.running_mean,
self.running_variance,

正在加载...
取消
保存