浏览代码

[refactor] Remove BrainParameters from Python code (#4138)

/MLA-1734-demo-provider
GitHub 4 年前
当前提交
3bcb029b
共有 38 个文件被更改,包括 466 次插入819 次删除
  1. 2
      ml-agents/mlagents/trainers/agent_processor.py
  2. 7
      ml-agents/mlagents/trainers/behavior_id_utils.py
  3. 2
      ml-agents/mlagents/trainers/components/bc/model.py
  4. 8
      ml-agents/mlagents/trainers/components/bc/module.py
  5. 35
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  6. 32
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  7. 2
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  8. 74
      ml-agents/mlagents/trainers/demo_loader.py
  9. 5
      ml-agents/mlagents/trainers/env_manager.py
  10. 8
      ml-agents/mlagents/trainers/ghost/trainer.py
  11. 54
      ml-agents/mlagents/trainers/models.py
  12. 7
      ml-agents/mlagents/trainers/policy/nn_policy.py
  13. 42
      ml-agents/mlagents/trainers/policy/tf_policy.py
  14. 8
      ml-agents/mlagents/trainers/ppo/trainer.py
  15. 5
      ml-agents/mlagents/trainers/sac/network.py
  16. 6
      ml-agents/mlagents/trainers/sac/trainer.py
  17. 13
      ml-agents/mlagents/trainers/simple_env_manager.py
  18. 22
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  19. 187
      ml-agents/mlagents/trainers/tests/mock_brain.py
  20. 38
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  21. 4
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  22. 37
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  23. 70
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  24. 100
      ml-agents/mlagents/trainers/tests/test_ghost.py
  25. 49
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  26. 138
      ml-agents/mlagents/trainers/tests/test_ppo.py
  27. 17
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  28. 19
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  29. 86
      ml-agents/mlagents/trainers/tests/test_sac.py
  30. 6
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  31. 4
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  32. 29
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  33. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  34. 6
      ml-agents/mlagents/trainers/trainer/trainer.py
  35. 4
      ml-agents/mlagents/trainers/trainer_controller.py
  36. 36
      ml-agents/mlagents/trainers/tests/test_models.py
  37. 88
      ml-agents/mlagents/trainers/brain.py
  38. 31
      ml-agents/mlagents/trainers/brain_conversion_utils.py

2
ml-agents/mlagents/trainers/agent_processor.py


from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents.trainers.behavior_id_utils import get_global_agent_id
T = TypeVar("T")

7
ml-agents/mlagents/trainers/behavior_id_utils.py


:return: name_behavior_id
"""
return name + "?team=" + str(team_id)
def get_global_agent_id(worker_id: int, agent_id: int) -> str:
"""
Create an agent id that is unique across environment workers using the worker_id.
"""
return f"${worker_id}-{agent_id}"

2
ml-agents/mlagents/trainers/components/bc/model.py


self.done_expert = tf.placeholder(shape=[None, 1], dtype=tf.float32)
self.done_policy = tf.placeholder(shape=[None, 1], dtype=tf.float32)
if self.policy.brain.vector_action_space_type == "continuous":
if self.policy.behavior_spec.is_action_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32

8
ml-agents/mlagents/trainers/components/bc/module.py


self.current_lr = policy_learning_rate * settings.strength
self.model = BCModel(policy, self.current_lr, settings.steps)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.brain
settings.demo_path, policy.sequence_length, policy.behavior_spec
)
self.batch_size = (

self.policy.sequence_length_ph: self.policy.sequence_length,
}
feed_dict[self.model.action_in_expert] = mini_batch_demo["actions"]
if not self.policy.use_continuous_act:
if self.policy.behavior_spec.is_action_discrete():
sum(self.policy.brain.vector_action_space_size),
sum(self.policy.behavior_spec.discrete_action_branches),
if self.policy.brain.vector_observation_space_size > 0:
if self.policy.vec_obs_size > 0:
feed_dict[self.policy.vector_in] = mini_batch_demo["vector_obs"]
for i, _ in enumerate(self.policy.visual_in):
feed_dict[self.policy.visual_in[i]] = mini_batch_demo["visual_obs%d" % i]

35
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


encoded_state_list = []
encoded_next_state_list = []
if self.policy.vis_obs_size > 0:
self.next_visual_in = []
# Create input ops for next (t+1) visual observations.
self.next_vector_in, self.next_visual_in = ModelUtils.create_input_placeholders(
self.policy.behavior_spec.observation_shapes, name_prefix="curiosity_next_"
)
if self.next_visual_in:
for i in range(self.policy.vis_obs_size):
# Create input ops for next (t+1) visual observations.
next_visual_input = ModelUtils.create_visual_input(
self.policy.brain.camera_resolutions[i],
name="curiosity_next_visual_observation_" + str(i),
)
self.next_visual_in.append(next_visual_input)
for i, (vis_in, next_vis_in) in enumerate(
zip(self.policy.visual_in, self.next_visual_in)
):
self.policy.visual_in[i],
vis_in,
self.encoding_size,
ModelUtils.swish,
1,

encoded_next_visual = ModelUtils.create_visual_observation_encoder(
self.next_visual_in[i],
next_vis_in,
self.encoding_size,
ModelUtils.swish,
1,

encoded_next_state_list.append(hidden_next_visual)
if self.policy.vec_obs_size > 0:
# Create the encoder ops for current and next vector input.
# Note that these encoders are siamese.
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(
shape=[None, self.policy.vec_obs_size],
dtype=tf.float32,
name="curiosity_next_vector_observation",
)
encoded_vector_obs = ModelUtils.create_vector_observation_encoder(
self.policy.vector_in,
self.encoding_size,

)
encoded_state_list.append(encoded_vector_obs)
encoded_next_state_list.append(encoded_next_vector_obs)
encoded_state = tf.concat(encoded_state_list, axis=1)
encoded_next_state = tf.concat(encoded_next_state_list, axis=1)
return encoded_state, encoded_next_state

"""
combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
if self.policy.brain.vector_action_space_type == "continuous":
if self.policy.behavior_spec.is_action_continuous():
pred_action = tf.layers.dense(
hidden, self.policy.act_size[0], activation=None
)

32
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


from typing import List, Optional, Tuple
from typing import Optional, Tuple
from mlagents.tf_utils import tf

self.done_expert = tf.expand_dims(self.done_expert_holder, -1)
self.done_policy = tf.expand_dims(self.done_policy_holder, -1)
if self.policy.brain.vector_action_space_type == "continuous":
if self.policy.behavior_spec.is_action_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32

encoded_policy_list = []
encoded_expert_list = []
(
self.obs_in_expert,
self.expert_visual_in,
) = ModelUtils.create_input_placeholders(
self.policy.behavior_spec.observation_shapes, "gail_"
)
self.obs_in_expert = tf.placeholder(
shape=[None, self.policy.vec_obs_size], dtype=tf.float32
)
if self.policy.normalize:
encoded_expert_list.append(
ModelUtils.normalize_vector_obs(

encoded_expert_list.append(self.obs_in_expert)
encoded_policy_list.append(self.policy.vector_in)
if self.policy.vis_obs_size > 0:
self.expert_visual_in: List[tf.Tensor] = []
if self.expert_visual_in:
for i in range(self.policy.vis_obs_size):
# Create input ops for next (t+1) visual observations.
visual_input = ModelUtils.create_visual_input(
self.policy.brain.camera_resolutions[i],
name="gail_visual_observation_" + str(i),
)
self.expert_visual_in.append(visual_input)
for i, (vis_in, exp_vis_in) in enumerate(
zip(self.policy.visual_in, self.expert_visual_in)
):
self.policy.visual_in[i],
vis_in,
self.encoding_size,
ModelUtils.swish,
1,

encoded_expert_visual = ModelUtils.create_visual_observation_encoder(
self.expert_visual_in[i],
exp_vis_in,
self.encoding_size,
ModelUtils.swish,
1,

2
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


settings.use_vail,
)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.brain
settings.demo_path, policy.sequence_length, policy.behavior_spec
)
self.has_updated = False
self.update_dict: Dict[str, tf.Tensor] = {

74
ml-agents/mlagents/trainers/demo_loader.py


from typing import List, Tuple
import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.brain_conversion_utils import behavior_spec_to_brain_parameters
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)

@timed
def demo_to_buffer(
file_path: str, sequence_length: int, expected_brain_params: BrainParameters = None
) -> Tuple[BrainParameters, AgentBuffer]:
file_path: str, sequence_length: int, expected_behavior_spec: BehaviorSpec = None
) -> Tuple[BehaviorSpec, AgentBuffer]:
"""
Loads demonstration file and uses it to fill training buffer.
:param file_path: Location of demonstration file (.demo).

behavior_spec, info_action_pair, _ = load_demonstration(file_path)
demo_buffer = make_demo_buffer(info_action_pair, behavior_spec, sequence_length)
brain_params = behavior_spec_to_brain_parameters("DemoBrain", behavior_spec)
if expected_brain_params:
if expected_behavior_spec:
if (
brain_params.vector_action_space_size
!= expected_brain_params.vector_action_space_size
):
if behavior_spec.action_shape != expected_behavior_spec.action_shape:
brain_params.vector_action_space_size,
expected_brain_params.vector_action_space_size,
behavior_spec.action_shape, expected_behavior_spec.action_shape
if (
brain_params.vector_action_space_type
!= expected_brain_params.vector_action_space_type
):
if behavior_spec.action_type != expected_behavior_spec.action_type:
brain_params.vector_action_space_type,
expected_brain_params.vector_action_space_type,
behavior_spec.action_type, expected_behavior_spec.action_type
# check number of vector observations in demonstration match
if (
brain_params.vector_observation_space_size
!= expected_brain_params.vector_observation_space_size
# check observations match
if len(behavior_spec.observation_shapes) != len(
expected_behavior_spec.observation_shapes
"The vector observation dimensions of {} in demonstration do not match the policy's {}.".format(
brain_params.vector_observation_space_size,
expected_brain_params.vector_observation_space_size,
)
"The demonstrations do not have the same number of observations as the policy."
# check number of visual observations/resolutions in demonstration match
if (
brain_params.number_visual_observations
!= expected_brain_params.number_visual_observations
):
raise RuntimeError(
"Number of visual observations {} in demonstrations do not match the policy's {}.".format(
brain_params.number_visual_observations,
expected_brain_params.number_visual_observations,
else:
for i, (demo_obs, policy_obs) in enumerate(
zip(
behavior_spec.observation_shapes,
expected_behavior_spec.observation_shapes,
)
for i, (resolution, expected_resolution) in enumerate(
zip(
brain_params.camera_resolutions,
expected_brain_params.camera_resolutions,
)
):
if resolution != expected_resolution:
raise RuntimeError(
"The resolution of visual observation {} in demonstrations do not match the policy's.".format(
i
):
if demo_obs != policy_obs:
raise RuntimeError(
f"The shape {demo_obs} for observation {i} in demonstration \
do not match the policy's {policy_obs}."
)
return brain_params, demo_buffer
return behavior_spec, demo_buffer
def get_demo_files(path: str) -> List[str]:

@timed
def load_demonstration(
file_path: str
) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]:
file_path: str,
) -> Tuple[BehaviorSpec, List[AgentInfoActionPairProto], int]:
"""
Loads and parses a demonstration file.
:param file_path: Location of demonstration file (.demo).

5
ml-agents/mlagents/trainers/env_manager.py


)
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
from mlagents.trainers.action_info import ActionInfo

@property
@abstractmethod
def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
pass
@abstractmethod

self._process_step_infos(self.first_step_infos)
self.first_step_infos = None
# Get new policies if found. Always get the latest policy.
for brain_name in self.external_brains:
for brain_name in self.training_behaviors:
_policy = None
try:
# We make sure to empty the policy queue before continuing to produce steps.

8
ml-agents/mlagents/trainers/ghost/trainer.py


import numpy as np
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.brain import BrainParameters
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy

self.trainer.export_model(brain_name)
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TFPolicy:
"""
Creates policy with the wrapped trainer's create_policy function

wrapped trainer to be trained.
"""
policy = self.trainer.create_policy(parsed_behavior_id, brain_parameters)
policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
policy.create_tf_graph()
policy.initialize_or_load()
policy.init_load_weights()

# First policy or a new agent on the same team encountered
if self.wrapped_trainer_team is None or team_id == self.wrapped_trainer_team:
internal_trainer_policy = self.trainer.create_policy(
parsed_behavior_id, brain_parameters
parsed_behavior_id, behavior_spec
)
self.trainer.add_policy(parsed_behavior_id, internal_trainer_policy)
internal_trainer_policy.init_load_weights()

54
ml-agents/mlagents/trainers/models.py


from mlagents.tf_utils import tf
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.brain import CameraResolution
ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
EncoderFunction = Callable[

EPSILON = 1e-7
class Tensor3DShape(NamedTuple):
height: int
width: int
num_channels: int
class EncoderType(Enum):

return tf.multiply(input_activation, tf.nn.sigmoid(input_activation))
@staticmethod
def create_visual_input(
camera_parameters: CameraResolution, name: str
) -> tf.Tensor:
def create_visual_input(camera_parameters: Tensor3DShape, name: str) -> tf.Tensor:
"""
Creates image input op.
:param camera_parameters: Parameters for visual observation.

return visual_in
@staticmethod
def create_visual_input_placeholders(
camera_resolutions: List[CameraResolution]
) -> List[tf.Tensor]:
def create_input_placeholders(
observation_shapes: List[Tuple], name_prefix: str = ""
) -> Tuple[tf.Tensor, List[tf.Tensor]]:
:param camera_resolutions: A List of CameraResolutions that specify the resolutions
of the input visual observations.
:param observation_shapes: A List of tuples that specify the resolutions
of the input observations. Tuples for now are restricted to 1D (vector) or 3D (Tensor)
:param name_prefix: A name prefix to add to the placeholder names. This is used so that there
is no conflict when creating multiple placeholder sets.
for i, camera_resolution in enumerate(camera_resolutions):
visual_input = ModelUtils.create_visual_input(
camera_resolution, name="visual_observation_" + str(i)
)
visual_in.append(visual_input)
return visual_in
vector_in_size = 0
for i, dimension in enumerate(observation_shapes):
if len(dimension) == 3:
_res = Tensor3DShape(
height=dimension[0], width=dimension[1], num_channels=dimension[2]
)
visual_input = ModelUtils.create_visual_input(
_res, name=name_prefix + "visual_observation_" + str(i)
)
visual_in.append(visual_input)
elif len(dimension) == 1:
vector_in_size += dimension[0]
else:
raise UnityTrainerException(
f"Unsupported shape of {dimension} for observation {i}"
)
vector_in = tf.placeholder(
shape=[None, vector_in_size],
dtype=tf.float32,
name=name_prefix + "vector_observation",
)
return vector_in, visual_in
@staticmethod
def create_vector_input(

)
visual_encoders.append(encoded_visual)
hidden_visual = tf.concat(visual_encoders, axis=1)
if vector_in.get_shape()[-1] > 0: # Don't encode 0-shape inputs
if vector_in.get_shape()[-1] > 0:
# Don't encode non-existant or 0-shape inputs
hidden_state = ModelUtils.create_vector_observation_encoder(
vector_observation_input,
h_size,

7
ml-agents/mlagents/trainers/policy/nn_policy.py


from typing import Any, Dict, Optional, List
from mlagents.tf_utils import tf
from mlagents_envs.timers import timed
from mlagents_envs.base_env import DecisionSteps
from mlagents.trainers.brain import BrainParameters
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents.trainers.models import EncoderType
from mlagents.trainers.models import ModelUtils
from mlagents.trainers.policy.tf_policy import TFPolicy

def __init__(
self,
seed: int,
brain: BrainParameters,
behavior_spec: BehaviorSpec,
trainer_params: TrainerSettings,
is_training: bool,
model_path: str,

:param tanh_squash: Whether to use a tanh function on the continuous output, or a clipped output.
:param reparameterize: Whether we are using the resampling trick to update the policy in continuous output.
"""
super().__init__(seed, brain, trainer_params, model_path, load)
super().__init__(seed, behavior_spec, trainer_params, model_path, load)
self.grads = None
self.update_batch: Optional[tf.Operation] = None
num_layers = self.network_settings.num_layers

42
ml-agents/mlagents/trainers/policy/tf_policy.py


from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers import __version__

def __init__(
self,
seed: int,
brain: BrainParameters,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,

self.update_dict: Dict[str, tf.Tensor] = {}
self.sequence_length = 1
self.seed = seed
self.brain = brain
self.behavior_spec = behavior_spec
self.act_size = brain.vector_action_space_size
self.vec_obs_size = brain.vector_observation_space_size
self.vis_obs_size = brain.number_visual_observations
self.act_size = (
list(behavior_spec.discrete_action_branches)
if behavior_spec.is_action_discrete()
else [behavior_spec.action_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
)
self.vis_obs_size = sum(
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
)
self.num_branches = len(self.brain.vector_action_space_size)
self.num_branches = self.behavior_spec.action_size
self.use_continuous_act = brain.vector_action_space_type == "continuous"
if self.use_continuous_act:
self.num_branches = self.brain.vector_action_space_size[0]
self.use_continuous_act = behavior_spec.is_action_continuous()
self.model_path = model_path
self.initialize_path = self.trainer_settings.init_path
self.keep_checkpoints = self.trainer_settings.keep_checkpoints

feed_dict[self.vector_in] = vec_vis_obs.vector_observations
if not self.use_continuous_act:
mask = np.ones(
(len(batched_step_result), np.sum(self.brain.vector_action_space_size)),
(
len(batched_step_result),
sum(self.behavior_spec.discrete_action_branches),
),
dtype=np.float32,
)
if batched_step_result.action_mask is not None:

self.increment_step_op,
self.steps_to_increment,
) = ModelUtils.create_global_steps()
self.visual_in = ModelUtils.create_visual_input_placeholders(
self.brain.camera_resolutions
self.vector_in, self.visual_in = ModelUtils.create_input_placeholders(
self.behavior_spec.observation_shapes
self.vector_in = ModelUtils.create_vector_input(self.vec_obs_size)
if self.normalize:
normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
self.update_normalization_op = normalization_tensors.update_op

self.mask = tf.cast(self.mask_input, tf.int32)
tf.Variable(
int(self.brain.vector_action_space_type == "continuous"),
int(self.behavior_spec.is_action_continuous()),
name="is_continuous_control",
trainable=False,
dtype=tf.int32,

tf.Variable(
self.m_size, name="memory_size", trainable=False, dtype=tf.int32
)
if self.brain.vector_action_space_type == "continuous":
if self.behavior_spec.is_action_continuous():
tf.Variable(
self.act_size[0],
name="action_output_shape",

8
ml-agents/mlagents/trainers/ppo/trainer.py


import numpy as np
from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory

return True
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
:param brain_parameters: specifications for policy construction
:param behavior_spec: specifications for policy construction
brain_parameters,
behavior_spec,
self.trainer_settings,
self.is_training,
self.artifact_path,

5
ml-agents/mlagents/trainers/sac/network.py


vis_encode_type,
)
with tf.variable_scope(TARGET_SCOPE):
self.visual_in = ModelUtils.create_visual_input_placeholders(
policy.brain.camera_resolutions
self.vector_in, self.visual_in = ModelUtils.create_input_placeholders(
self.policy.behavior_spec.observation_shapes
self.vector_in = ModelUtils.create_vector_input(policy.vec_obs_size)
if self.policy.normalize:
normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
self.update_normalization_op = normalization_tensors.update_op

6
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import timed
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings

return policy_was_updated
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
brain_parameters,
behavior_spec,
self.trainer_settings,
self.is_training,
self.artifact_path,

13
ml-agents/mlagents/trainers/simple_env_manager.py


from typing import Dict, List
from mlagents_envs.base_env import BaseEnv, BehaviorName
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.brain_conversion_utils import behavior_spec_to_brain_parameters
class SimpleEnvManager(EnvManager):

v.apply(k, self.env_params)
@property
def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
result = {}
for behavior_name, behavior_spec in self.env.behavior_specs.items():
result[behavior_name] = behavior_spec_to_brain_parameters(
behavior_name, behavior_spec
)
return result
def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
return self.env.behavior_specs
def close(self):
self.env.close()

22
ml-agents/mlagents/trainers/subprocess_env_manager.py


from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection
from queue import Empty as EmptyQueueException
from mlagents_envs.base_env import BaseEnv, BehaviorName
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents_envs import logging_util
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
from mlagents_envs.timers import (

reset_timers,
get_timer_root,
)
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.settings import ParameterRandomizationSettings
from mlagents.trainers.action_info import ActionInfo
from mlagents_envs.side_channel.environment_parameters_channel import (

StatsAggregationMethod,
)
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents.trainers.brain_conversion_utils import behavior_spec_to_brain_parameters
logger = logging_util.get_logger(__name__)

STEP = 1
EXTERNAL_BRAINS = 2
BEHAVIOR_SPECS = 2
ENVIRONMENT_PARAMETERS = 3
RESET = 4
CLOSE = 5

all_step_result[brain_name] = env.get_steps(brain_name)
return all_step_result
def external_brains():
result = {}
for behavior_name, behavior_specs in env.behavior_specs.items():
result[behavior_name] = behavior_spec_to_brain_parameters(
behavior_name, behavior_specs
)
return result
try:
env = env_factory(
worker_id, [env_parameters, engine_configuration_channel, stats_channel]

)
)
reset_timers()
elif req.cmd == EnvironmentCommand.EXTERNAL_BRAINS:
_send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains())
elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS:
_send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs)
elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS:
for k, v in req.payload.items():
if isinstance(v, float):

ew.send(EnvironmentCommand.ENVIRONMENT_PARAMETERS, config)
@property
def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
self.env_workers[0].send(EnvironmentCommand.EXTERNAL_BRAINS)
def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
self.env_workers[0].send(EnvironmentCommand.BEHAVIOR_SPECS)
return self.env_workers[0].recv().payload
def close(self) -> None:

187
ml-agents/mlagents/trainers/tests/mock_brain.py


from unittest import mock
from typing import List, Tuple
from typing import List, Tuple, Union
from collections.abc import Iterable
from mlagents.trainers.brain import CameraResolution, BrainParameters
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents_envs.base_env import (

)
def create_mock_brainparams(
number_visual_observations=0,
vector_action_space_type="continuous",
vector_observation_space_size=3,
vector_action_space_size=None,
):
"""
Creates a mock BrainParameters object with parameters.
"""
# Avoid using mutable object as default param
if vector_action_space_size is None:
vector_action_space_size = [2]
mock_brain = mock.Mock()
mock_brain.return_value.number_visual_observations = number_visual_observations
mock_brain.return_value.vector_action_space_type = vector_action_space_type
mock_brain.return_value.vector_observation_space_size = (
vector_observation_space_size
)
camrez = CameraResolution(height=84, width=84, num_channels=3)
mock_brain.return_value.camera_resolutions = [camrez] * number_visual_observations
mock_brain.return_value.vector_action_space_size = vector_action_space_size
mock_brain.return_value.brain_name = "MockBrain"
return mock_brain()
num_agents: int = 1,
num_vector_observations: int = 0,
num_vis_observations: int = 0,
action_shape: List[int] = None,
num_agents: int,
observation_shapes: List[Tuple],
action_shape: Union[int, Tuple[int]] = None,
discrete: bool = False,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:int num_vector_observations: Number of "observations" in your observation space
:int num_vis_observations: Number of "observations" in your observation space
:List observation_shapes: A List of the observation spaces in your steps
action_shape = [2]
action_shape = 2
for _ in range(num_vis_observations):
obs_list.append(np.ones((num_agents, 84, 84, 3), dtype=np.float32))
if num_vector_observations > 1:
obs_list.append(
np.array(num_agents * [num_vector_observations * [1]], dtype=np.float32)
)
for _shape in observation_shapes:
obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
if discrete:
if discrete and isinstance(action_shape, Iterable):
for action_size in action_shape
]
for action_size in action_shape # type: ignore
] # type: ignore
[(84, 84, 3)] * num_vis_observations + [(num_vector_observations, 0, 0)],
observation_shapes,
action_shape if discrete else action_shape[0],
action_shape,
)
if done:
return (

)
def create_steps_from_brainparams(
brain_params: BrainParameters, num_agents: int = 1
def create_steps_from_behavior_spec(
behavior_spec: BehaviorSpec, num_agents: int = 1
num_vector_observations=brain_params.vector_observation_space_size,
num_vis_observations=brain_params.number_visual_observations,
action_shape=brain_params.vector_action_space_size,
discrete=brain_params.vector_action_space_type == "discrete",
observation_shapes=behavior_spec.observation_shapes,
action_shape=behavior_spec.action_shape,
discrete=behavior_spec.is_action_discrete(),
observation_shapes: List[Tuple],
vec_obs_size: int = 1,
num_vis_obs: int = 1,
action_space: List[int] = None,
action_space: Union[int, Tuple[int]] = 2,
memory_size: int = 10,
is_discrete: bool = True,
) -> Trajectory:

"""
if action_space is None:
action_space = [2]
for _j in range(num_vis_obs):
obs.append(np.ones((84, 84, 3), dtype=np.float32))
obs.append(np.ones(vec_obs_size, dtype=np.float32))
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
action_size = len(action_space)
action_size = len(action_space) # type: ignore
action_size = action_space[0]
action_size = int(action_space) # type: ignore
[[False for _ in range(branch)] for branch in action_space]
[[False for _ in range(branch)] for branch in action_space] # type: ignore
if is_discrete
else None
)

def simulate_rollout(
length: int,
brain_params: BrainParameters,
behavior_spec: BehaviorSpec,
vec_obs_size = brain_params.vector_observation_space_size
num_vis_obs = brain_params.number_visual_observations
action_space = brain_params.vector_action_space_size
is_discrete = brain_params.vector_action_space_type == "discrete"
action_space = behavior_spec.action_shape
is_discrete = behavior_spec.is_action_discrete()
vec_obs_size=vec_obs_size,
num_vis_obs=num_vis_obs,
behavior_spec.observation_shapes,
action_space=action_space,
memory_size=memory_size,
is_discrete=is_discrete,

return buffer
def setup_mock_brain(
use_discrete,
use_visual,
discrete_action_space=None,
vector_action_space=None,
vector_obs_space=8,
def setup_test_behavior_specs(
use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8
# defaults
discrete_action_space = (
[3, 3, 3, 2] if discrete_action_space is None else discrete_action_space
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)],
ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS,
tuple(vector_action_space) if use_discrete else vector_action_space,
vector_action_space = [2] if vector_action_space is None else vector_action_space
return behavior_spec
if not use_visual:
mock_brain = create_mock_brainparams(
vector_action_space_type="discrete" if use_discrete else "continuous",
vector_action_space_size=discrete_action_space
if use_discrete
else vector_action_space,
vector_observation_space_size=vector_obs_space,
)
else:
mock_brain = create_mock_brainparams(
vector_action_space_type="discrete" if use_discrete else "continuous",
vector_action_space_size=discrete_action_space
if use_discrete
else vector_action_space,
vector_observation_space_size=0,
number_visual_observations=1,
)
return mock_brain
def create_mock_3dball_brain():
mock_brain = create_mock_brainparams(
vector_action_space_type="continuous",
vector_action_space_size=[2],
vector_observation_space_size=8,
def create_mock_3dball_behavior_specs():
return setup_test_behavior_specs(
False, False, vector_action_space=2, vector_obs_space=8
mock_brain.brain_name = "Ball3DBrain"
return mock_brain
def create_mock_pushblock_brain():
mock_brain = create_mock_brainparams(
vector_action_space_type="discrete",
vector_action_space_size=[7],
vector_observation_space_size=70,
)
mock_brain.brain_name = "PushblockLearning"
return mock_brain
def create_mock_banana_brain():
mock_brain = create_mock_brainparams(
number_visual_observations=1,
vector_action_space_type="discrete",
vector_action_space_size=[3, 3, 3, 2],
vector_observation_space_size=0,
def create_mock_pushblock_behavior_specs():
return setup_test_behavior_specs(
True, False, vector_action_space=7, vector_obs_space=70
return mock_brain
def make_brain_parameters(
discrete_action: bool = False,
visual_inputs: int = 0,
brain_name: str = "RealFakeBrain",
vec_obs_size: int = 6,
) -> BrainParameters:
resolutions = [
CameraResolution(width=30, height=40, num_channels=3)
for _ in range(visual_inputs)
]
return BrainParameters(
vector_observation_space_size=vec_obs_size,
camera_resolutions=resolutions,
vector_action_space_size=[2],
vector_action_descriptions=["", ""],
vector_action_space_type=int(not discrete_action),
brain_name=brain_name,
def create_mock_banana_behavior_specs():
return setup_test_behavior_specs(
True, True, vector_action_space=[3, 3, 3, 2], vector_obs_space=0
)

38
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents.trainers.behavior_id_utils import get_global_agent_id
def create_mock_brain():
mock_brain = mb.create_mock_brainparams(
vector_action_space_type="continuous",
vector_action_space_size=[2],
vector_observation_space_size=8,
number_visual_observations=1,
)
return mock_brain
def create_mock_policy():
mock_policy = mock.Mock()
mock_policy.reward_signals = {}

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
num_vector_observations=8,
action_shape=[2],
num_vis_observations=num_vis_obs,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
action_shape=2,
)
fake_action_info = ActionInfo(
action=[0.1, 0.1],

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
num_vector_observations=8,
action_shape=[2],
num_vis_observations=num_vis_obs,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
action_shape=2,
)
processor.add_experiences(
mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])

"log_probs": [0.1],
}
mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
num_vector_observations=8,
action_shape=[2],
num_vis_observations=0,
num_agents=1, observation_shapes=[(8,)], action_shape=2
num_agents=1,
num_vector_observations=8,
action_shape=[2],
num_vis_observations=0,
done=True,
num_agents=1, observation_shapes=[(8,)], action_shape=2, done=True
)
fake_action_info = ActionInfo(
action=[0.1],

"log_probs": [0.1],
}
mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
num_vector_observations=8,
action_shape=[2],
num_vis_observations=0,
num_agents=1, observation_shapes=[(8,)], action_shape=2
)
fake_action_info = ActionInfo(
action=[0.1],

4
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


use_visual=visual,
)
policy.save_model(1000)
settings = SerializationSettings(
policy.model_path, os.path.join(tmpdir, policy.brain.brain_name)
)
settings = SerializationSettings(policy.model_path, os.path.join(tmpdir, "test"))
export_policy_model(settings, policy.graph, policy.sess)
# These checks taken from test_barracuda_converter

37
ml-agents/mlagents/trainers/tests/test_bcmodule.py


)
def create_bc_module(mock_brain, bc_settings, use_rnn, tanhresample):
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
# model_path = env.external_brain_names[0]
trainer_config = TrainerSettings()
trainer_config.network_settings.memory = (

0, mock_brain, trainer_config, False, "test", False, tanhresample, tanhresample
0,
mock_behavior_specs,
trainer_config,
False,
"test",
False,
tanhresample,
tanhresample,
)
with policy.graph.as_default():
bc_module = BCModule(

# Test default values
def test_bcmodule_defaults():
# See if default values match
mock_brain = mb.create_mock_3dball_brain()
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_module = create_bc_module(mock_brain, bc_settings, False, False)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 3
assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
# Assign strange values and see if it overrides properly

batch_size=10000,
)
bc_module = create_bc_module(mock_brain, bc_settings, False, False)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 100
assert bc_module.batch_size == 10000

def test_bcmodule_update(is_sac):
mock_brain = mb.create_mock_3dball_brain()
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_module = create_bc_module(mock_brain, bc_settings, False, is_sac)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)

@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_constant_lr_update(is_sac):
mock_brain = mb.create_mock_3dball_brain()
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_module = create_bc_module(mock_brain, bc_settings, False, is_sac)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)

# Test with RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_update(is_sac):
mock_brain = mb.create_mock_3dball_brain()
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_module = create_bc_module(mock_brain, bc_settings, True, is_sac)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)

@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_dc_visual_update(is_sac):
mock_brain = mb.create_mock_banana_brain()
mock_specs = mb.create_mock_banana_behavior_specs()
bc_module = create_bc_module(mock_brain, bc_settings, False, is_sac)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)

@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_dc_update(is_sac):
mock_brain = mb.create_mock_banana_brain()
mock_specs = mb.create_mock_banana_behavior_specs()
bc_module = create_bc_module(mock_brain, bc_settings, True, is_sac)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)

70
ml-agents/mlagents/trainers/tests/test_demo_loader.py


from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.tests.mock_brain import (
create_mock_3dball_behavior_specs,
setup_test_behavior_specs,
)
from mlagents.trainers.demo_loader import (
load_demonstration,
demo_to_buffer,

BRAIN_PARAMS = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=1,
)
BEHAVIOR_SPEC = create_mock_3dball_behavior_specs()
def test_load_demo():

assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BRAIN_PARAMS)
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)
assert len(demo_buffer["actions"]) == total_expected - 1

assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BRAIN_PARAMS)
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)
# observation mismatch
# observation size mismatch
brain_params_obs = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=9,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=1,
mismatch_obs = setup_test_behavior_specs(
False, False, vector_action_space=2, vector_obs_space=9
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_obs)
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, mismatch_obs)
brain_params_act = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[],
vector_action_space_size=[3],
vector_action_descriptions=[],
vector_action_space_type=1,
mismatch_act = setup_test_behavior_specs(
False, False, vector_action_space=3, vector_obs_space=9
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_act)
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, mismatch_act)
brain_params_type = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
mismatch_act_type = setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=9
path_prefix + "/test.demo", 1, brain_params_type
path_prefix + "/test.demo", 1, mismatch_act_type
# vis obs mismatch
# number obs mismatch
brain_params_vis = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[[30, 40]],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=1,
mismatch_obs_number = setup_test_behavior_specs(
False, True, vector_action_space=2, vector_obs_space=9
)
_, demo_buffer = demo_to_buffer(
path_prefix + "/test.demo", 1, mismatch_obs_number
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_vis)
def test_edge_cases():

100
ml-agents/mlagents/trainers/tests/test_ghost.py


from mlagents.trainers.ghost.controller import GhostController
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory

return TrainerSettings(self_play=SelfPlaySettings())
VECTOR_ACTION_SPACE = [1]
VECTOR_ACTION_SPACE = 1
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 513

@pytest.mark.parametrize("use_discrete", [True, False])
def test_load_and_set(dummy_config, use_discrete):
mock_brain = mb.setup_mock_brain(
mock_specs = mb.setup_test_behavior_specs(
vector_action_space=VECTOR_ACTION_SPACE,
vector_action_space=DISCRETE_ACTION_SPACE
if use_discrete
else VECTOR_ACTION_SPACE,
discrete_action_space=DISCRETE_ACTION_SPACE,
trainer = PPOTrainer(mock_brain.brain_name, 0, trainer_params, True, False, 0, "0")
trainer = PPOTrainer("test", 0, trainer_params, True, False, 0, "0")
policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
policy = trainer.create_policy("test", mock_specs)
to_load_policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
to_load_policy = trainer.create_policy("test", mock_specs)
to_load_policy.create_tf_graph()
to_load_policy.init_load_weights()

def test_process_trajectory(dummy_config):
brain_params_team0 = BrainParameters(
brain_name="test_brain?team=0",
vector_observation_space_size=1,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
brain_name = BehaviorIdentifiers.from_name_behavior_id(
brain_params_team0.brain_name
).brain_name
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
brain_params_team1 = BrainParameters(
brain_name="test_brain?team=1",
vector_observation_space_size=1,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
)
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(

# first policy encountered becomes policy trained by wrapped PPO
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(
brain_params_team0.brain_name