比较提交

...
此合并请求有变更与目标分支冲突。
/docs/Training-ML-Agents.md
/docs/Learning-Environment-Examples.md
/ml-agents/mlagents/trainers/cli_utils.py
/ml-agents/mlagents/trainers/settings.py
/ml-agents/mlagents/trainers/saver/saver.py
/ml-agents/mlagents/trainers/learn.py
/ml-agents/mlagents/trainers/ghost/trainer.py
/ml-agents/mlagents/trainers/policy/policy.py
/ml-agents/mlagents/trainers/ppo/trainer.py
/ml-agents/mlagents/trainers/sac/trainer.py
/ml-agents/mlagents/trainers/trainer/trainer.py
/ml-agents/mlagents/trainers/trainer/rl_trainer.py
/ml-agents/mlagents/trainers/tests/torch/test_utils.py
/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
/ml-agents/mlagents/trainers/buffer.py
/ml-agents/mlagents/trainers/torch/utils.py
/ml-agents/mlagents/trainers/torch/networks.py
/ml-agents/mlagents/trainers/saver/saver.py
/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
/ml-agents/mlagents/trainers/sac/optimizer_torch.py
/ml-agents/mlagents/trainers/policy/torch_policy.py
/ml-agents/mlagents/trainers/tests/torch/test_reward_providers
/ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
/ml-agents/mlagents/trainers/torch/components
/ml-agents/mlagents/trainers/saver/torch_saver.py
/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
/ml-agents/mlagents/trainers/tests/test_ppo.py
/ml-agents/mlagents/trainers/tests/test_reward_signals.py
/ml-agents/mlagents/trainers/tests/test_sac.py
/ml-agents/mlagents/trainers/tests/test_ghost.py
/ml-agents/mlagents/trainers/policy/tf_policy.py
/com.unity.ml-agents.extensions/Tests/Editor/Sensors
/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
/utils/validate_release_links.py
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/com.unity.ml-agents.extensions/Runtime/Sensors
/ml-agents/mlagents/trainers/saver
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/tests/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/torch
/ml-agents/mlagents/trainers/saver
/ml-agents/mlagents/trainers/tf/distributions.py
/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py
/ml-agents/mlagents/trainers/tf/models.py
/com.unity.ml-agents.extensions/Tests/Editor/Sensors
/com.unity.ml-agents.extensions/Tests/Editor/Sensors
/com.unity.ml-agents.extensions/Tests/Editor/Sensors
/com.unity.ml-agents.extensions/Tests/Editor/Sensors

9 次代码提交

作者 SHA1 备注 提交日期
Andrew Cohen effdec13 return copy of state_dict 4 年前
Andrew Cohen fcec6734 added comments 4 年前
Andrew Cohen 0053713a fix sac precommit 4 年前
Andrew Cohen 20083987 move tf policy comment 4 年前
Andrew Cohen 5f7a7e44 revert tennis config 4 年前
Andrew Cohen 39bca7d2 fix tf ghost tests 4 年前
Andrew Cohen 71f9c241 fix tf policy for ghosts 4 年前
Andrew Cohen a65d08c7 ghost trainer tests 4 年前
Andrew Cohen b822283f merge add fire 4 年前
共有 134 个文件被更改,包括 7563 次插入421 次删除
  1. 0
      config/sac/3DBall.yaml
  2. 0
      config/sac/3DBallHard.yaml
  3. 0
      config/ppo/Tennis.yaml
  4. 79
      docs/Training-ML-Agents.md
  5. 2
      docs/Learning-Environment-Examples.md
  6. 4
      ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2_grpc.py
  7. 2
      ml-agents/mlagents/trainers/buffer.py
  8. 12
      ml-agents/mlagents/trainers/ghost/trainer.py
  9. 7
      ml-agents/mlagents/trainers/cli_utils.py
  10. 35
      ml-agents/mlagents/trainers/learn.py
  11. 21
      ml-agents/mlagents/trainers/settings.py
  12. 105
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  13. 5
      ml-agents/mlagents/trainers/trainer/trainer.py
  14. 2
      ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  15. 82
      ml-agents/mlagents/trainers/ppo/trainer.py
  16. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  17. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  18. 5
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  19. 3
      ml-agents/mlagents/trainers/tests/test_sac.py
  20. 7
      ml-agents/mlagents/trainers/tests/test_ghost.py
  21. 113
      ml-agents/mlagents/trainers/sac/trainer.py
  22. 5
      ml-agents/mlagents/trainers/policy/policy.py
  23. 2
      ml-agents/mlagents/trainers/policy/tf_policy.py
  24. 2
      com.unity.ml-agents/Runtime/SensorHelper.cs.meta
  25. 2
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/OrientationCubeController.cs.meta
  26. 6
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  27. 4
      ml-agents/mlagents/trainers/tf/models.py
  28. 10
      ml-agents/mlagents/trainers/saver/saver.py
  29. 43
      ml-agents/mlagents/trainers/torch/networks.py
  30. 4
      ml-agents/mlagents/trainers/torch/utils.py
  31. 2
      Project/Assets/csc.rsp
  32. 132
      utils/validate_release_links.py
  33. 248
      experiment_torch.py
  34. 94
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  35. 203
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  36. 36
      ml-agents/mlagents/trainers/tests/test_models.py
  37. 113
      ml-agents/mlagents/trainers/tests/test_saver.py
  38. 561
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  39. 262
      ml-agents/mlagents/trainers/policy/torch_policy.py
  40. 11
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs.meta
  41. 11
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs.meta
  42. 152
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs
  43. 107
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
  44. 47
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs
  45. 122
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
  46. 473
      com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
  47. 209
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
  48. 116
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
  49. 140
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/ArticulationBodySensorTests.cs
  50. 136
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodySensorTests.cs
  51. 249
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
  52. 186
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
  53. 0
      ml-agents/mlagents/trainers/tf/__init__.py
  54. 221
      ml-agents/mlagents/trainers/tf/model_serialization.py
  55. 111
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  56. 56
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  57. 138
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  58. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  59. 1001
      ml-agents/mlagents/trainers/tests/torch/test.demo
  60. 144
      ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
  61. 446
      ml-agents/mlagents/trainers/tests/torch/testdcvis.demo
  62. 177
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  63. 0
      ml-agents/mlagents/trainers/torch/__init__.py
  64. 0
      ml-agents/mlagents/trainers/torch/components/__init__.py
  65. 15
      ml-agents/mlagents/trainers/torch/components/reward_providers/__init__.py
  66. 72
      ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py
  67. 15
      ml-agents/mlagents/trainers/torch/components/reward_providers/extrinsic_reward_provider.py
  68. 43
      ml-agents/mlagents/trainers/torch/components/reward_providers/reward_provider_factory.py
  69. 225
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  70. 256
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  71. 0
      ml-agents/mlagents/trainers/torch/components/bc/__init__.py
  72. 185
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  73. 77
      ml-agents/mlagents/trainers/torch/model_serialization.py
  74. 0
      ml-agents/mlagents/trainers/saver/__init__.py
  75. 171
      ml-agents/mlagents/trainers/saver/tf_saver.py
  76. 118
      ml-agents/mlagents/trainers/saver/torch_saver.py
  77. 11
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/OrientationCubeController.cs.meta
  78. 2
      Project/Assets/csc.rsp
  79. 11
      com.unity.ml-agents/Runtime/SensorHelper.cs.meta
  80. 132
      utils/validate_release_links.py
  81. 36
      ml-agents/mlagents/trainers/tests/test_models.py
  82. 113
      ml-agents/mlagents/trainers/tests/test_saver.py
  83. 0
      /ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  84. 0
      /com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs.meta

0
config/sac/3DBall.yaml

0
config/sac/3DBallHard.yaml

0
config/ppo/Tennis.yaml

79
docs/Training-ML-Agents.md


ended.
#### Curriculum
To enable curriculum learning, you need to add a `curriculum` sub-section to your environment
parameter. Here is one example with the environment parameter `my_environment_parameter` :
```yml
behaviors:
BehaviorY:
# < Same as above >
# Add this section
environment_parameters:
my_environment_parameter:
curriculum:
- name: MyFirstLesson # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: my_behavior
signal_smoothing: true
min_lesson_length: 100
threshold: 0.2
value: 0.0
- name: MySecondLesson # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: my_behavior
signal_smoothing: true
min_lesson_length: 100
threshold: 0.6
require_reset: true
value:
sampler_type: uniform
sampler_parameters:
min_value: 4.0
max_value: 7.0
- name: MyLastLesson
value: 8.0
```
Note that this curriculum __only__ applies to `my_environment_parameter`. The `curriculum` section
contains a list of `Lessons`. In the example, the lessons are named `MyFirstLesson`, `MySecondLesson`
and `MyLastLesson`.
Each `Lesson` has 3 fields :
- `name` which is a user defined name for the lesson (The name of the lesson will be displayed in
the console when the lesson changes)
- `completion_criteria` which determines what needs to happen in the simulation before the lesson
can be considered complete. When that condition is met, the curriculum moves on to the next
`Lesson`. Note that you do not need to specify a `completion_criteria` for the last `Lesson`
- `value` which is the value the environment parameter will take during the lesson. Note that this
can be a float or a sampler.
There are the different settings of the `completion_criteria` :
| **Setting** | **Description** |
| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `measure` | What to measure learning progress, and advancement in lessons by.<br><br> `reward` uses a measure received reward, while `progress` uses the ratio of steps/max_steps. |
| `behavior` | Specifies which behavior is being tracked. There can be multiple behaviors with different names, each at different points of training. This setting allows the curriculum to track only one of them. |
| `threshold` | Determines at what point in value of `measure` the lesson should be increased. |
| `min_lesson_length` | The minimum number of episodes that should be completed before the lesson can change. If `measure` is set to `reward`, the average cumulative reward of the last `min_lesson_length` episodes will be used to determine if the lesson should change. Must be nonnegative. <br><br> **Important**: the average reward that is compared to the thresholds is different than the mean reward that is logged to the console. For example, if `min_lesson_length` is `100`, the lesson will increment after the average cumulative reward of the last `100` episodes exceeds the current threshold. The mean reward logged to the console is dictated by the `summary_freq` parameter defined above. |
| `signal_smoothing` | Whether to weight the current progress measure by previous values. |
| `require_reset` | Whether changing lesson requires the environment to reset (default: false) |
##### Training with a Curriculum
Once we have specified our metacurriculum and curricula, we can launch
`mlagents-learn` to point to the config file containing
our curricula and PPO will train using Curriculum Learning. For example, to
train agents in the Wall Jump environment with curriculum learning, we can run:
```sh
mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum
```
We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated
the run, you can resume it using `--resume` and lesson progress will start off where it
ended.
### Training Using Concurrent Unity Instances
In order to run concurrent Unity instances during training, set the number of

2
docs/Learning-Environment-Examples.md


head, thighs, shins, feet, arms, forearms and hands.
- Goal: The agents must move its body toward the goal direction without falling.
- `WalkerDynamic`- Goal direction is randomized.
- `WalkerDynamicVariableSpeed`- Goal direction and walking speed are randomized.
- `WalkerDynamicVariableSpeed`- Goal direction and walking speed are randomized.
- `WalkerStatic` - Goal direction is always forward.
- `WalkerStaticVariableSpeed` - Goal direction is always forward. Walking
speed is randomized

4
ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2_grpc.py


from mlagents_envs.communicator_objects import unity_message_pb2 as mlagents__envs_dot_communicator__objects_dot_unity__message__pb2
class UnityToExternalProtoStub(object):
class UnityToExternalProtoStub:
# missing associated documentation comment in .proto file
pass

)
class UnityToExternalProtoServicer(object):
class UnityToExternalProtoServicer:
# missing associated documentation comment in .proto file
pass

2
ml-agents/mlagents/trainers/buffer.py


Adds a list of np.arrays to the end of the list of np.arrays.
:param data: The np.array list to append.
"""
self += list(np.array(data))
self += list(np.array(data, dtype=np.float32))
def set(self, data):
"""

12
ml-agents/mlagents/trainers/ghost/trainer.py


self.trainer.save_model()
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
"""
Creates policy with the wrapped trainer's create_policy function

wrapped trainer to be trained.
"""
policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
policy.create_tf_graph()
policy = self.trainer.create_policy(
parsed_behavior_id, behavior_spec, create_graph=True
)
policy.init_load_weights()
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)

parsed_behavior_id, behavior_spec
)
self.trainer.add_policy(parsed_behavior_id, internal_trainer_policy)
internal_trainer_policy.init_load_weights()
self.current_policy_snapshot[
parsed_behavior_id.brain_name
] = internal_trainer_policy.get_weights()

7
ml-agents/mlagents/trainers/cli_utils.py


action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)
argparser.add_argument(
"--torch",
default=False,
action=DetectDefaultStoreTrue,
help="(Experimental) Use the PyTorch framework instead of TensorFlow. Install PyTorch "
"before using this option",
)
eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(

35
ml-agents/mlagents/trainers/learn.py


)
from mlagents.trainers.cli_utils import parser
from mlagents_envs.environment import UnityEnvironment
from mlagents.trainers.settings import RunOptions
from mlagents.trainers.settings import RunOptions, TestingConfiguration
from mlagents.trainers.training_status import GlobalTrainingStatus
from mlagents_envs.base_env import BaseEnv

add_metadata as add_timer_metadata,
)
from mlagents_envs import logging_util
from mlagents_envs.registry import default_registry
logger = logging_util.get_logger(__name__)

) -> UnityEnvironment:
# Make sure that each environment gets a different seed
env_seed = seed + worker_id
return UnityEnvironment(
file_name=env_path,
worker_id=worker_id,
seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
additional_args=env_args,
side_channels=side_channels,
log_folder=log_folder,
)
if TestingConfiguration.env_name == "":
return UnityEnvironment(
file_name=env_path,
worker_id=worker_id,
seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
additional_args=env_args,
side_channels=side_channels,
log_folder=log_folder,
)
else:
return default_registry[TestingConfiguration.env_name].make(
seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
worker_id=worker_id,
additional_args=env_args,
side_channels=side_channels,
log_folder=log_folder,
)
return create_unity_environment

21
ml-agents/mlagents/trainers/settings.py


return {key: cattr.unstructure(val) for key, val in d.items()}
class TestingConfiguration:
use_torch = True
max_steps = 0
env_name = ""
device = "cpu"
class SerializationSettings:
convert_to_barracuda = True
convert_to_onnx = True

return _mapping[self]
class FrameworkType(Enum):
TENSORFLOW: str = "tensorflow"
PYTORCH: str = "pytorch"
@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
trainer_type: TrainerType = TrainerType.PPO

threaded: bool = True
self_play: Optional[SelfPlaySettings] = None
behavioral_cloning: Optional[BehavioralCloningSettings] = None
framework: FrameworkType = FrameworkType.TENSORFLOW
cattr.register_structure_hook(
Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure

configured_dict["engine_settings"][key] = val
else: # Base options
configured_dict[key] = val
return RunOptions.from_dict(configured_dict)
# Apply --torch retroactively
final_runoptions = RunOptions.from_dict(configured_dict)
if "torch" in DetectDefault.non_default_args:
for trainer_set in final_runoptions.behaviors.values():
trainer_set.framework = FrameworkType.PYTORCH
return final_runoptions
@staticmethod
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":

105
ml-agents/mlagents/trainers/trainer/rl_trainer.py


from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.components.reward_signals import RewardSignalResult
from mlagents.trainers.components.reward_signals import RewardSignalResult, RewardSignal
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.settings import (
TestingConfiguration,
TrainerSettings,
FrameworkType,
)
from mlagents.trainers.saver.torch_saver import TorchSaver
from mlagents.trainers.exception import UnityTrainerException
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
RewardSignalResults = Dict[str, RewardSignalResult]

self._stats_reporter.add_property(
StatsPropertyType.HYPERPARAMETERS, self.trainer_settings.as_dict()
)
self.framework = self.trainer_settings.framework
logger.debug(f"Using framework {self.framework.value}")
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self.trainer_settings, self.artifact_path, self.load
self.framework, self.trainer_settings, self.artifact_path, self.load
)
def end_episode(self) -> None:

for agent_id in rewards:
rewards[agent_id] = 0
@staticmethod
def create_saver(
trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
saver = TFSaver(trainer_settings, model_path, load)
return saver
def _update_end_episode_stats(self, agent_id: str, optimizer: Optimizer) -> None:
for name, rewards in self.collected_rewards.items():
if name == "environment":

self.reward_buffer.appendleft(rewards.get(agent_id, 0))
rewards[agent_id] = 0
else:
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name, rewards.get(agent_id, 0)
)
if isinstance(optimizer.reward_signals[name], RewardSignal):
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name,
rewards.get(agent_id, 0),
)
else:
self.stats_reporter.add_stat(
f"Policy/{optimizer.reward_signals[name].name.capitalize()} Reward",
rewards.get(agent_id, 0),
)
rewards[agent_id] = 0
def _clear_update_buffer(self) -> None:

"""
return False
def create_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
if self.framework == FrameworkType.PYTORCH and TorchPolicy is None:
raise UnityTrainerException(
"To use the experimental PyTorch backend, install the PyTorch Python package first."
)
elif self.framework == FrameworkType.PYTORCH:
return self.create_torch_policy(parsed_behavior_id, behavior_spec)
else:
return self.create_tf_policy(
parsed_behavior_id, behavior_spec, create_graph=create_graph
)
@abc.abstractmethod
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Create a Policy object that uses the PyTorch backend.
"""
pass
@abc.abstractmethod
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> TFPolicy:
"""
Create a Policy object that uses the TensorFlow backend.
"""
pass
@staticmethod
def create_saver(
framework: str, trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
if framework == FrameworkType.PYTORCH:
saver = TorchSaver( # type: ignore
trainer_settings, model_path, load
)
else:
saver = TFSaver( # type: ignore
trainer_settings, model_path, load
)
return saver
def _policy_mean_reward(self) -> Optional[float]:
""" Returns the mean episode reward for the current policy. """
rewards = self.cumulative_returns_since_policy_update

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
elif n_policies == 0:
logger.warning("Trainer has no policies, not saving anything.")
return
# Copy the checkpointed model files to the final output location
final_checkpoint = attr.evolve(
model_checkpoint, file_path=f"{self.saver.model_path}.nn"
)

5
ml-agents/mlagents/trainers/trainer/trainer.py


@abc.abstractmethod
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
"""
Creates policy

2
ml-agents/mlagents/trainers/ppo/optimizer_tf.py


name="old_probabilities",
)
# Break old log probs into separate branches
# Break old log log_probs into separate branches
old_log_prob_branches = ModelUtils.break_into_branches(
self.all_old_log_probs, self.policy.act_size
)

82
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.settings import (
TrainerSettings,
PPOSettings,
TestingConfiguration,
FrameworkType,
)
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchPPOOptimizer = None # type: ignore
logger = get_logger(__name__)

PPOSettings, self.trainer_settings.hyperparameters
)
self.seed = seed
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self.policy: Policy = None # type: ignore
def _process_trajectory(self, trajectory: Trajectory) -> None:

trajectory.next_obs,
trajectory.done_reached and not trajectory.interrupted,
)
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)
# Evaluate all reward functions
self.collected_rewards["environment"][agent_id] += np.sum(

evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

local_value_estimates = agent_buffer_trajectory[
f"{name}_value_estimates"
].get_batch()
local_advantage = get_gae(
rewards=local_rewards,
value_estimates=local_value_estimates,

self._clear_update_buffer()
return True
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
:param create_graph: whether to create the graph when policy is constructed
:return policy
"""
policy = TFPolicy(

condition_sigma_on_obs=False, # Faster training for PPO
create_tf_graph=False, # We will create the TF graph in the Optimizer
create_tf_graph=create_graph,
return policy
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Creates a PPO policy to trainers list of policies.
:param parsed_behavior_id:
:param brain_parameters: specifications for policy construction
:return policy
"""
policy = TorchPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=False, # Faster training for PPO
separate_critic=behavior_spec.is_action_continuous(),
)
return PPOOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
if self.framework == FrameworkType.PYTORCH:
return TorchPPOOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return PPOOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
self.optimizer = self.create_ppo_optimizer()
for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)

2
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
from mlagents.trainers.settings import (
GAILSettings,

5
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


mock_saver.save_checkpoint.side_effect = checkpoint_path
self.saver = mock_saver
def create_policy(self):
def create_tf_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def create_torch_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def _process_trajectory(self, trajectory):

3
ml-agents/mlagents/trainers/tests/test_sac.py


0, mock_brain, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = SACOptimizer(policy, trainer_settings)
policy.initialize()
optimizer.policy.initialize()
return optimizer

trainer.add_policy(behavior_id, policy)
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update = mock.Mock()
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update_reward_signals = mock.Mock()
trainer.optimizer.update_reward_signals.return_value = {}
trainer.optimizer.update.return_value = {}

7
ml-agents/mlagents/trainers/tests/test_ghost.py


trainer_params = dummy_config
trainer = PPOTrainer("test", 0, trainer_params, True, False, 0, "0")
trainer.seed = 1
policy = trainer.create_policy("test", mock_specs)
policy.create_tf_graph()
policy = trainer.create_policy("test", mock_specs, create_graph=True)
to_load_policy = trainer.create_policy("test", mock_specs)
to_load_policy.create_tf_graph()
to_load_policy.init_load_weights()
to_load_policy = trainer.create_policy("test", mock_specs, create_graph=True)
weights = policy.get_weights()
load_weights = to_load_policy.get_weights()

113
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchSACOptimizer = None # type: ignore
logger = get_logger(__name__)

agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

)
for name, v in value_estimates.items():
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
np.mean(v),
)
# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.

self._update_reward_signals()
return policy_was_updated
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TFPolicy:
policy = TFPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
tanh_squash=True,
reparameterize=True,
create_tf_graph=False,
)
def maybe_load_replay_buffer(self):
# Load the replay buffer if load
if self.load and self.checkpoint_replay_buffer:
try:

)
)
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> TFPolicy:
policy = TFPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
tanh_squash=True,
reparameterize=True,
create_tf_graph=create_graph,
)
self.maybe_load_replay_buffer()
return policy
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Creates a PPO policy to trainers list of policies.
:param parsed_behavior_id:
:param behavior_spec: specifications for policy construction
:return policy
"""
policy = TorchPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=True,
tanh_squash=True,
separate_critic=True,
)
self.maybe_load_replay_buffer()
return policy
def _update_sac_policy(self) -> bool:

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
if isinstance(signal, RewardSignal):
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
else:
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug(f"Updating {name} at step {self.step}")
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
if isinstance(signal, RewardSignal):
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
update_stats = self.optimizer.update_reward_signals(
reward_signal_minibatches, n_sequences
)

self._stats_reporter.add_stat(stat, np.mean(stat_list))
def create_sac_optimizer(self) -> SACOptimizer:
return SACOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
if self.framework == FrameworkType.PYTORCH:
return TorchSACOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return SACOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy

5
ml-agents/mlagents/trainers/policy/policy.py


self.vis_obs_size = sum(
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
)
self.vis_obs_shape = (
[shape for shape in behavior_spec.observation_shapes if len(shape) == 3][0]
if self.vis_obs_size > 0
else None
)
self.use_continuous_act = behavior_spec.is_action_continuous()
self.num_branches = self.behavior_spec.action_size
self.previous_action_dict: Dict[str, np.array] = {}

2
ml-agents/mlagents/trainers/policy/tf_policy.py


# We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
# it will re-load the full graph
self.initialize()
# Create assignment ops for Ghost Trainer
self.init_load_weights()
def _create_encoder(
self,

2
com.unity.ml-agents/Runtime/SensorHelper.cs.meta


fileFormatVersion: 2
guid: 11fe037a02b4a483cb9342c3454232cd
guid: 7c1189c0af42c46f7b533350d49ad3e7
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/OrientationCubeController.cs.meta


fileFormatVersion: 2
guid: fcb7a51f0d5f8404db7b85bd35ecc1fb
guid: 771e78c5e980e440e8cd19716b55075f
MonoImporter:
externalObjects: {}
serializedVersion: 2

6
ml-agents/mlagents/trainers/tests/torch/test_utils.py


masks = torch.tensor([False, False, False, False, False])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 0.0
# Make sure it works with 2d arrays of shape (mask_length, N)
test_input = torch.tensor([1, 2, 3, 4, 5]).repeat(2, 1).T
masks = torch.tensor([False, False, True, True, True])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 4.0

4
ml-agents/mlagents/trainers/tf/models.py


:param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
:param action_size: A list containing the number of possible actions for each branch
:return: The action output dimension [batch_size, num_branches], the concatenated
normalized probs (after softmax)
and the concatenated normalized log probs
normalized log_probs (after softmax)
and the concatenated normalized log log_probs
"""
branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
raw_probs = [

10
ml-agents/mlagents/trainers/saver/saver.py


pass
@abc.abstractmethod
def save_checkpoint(self, brain_name: str, step: int) -> str:
def save_checkpoint(self, behavior_name: str, step: int) -> str:
:param brain_name: Brain name of brain to be trained
:param behavior_name: Behavior name of behavior to be trained
def export(self, output_filepath: str, brain_name: str) -> None:
def export(self, output_filepath: str, behavior_name: str) -> None:
Saves the serialized model, given a path and brain name.
Saves the serialized model, given a path and behavior name.
:param brain_name: Brain name of brain to be trained.
:param behavior_name: Behavior name of behavior to be trained.
"""
pass

43
ml-agents/mlagents/trainers/torch/networks.py


memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
vec_encodes = []
encodes = []
for idx, encoder in enumerate(self.vector_encoders):
vec_input = vec_inputs[idx]
if actions is not None:

vec_encodes.append(hidden)
encodes.append(hidden)
vis_encodes = []
vis_encodes.append(hidden)
encodes.append(hidden)
if len(vec_encodes) > 0 and len(vis_encodes) > 0:
vec_encodes_tensor = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
vis_encodes_tensor = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
encoding = torch.stack(
[vec_encodes_tensor, vis_encodes_tensor], dim=-1
).sum(dim=-1)
elif len(vec_encodes) > 0:
encoding = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
elif len(vis_encodes) > 0:
encoding = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
else:
if len(encodes) == 0:
# Constants don't work in Barracuda
encoding = encodes[0]
if len(encodes) > 1:
for _enc in encodes[1:]:
encoding += _enc
if self.use_lstm:
# Resize to (batch, sequence length, encoding size)

)
action_list = self.sample_action(dists)
sampled_actions = torch.stack(action_list, dim=-1)
if self.act_type == ActionType.CONTINUOUS:
log_probs = dists[0].log_prob(sampled_actions)
else:
log_probs = dists[0].all_log_prob()
dists[0].pdf(sampled_actions),
log_probs,
self.version_number,
self.memory_size,
self.is_continuous_int,

class GlobalSteps(nn.Module):
def __init__(self):
super().__init__()
self.global_step = torch.Tensor([0])
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
@property
def current_step(self):
return int(self.__global_step.item())
@current_step.setter
def current_step(self, value):
self.__global_step[:] = value
self.global_step += value
self.__global_step += value
class LearningRate(nn.Module):

4
ml-agents/mlagents/trainers/torch/utils.py


:param tensor: Tensor which needs mean computation.
:param masks: Boolean tensor of masks with same dimension as tensor.
"""
return (tensor * masks).sum() / torch.clamp(masks.float().sum(), min=1.0)
return (tensor.T * masks).sum() / torch.clamp(
(torch.ones_like(tensor.T) * masks).float().sum(), min=1.0
)

2
Project/Assets/csc.rsp


-warnaserror+
-warnaserror-:618

132
utils/validate_release_links.py


#!/usr/bin/env python3
import ast
import sys
import os
import re
import subprocess
from typing import List, Optional, Pattern
RELEASE_PATTERN = re.compile(r"release_[0-9]+(_docs)*")
TRAINER_INIT_FILE = "ml-agents/mlagents/trainers/__init__.py"
# Filename -> regex list to allow specific lines.
# To allow everything in the file, use None for the value
ALLOW_LIST = {
# Previous release table
"README.md": re.compile(r"\*\*Release [0-9]+\*\*"),
"docs/Versioning.md": None,
"com.unity.ml-agents/CHANGELOG.md": None,
"utils/make_readme_table.py": None,
"utils/validate_release_links.py": None,
}
def test_pattern():
# Just some sanity check that the regex works as expected.
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/Food.md"
)
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_4/Foo.md"
)
assert RELEASE_PATTERN.search(
"git clone --branch release_4 https://github.com/Unity-Technologies/ml-agents.git"
)
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_123_docs/Foo.md"
)
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_123/Foo.md"
)
assert not RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/latest_release/docs/Foo.md"
)
print("tests OK!")
def git_ls_files() -> List[str]:
"""
Run "git ls-files" and return a list with one entry per line.
This returns the list of all files tracked by git.
"""
return subprocess.check_output(["git", "ls-files"], universal_newlines=True).split(
"\n"
)
def get_release_tag() -> Optional[str]:
"""
Returns the release tag for the mlagents python package.
This will be None on the master branch.
:return:
"""
with open(TRAINER_INIT_FILE) as f:
for line in f:
if "__release_tag__" in line:
lhs, equals_string, rhs = line.strip().partition(" = ")
# Evaluate the right hand side of the expression
return ast.literal_eval(rhs)
# If we couldn't find the release tag, raise an exception
# (since we can't return None here)
raise RuntimeError("Can't determine release tag")
def check_file(filename: str, global_allow_pattern: Pattern) -> List[str]:
"""
Validate a single file and return any offending lines.
"""
bad_lines = []
with open(filename) as f:
for line in f:
if not RELEASE_PATTERN.search(line):
continue
if global_allow_pattern.search(line):
continue
if filename in ALLOW_LIST:
if ALLOW_LIST[filename] is None or ALLOW_LIST[filename].search(line):
continue
bad_lines.append(f"{filename}: {line.strip()}")
return bad_lines
def check_all_files(allow_pattern: Pattern) -> List[str]:
"""
Validate all files tracked by git.
:param allow_pattern:
"""
bad_lines = []
file_types = {".py", ".md", ".cs"}
for file_name in git_ls_files():
if "localized" in file_name or os.path.splitext(file_name)[1] not in file_types:
continue
bad_lines += check_file(file_name, allow_pattern)
return bad_lines
def main():
release_tag = get_release_tag()
if not release_tag:
print("Release tag is None, exiting")
sys.exit(0)
print(f"Release tag: {release_tag}")
allow_pattern = re.compile(f"{release_tag}(_docs)*")
bad_lines = check_all_files(allow_pattern)
if bad_lines:
print(
f"Found lines referring to previous release. Either update the files, or add an exclusion to {__file__}"
)
for line in bad_lines:
print(line)
sys.exit(1 if bad_lines else 0)
if __name__ == "__main__":
if "--test" in sys.argv:
test_pattern()
main()

248
experiment_torch.py


import json
import os
import torch
from mlagents.tf_utils import tf
import argparse
from mlagents.trainers.learn import run_cli, parse_command_line
from mlagents.trainers.settings import TestingConfiguration
from mlagents.trainers.stats import StatsReporter
from mlagents_envs.timers import _thread_timer_stacks
def run_experiment(
name: str,
steps: int,
use_torch: bool,
algo: str,
num_torch_threads: int,
use_gpu: bool,
num_envs: int = 1,
config_name=None,
):
TestingConfiguration.env_name = name
TestingConfiguration.max_steps = steps
TestingConfiguration.use_torch = use_torch
TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
if use_gpu:
tf.device("/GPU:0")
else:
tf.device("/device:CPU:0")
if not torch.cuda.is_available() and use_gpu:
return (
name,
str(steps),
str(use_torch),
algo,
str(num_torch_threads),
str(num_envs),
str(use_gpu),
"na",
"na",
"na",
"na",
"na",
"na",
"na",
)
if config_name is None:
config_name = name
run_options = parse_command_line(
[f"config/{algo}/{config_name}.yaml", "--num-envs", f"{num_envs}"]
)
run_options.checkpoint_settings.run_id = (
f"{name}_test_" + str(steps) + "_" + ("torch" if use_torch else "tf")
)
run_options.checkpoint_settings.force = True
# run_options.env_settings.num_envs = num_envs
for trainer_settings in run_options.behaviors.values():
trainer_settings.threaded = False
timers_path = os.path.join(
"results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json"
)
if use_torch:
torch.set_num_threads(num_torch_threads)
run_cli(run_options)
StatsReporter.writers.clear()
StatsReporter.stats_dict.clear()
_thread_timer_stacks.clear()
with open(timers_path) as timers_json_file:
timers_json = json.load(timers_json_file)
total = timers_json["total"]
tc_advance = timers_json["children"]["TrainerController.start_learning"][
"children"
]["TrainerController.advance"]
evaluate = timers_json["children"]["TrainerController.start_learning"][
"children"
]["TrainerController.advance"]["children"]["env_step"]["children"][
"SubprocessEnvManager._take_step"
][
"children"
]
update = timers_json["children"]["TrainerController.start_learning"][
"children"
]["TrainerController.advance"]["children"]["trainer_advance"]["children"][
"_update_policy"
][
"children"
]
tc_advance_total = tc_advance["total"]
tc_advance_count = tc_advance["count"]
if use_torch:
if algo == "ppo":
update_total = update["TorchPPOOptimizer.update"]["total"]
update_count = update["TorchPPOOptimizer.update"]["count"]
else:
update_total = update["SACTrainer._update_policy"]["total"]
update_count = update["SACTrainer._update_policy"]["count"]
evaluate_total = evaluate["TorchPolicy.evaluate"]["total"]
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
else:
if algo == "ppo":
update_total = update["PPOOptimizer.update"]["total"]
update_count = update["PPOOptimizer.update"]["count"]
else:
update_total = update["SACTrainer._update_policy"]["total"]
update_count = update["SACTrainer._update_policy"]["count"]
evaluate_total = evaluate["NNPolicy.evaluate"]["total"]
evaluate_count = evaluate["NNPolicy.evaluate"]["count"]
# todo: do total / count
return (
name,
str(steps),
str(use_torch),
algo,
str(num_torch_threads),
str(num_envs),
str(use_gpu),
str(total),
str(tc_advance_total),
str(tc_advance_count),
str(update_total),
str(update_count),
str(evaluate_total),
str(evaluate_count),
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--steps", default=25000, type=int, help="The number of steps")
parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")
parser.add_argument(
"--gpu", default=False, action="store_true", help="If true, will use the GPU"
)
parser.add_argument(
"--threads",
default=False,
action="store_true",
help="If true, will try both 1 and 8 threads for torch",
)
parser.add_argument(
"--ball",
default=False,
action="store_true",
help="If true, will only do 3dball",
)
parser.add_argument(
"--sac",
default=False,
action="store_true",
help="If true, will run sac instead of ppo",
)
args = parser.parse_args()
if args.gpu:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
else:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
algo = "ppo"
if args.sac:
algo = "sac"
envs_config_tuples = [
("3DBall", "3DBall"),
("GridWorld", "GridWorld"),
("PushBlock", "PushBlock"),
("CrawlerStaticTarget", "CrawlerStatic"),
]
if algo == "ppo":
envs_config_tuples += [
("Hallway", "Hallway"),
("VisualHallway", "VisualHallway"),
]
if args.ball:
envs_config_tuples = [("3DBall", "3DBall")]
labels = (
"name",
"steps",
"use_torch",
"algorithm",
"num_torch_threads",
"num_envs",
"use_gpu",
"total",
"tc_advance_total",
"tc_advance_count",
"update_total",
"update_count",
"evaluate_total",
"evaluate_count",
)
results = []
results.append(labels)
f = open(
f"result_data_steps_{args.steps}_algo_{algo}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt",
"w",
)
f.write(" ".join(labels) + "\n")
for env_config in envs_config_tuples:
data = run_experiment(
name=env_config[0],
steps=args.steps,
use_torch=True,
algo=algo,
num_torch_threads=1,
use_gpu=args.gpu,
num_envs=args.num_envs,
config_name=env_config[1],
)
results.append(data)
f.write(" ".join(data) + "\n")
if args.threads:
data = run_experiment(
name=env_config[0],
steps=args.steps,
use_torch=True,
algo=algo,
num_torch_threads=8,
use_gpu=args.gpu,
num_envs=args.num_envs,
config_name=env_config[1],
)
results.append(data)
f.write(" ".join(data) + "\n")
data = run_experiment(
name=env_config[0],
steps=args.steps,
use_torch=False,
algo=algo,
num_torch_threads=1,
use_gpu=args.gpu,
num_envs=args.num_envs,
config_name=env_config[1],
)
results.append(data)
f.write(" ".join(data) + "\n")
for r in results:
print(*r)
f.close()
if __name__ == "__main__":
main()

94
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


from typing import Dict, Optional, Tuple, List
import torch
import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.torch.components.reward_providers import create_reward_provider
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.torch.utils import ModelUtils
class TorchOptimizer(Optimizer): # pylint: disable=W0223
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
super().__init__()
self.policy = policy
self.trainer_settings = trainer_settings
self.update_dict: Dict[str, torch.Tensor] = {}
self.value_heads: Dict[str, torch.Tensor] = {}
self.memory_in: torch.Tensor = None
self.memory_out: torch.Tensor = None
self.m_size: int = 0
self.global_step = torch.tensor(0)
self.bc_module: Optional[BCModule] = None
self.create_reward_signals(trainer_settings.reward_signals)
if trainer_settings.behavioral_cloning is not None:
self.bc_module = BCModule(
self.policy,
trainer_settings.behavioral_cloning,
policy_learning_rate=trainer_settings.hyperparameters.learning_rate,
default_batch_size=trainer_settings.hyperparameters.batch_size,
default_num_epoch=3,
)
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
pass
def create_reward_signals(self, reward_signal_configs):
"""
Create reward signals
:param reward_signal_configs: Reward signal config.
"""
for reward_signal, settings in reward_signal_configs.items():
# Name reward signals by string in case we have duplicates later
self.reward_signals[reward_signal.value] = create_reward_provider(
reward_signal, self.policy.behavior_spec, settings
)
def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
if self.policy.use_vis_obs:
visual_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
visual_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
visual_obs.append(visual_ob)
else:
visual_obs = []
memory = torch.zeros([1, 1, self.policy.m_size])
vec_vis_obs = SplitObservations.from_observations(next_obs)
next_vec_obs = [
ModelUtils.list_to_tensor(vec_vis_obs.vector_observations).unsqueeze(0)
]
next_vis_obs = [
ModelUtils.list_to_tensor(_vis_ob).unsqueeze(0)
for _vis_ob in vec_vis_obs.visual_observations
]
value_estimates, next_memory = self.policy.actor_critic.critic_pass(
vector_obs, visual_obs, memory, sequence_length=batch.num_experiences
)
next_value_estimate, _ = self.policy.actor_critic.critic_pass(
next_vec_obs, next_vis_obs, next_memory, sequence_length=1
)
for name, estimate in value_estimates.items():
value_estimates[name] = estimate.detach().cpu().numpy()
next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
if done:
for k in next_value_estimate:
if not self.reward_signals[k].ignore_done:
next_value_estimate[k] = 0.0
return value_estimates, next_value_estimate

203
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from typing import Dict, cast
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.utils import ModelUtils
class TorchPPOOptimizer(TorchOptimizer):
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.
The PPO optimizer has a value estimator and a loss function.
:param policy: A TFPolicy object that will be updated by this PPO Optimizer.
:param trainer_params: Trainer parameters dictionary that specifies the
properties of the trainer.
"""
# Create the graph here to give more granular control of the TF graph to the Optimizer.
super().__init__(policy, trainer_settings)
params = list(self.policy.actor_critic.parameters())
self.hyperparameters: PPOSettings = cast(
PPOSettings, trainer_settings.hyperparameters
)
self.decay_learning_rate = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.decay_epsilon = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.epsilon,
0.1,
self.trainer_settings.max_steps,
)
self.decay_beta = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.beta,
1e-5,
self.trainer_settings.max_steps,
)
self.optimizer = torch.optim.Adam(
params, lr=self.trainer_settings.hyperparameters.learning_rate
)
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
}
self.stream_names = list(self.reward_signals.keys())
def ppo_value_loss(
self,
values: Dict[str, torch.Tensor],
old_values: Dict[str, torch.Tensor],
returns: Dict[str, torch.Tensor],
epsilon: float,
loss_masks: torch.Tensor,
) -> torch.Tensor:
"""
Evaluates value loss for PPO.
:param values: Value output of the current network.
:param old_values: Value stored with experiences in buffer.
:param returns: Computed returns.
:param epsilon: Clipping value for value estimate.
:param loss_mask: Mask for losses. Used with LSTM to ignore 0'ed out experiences.
"""
value_losses = []
for name, head in values.items():
old_val_tensor = old_values[name]
returns_tensor = returns[name]
clipped_value_estimate = old_val_tensor + torch.clamp(
head - old_val_tensor, -1 * epsilon, epsilon
)
v_opt_a = (returns_tensor - head) ** 2
v_opt_b = (returns_tensor - clipped_value_estimate) ** 2
value_loss = ModelUtils.masked_mean(torch.max(v_opt_a, v_opt_b), loss_masks)
value_losses.append(value_loss)
value_loss = torch.mean(torch.stack(value_losses))
return value_loss
def ppo_policy_loss(
self,
advantages: torch.Tensor,
log_probs: torch.Tensor,
old_log_probs: torch.Tensor,
loss_masks: torch.Tensor,
) -> torch.Tensor:
"""
Evaluate PPO policy loss.
:param advantages: Computed advantages.
:param log_probs: Current policy probabilities
:param old_log_probs: Past policy probabilities
:param loss_masks: Mask for losses. Used with LSTM to ignore 0'ed out experiences.
"""
advantage = advantages.unsqueeze(-1)
decay_epsilon = self.hyperparameters.epsilon
r_theta = torch.exp(log_probs - old_log_probs)
p_opt_a = r_theta * advantage
p_opt_b = (
torch.clamp(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * advantage
)
policy_loss = -1 * ModelUtils.masked_mean(
torch.min(p_opt_a, p_opt_b), loss_masks
)
return policy_loss
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
"""
Performs update on model.
:param batch: Batch of experiences.
:param num_sequences: Number of sequences to process.
:return: Results of update.
"""
# Get decayed parameters
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
decay_eps = self.decay_epsilon.get_value(self.policy.get_current_step())
decay_bet = self.decay_beta.get_value(self.policy.get_current_step())
returns = {}
old_values = {}
for name in self.reward_signals:
old_values[name] = ModelUtils.list_to_tensor(
batch[f"{name}_value_estimates"]
)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
else:
vis_obs = []
log_probs, entropy, values = self.policy.evaluate_actions(
vec_obs,
vis_obs,
masks=act_masks,
actions=actions,
memories=memories,
seq_len=self.policy.sequence_length,
)
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(
values, old_values, returns, decay_eps, loss_masks
)
policy_loss = self.ppo_policy_loss(
ModelUtils.list_to_tensor(batch["advantages"]),
log_probs,
ModelUtils.list_to_tensor(batch["action_probs"]),
loss_masks,
)
loss = (
policy_loss
+ 0.5 * value_loss
- decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
)
# Set optimizer learning rate
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,
}
for reward_provider in self.reward_signals.values():
update_stats.update(reward_provider.update(batch))
return update_stats
def get_modules(self):
return {"Optimizer": self.optimizer}

36
ml-agents/mlagents/trainers/tests/test_models.py


import pytest
from mlagents.trainers.tf.models import ModelUtils
from mlagents.tf_utils import tf
from mlagents_envs.base_env import BehaviorSpec, ActionType
def create_behavior_spec(num_visual, num_vector, vector_size):
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector),
ActionType.DISCRETE,
(1,),
)
return behavior_spec
@pytest.mark.parametrize("num_visual", [1, 2, 4])
@pytest.mark.parametrize("num_vector", [1, 2, 4])
def test_create_input_placeholders(num_vector, num_visual):
vec_size = 8
name_prefix = "test123"
bspec = create_behavior_spec(num_visual, num_vector, vec_size)
vec_in, vis_in = ModelUtils.create_input_placeholders(
bspec.observation_shapes, name_prefix=name_prefix
)
assert isinstance(vis_in, list)
assert len(vis_in) == num_visual
assert isinstance(vec_in, tf.Tensor)
assert vec_in.get_shape().as_list()[1] == num_vector * 8
# Check names contain prefix and vis shapes are correct
for _vis in vis_in:
assert _vis.get_shape().as_list() == [None, 84, 84, 3]
assert _vis.name.startswith(name_prefix)
assert vec_in.name.startswith(name_prefix)

113
ml-agents/mlagents/trainers/tests/test_saver.py


import pytest
from unittest import mock
import os
import unittest
import tempfile
import numpy as np
from mlagents.tf_utils import tf
from mlagents.trainers.saver.tf_saver import TFSaver
from mlagents.trainers import __version__
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
def test_register(tmp_path):
trainer_params = TrainerSettings()
saver = TFSaver(trainer_params, tmp_path)
opt = mock.Mock(spec=PPOOptimizer)
saver.register(opt)
assert saver.policy is None
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params)
saver.register(policy)
assert saver.policy is not None
class ModelVersionTest(unittest.TestCase):
def test_version_compare(self):
# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
trainer_params = TrainerSettings()
mock_path = tempfile.mkdtemp()
policy = create_policy_mock(trainer_params)
saver = TFSaver(trainer_params, mock_path)
saver.register(policy)
saver._check_model_version(
"0.0.0"
) # This is not the right version for sure
# Assert that 1 warning has been thrown with incorrect version
assert len(cm.output) == 1
saver._check_model_version(__version__) # This should be the right version
# Assert that no additional warnings have been thrown wth correct ver
assert len(cm.output) == 1
def test_load_save(tmp_path):
path1 = os.path.join(tmp_path, "runid1")
path2 = os.path.join(tmp_path, "runid2")
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params)
saver = TFSaver(trainer_params, path1)
saver.register(policy)
saver.initialize_or_load(policy)
policy.set_step(2000)
mock_brain_name = "MockBrain"
saver.save_checkpoint(mock_brain_name, 2000)
assert len(os.listdir(tmp_path)) > 0
# Try load from this path
saver = TFSaver(trainer_params, path1, load=True)
policy2 = create_policy_mock(trainer_params)
saver.register(policy2)
saver.initialize_or_load(policy2)
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000
# Try initialize from path 1
trainer_params.init_path = path1
saver = TFSaver(trainer_params, path2)
policy3 = create_policy_mock(trainer_params)
saver.register(policy3)
saver.initialize_or_load(policy3)
_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:
"""
Make sure two policies have the same output for the same input.
"""
decision_step, _ = mb.create_steps_from_behavior_spec(
policy1.behavior_spec, num_agents=1
)
run_out1 = policy1.evaluate(decision_step, list(decision_step.agent_id))
run_out2 = policy2.evaluate(decision_step, list(decision_step.agent_id))
np.testing.assert_array_equal(run_out2["log_probs"], run_out1["log_probs"])
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_checkpoint_conversion(tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config = TrainerSettings()
model_path = os.path.join(tmpdir, "Mock_Brain")
policy = create_policy_mock(
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
trainer_params = TrainerSettings()
saver = TFSaver(trainer_params, model_path)
saver.register(policy)
saver.save_checkpoint("Mock_Brain", 100)
assert os.path.isfile(model_path + "/Mock_Brain-100.nn")

561
ml-agents/mlagents/trainers/sac/optimizer_torch.py


import numpy as np
from typing import Dict, List, Mapping, cast, Tuple, Optional
import torch
from torch import nn
import attr
from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import ActionType
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.networks import ValueNetwork
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
EPSILON = 1e-6 # Small value to avoid divide by zero
logger = get_logger(__name__)
class TorchSACOptimizer(TorchOptimizer):
class PolicyValueNetwork(nn.Module):
def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
act_type: ActionType,
act_size: List[int],
):
super().__init__()
if act_type == ActionType.CONTINUOUS:
num_value_outs = 1
num_action_ins = sum(act_size)
else:
num_value_outs = sum(act_size)
num_action_ins = 0
self.q1_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
)
self.q2_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
)
def forward(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
q1_out, _ = self.q1_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
q2_out, _ = self.q2_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
return q1_out, q2_out
def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
super().__init__(policy, trainer_params)
hyperparameters: SACSettings = cast(SACSettings, trainer_params.hyperparameters)
self.tau = hyperparameters.tau
self.init_entcoef = hyperparameters.init_entcoef
self.policy = policy
self.act_size = policy.act_size
policy_network_settings = policy.network_settings
self.tau = hyperparameters.tau
self.burn_in_ratio = 0.0
# Non-exposed SAC parameters
self.discrete_target_entropy_scale = 0.2 # Roughly equal to e-greedy 0.05
self.continuous_target_entropy_scale = 1.0
self.stream_names = list(self.reward_signals.keys())
# Use to reduce "survivor bonus" when using Curiosity or GAIL.
self.gammas = [_val.gamma for _val in trainer_params.reward_signals.values()]
self.use_dones_in_backup = {
name: int(not self.reward_signals[name].ignore_done)
for name in self.stream_names
}
# Critics should have 1/2 of the memory of the policy
critic_memory = policy_network_settings.memory
if critic_memory is not None:
critic_memory = attr.evolve(
critic_memory, memory_size=critic_memory.memory_size // 2
)
value_network_settings = attr.evolve(
policy_network_settings, memory=critic_memory
)
self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
value_network_settings,
self.policy.behavior_spec.action_type,
self.act_size,
)
self.target_network = ValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
value_network_settings,
)
self.soft_update(self.policy.actor_critic.critic, self.target_network, 1.0)
self._log_ent_coef = torch.nn.Parameter(
torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),
requires_grad=True,
)
if self.policy.use_continuous_act:
self.target_entropy = torch.as_tensor(
-1
* self.continuous_target_entropy_scale
* np.prod(self.act_size[0]).astype(np.float32)
)
else:
self.target_entropy = [
self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
for i in self.act_size
]
policy_params = list(self.policy.actor_critic.network_body.parameters()) + list(
self.policy.actor_critic.distribution.parameters()
)
value_params = list(self.value_network.parameters()) + list(
self.policy.actor_critic.critic.parameters()
)
logger.debug("value_vars")
for param in value_params:
logger.debug(param.shape)
logger.debug("policy_vars")
for param in policy_params:
logger.debug(param.shape)
self.decay_learning_rate = ModelUtils.DecayedValue(
hyperparameters.learning_rate_schedule,
hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.policy_optimizer = torch.optim.Adam(
policy_params, lr=hyperparameters.learning_rate
)
self.value_optimizer = torch.optim.Adam(
value_params, lr=hyperparameters.learning_rate
)
self.entropy_optimizer = torch.optim.Adam(
[self._log_ent_coef], lr=hyperparameters.learning_rate
)
def sac_q_loss(
self,
q1_out: Dict[str, torch.Tensor],
q2_out: Dict[str, torch.Tensor],
target_values: Dict[str, torch.Tensor],
dones: torch.Tensor,
rewards: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
q1_losses = []
q2_losses = []
# Multiple q losses per stream
for i, name in enumerate(q1_out.keys()):
q1_stream = q1_out[name].squeeze()
q2_stream = q2_out[name].squeeze()
with torch.no_grad():
q_backup = rewards[name] + (
(1.0 - self.use_dones_in_backup[name] * dones)
* self.gammas[i]
* target_values[name]
)
_q1_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(q_backup, q1_stream), loss_masks
)
_q2_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(q_backup, q2_stream), loss_masks
)
q1_losses.append(_q1_loss)
q2_losses.append(_q2_loss)
q1_loss = torch.mean(torch.stack(q1_losses))
q2_loss = torch.mean(torch.stack(q2_losses))
return q1_loss, q2_loss
def soft_update(self, source: nn.Module, target: nn.Module, tau: float) -> None:
for source_param, target_param in zip(source.parameters(), target.parameters()):
target_param.data.copy_(
target_param.data * (1.0 - tau) + source_param.data * tau
)
def sac_value_loss(
self,
log_probs: torch.Tensor,
values: Dict[str, torch.Tensor],
q1p_out: Dict[str, torch.Tensor],
q2p_out: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
discrete: bool,
) -> torch.Tensor:
min_policy_qs = {}
with torch.no_grad():
_ent_coef = torch.exp(self._log_ent_coef)
for name in values.keys():
if not discrete:
min_policy_qs[name] = torch.min(q1p_out[name], q2p_out[name])
else:
action_probs = log_probs.exp()
_branched_q1p = ModelUtils.break_into_branches(
q1p_out[name] * action_probs, self.act_size
)
_branched_q2p = ModelUtils.break_into_branches(
q2p_out[name] * action_probs, self.act_size
)
_q1p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q1p]
),
dim=0,
)
_q2p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q2p]
),
dim=0,
)
min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)
value_losses = []
if not discrete:
for name in values.keys():
with torch.no_grad():
v_backup = min_policy_qs[name] - torch.sum(
_ent_coef * log_probs, dim=1
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup), loss_masks
)
value_losses.append(value_loss)
else:
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * log_probs.exp(), self.act_size
)
# We have to do entropy bonus per action branch
branched_ent_bonus = torch.stack(
[
torch.sum(_ent_coef[i] * _lp, dim=1, keepdim=True)
for i, _lp in enumerate(branched_per_action_ent)
]
)
for name in values.keys():
with torch.no_grad():
v_backup = min_policy_qs[name] - torch.mean(
branched_ent_bonus, axis=0
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup.squeeze()),
loss_masks,
)
value_losses.append(value_loss)
value_loss = torch.mean(torch.stack(value_losses))
if torch.isinf(value_loss).any() or torch.isnan(value_loss).any():
raise UnityTrainerException("Inf found")
return value_loss
def sac_policy_loss(
self,
log_probs: torch.Tensor,
q1p_outs: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
discrete: bool,
) -> torch.Tensor:
_ent_coef = torch.exp(self._log_ent_coef)
mean_q1 = torch.mean(torch.stack(list(q1p_outs.values())), axis=0)
if not discrete:
mean_q1 = mean_q1.unsqueeze(1)
batch_policy_loss = torch.mean(_ent_coef * log_probs - mean_q1, dim=1)
policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
else:
action_probs = log_probs.exp()
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * action_probs, self.act_size
)
branched_q_term = ModelUtils.break_into_branches(
mean_q1 * action_probs, self.act_size
)
branched_policy_loss = torch.stack(
[
torch.sum(_ent_coef[i] * _lp - _qt, dim=1, keepdim=True)
for i, (_lp, _qt) in enumerate(
zip(branched_per_action_ent, branched_q_term)
)
]
)
batch_policy_loss = torch.squeeze(branched_policy_loss)
policy_loss = torch.mean(loss_masks * batch_policy_loss)
return policy_loss
def sac_entropy_loss(
self, log_probs: torch.Tensor, loss_masks: torch.Tensor, discrete: bool
) -> torch.Tensor:
if not discrete:
with torch.no_grad():
target_current_diff = torch.sum(log_probs + self.target_entropy, dim=1)
entropy_loss = -torch.mean(
self._log_ent_coef * loss_masks * target_current_diff
)
else:
with torch.no_grad():
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * log_probs.exp(), self.act_size
)
target_current_diff_branched = torch.stack(
[
torch.sum(_lp, axis=1, keepdim=True) + _te
for _lp, _te in zip(
branched_per_action_ent, self.target_entropy
)
],
axis=1,
)
target_current_diff = torch.squeeze(
target_current_diff_branched, axis=2
)
entropy_loss = -1 * ModelUtils.masked_mean(
torch.mean(self._log_ent_coef * target_current_diff, axis=1), loss_masks
)
return entropy_loss
def _condense_q_streams(
self, q_output: Dict[str, torch.Tensor], discrete_actions: torch.Tensor
) -> Dict[str, torch.Tensor]:
condensed_q_output = {}
onehot_actions = ModelUtils.actions_to_onehot(discrete_actions, self.act_size)
for key, item in q_output.items():
branched_q = ModelUtils.break_into_branches(item, self.act_size)
only_action_qs = torch.stack(
[
torch.sum(_act * _q, dim=1, keepdim=True)
for _act, _q in zip(onehot_actions, branched_q)
]
)
condensed_q_output[key] = torch.mean(only_action_qs, dim=0)
return condensed_q_output
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
"""
Updates model using buffer.
:param num_sequences: Number of trajectories in batch.
:param batch: Experience mini-batch.
:param update_target: Whether or not to update target value network
:param reward_signal_batches: Minibatches to use for updating the reward signals,
indexed by name. If none, don't update the reward signals.
:return: Output from update process.
"""
rewards = {}
for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
next_vec_obs = [ModelUtils.list_to_tensor(batch["next_vector_in"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
memories_list = [
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
# LSTM shouldn't have sequence length <1, but stop it from going out of the index if true.
offset = 1 if self.policy.sequence_length > 1 else 0
next_memories_list = [
ModelUtils.list_to_tensor(
batch["memory"][i][self.policy.m_size // 2 :]
) # only pass value part of memory to target network
for i in range(offset, len(batch["memory"]), self.policy.sequence_length)
]
if len(memories_list) > 0:
memories = torch.stack(memories_list).unsqueeze(0)
next_memories = torch.stack(next_memories_list).unsqueeze(0)
else:
memories = None
next_memories = None
# Q network memories are 0'ed out, since we don't have them during inference.
q_memories = (
torch.zeros_like(next_memories) if next_memories is not None else None
)
vis_obs: List[torch.Tensor] = []
next_vis_obs: List[torch.Tensor] = []
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
next_vis_ob = ModelUtils.list_to_tensor(
batch["next_visual_obs%d" % idx]
)
next_vis_obs.append(next_vis_ob)
# Copy normalizers from policy
self.value_network.q1_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
self.value_network.q2_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
self.target_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
_,
) = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=self.policy.sequence_length,
all_log_probs=not self.policy.use_continuous_act,
)
if self.policy.use_continuous_act:
squeezed_actions = actions.squeeze(-1)
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
sampled_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_out, q2_out = self.value_network(
vec_obs,
vis_obs,
squeezed_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_stream, q2_stream = q1_out, q2_out
else:
with torch.no_grad():
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_out, q2_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_stream = self._condense_q_streams(q1_out, actions)
q2_stream = self._condense_q_streams(q2_out, actions)
with torch.no_grad():
target_values, _ = self.target_network(
next_vec_obs,
next_vis_obs,
memories=next_memories,
sequence_length=self.policy.sequence_length,
)
masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
use_discrete = not self.policy.use_continuous_act
dones = ModelUtils.list_to_tensor(batch["done"])
q1_loss, q2_loss = self.sac_q_loss(
q1_stream, q2_stream, target_values, dones, rewards, masks
)
value_loss = self.sac_value_loss(
log_probs, sampled_values, q1p_out, q2p_out, masks, use_discrete
)
policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)
total_value_loss = q1_loss + q2_loss + value_loss
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
ModelUtils.update_learning_rate(self.policy_optimizer, decay_lr)
self.policy_optimizer.zero_grad()
policy_loss.backward()
self.policy_optimizer.step()
ModelUtils.update_learning_rate(self.value_optimizer, decay_lr)
self.value_optimizer.zero_grad()
total_value_loss.backward()
self.value_optimizer.step()
ModelUtils.update_learning_rate(self.entropy_optimizer, decay_lr)
self.entropy_optimizer.zero_grad()
entropy_loss.backward()
self.entropy_optimizer.step()
# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Losses/Q1 Loss": q1_loss.detach().cpu().numpy(),
"Losses/Q2 Loss": q2_loss.detach().cpu().numpy(),
"Policy/Entropy Coeff": torch.exp(self._log_ent_coef)
.detach()
.cpu()
.numpy(),
"Policy/Learning Rate": decay_lr,
}
for signal in self.reward_signals.values():
signal.update(batch)
return update_stats
def update_reward_signals(
self, reward_signal_minibatches: Mapping[str, AgentBuffer], num_sequences: int
) -> Dict[str, float]:
return {}
def get_modules(self):
return {
"Optimizer:value_network": self.value_network,
"Optimizer:target_network": self.target_network,
"Optimizer:policy_optimizer": self.policy_optimizer,
"Optimizer:value_optimizer": self.value_optimizer,
"Optimizer:entropy_optimizer": self.entropy_optimizer,
}

262
ml-agents/mlagents/trainers/policy/torch_policy.py


from typing import Any, Dict, List
import numpy as np
import torch
import copy
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents.trainers.policy import Policy
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents_envs.timers import timed
from mlagents.trainers.settings import TrainerSettings, TestingConfiguration
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.networks import (
SharedActorCritic,
SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.utils import ModelUtils
EPSILON = 1e-7 # Small value to avoid divide by zero
class TorchPolicy(Policy):
def __init__(
self,
seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
tanh_squash: bool = False,
reparameterize: bool = False,
separate_critic: bool = True,
condition_sigma_on_obs: bool = True,
):
"""
Policy that uses a multilayer perceptron to map the observations to actions. Could
also use a CNN to encode visual input prior to the MLP. Supports discrete and
continuous action spaces, as well as recurrent networks.
:param seed: Random seed.
:param brain: Assigned BrainParameters object.
:param trainer_settings: Defined training parameters.
:param load: Whether a pre-trained model will be loaded or a new one created.
:param tanh_squash: Whether to use a tanh function on the continuous output,
or a clipped output.
:param reparameterize: Whether we are using the resampling trick to update the policy
in continuous output.
"""
super().__init__(
seed,
behavior_spec,
trainer_settings,
tanh_squash,
reparameterize,
condition_sigma_on_obs,
)
self.global_step = (
GlobalSteps()
) # could be much simpler if TorchPolicy is nn.Module
self.grads = None
if TestingConfiguration.device != "cpu":
torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:
torch.set_default_tensor_type(torch.FloatTensor)
reward_signal_configs = trainer_settings.reward_signals
reward_signal_names = [key.value for key, _ in reward_signal_configs.items()]
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
}
if separate_critic:
ac_class = SeparateActorCritic
else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_shapes,
network_settings=trainer_settings.network_settings,
act_type=behavior_spec.action_type,
act_size=self.act_size,
stream_names=reward_signal_names,
conditional_sigma=self.condition_sigma_on_obs,
tanh_squash=tanh_squash,
)
self.actor_critic.to(TestingConfiguration.device)
def split_decision_step(self, decision_requests):
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
mask = None
if not self.use_continuous_act:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
if decision_requests.action_mask is not None:
mask = torch.as_tensor(
1 - np.concatenate(decision_requests.action_mask, axis=1)
)
return vec_vis_obs.vector_observations, vec_vis_obs.visual_observations, mask
def update_normalization(self, vector_obs: np.ndarray) -> None:
"""
If this policy normalizes vector observations, this will update the norm values in the graph.
:param vector_obs: The vector observations to add to the running estimate of the distribution.
"""
vector_obs = [torch.as_tensor(vector_obs)]
if self.use_vec_obs and self.normalize:
self.actor_critic.update_normalization(vector_obs)
@timed
def sample_actions(
self,
vec_obs,
vis_obs,
masks=None,
memories=None,
seq_len=1,
all_log_probs=False,
):
"""
:param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
"""
dists, value_heads, memories = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
action_list, dists
)
actions = torch.stack(action_list, dim=-1)
if self.use_continuous_act:
actions = actions[:, :, 0]
else:
actions = actions[:, 0, :]
return (
actions,
all_logs if all_log_probs else log_probs,
entropies,
value_heads,
memories,
)
def evaluate_actions(
self, vec_obs, vis_obs, actions, masks=None, memories=None, seq_len=1
):
dists, value_heads, _ = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
if len(actions.shape) <= 2:
actions = actions.unsqueeze(-1)
action_list = [actions[..., i] for i in range(actions.shape[2])]
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
return log_probs, entropies, value_heads
@timed
def evaluate(
self, decision_requests: DecisionSteps, global_agent_ids: List[str]
) -> Dict[str, Any]:
"""
Evaluates policy for the agent experiences provided.
:param global_agent_ids:
:param decision_requests: DecisionStep object containing inputs.
:return: Outputs from network as defined by self.inference_dict.
"""
vec_obs, vis_obs, masks = self.split_decision_step(decision_requests)
vec_obs = [torch.as_tensor(vec_obs)]
vis_obs = [torch.as_tensor(vis_ob) for vis_ob in vis_obs]
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0
)
run_out = {}
with torch.no_grad():
action, log_probs, entropy, value_heads, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = action.detach().cpu().numpy()
run_out["pre_action"] = action.detach().cpu().numpy()
# Todo - make pre_action difference
run_out["log_probs"] = log_probs.detach().cpu().numpy()
run_out["entropy"] = entropy.detach().cpu().numpy()
run_out["value_heads"] = {
name: t.detach().cpu().numpy() for name, t in value_heads.items()
}
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
run_out["learning_rate"] = 0.0
if self.use_recurrent:
run_out["memory_out"] = memories.detach().cpu().numpy().squeeze(0)
return run_out
def get_action(
self, decision_requests: DecisionSteps, worker_id: int = 0
) -> ActionInfo:
"""
Decides actions given observations information, and takes them in environment.
:param worker_id:
:param decision_requests: A dictionary of brain names and BrainInfo from environment.
:return: an ActionInfo containing action, memories, values and an object
to be passed to add experiences
"""
if len(decision_requests) == 0:
return ActionInfo.empty()
global_agent_ids = [
get_global_agent_id(worker_id, int(agent_id))
for agent_id in decision_requests.agent_id
] # For 1-D array, the iterator order is correct.
run_out = self.evaluate(
decision_requests, global_agent_ids
) # pylint: disable=assignment-from-no-return
self.save_memories(global_agent_ids, run_out.get("memory_out"))
return ActionInfo(
action=run_out.get("action"),
value=run_out.get("value"),
outputs=run_out,
agent_ids=list(decision_requests.agent_id),
)
@property
def use_vis_obs(self):
return self.vis_obs_size > 0
@property
def use_vec_obs(self):
return self.vec_obs_size > 0
def get_current_step(self):
"""
Gets current model step.
:return: current model step.
"""
return self.global_step.current_step
def set_step(self, step: int) -> int:
"""
Sets current model step to step without creating additional ops.
:param step: Step to set the current model step to.
:return: The step the model was set to.
"""
self.global_step.current_step = step
return step
def increment_step(self, n_steps):
"""
Increments model step.
"""
self.global_step.increment(n_steps)
return self.get_current_step()
def load_weights(self, values: List[np.ndarray]) -> None:
self.actor_critic.load_state_dict(values)
def init_load_weights(self) -> None:
pass
def get_weights(self) -> List[np.ndarray]:
return copy.deepcopy(self.actor_critic.state_dict())
def get_modules(self):
return {"Policy": self.actor_critic, "global_step": self.global_step}

11
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs.meta


fileFormatVersion: 2
guid: 11fe037a02b4a483cb9342c3454232cd
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

11
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs.meta


fileFormatVersion: 2
guid: fcb7a51f0d5f8404db7b85bd35ecc1fb
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

152
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs


using System;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// Settings that define the observations generated for physics-based sensors.
/// </summary>
[Serializable]
public struct PhysicsSensorSettings
{
/// <summary>
/// Whether to use model space (relative to the root body) translations as observations.
/// </summary>
public bool UseModelSpaceTranslations;
/// <summary>
/// Whether to use model space (relative to the root body) rotations as observations.
/// </summary>
public bool UseModelSpaceRotations;
/// <summary>
/// Whether to use local space (relative to the parent body) translations as observations.
/// </summary>
public bool UseLocalSpaceTranslations;
/// <summary>
/// Whether to use local space (relative to the parent body) translations as observations.
/// </summary>
public bool UseLocalSpaceRotations;
/// <summary>
/// Whether to use model space (relative to the root body) linear velocities as observations.
/// </summary>
public bool UseModelSpaceLinearVelocity;
/// <summary>
/// Whether to use local space (relative to the parent body) linear velocities as observations.
/// </summary>
public bool UseLocalSpaceLinearVelocity;
/// <summary>
/// Whether to use joint-specific positions and angles as observations.
/// </summary>
public bool UseJointPositionsAndAngles;
/// <summary>
/// Whether to use the joint forces and torques that are applied by the solver as observations.
/// </summary>
public bool UseJointForces;
/// <summary>
/// Creates a PhysicsSensorSettings with reasonable default values.
/// </summary>
/// <returns></returns>
public static PhysicsSensorSettings Default()
{
return new PhysicsSensorSettings
{
UseModelSpaceTranslations = true,
UseModelSpaceRotations = true,
};
}
/// <summary>
/// Whether any model space observations are being used.
/// </summary>
public bool UseModelSpace
{
get { return UseModelSpaceTranslations || UseModelSpaceRotations || UseModelSpaceLinearVelocity; }
}
/// <summary>
/// Whether any local space observations are being used.
/// </summary>
public bool UseLocalSpace
{
get { return UseLocalSpaceTranslations || UseLocalSpaceRotations || UseLocalSpaceLinearVelocity; }
}
}
internal static class ObservationWriterPhysicsExtensions
{
/// <summary>
/// Utility method for writing a PoseExtractor to an ObservationWriter.
/// </summary>
/// <param name="writer"></param>
/// <param name="settings"></param>
/// <param name="poseExtractor"></param>
/// <param name="baseOffset">The offset into the ObservationWriter to start writing at.</param>
/// <returns>The number of observations written.</returns>
public static int WritePoses(this ObservationWriter writer, PhysicsSensorSettings settings, PoseExtractor poseExtractor, int baseOffset = 0)
{
var offset = baseOffset;
if (settings.UseModelSpace)
{
foreach (var pose in poseExtractor.GetEnabledModelSpacePoses())
{
if (settings.UseModelSpaceTranslations)
{
writer.Add(pose.position, offset);
offset += 3;
}
if (settings.UseModelSpaceRotations)
{
writer.Add(pose.rotation, offset);
offset += 4;
}
}
foreach(var vel in poseExtractor.GetEnabledModelSpaceVelocities())
{
if (settings.UseModelSpaceLinearVelocity)
{
writer.Add(vel, offset);
offset += 3;
}
}
}
if (settings.UseLocalSpace)
{
foreach (var pose in poseExtractor.GetEnabledLocalSpacePoses())
{
if (settings.UseLocalSpaceTranslations)
{
writer.Add(pose.position, offset);
offset += 3;
}
if (settings.UseLocalSpaceRotations)
{
writer.Add(pose.rotation, offset);
offset += 4;
}
}
foreach(var vel in poseExtractor.GetEnabledLocalSpaceVelocities())
{
if (settings.UseLocalSpaceLinearVelocity)
{
writer.Add(vel, offset);
offset += 3;
}
}
}
return offset - baseOffset;
}
}
}

107
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs


#if UNITY_2020_1_OR_NEWER
using System.Collections.Generic;
using UnityEngine;
namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// Utility class to track a hierarchy of ArticulationBodies.
/// </summary>
public class ArticulationBodyPoseExtractor : PoseExtractor
{
ArticulationBody[] m_Bodies;
public ArticulationBodyPoseExtractor(ArticulationBody rootBody)
{
if (rootBody == null)
{
return;
}
if (!rootBody.isRoot)
{
Debug.Log("Must pass ArticulationBody.isRoot");
return;
}
var bodies = rootBody.GetComponentsInChildren <ArticulationBody>();
if (bodies[0] != rootBody)
{
Debug.Log("Expected root body at index 0");
return;
}
var numBodies = bodies.Length;
m_Bodies = bodies;
int[] parentIndices = new int[numBodies];
parentIndices[0] = -1;
var bodyToIndex = new Dictionary<ArticulationBody, int>();
for (var i = 0; i < numBodies; i++)
{
bodyToIndex[m_Bodies[i]] = i;
}
for (var i = 1; i < numBodies; i++)
{
var currentArticBody = m_Bodies[i];
// Component.GetComponentInParent will consider the provided object as well.
// So start looking from the parent.
var currentGameObject = currentArticBody.gameObject;
var parentGameObject = currentGameObject.transform.parent;
var parentArticBody = parentGameObject.GetComponentInParent<ArticulationBody>();
parentIndices[i] = bodyToIndex[parentArticBody];
}
Setup(parentIndices);
}
/// <inheritdoc/>
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return m_Bodies[index].velocity;
}
/// <inheritdoc/>
protected internal override Pose GetPoseAt(int index)
{
var body = m_Bodies[index];
var go = body.gameObject;
var t = go.transform;
return new Pose { rotation = t.rotation, position = t.position };
}
/// <inheritdoc/>
protected internal override Object GetObjectAt(int index)
{
return m_Bodies[index];
}
internal ArticulationBody[] Bodies => m_Bodies;
internal IEnumerable<ArticulationBody> GetEnabledArticulationBodies()
{
if (m_Bodies == null)
{
yield break;
}
for (var i = 0; i < m_Bodies.Length; i++)
{
var articBody = m_Bodies[i];
if (articBody == null)
{
// Ignore a virtual root.
continue;
}
if (IsPoseEnabled(i))
{
yield return articBody;
}
}
}
}
}
#endif // UNITY_2020_1_OR_NEWER

47
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs


#if UNITY_2020_1_OR_NEWER
using UnityEngine;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Extensions.Sensors
{
public class ArticulationBodySensorComponent : SensorComponent
{
public ArticulationBody RootBody;
[SerializeField]
public PhysicsSensorSettings Settings = PhysicsSensorSettings.Default();
public string sensorName;
/// <summary>
/// Creates a PhysicsBodySensor.
/// </summary>
/// <returns></returns>
public override ISensor CreateSensor()
{
return new PhysicsBodySensor(RootBody, Settings, sensorName);
}
/// <inheritdoc/>
public override int[] GetObservationShape()
{
if (RootBody == null)
{
return new[] { 0 };
}
// TODO static method in PhysicsBodySensor?
// TODO only update PoseExtractor when body changes?
var poseExtractor = new ArticulationBodyPoseExtractor(RootBody);
var numPoseObservations = poseExtractor.GetNumPoseObservations(Settings);
var numJointObservations = 0;
foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
{
numJointObservations += ArticulationBodyJointExtractor.NumObservations(articBody, Settings);
}
return new[] { numPoseObservations + numJointObservations };
}
}
}
#endif // UNITY_2020_1_OR_NEWER

122
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs


using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// ISensor implementation that generates observations for a group of Rigidbodies or ArticulationBodies.
/// </summary>
public class PhysicsBodySensor : ISensor
{
int[] m_Shape;
string m_SensorName;
PoseExtractor m_PoseExtractor;
List<IJointExtractor> m_JointExtractors;
PhysicsSensorSettings m_Settings;
/// <summary>
/// Construct a new PhysicsBodySensor
/// </summary>
/// <param name="poseExtractor"></param>
/// <param name="settings"></param>
/// <param name="sensorName"></param>
public PhysicsBodySensor(
RigidBodyPoseExtractor poseExtractor,
PhysicsSensorSettings settings,
string sensorName
)
{
m_PoseExtractor = poseExtractor;
m_SensorName = sensorName;
m_Settings = settings;
var numJointExtractorObservations = 0;
m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
foreach(var rb in poseExtractor.GetEnabledRigidbodies())
{
var jointExtractor = new RigidBodyJointExtractor(rb);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors.Add(jointExtractor);
}
var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);
m_Shape = new[] { numTransformObservations + numJointExtractorObservations };
}
#if UNITY_2020_1_OR_NEWER
public PhysicsBodySensor(ArticulationBody rootBody, PhysicsSensorSettings settings, string sensorName=null)
{
var poseExtractor = new ArticulationBodyPoseExtractor(rootBody);
m_PoseExtractor = poseExtractor;
m_SensorName = string.IsNullOrEmpty(sensorName) ? $"ArticulationBodySensor:{rootBody?.name}" : sensorName;
m_Settings = settings;
var numJointExtractorObservations = 0;
m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
{
var jointExtractor = new ArticulationBodyJointExtractor(articBody);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors.Add(jointExtractor);
}
var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);
m_Shape = new[] { numTransformObservations + numJointExtractorObservations };
}
#endif
/// <inheritdoc/>
public int[] GetObservationShape()
{
return m_Shape;
}
/// <inheritdoc/>
public int Write(ObservationWriter writer)
{
var numWritten = writer.WritePoses(m_Settings, m_PoseExtractor);
foreach (var jointExtractor in m_JointExtractors)
{
numWritten += jointExtractor.Write(m_Settings, writer, numWritten);
}
return numWritten;
}
/// <inheritdoc/>
public byte[] GetCompressedObservation()
{
return null;
}
/// <inheritdoc/>
public void Update()
{
if (m_Settings.UseModelSpace)
{
m_PoseExtractor.UpdateModelSpacePoses();
}
if (m_Settings.UseLocalSpace)
{
m_PoseExtractor.UpdateLocalSpacePoses();
}
}
/// <inheritdoc/>
public void Reset() {}
/// <inheritdoc/>
public SensorCompressionType GetCompressionType()
{
return SensorCompressionType.None;
}
/// <inheritdoc/>
public string GetName()
{
return m_SensorName;
}
}
}

473
com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs


using System;
using System.Collections.Generic;
using UnityEngine;
using Object = UnityEngine.Object;
namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// Abstract class for managing the transforms of a hierarchy of objects.
/// This could be GameObjects or Monobehaviours in the scene graph, but this is
/// not a requirement; for example, the objects could be rigid bodies whose hierarchy
/// is defined by Joint configurations.
///
/// Poses are either considered in model space, which is relative to a root body,
/// or in local space, which is relative to their parent.
/// </summary>
public abstract class PoseExtractor
{
int[] m_ParentIndices;
Pose[] m_ModelSpacePoses;
Pose[] m_LocalSpacePoses;
Vector3[] m_ModelSpaceLinearVelocities;
Vector3[] m_LocalSpaceLinearVelocities;
bool[] m_PoseEnabled;
/// <summary>
/// Read iterator for the enabled model space transforms.
/// </summary>
public IEnumerable<Pose> GetEnabledModelSpacePoses()
{
if (m_ModelSpacePoses == null)
{
yield break;
}
for (var i = 0; i < m_ModelSpacePoses.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_ModelSpacePoses[i];
}
}
}
/// <summary>
/// Read iterator for the enabled local space transforms.
/// </summary>
public IEnumerable<Pose> GetEnabledLocalSpacePoses()
{
if (m_LocalSpacePoses == null)
{
yield break;
}
for (var i = 0; i < m_LocalSpacePoses.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_LocalSpacePoses[i];
}
}
}
/// <summary>
/// Read iterator for the enabled model space linear velocities.
/// </summary>
public IEnumerable<Vector3> GetEnabledModelSpaceVelocities()
{
if (m_ModelSpaceLinearVelocities == null)
{
yield break;
}
for (var i = 0; i < m_ModelSpaceLinearVelocities.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_ModelSpaceLinearVelocities[i];
}
}
}
/// <summary>
/// Read iterator for the enabled local space linear velocities.
/// </summary>
public IEnumerable<Vector3> GetEnabledLocalSpaceVelocities()
{
if (m_LocalSpaceLinearVelocities == null)
{
yield break;
}
for (var i = 0; i < m_LocalSpaceLinearVelocities.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_LocalSpaceLinearVelocities[i];
}
}
}
/// <summary>
/// Number of enabled poses in the hierarchy (read-only).
/// </summary>
public int NumEnabledPoses
{
get
{
if (m_PoseEnabled == null)
{
return 0;
}
var numEnabled = 0;
for (var i = 0; i < m_PoseEnabled.Length; i++)
{
numEnabled += m_PoseEnabled[i] ? 1 : 0;
}
return numEnabled;
}
}
/// <summary>
/// Number of total poses in the hierarchy (read-only).
/// </summary>
public int NumPoses
{
get { return m_ModelSpacePoses?.Length ?? 0; }
}
/// <summary>
/// Get the parent index of the body at the specified index.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
public int GetParentIndex(int index)
{
if (m_ParentIndices == null)
{
throw new NullReferenceException("No parent indices set");
}
return m_ParentIndices[index];
}
/// <summary>
/// Set whether the pose at the given index is enabled or disabled for observations.
/// </summary>
/// <param name="index"></param>
/// <param name="val"></param>
public void SetPoseEnabled(int index, bool val)
{
m_PoseEnabled[index] = val;
}
public bool IsPoseEnabled(int index)
{
return m_PoseEnabled[index];
}
/// <summary>
/// Initialize with the mapping of parent indices.
/// The 0th element is assumed to be -1, indicating that it's the root.
/// </summary>
/// <param name="parentIndices"></param>
protected void Setup(int[] parentIndices)
{
#if DEBUG
if (parentIndices[0] != -1)
{
throw new UnityAgentsException($"Expected parentIndices[0] to be -1, got {parentIndices[0]}");
}
#endif
m_ParentIndices = parentIndices;
var numPoses = parentIndices.Length;
m_ModelSpacePoses = new Pose[numPoses];
m_LocalSpacePoses = new Pose[numPoses];
m_ModelSpaceLinearVelocities = new Vector3[numPoses];
m_LocalSpaceLinearVelocities = new Vector3[numPoses];
m_PoseEnabled = new bool[numPoses];
// All poses are enabled by default. Generally we'll want to disable the root though.
for (var i = 0; i < numPoses; i++)
{
m_PoseEnabled[i] = true;
}
}
/// <summary>
/// Return the world space Pose of the i'th object.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
protected internal abstract Pose GetPoseAt(int index);
/// <summary>
/// Return the world space linear velocity of the i'th object.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
protected internal abstract Vector3 GetLinearVelocityAt(int index);
/// <summary>
/// Return the underlying object at the given index. This is only
/// used for display in the inspector.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
protected internal virtual Object GetObjectAt(int index)
{
return null;
}
/// <summary>
/// Update the internal model space transform storage based on the underlying system.
/// </summary>
public void UpdateModelSpacePoses()
{
using (TimerStack.Instance.Scoped("UpdateModelSpacePoses"))
{
if (m_ModelSpacePoses == null)
{
return;
}
var rootWorldTransform = GetPoseAt(0);
var worldToModel = rootWorldTransform.Inverse();
var rootLinearVel = GetLinearVelocityAt(0);
for (var i = 0; i < m_ModelSpacePoses.Length; i++)
{
var currentWorldSpacePose = GetPoseAt(i);
var currentModelSpacePose = worldToModel.Multiply(currentWorldSpacePose);
m_ModelSpacePoses[i] = currentModelSpacePose;
var currentBodyLinearVel = GetLinearVelocityAt(i);
var relativeVelocity = currentBodyLinearVel - rootLinearVel;
m_ModelSpaceLinearVelocities[i] = worldToModel.rotation * relativeVelocity;
}
}
}
/// <summary>
/// Update the internal model space transform storage based on the underlying system.
/// </summary>
public void UpdateLocalSpacePoses()
{
using (TimerStack.Instance.Scoped("UpdateLocalSpacePoses"))
{
if (m_LocalSpacePoses == null)
{
return;
}
for (var i = 0; i < m_LocalSpacePoses.Length; i++)
{
if (m_ParentIndices[i] != -1)
{
var parentTransform = GetPoseAt(m_ParentIndices[i]);
// This is slightly inefficient, since for a body with multiple children, we'll end up inverting
// the transform multiple times. Might be able to trade space for perf here.
var invParent = parentTransform.Inverse();
var currentTransform = GetPoseAt(i);
m_LocalSpacePoses[i] = invParent.Multiply(currentTransform);
var parentLinearVel = GetLinearVelocityAt(m_ParentIndices[i]);
var currentLinearVel = GetLinearVelocityAt(i);
m_LocalSpaceLinearVelocities[i] = invParent.rotation * (currentLinearVel - parentLinearVel);
}
else
{
m_LocalSpacePoses[i] = Pose.identity;
m_LocalSpaceLinearVelocities[i] = Vector3.zero;
}
}
}
}
/// <summary>
/// Compute the number of floats needed to represent the poses for the given PhysicsSensorSettings.
/// </summary>
/// <param name="settings"></param>
/// <returns></returns>
public int GetNumPoseObservations(PhysicsSensorSettings settings)
{
int obsPerPose = 0;
obsPerPose += settings.UseModelSpaceTranslations ? 3 : 0;
obsPerPose += settings.UseModelSpaceRotations ? 4 : 0;
obsPerPose += settings.UseLocalSpaceTranslations ? 3 : 0;
obsPerPose += settings.UseLocalSpaceRotations ? 4 : 0;
obsPerPose += settings.UseModelSpaceLinearVelocity ? 3 : 0;
obsPerPose += settings.UseLocalSpaceLinearVelocity ? 3 : 0;
return NumEnabledPoses * obsPerPose;
}
internal void DrawModelSpace(Vector3 offset)
{
UpdateLocalSpacePoses();
UpdateModelSpacePoses();
var pose = m_ModelSpacePoses;
var localPose = m_LocalSpacePoses;
for (var i = 0; i < pose.Length; i++)
{
var current = pose[i];
if (m_ParentIndices[i] == -1)
{
continue;
}
var parent = pose[m_ParentIndices[i]];
Debug.DrawLine(current.position + offset, parent.position + offset, Color.cyan);
var localUp = localPose[i].rotation * Vector3.up;
var localFwd = localPose[i].rotation * Vector3.forward;
var localRight = localPose[i].rotation * Vector3.right;
Debug.DrawLine(current.position+offset, current.position+offset+.1f*localUp, Color.red);
Debug.DrawLine(current.position+offset, current.position+offset+.1f*localFwd, Color.green);
Debug.DrawLine(current.position+offset, current.position+offset+.1f*localRight, Color.blue);
}
}
/// <summary>
/// Simplified representation of the a node in the hierarchy for display.
/// </summary>
internal struct DisplayNode
{
/// <summary>
/// Underlying object in the hierarchy. Pass to EditorGUIUtility.ObjectContent() for display.
/// </summary>
public Object NodeObject;
/// <summary>
/// Whether the poses for the object are enabled.
/// </summary>
public bool Enabled;
/// <summary>
/// Depth in the hierarchy, used for adjusting the indent level.
/// </summary>
public int Depth;
/// <summary>
/// The index of the corresponding object in the PoseExtractor.
/// </summary>
public int OriginalIndex;
}
/// <summary>
/// Get a list of display nodes in depth-first order.
/// </summary>
/// <returns></returns>
internal IList<DisplayNode> GetDisplayNodes()
{
if (NumPoses == 0)
{
return Array.Empty<DisplayNode>();
}
var nodesOut = new List<DisplayNode>(NumPoses);
// List of children for each node
var tree = new Dictionary<int, List<int>>();
for (var i = 0; i < NumPoses; i++)
{
var parent = GetParentIndex(i);
if (i == -1)
{
continue;
}
if (!tree.ContainsKey(parent))
{
tree[parent] = new List<int>();
}
tree[parent].Add(i);
}
// Store (index, depth) in the stack
var stack = new Stack<(int, int)>();
stack.Push((0, 0));
while (stack.Count != 0)
{
var (current, depth) = stack.Pop();
var obj = GetObjectAt(current);
var node = new DisplayNode
{
NodeObject = obj,
Enabled = IsPoseEnabled(current),
OriginalIndex = current,
Depth = depth
};
nodesOut.Add(node);
// Add children
if (tree.ContainsKey(current))
{
// Push to the stack in reverse order
var children = tree[current];
for (var childIdx = children.Count-1; childIdx >= 0; childIdx--)
{
stack.Push((children[childIdx], depth+1));
}
}
// Safety check
// This shouldn't even happen, but in case we have a cycle in the graph
// exit instead of looping forever and eating up all the memory.
if (nodesOut.Count > NumPoses)
{
return nodesOut;
}
}
return nodesOut;
}
}
/// <summary>
/// Extension methods for the Pose struct, in order to improve the readability of some math.
/// </summary>
public static class PoseExtensions
{
/// <summary>
/// Compute the inverse of a Pose. For any Pose P,
/// P.Inverse() * P
/// will equal the identity pose (within tolerance).
/// </summary>
/// <param name="pose"></param>
/// <returns></returns>
public static Pose Inverse(this Pose pose)
{
var rotationInverse = Quaternion.Inverse(pose.rotation);
var translationInverse = -(rotationInverse * pose.position);
return new Pose { rotation = rotationInverse, position = translationInverse };
}
/// <summary>
/// This is equivalent to Pose.GetTransformedBy(), but keeps the order more intuitive.
/// </summary>
/// <param name="pose"></param>
/// <param name="rhs"></param>
/// <returns></returns>
public static Pose Multiply(this Pose pose, Pose rhs)
{
return rhs.GetTransformedBy(pose);
}
/// <summary>
/// Transform the vector by the pose. Conceptually this is equivalent to treating the Pose
/// as a 4x4 matrix and multiplying the augmented vector.
/// See https://en.wikipedia.org/wiki/Affine_transformation#Augmented_matrix for more details.
/// </summary>
/// <param name="pose"></param>
/// <param name="rhs"></param>
/// <returns></returns>
public static Vector3 Multiply(this Pose pose, Vector3 rhs)
{
return pose.rotation * rhs + pose.position;
}
// TODO optimize inv(A)*B?
}
}

209
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs


using System.Collections.Generic;
using UnityEngine;
namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// Utility class to track a hierarchy of RigidBodies. These are assumed to have a root node,
/// and child nodes are connect to their parents via Joints.
/// </summary>
public class RigidBodyPoseExtractor : PoseExtractor
{
Rigidbody[] m_Bodies;
/// <summary>
/// Optional game object used to determine the root of the poses, separate from the actual Rigidbodies
/// in the hierarchy. For locomotion
/// </summary>
GameObject m_VirtualRoot;
/// <summary>
/// Initialize given a root RigidBody.
/// </summary>
/// <param name="rootBody">The root Rigidbody. This has no Joints on it (but other Joints may connect to it).</param>
/// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
/// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
/// separate from the actual Rigidbodies in the hierarchy. For locomotion tasks, with ragdolls, this provides
/// a stabilized reference frame, which can improve learning.</param>
/// <param name="enableBodyPoses">Optional mapping of whether a body's psoe should be enabled or not.</param>
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null,
GameObject virtualRoot = null, Dictionary<Rigidbody, bool> enableBodyPoses = null)
{
if (rootBody == null)
{
return;
}
Rigidbody[] rbs;
Joint[] joints;
if (rootGameObject == null)
{
rbs = rootBody.GetComponentsInChildren<Rigidbody>();
joints = rootBody.GetComponentsInChildren <Joint>();
}
else
{
rbs = rootGameObject.GetComponentsInChildren<Rigidbody>();
joints = rootGameObject.GetComponentsInChildren<Joint>();
}
if (rbs == null || rbs.Length == 0)
{
Debug.Log("No rigid bodies found!");
return;
}
if (rbs[0] != rootBody)
{
Debug.Log("Expected root body at index 0");
return;
}
// Adjust the array if we have a virtual root.
// This will be at index 0, and the "real" root will be parented to it.
if (virtualRoot != null)
{
var extendedRbs = new Rigidbody[rbs.Length + 1];
for (var i = 0; i < rbs.Length; i++)
{
extendedRbs[i + 1] = rbs[i];
}
rbs = extendedRbs;
}
var bodyToIndex = new Dictionary<Rigidbody, int>(rbs.Length);
var parentIndices = new int[rbs.Length];
parentIndices[0] = -1;
for (var i = 0; i < rbs.Length; i++)
{
if(rbs[i] != null)
{
bodyToIndex[rbs[i]] = i;
}
}
foreach (var j in joints)
{
var parent = j.connectedBody;
var child = j.GetComponent<Rigidbody>();
var parentIndex = bodyToIndex[parent];
var childIndex = bodyToIndex[child];
parentIndices[childIndex] = parentIndex;
}
if (virtualRoot != null)
{
// Make sure the original root treats the virtual root as its parent.
parentIndices[1] = 0;
m_VirtualRoot = virtualRoot;
}
m_Bodies = rbs;
Setup(parentIndices);
// By default, ignore the root
SetPoseEnabled(0, false);
if (enableBodyPoses != null)
{
foreach (var pair in enableBodyPoses)
{
var rb = pair.Key;
if (bodyToIndex.TryGetValue(rb, out var index))
{
SetPoseEnabled(index, pair.Value);
}
}
}
}
/// <inheritdoc/>
protected internal override Vector3 GetLinearVelocityAt(int index)
{
if (index == 0 && m_VirtualRoot != null)
{
// No velocity on the virtual root
return Vector3.zero;
}
return m_Bodies[index].velocity;
}
/// <inheritdoc/>
protected internal override Pose GetPoseAt(int index)
{
if (index == 0 && m_VirtualRoot != null)
{
// Use the GameObject's world transform
return new Pose
{
rotation = m_VirtualRoot.transform.rotation,
position = m_VirtualRoot.transform.position
};
}
var body = m_Bodies[index];
return new Pose { rotation = body.rotation, position = body.position };
}
/// <inheritdoc/>
protected internal override Object GetObjectAt(int index)
{
if (index == 0 && m_VirtualRoot != null)
{
return m_VirtualRoot;
}
return m_Bodies[index];
}
internal Rigidbody[] Bodies => m_Bodies;
/// <summary>
/// Get a dictionary indicating which Rigidbodies' poses are enabled or disabled.
/// </summary>
/// <returns></returns>
internal Dictionary<Rigidbody, bool> GetBodyPosesEnabled()
{
var bodyPosesEnabled = new Dictionary<Rigidbody, bool>(m_Bodies.Length);
for (var i = 0; i < m_Bodies.Length; i++)
{
var rb = m_Bodies[i];
if (rb == null)
{
continue; // skip virtual root
}
bodyPosesEnabled[rb] = IsPoseEnabled(i);
}
return bodyPosesEnabled;
}
internal IEnumerable<Rigidbody> GetEnabledRigidbodies()
{
if (m_Bodies == null)
{
yield break;
}
for (var i = 0; i < m_Bodies.Length; i++)
{
var rb = m_Bodies[i];
if (rb == null)
{
// Ignore a virtual root.
continue;
}
if (IsPoseEnabled(i))
{
yield return rb;
}
}
}
}
}

116
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs


using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// Editor component that creates a PhysicsBodySensor for the Agent.
/// </summary>
public class RigidBodySensorComponent : SensorComponent
{
/// <summary>
/// The root Rigidbody of the system.
/// </summary>
public Rigidbody RootBody;
/// <summary>
/// Optional GameObject used to determine the root of the poses.
/// </summary>
public GameObject VirtualRoot;
/// <summary>
/// Settings defining what types of observations will be generated.
/// </summary>
[SerializeField]
public PhysicsSensorSettings Settings = PhysicsSensorSettings.Default();
/// <summary>
/// Optional sensor name. This must be unique for each Agent.
/// </summary>
[SerializeField]
public string sensorName;
[SerializeField]
[HideInInspector]
RigidBodyPoseExtractor m_PoseExtractor;
/// <summary>
/// Creates a PhysicsBodySensor.
/// </summary>
/// <returns></returns>
public override ISensor CreateSensor()
{
var _sensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{RootBody?.name}" : sensorName;
return new PhysicsBodySensor(GetPoseExtractor(), Settings, _sensorName);
}
/// <inheritdoc/>
public override int[] GetObservationShape()
{
if (RootBody == null)
{
return new[] { 0 };
}
var poseExtractor = GetPoseExtractor();
var numPoseObservations = poseExtractor.GetNumPoseObservations(Settings);
var numJointObservations = 0;
foreach(var rb in poseExtractor.GetEnabledRigidbodies())
{
var joint = rb.GetComponent<Joint>();
numJointObservations += RigidBodyJointExtractor.NumObservations(rb, joint, Settings);
}
return new[] { numPoseObservations + numJointObservations };
}
/// <summary>
/// Get the DisplayNodes of the hierarchy.
/// </summary>
/// <returns></returns>
internal IList<PoseExtractor.DisplayNode> GetDisplayNodes()
{
return GetPoseExtractor().GetDisplayNodes();
}
/// <summary>
/// Lazy construction of the PoseExtractor.
/// </summary>
/// <returns></returns>
RigidBodyPoseExtractor GetPoseExtractor()
{
if (m_PoseExtractor == null)
{
ResetPoseExtractor();
}
return m_PoseExtractor;
}
/// <summary>
/// Reset the pose extractor, trying to keep the enabled state of the corresponding poses the same.
/// </summary>
internal void ResetPoseExtractor()
{
// Get the current enabled state of each body, so that we can reinitialize with them.
Dictionary<Rigidbody, bool> bodyPosesEnabled = null;
if (m_PoseExtractor != null)
{
bodyPosesEnabled = m_PoseExtractor.GetBodyPosesEnabled();
}
m_PoseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot, bodyPosesEnabled);
}
/// <summary>
/// Toggle the pose at the given index.
/// </summary>
/// <param name="index"></param>
/// <param name="enabled"></param>
internal void SetPoseEnabled(int index, bool enabled)
{
GetPoseExtractor().SetPoseEnabled(index, enabled);
}
}
}

140
com.unity.ml-agents.extensions/Tests/Editor/Sensors/ArticulationBodySensorTests.cs


#if UNITY_2020_1_OR_NEWER
using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Extensions.Sensors;
namespace Unity.MLAgents.Extensions.Tests.Sensors
{
public class ArticulationBodySensorTests
{
[Test]
public void TestNullRootBody()
{
var gameObj = new GameObject();
var sensorComponent = gameObj.AddComponent<ArticulationBodySensorComponent>();
var sensor = sensorComponent.CreateSensor();
SensorTestHelper.CompareObservation(sensor, new float[0]);
}
[Test]
public void TestSingleBody()
{
var gameObj = new GameObject();
var articulationBody = gameObj.AddComponent<ArticulationBody>();
var sensorComponent = gameObj.AddComponent<ArticulationBodySensorComponent>();
sensorComponent.RootBody = articulationBody;
sensorComponent.Settings = new PhysicsSensorSettings
{
UseModelSpaceLinearVelocity = true,
UseLocalSpaceTranslations = true,
UseLocalSpaceRotations = true
};
var sensor = sensorComponent.CreateSensor();
sensor.Update();
var expected = new[]
{
0f, 0f, 0f, // ModelSpaceLinearVelocity
0f, 0f, 0f, // LocalSpaceTranslations
0f, 0f, 0f, 1f // LocalSpaceRotations
};
SensorTestHelper.CompareObservation(sensor, expected);
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
}
[Test]
public void TestBodiesWithJoint()
{
var rootObj = new GameObject();
var rootArticBody = rootObj.AddComponent<ArticulationBody>();
var middleGamObj = new GameObject();
var middleArticBody = middleGamObj.AddComponent<ArticulationBody>();
middleArticBody.AddForce(new Vector3(0f, 1f, 0f));
middleGamObj.transform.SetParent(rootObj.transform);
middleGamObj.transform.localPosition = new Vector3(13.37f, 0f, 0f);
middleArticBody.jointType = ArticulationJointType.RevoluteJoint;
var leafGameObj = new GameObject();
var leafArticBody = leafGameObj.AddComponent<ArticulationBody>();
leafGameObj.transform.SetParent(middleGamObj.transform);
leafGameObj.transform.localPosition = new Vector3(4.2f, 0f, 0f);
leafArticBody.jointType = ArticulationJointType.PrismaticJoint;
leafArticBody.linearLockZ = ArticulationDofLock.LimitedMotion;
leafArticBody.zDrive = new ArticulationDrive
{
lowerLimit = -3,
upperLimit = 1
};
#if UNITY_2020_2_OR_NEWER
// ArticulationBody.velocity is read-only in 2020.1
rootArticBody.velocity = new Vector3(1f, 0f, 0f);
middleArticBody.velocity = new Vector3(0f, 1f, 0f);
leafArticBody.velocity = new Vector3(0f, 0f, 1f);
#endif
var sensorComponent = rootObj.AddComponent<ArticulationBodySensorComponent>();
sensorComponent.RootBody = rootArticBody;
sensorComponent.Settings = new PhysicsSensorSettings
{
UseModelSpaceTranslations = true,
UseLocalSpaceTranslations = true,
#if UNITY_2020_2_OR_NEWER
UseLocalSpaceLinearVelocity = true
#endif
};
var sensor = sensorComponent.CreateSensor();
sensor.Update();
var expected = new[]
{
// Model space
0f, 0f, 0f, // Root pos
13.37f, 0f, 0f, // Middle pos
leafGameObj.transform.position.x, 0f, 0f, // Leaf pos
// Local space
0f, 0f, 0f, // Root pos
13.37f, 0f, 0f, // Attached pos
4.2f, 0f, 0f, // Leaf pos
#if UNITY_2020_2_OR_NEWER
0f, 0f, 0f, // Root vel
-1f, 1f, 0f, // Attached vel
0f, -1f, 1f // Leaf vel
#endif
};
SensorTestHelper.CompareObservation(sensor, expected);
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
// Update the settings to only process joint observations
sensorComponent.Settings = new PhysicsSensorSettings
{
UseJointForces = true,
UseJointPositionsAndAngles = true,
};
sensor = sensorComponent.CreateSensor();
sensor.Update();
expected = new[]
{
// revolute
0f, 1f, // joint1.position (sin and cos)
0f, // joint1.force
// prismatic
0.5f, // joint2.position (interpolate between limits)
0f, // joint2.force
};
SensorTestHelper.CompareObservation(sensor, expected);
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
}
}
}
#endif // #if UNITY_2020_1_OR_NEWER

136
com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodySensorTests.cs


using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Extensions.Sensors;
namespace Unity.MLAgents.Extensions.Tests.Sensors
{
public static class SensorTestHelper
{
public static void CompareObservation(ISensor sensor, float[] expected)
{
string errorMessage;
bool isOK = SensorHelper.CompareObservation(sensor, expected, out errorMessage);
Assert.IsTrue(isOK, errorMessage);
}
}
public class RigidBodySensorTests
{
[Test]
public void TestNullRootBody()
{
var gameObj = new GameObject();
var sensorComponent = gameObj.AddComponent<RigidBodySensorComponent>();
var sensor = sensorComponent.CreateSensor();
SensorTestHelper.CompareObservation(sensor, new float[0]);
}
[Test]
public void TestSingleRigidbody()
{
var gameObj = new GameObject();
var rootRb = gameObj.AddComponent<Rigidbody>();
var sensorComponent = gameObj.AddComponent<RigidBodySensorComponent>();
sensorComponent.RootBody = rootRb;
sensorComponent.Settings = new PhysicsSensorSettings
{
UseModelSpaceLinearVelocity = true,
UseLocalSpaceTranslations = true,
UseLocalSpaceRotations = true
};
var sensor = sensorComponent.CreateSensor();
sensor.Update();
// The root body is ignored since it always generates identity values
// and there are no other bodies to generate observations.
var expected = new float[0];
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
SensorTestHelper.CompareObservation(sensor, expected);
}
[Test]
public void TestBodiesWithJoint()
{
var rootObj = new GameObject();
var rootRb = rootObj.AddComponent<Rigidbody>();
rootRb.velocity = new Vector3(1f, 0f, 0f);
var middleGamObj = new GameObject();
var middleRb = middleGamObj.AddComponent<Rigidbody>();
middleRb.velocity = new Vector3(0f, 1f, 0f);
middleGamObj.transform.SetParent(rootObj.transform);
middleGamObj.transform.localPosition = new Vector3(13.37f, 0f, 0f);
var joint = middleGamObj.AddComponent<ConfigurableJoint>();
joint.connectedBody = rootRb;
var leafGameObj = new GameObject();
var leafRb = leafGameObj.AddComponent<Rigidbody>();
leafRb.velocity = new Vector3(0f, 0f, 1f);
leafGameObj.transform.SetParent(middleGamObj.transform);
leafGameObj.transform.localPosition = new Vector3(4.2f, 0f, 0f);
var joint2 = leafGameObj.AddComponent<ConfigurableJoint>();
joint2.connectedBody = middleRb;
var virtualRoot = new GameObject();
var sensorComponent = rootObj.AddComponent<RigidBodySensorComponent>();
sensorComponent.RootBody = rootRb;
sensorComponent.Settings = new PhysicsSensorSettings
{
UseModelSpaceTranslations = true,
UseLocalSpaceTranslations = true,
UseLocalSpaceLinearVelocity = true
};
sensorComponent.VirtualRoot = virtualRoot;
var sensor = sensorComponent.CreateSensor();
sensor.Update();
// Note that the VirtualRoot is ignored from the observations
var expected = new[]
{
// Model space
0f, 0f, 0f, // Root pos
13.37f, 0f, 0f, // Middle pos
leafGameObj.transform.position.x, 0f, 0f, // Leaf pos
// Local space
0f, 0f, 0f, // Root pos
13.37f, 0f, 0f, // Attached pos
4.2f, 0f, 0f, // Leaf pos
1f, 0f, 0f, // Root vel (relative to virtual root)
-1f, 1f, 0f, // Attached vel
0f, -1f, 1f // Leaf vel
};
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
SensorTestHelper.CompareObservation(sensor, expected);
// Update the settings to only process joint observations
sensorComponent.Settings = new PhysicsSensorSettings
{
UseJointPositionsAndAngles = true,
UseJointForces = true,
};
sensor = sensorComponent.CreateSensor();
sensor.Update();
expected = new[]
{
0f, 0f, 0f, // joint1.force
0f, 0f, 0f, // joint1.torque
0f, 0f, 0f, // joint2.force
0f, 0f, 0f, // joint2.torque
};
SensorTestHelper.CompareObservation(sensor, expected);
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
}
}
}

249
com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs


using System;
using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Extensions.Sensors;
namespace Unity.MLAgents.Extensions.Tests.Sensors
{
public class PoseExtractorTests
{
class BasicPoseExtractor : PoseExtractor
{
protected internal override Pose GetPoseAt(int index)
{
return Pose.identity;
}
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}
}
class UselessPoseExtractor : BasicPoseExtractor
{
public void Init(int[] parentIndices)
{
Setup(parentIndices);
}
}
[Test]
public void TestEmptyExtractor()
{
var poseExtractor = new UselessPoseExtractor();
// These should be no-ops
poseExtractor.UpdateLocalSpacePoses();
poseExtractor.UpdateModelSpacePoses();
Assert.AreEqual(0, poseExtractor.NumPoses);
// Iterating through poses and velocities should be an empty loop
foreach (var pose in poseExtractor.GetEnabledModelSpacePoses())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var pose in poseExtractor.GetEnabledLocalSpacePoses())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var vel in poseExtractor.GetEnabledModelSpaceVelocities())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var vel in poseExtractor.GetEnabledLocalSpaceVelocities())
{
throw new UnityAgentsException("This shouldn't happen");
}
// Getting a parent index should throw an index exception
Assert.Throws <NullReferenceException>(
() => poseExtractor.GetParentIndex(0)
);
// DisplayNodes should be empty
var displayNodes = poseExtractor.GetDisplayNodes();
Assert.AreEqual(0, displayNodes.Count);
}
[Test]
public void TestSimpleExtractor()
{
var poseExtractor = new UselessPoseExtractor();
var parentIndices = new[] { -1, 0 };
poseExtractor.Init(parentIndices);
Assert.AreEqual(2, poseExtractor.NumPoses);
}
/// <summary>
/// A simple "chain" hierarchy, where each object is parented to the one before it.
/// 0 <- 1 <- 2 <- ...
/// </summary>
class ChainPoseExtractor : PoseExtractor
{
public Vector3 offset;
public ChainPoseExtractor(int size)
{
var parents = new int[size];
for (var i = 0; i < size; i++)
{
parents[i] = i - 1;
}
Setup(parents);
}
protected internal override Pose GetPoseAt(int index)
{
var rotation = Quaternion.identity;
var translation = offset + new Vector3(index, index, index);
return new Pose
{
rotation = rotation,
position = translation
};
}
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}
}
[Test]
public void TestChain()
{
var size = 4;
var chain = new ChainPoseExtractor(size);
chain.offset = new Vector3(.5f, .75f, .333f);
chain.UpdateModelSpacePoses();
chain.UpdateLocalSpacePoses();
var modelPoseIndex = 0;
foreach (var modelSpace in chain.GetEnabledModelSpacePoses())
{
if (modelPoseIndex == 0)
{
// Root transforms are currently always the identity.
Assert.IsTrue(modelSpace == Pose.identity);
}
else
{
var expectedModelTranslation = new Vector3(modelPoseIndex, modelPoseIndex, modelPoseIndex);
Assert.IsTrue(expectedModelTranslation == modelSpace.position);
}
modelPoseIndex++;
}
Assert.AreEqual(size, modelPoseIndex);
var localPoseIndex = 0;
foreach (var localSpace in chain.GetEnabledLocalSpacePoses())
{
if (localPoseIndex == 0)
{
// Root transforms are currently always the identity.
Assert.IsTrue(localSpace == Pose.identity);
}
else
{
var expectedLocalTranslation = new Vector3(1, 1, 1);
Assert.IsTrue(expectedLocalTranslation == localSpace.position, $"{expectedLocalTranslation} != {localSpace.position}");
}
localPoseIndex++;
}
Assert.AreEqual(size, localPoseIndex);
}
[Test]
public void TestChainDisplayNodes()
{
var size = 4;
var chain = new ChainPoseExtractor(size);
var displayNodes = chain.GetDisplayNodes();
Assert.AreEqual(size, displayNodes.Count);
for (var i = 0; i < size; i++)
{
var displayNode = displayNodes[i];
Assert.AreEqual(i, displayNode.OriginalIndex);
Assert.AreEqual(null, displayNode.NodeObject);
Assert.AreEqual(i, displayNode.Depth);
Assert.AreEqual(true, displayNode.Enabled);
}
}
[Test]
public void TestDisplayNodesLoop()
{
// Degenerate case with a loop
var poseExtractor = new UselessPoseExtractor();
poseExtractor.Init(new[] {-1, 2, 1});
// This just shouldn't blow up
poseExtractor.GetDisplayNodes();
// Self-loop
poseExtractor.Init(new[] {-1, 1});
// This just shouldn't blow up
poseExtractor.GetDisplayNodes();
}
class BadPoseExtractor : BasicPoseExtractor
{
public BadPoseExtractor()
{
var size = 2;
var parents = new int[size];
// Parents are intentionally invalid - expect -1 at root
for (var i = 0; i < size; i++)
{
parents[i] = i;
}
Setup(parents);
}
}
[Test]
public void TestExpectedRoot()
{
Assert.Throws<UnityAgentsException>(() =>
{
var bad = new BadPoseExtractor();
});
}
}
public class PoseExtensionTests
{
[Test]
public void TestInverse()
{
Pose t = new Pose
{
rotation = Quaternion.AngleAxis(23.0f, new Vector3(1, 1, 1).normalized),
position = new Vector3(-1.0f, 2.0f, 3.0f)
};
var inverseT = t.Inverse();
var product = inverseT.Multiply(t);
Assert.IsTrue(Vector3.zero == product.position);
Assert.IsTrue(Quaternion.identity == product.rotation);
Assert.IsTrue(Pose.identity == product);
}
}
}

186
com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs


using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Extensions.Sensors;
using UnityEditor;
namespace Unity.MLAgents.Extensions.Tests.Sensors
{
public class RigidBodyPoseExtractorTests
{
[TearDown]
public void RemoveGameObjects()
{
var objects = GameObject.FindObjectsOfType<GameObject>();
foreach (var o in objects)
{
UnityEngine.Object.DestroyImmediate(o);
}
}
[Test]
public void TestNullRoot()
{
var poseExtractor = new RigidBodyPoseExtractor(null);
// These should be no-ops
poseExtractor.UpdateLocalSpacePoses();
poseExtractor.UpdateModelSpacePoses();
Assert.AreEqual(0, poseExtractor.NumPoses);
}
[Test]
public void TestSingleBody()
{
var go = new GameObject();
var rootRb = go.AddComponent<Rigidbody>();
var poseExtractor = new RigidBodyPoseExtractor(rootRb);
Assert.AreEqual(1, poseExtractor.NumPoses);
// Also pass the GameObject
poseExtractor = new RigidBodyPoseExtractor(rootRb, go);
Assert.AreEqual(1, poseExtractor.NumPoses);
}
[Test]
public void TestNoBodiesFound()
{
// Check that if we can't find any bodies under the game object, we get an empty extractor
var gameObj = new GameObject();
var rootRb = gameObj.AddComponent<Rigidbody>();
var otherGameObj = new GameObject();
var poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
Assert.AreEqual(0, poseExtractor.NumPoses);
// Add an RB under the other GameObject. Constructor will find a rigid body, but not the root.
var otherRb = otherGameObj.AddComponent<Rigidbody>();
poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
Assert.AreEqual(0, poseExtractor.NumPoses);
}
[Test]
public void TestTwoBodies()
{
// * rootObj
// - rb1
// * go2
// - rb2
// - joint
var rootObj = new GameObject();
var rb1 = rootObj.AddComponent<Rigidbody>();
var go2 = new GameObject();
var rb2 = go2.AddComponent<Rigidbody>();
go2.transform.SetParent(rootObj.transform);
var joint = go2.AddComponent<ConfigurableJoint>();
joint.connectedBody = rb1;
var poseExtractor = new RigidBodyPoseExtractor(rb1);
Assert.AreEqual(2, poseExtractor.NumPoses);
rb1.position = new Vector3(1, 0, 0);
rb1.rotation = Quaternion.Euler(0, 13.37f, 0);
rb1.velocity = new Vector3(2, 0, 0);
Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(0).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(0).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(0));
// Check DisplayNodes gives expected results
var displayNodes = poseExtractor.GetDisplayNodes();
Assert.AreEqual(2, displayNodes.Count);
Assert.AreEqual(rb1, displayNodes[0].NodeObject);
Assert.AreEqual(false, displayNodes[0].Enabled);
Assert.AreEqual(rb2, displayNodes[1].NodeObject);
Assert.AreEqual(true, displayNodes[1].Enabled);
}
[Test]
public void TestTwoBodiesVirtualRoot()
{
// * virtualRoot
// * rootObj
// - rb1
// * go2
// - rb2
// - joint
var virtualRoot = new GameObject("I am vroot");
var rootObj = new GameObject();
var rb1 = rootObj.AddComponent<Rigidbody>();
var go2 = new GameObject();
var rb2 = go2.AddComponent<Rigidbody>();
go2.transform.SetParent(rootObj.transform);
var joint = go2.AddComponent<ConfigurableJoint>();
joint.connectedBody = rb1;
var poseExtractor = new RigidBodyPoseExtractor(rb1, null, virtualRoot);
Assert.AreEqual(3, poseExtractor.NumPoses);
// "body" 0 has no parent
Assert.AreEqual(-1, poseExtractor.GetParentIndex(0));
// body 1 has parent 0
Assert.AreEqual(0, poseExtractor.GetParentIndex(1));
var virtualRootPos = new Vector3(0,2,0);
var virtualRootRot = Quaternion.Euler(0, 42, 0);
virtualRoot.transform.position = virtualRootPos;
virtualRoot.transform.rotation = virtualRootRot;
Assert.AreEqual(virtualRootPos, poseExtractor.GetPoseAt(0).position);
Assert.IsTrue(virtualRootRot == poseExtractor.GetPoseAt(0).rotation);
Assert.AreEqual(Vector3.zero, poseExtractor.GetLinearVelocityAt(0));
// Same as above test, but using index 1
rb1.position = new Vector3(1, 0, 0);
rb1.rotation = Quaternion.Euler(0, 13.37f, 0);
rb1.velocity = new Vector3(2, 0, 0);
Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(1).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(1).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(1));
}
[Test]
public void TestBodyPosesEnabledDictionary()
{
// * rootObj
// - rb1
// * go2
// - rb2
// - joint
var rootObj = new GameObject();
var rb1 = rootObj.AddComponent<Rigidbody>();
var go2 = new GameObject();
var rb2 = go2.AddComponent<Rigidbody>();
go2.transform.SetParent(rootObj.transform);
var joint = go2.AddComponent<ConfigurableJoint>();
joint.connectedBody = rb1;
var poseExtractor = new RigidBodyPoseExtractor(rb1);
// Expect the root body disabled and the attached one enabled.
Assert.IsFalse(poseExtractor.IsPoseEnabled(0));
Assert.IsTrue(poseExtractor.IsPoseEnabled(1));
var bodyPosesEnabled = poseExtractor.GetBodyPosesEnabled();
Assert.IsFalse(bodyPosesEnabled[rb1]);
Assert.IsTrue(bodyPosesEnabled[rb2]);
// Swap the values
bodyPosesEnabled[rb1] = true;
bodyPosesEnabled[rb2] = false;
var poseExtractor2 = new RigidBodyPoseExtractor(rb1, null, null, bodyPosesEnabled);
Assert.IsTrue(poseExtractor2.IsPoseEnabled(0));
Assert.IsFalse(poseExtractor2.IsPoseEnabled(1));
}
}
}

0
ml-agents/mlagents/trainers/tf/__init__.py

221
ml-agents/mlagents/trainers/tf/model_serialization.py


from distutils.util import strtobool
import os
from typing import Any, List, Set
from distutils.version import LooseVersion
try:
from tf2onnx.tfonnx import process_tf_graph, tf_optimize
from tf2onnx import optimizer
ONNX_EXPORT_ENABLED = True
except ImportError:
# Either onnx and tf2onnx not installed, or they're not compatible with the version of tensorflow
ONNX_EXPORT_ENABLED = False
pass
from mlagents.tf_utils import tf
from tensorflow.python.platform import gfile
from tensorflow.python.framework import graph_util
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.settings import SerializationSettings
from mlagents.trainers.tf import tensorflow_to_barracuda as tf2bc
if LooseVersion(tf.__version__) < LooseVersion("1.12.0"):
# ONNX is only tested on 1.12.0 and later
ONNX_EXPORT_ENABLED = False
logger = get_logger(__name__)
POSSIBLE_INPUT_NODES = frozenset(
[
"action_masks",
"epsilon",
"prev_action",
"recurrent_in",
"sequence_length",
"vector_observation",
]
)
POSSIBLE_OUTPUT_NODES = frozenset(
["action", "action_probs", "recurrent_out", "value_estimate"]
)
MODEL_CONSTANTS = frozenset(
[
"action_output_shape",
"is_continuous_control",
"memory_size",
"version_number",
"trainer_major_version",
"trainer_minor_version",
"trainer_patch_version",
]
)
VISUAL_OBSERVATION_PREFIX = "visual_observation_"
def export_policy_model(
model_path: str,
output_filepath: str,
behavior_name: str,
graph: tf.Graph,
sess: tf.Session,
) -> None:
"""
Exports a TF graph for a Policy to .nn and/or .onnx format for Unity embedding.
:param output_filepath: file path to output the model (without file suffix)
:param behavior_name: behavior name of the trained model
:param graph: Tensorflow Graph for the policy
:param sess: Tensorflow session for the policy
"""
frozen_graph_def = _make_frozen_graph(behavior_name, graph, sess)
if not os.path.exists(output_filepath):
os.makedirs(output_filepath)
# Save frozen graph
frozen_graph_def_path = model_path + "/frozen_graph_def.pb"
with gfile.GFile(frozen_graph_def_path, "wb") as f:
f.write(frozen_graph_def.SerializeToString())
# Convert to barracuda
if SerializationSettings.convert_to_barracuda:
tf2bc.convert(frozen_graph_def_path, f"{output_filepath}.nn")
logger.info(f"Exported {output_filepath}.nn")
# Save to onnx too (if we were able to import it)
if ONNX_EXPORT_ENABLED:
if SerializationSettings.convert_to_onnx:
try:
onnx_graph = convert_frozen_to_onnx(behavior_name, frozen_graph_def)
onnx_output_path = f"{output_filepath}.onnx"
with open(onnx_output_path, "wb") as f:
f.write(onnx_graph.SerializeToString())
logger.info(f"Converting to {onnx_output_path}")
except Exception:
# Make conversion errors fatal depending on environment variables (only done during CI)
if _enforce_onnx_conversion():
raise
logger.exception(
"Exception trying to save ONNX graph. Please report this error on "
"https://github.com/Unity-Technologies/ml-agents/issues and "
"attach a copy of frozen_graph_def.pb"
)
else:
if _enforce_onnx_conversion():
raise RuntimeError(
"ONNX conversion enforced, but couldn't import dependencies."
)
def _make_frozen_graph(
behavior_name: str, graph: tf.Graph, sess: tf.Session
) -> tf.GraphDef:
with graph.as_default():
target_nodes = ",".join(_process_graph(behavior_name, graph))
graph_def = graph.as_graph_def()
output_graph_def = graph_util.convert_variables_to_constants(
sess, graph_def, target_nodes.replace(" ", "").split(",")
)
return output_graph_def
def convert_frozen_to_onnx(behavior_name: str, frozen_graph_def: tf.GraphDef) -> Any:
# This is basically https://github.com/onnx/tensorflow-onnx/blob/master/tf2onnx/convert.py
inputs = _get_input_node_names(frozen_graph_def)
outputs = _get_output_node_names(frozen_graph_def)
logger.info(f"onnx export - inputs:{inputs} outputs:{outputs}")
frozen_graph_def = tf_optimize(
inputs, outputs, frozen_graph_def, fold_constant=True
)
with tf.Graph().as_default() as tf_graph:
tf.import_graph_def(frozen_graph_def, name="")
with tf.Session(graph=tf_graph):
g = process_tf_graph(
tf_graph,
input_names=inputs,
output_names=outputs,
opset=SerializationSettings.onnx_opset,
)
onnx_graph = optimizer.optimize_graph(g)
model_proto = onnx_graph.make_model(behavior_name)
return model_proto
def _get_input_node_names(frozen_graph_def: Any) -> List[str]:
"""
Get the list of input node names from the graph.
Names are suffixed with ":0"
"""
node_names = _get_frozen_graph_node_names(frozen_graph_def)
input_names = node_names & POSSIBLE_INPUT_NODES
# Check visual inputs sequentially, and exit as soon as we don't find one
vis_index = 0
while True:
vis_node_name = f"{VISUAL_OBSERVATION_PREFIX}{vis_index}"
if vis_node_name in node_names:
input_names.add(vis_node_name)
else:
break
vis_index += 1
# Append the port
return [f"{n}:0" for n in input_names]
def _get_output_node_names(frozen_graph_def: Any) -> List[str]:
"""
Get the list of output node names from the graph.
Also include constants, so that they will be readable by the
onnx importer.
Names are suffixed with ":0"
"""
node_names = _get_frozen_graph_node_names(frozen_graph_def)
output_names = node_names & (POSSIBLE_OUTPUT_NODES | MODEL_CONSTANTS)
# Append the port
return [f"{n}:0" for n in output_names]
def _get_frozen_graph_node_names(frozen_graph_def: Any) -> Set[str]:
"""
Get all the node names from the graph.
"""
names = set()
for node in frozen_graph_def.node:
names.add(node.name)
return names
def _process_graph(behavior_name: str, graph: tf.Graph) -> List[str]:
"""
Gets the list of the output nodes present in the graph for inference
:return: list of node names
"""
all_nodes = [x.name for x in graph.as_graph_def().node]
nodes = [x for x in all_nodes if x in POSSIBLE_OUTPUT_NODES | MODEL_CONSTANTS]
logger.info("List of nodes to export for behavior :" + behavior_name)
for n in nodes:
logger.info("\t" + n)
return nodes
def _enforce_onnx_conversion() -> bool:
env_var_name = "TEST_ENFORCE_ONNX_CONVERSION"
if env_var_name not in os.environ:
return False
val = os.environ[env_var_name]
try:
# This handles e.g. "false" converting reasonably to False
return strtobool(val)
except Exception:
return False

111
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


import numpy as np
import pytest
import torch
from mlagents.trainers.torch.components.reward_providers import (
CuriosityRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import CuriositySettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
SEED = [42]
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_settings.strength = 0.1
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
assert curiosity_rp.strength == 0.1
assert curiosity_rp.name == "Curiosity"
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,), (64, 66, 1)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_rp = create_reward_provider(
RewardSignalType.CURIOSITY, behavior_spec, curiosity_settings
)
assert curiosity_rp.name == "Curiosity"
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
curiosity_rp.update(buffer)
reward_old = curiosity_rp.evaluate(buffer)[0]
for _ in range(10):
curiosity_rp.update(buffer)
reward_new = curiosity_rp.evaluate(buffer)[0]
assert reward_new < reward_old
reward_old = reward_new
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5)]
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.1)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(200):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0].detach()
target = buffer["actions"][0]
error = float(torch.mean((prediction - target) ** 2))
assert error < 0.001
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.1)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(100):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_next_state(buffer)[0]
target = curiosity_rp._network.get_next_state(buffer)[0]
error = float(torch.mean((prediction - target) ** 2).detach())
assert error < 0.001

56
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


import pytest
from mlagents.trainers.torch.components.reward_providers import (
ExtrinsicRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
settings = RewardSignalSettings()
settings.gamma = 0.2
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
assert extrinsic_rp.gamma == 0.2
assert extrinsic_rp.name == "Extrinsic"
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
settings = RewardSignalSettings()
extrinsic_rp = create_reward_provider(
RewardSignalType.EXTRINSIC, behavior_spec, settings
)
assert extrinsic_rp.name == "Extrinsic"
@pytest.mark.parametrize("reward", [2.0, 3.0, 4.0])
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:
buffer = create_agent_buffer(behavior_spec, 1000, reward)
settings = RewardSignalSettings()
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
generated_rewards = extrinsic_rp.evaluate(buffer)
assert (generated_rewards == reward).all()

138
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from typing import Any
import numpy as np
import pytest
from unittest.mock import patch
import torch
import os
from mlagents.trainers.torch.components.reward_providers import (
GAILRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
CONTINUOUS_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/test.demo"
)
DISCRETE_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/testdcvis.demo"
)
SEED = [42]
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2)]
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = GAILRewardProvider(behavior_spec, gail_settings)
assert gail_rp.name == "GAIL"
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2)]
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
assert gail_rp.name == "GAIL"
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ActionType.CONTINUOUS, 2),
BehaviorSpec([(50,)], ActionType.DISCRETE, (2, 3, 3, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (20,)),
],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
def test_reward_decreases(
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
buffer_expert = create_agent_buffer(behavior_spec, 1000)
buffer_policy = create_agent_buffer(behavior_spec, 1000)
demo_to_buffer.return_value = None, buffer_expert
gail_settings = GAILSettings(
demo_path="", learning_rate=0.05, use_vail=False, use_actions=use_actions
)
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
init_reward_policy = gail_rp.evaluate(buffer_policy)[0]
for _ in range(10):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert >= 0 # GAIL / VAIL reward always positive
assert reward_policy >= 0
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert > reward_policy # Expert reward greater than non-expert reward
assert (
reward_expert > init_reward_expert
) # Expert reward getting better as network trains
assert (
reward_policy < init_reward_policy
) # Non-expert reward getting worse as network trains
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3, 3, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (20,)),
],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
def test_reward_decreases_vail(
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
buffer_expert = create_agent_buffer(behavior_spec, 1000)
buffer_policy = create_agent_buffer(behavior_spec, 1000)
demo_to_buffer.return_value = None, buffer_expert
gail_settings = GAILSettings(
demo_path="", learning_rate=0.005, use_vail=True, use_actions=use_actions
)
DiscriminatorNetwork.initial_beta = 0.0
# we must set the initial value of beta to 0 for testing
# If we do not, the kl-loss will dominate early and will block the estimator
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
for _ in range(100):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert >= 0 # GAIL / VAIL reward always positive
assert reward_policy >= 0
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert > reward_policy # Expert reward greater than non-expert reward

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.trajectory import SplitObservations
def create_agent_buffer(
behavior_spec: BehaviorSpec, number: int, reward: float = 0.0
) -> AgentBuffer:
buffer = AgentBuffer()
curr_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
next_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
action = behavior_spec.create_random_action(1)[0, :]
for _ in range(number):
curr_split_obs = SplitObservations.from_observations(curr_observations)
next_split_obs = SplitObservations.from_observations(next_observations)
for i, _ in enumerate(curr_split_obs.visual_observations):
buffer["visual_obs%d" % i].append(curr_split_obs.visual_observations[i])
buffer["next_visual_obs%d" % i].append(
next_split_obs.visual_observations[i]
)
buffer["vector_obs"].append(curr_split_obs.vector_observations)
buffer["next_vector_in"].append(next_split_obs.vector_observations)
buffer["actions"].append(action)
buffer["done"].append(np.zeros(1, dtype=np.float32))
buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
buffer["masks"].append(np.ones(1, dtype=np.float32))
return buffer

1001
ml-agents/mlagents/trainers/tests/torch/test.demo
文件差异内容过多而无法显示
查看文件

144
ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py


from unittest.mock import MagicMock
import pytest
import mlagents.trainers.tests.mock_brain as mb
import numpy as np
import os
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.settings import (
TrainerSettings,
BehavioralCloningSettings,
NetworkSettings,
)
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
# model_path = env.external_brain_names[0]
trainer_config = TrainerSettings()
trainer_config.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TorchPolicy(
0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
)
bc_module = BCModule(
policy,
settings=bc_settings,
policy_learning_rate=trainer_config.hyperparameters.learning_rate,
default_batch_size=trainer_config.hyperparameters.batch_size,
default_num_epoch=3,
)
return bc_module
# Test default values
def test_bcmodule_defaults():
# See if default values match
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 3
assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
# Assign strange values and see if it overrides properly
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
num_epoch=100,
batch_size=10000,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 100
assert bc_module.batch_size == 10000
# Test with continuous control env and vector actions
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_constant_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=0,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate == bc_module.current_lr
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_linear_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=100,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
# Should decay by 10/100 * 0.0003 = 0.00003
bc_module.policy.get_current_step = MagicMock(return_value=10)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate - 0.00003 == pytest.approx(bc_module.current_lr, abs=0.01)
# Test with RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control and visual observations
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_dc_visual_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control, visual observations and RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_dc_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
if __name__ == "__main__":
pytest.main()

446
ml-agents/mlagents/trainers/tests/torch/testdcvis.demo


bcvis& -****:VisualFoodCollectorLearning�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"

177
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


import pytest
import numpy as np
from mlagents.trainers.ghost.trainer import GhostTrainer
from mlagents.trainers.ghost.controller import GhostController
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings, FrameworkType
@pytest.fixture
def dummy_config():
return TrainerSettings(
self_play=SelfPlaySettings(), framework=FrameworkType.PYTORCH
)
VECTOR_ACTION_SPACE = 1
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 513
NUM_AGENTS = 12
@pytest.mark.parametrize("use_discrete", [True, False])
def test_load_and_set(dummy_config, use_discrete):
mock_specs = mb.setup_test_behavior_specs(
use_discrete,
False,
vector_action_space=DISCRETE_ACTION_SPACE
if use_discrete
else VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
trainer_params = dummy_config
trainer = PPOTrainer("test", 0, trainer_params, True, False, 0, "0")
trainer.seed = 1
policy = trainer.create_policy("test", mock_specs)
trainer.seed = 20 # otherwise graphs are the same
to_load_policy = trainer.create_policy("test", mock_specs)
weights = policy.get_weights()
load_weights = to_load_policy.get_weights()
try:
for w, lw in zip(weights, load_weights):
np.testing.assert_array_equal(w, lw)
except AssertionError:
pass
to_load_policy.load_weights(weights)
load_weights = to_load_policy.get_weights()
for w, lw in zip(weights, load_weights):
np.testing.assert_array_equal(w, lw)
def test_process_trajectory(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
)
# first policy encountered becomes policy trained by wrapped PPO
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)
trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
trainer.subscribe_trajectory_queue(trajectory_queue0)
# Ghost trainer should ignore this queue because off policy
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)
trajectory_queue1 = AgentManagerQueue(behavior_id_team1)
trainer.subscribe_trajectory_queue(trajectory_queue1)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
)
trajectory_queue0.put(trajectory)
trainer.advance()
# Check that trainer put trajectory in update buffer
assert trainer.trainer.update_buffer.num_experiences == 15
trajectory_queue1.put(trajectory)
trainer.advance()
# Check that ghost trainer ignored off policy queue
assert trainer.trainer.update_buffer.num_experiences == 15
# Check that it emptied the queue
assert trajectory_queue1.empty()
def test_publish_queue(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[1], vector_obs_space=8
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
brain_name = parsed_behavior_id0.brain_name
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
)
# First policy encountered becomes policy trained by wrapped PPO
# This queue should remain empty after swap snapshot
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)
policy_queue0 = AgentManagerQueue(behavior_id_team0)
trainer.publish_policy_queue(policy_queue0)
# Ghost trainer should use this queue for ghost policy swap
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)
policy_queue1 = AgentManagerQueue(behavior_id_team1)
trainer.publish_policy_queue(policy_queue1)
# check ghost trainer swap pushes to ghost queue and not trainer
assert policy_queue0.empty() and policy_queue1.empty()
trainer._swap_snapshots()
assert policy_queue0.empty() and not policy_queue1.empty()
# clear
policy_queue1.get_nowait()
mock_specs = mb.setup_test_behavior_specs(
False,
False,
vector_action_space=VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_specs)
# Mock out reward signal eval
buffer["extrinsic_rewards"] = buffer["environment_rewards"]
buffer["extrinsic_returns"] = buffer["environment_rewards"]
buffer["extrinsic_value_estimates"] = buffer["environment_rewards"]
buffer["curiosity_rewards"] = buffer["environment_rewards"]
buffer["curiosity_returns"] = buffer["environment_rewards"]
buffer["curiosity_value_estimates"] = buffer["environment_rewards"]
buffer["advantages"] = buffer["environment_rewards"]
trainer.trainer.update_buffer = buffer
# when ghost trainer advance and wrapped trainer buffers full
# the wrapped trainer pushes updated policy to correct queue
assert policy_queue0.empty() and policy_queue1.empty()
trainer.advance()
assert not policy_queue0.empty() and policy_queue1.empty()
if __name__ == "__main__":
pytest.main()

0
ml-agents/mlagents/trainers/torch/__init__.py

0
ml-agents/mlagents/trainers/torch/components/__init__.py

15
ml-agents/mlagents/trainers/torch/components/reward_providers/__init__.py


from mlagents.trainers.torch.components.reward_providers.base_reward_provider import ( # noqa F401
BaseRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.extrinsic_reward_provider import ( # noqa F401
ExtrinsicRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.curiosity_reward_provider import ( # noqa F401
CuriosityRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import ( # noqa F401
GAILRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.reward_provider_factory import ( # noqa F401
create_reward_provider,
)

72
ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py


import numpy as np
from abc import ABC, abstractmethod
from typing import Dict
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.settings import RewardSignalSettings
from mlagents_envs.base_env import BehaviorSpec
class BaseRewardProvider(ABC):
def __init__(self, specs: BehaviorSpec, settings: RewardSignalSettings) -> None:
self._policy_specs = specs
self._gamma = settings.gamma
self._strength = settings.strength
self._ignore_done = False
@property
def gamma(self) -> float:
"""
The discount factor for the reward signal
"""
return self._gamma
@property
def strength(self) -> float:
"""
The strength multiplier of the reward provider
"""
return self._strength
@property
def name(self) -> str:
"""
The name of the reward provider. Is used for reporting and identification
"""
class_name = self.__class__.__name__
return class_name.replace("RewardProvider", "")
@property
def ignore_done(self) -> bool:
"""
If true, when the agent is done, the rewards of the next episode must be
used to calculate the return of the current episode.
Is used to mitigate the positive bias in rewards with no natural end.
"""
return self._ignore_done
@abstractmethod
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
"""
Evaluates the reward for the data present in the Dict mini_batch. Use this when evaluating a reward
function drawn straight from a Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: a np.ndarray of rewards generated by the reward provider
"""
raise NotImplementedError(
"The reward provider's evaluate method has not been implemented "
)
@abstractmethod
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
"""
Update the reward for the data present in the Dict mini_batch. Use this when updating a reward
function drawn straight from a Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: A dictionary from string to stats values
"""
raise NotImplementedError(
"The reward provider's update method has not been implemented "
)

15
ml-agents/mlagents/trainers/torch/components/reward_providers/extrinsic_reward_provider.py


import numpy as np
from typing import Dict
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
class ExtrinsicRewardProvider(BaseRewardProvider):
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
return np.array(mini_batch["environment_rewards"], dtype=np.float32)
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
return {}

43
ml-agents/mlagents/trainers/torch/components/reward_providers/reward_provider_factory.py


from typing import Dict, Type
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.extrinsic_reward_provider import (
ExtrinsicRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.curiosity_reward_provider import (
CuriosityRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
GAILRewardProvider,
)
from mlagents_envs.base_env import BehaviorSpec
NAME_TO_CLASS: Dict[RewardSignalType, Type[BaseRewardProvider]] = {
RewardSignalType.EXTRINSIC: ExtrinsicRewardProvider,
RewardSignalType.CURIOSITY: CuriosityRewardProvider,
RewardSignalType.GAIL: GAILRewardProvider,
}
def create_reward_provider(
name: RewardSignalType, specs: BehaviorSpec, settings: RewardSignalSettings
) -> BaseRewardProvider:
"""
Creates a reward provider class based on the name and config entry provided as a dict.
:param name: The name of the reward signal
:param specs: The BehaviorSpecs of the policy
:param settings: The RewardSignalSettings for that reward signal
:return: The reward signal class instantiated
"""
rcls = NAME_TO_CLASS.get(name)
if not rcls:
raise UnityTrainerException(f"Unknown reward signal type {name}")
class_inst = rcls(specs, settings)
return class_inst

225
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


import numpy as np
from typing import Dict
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.settings import CuriositySettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish
from mlagents.trainers.settings import NetworkSettings, EncoderType
class CuriosityRewardProvider(BaseRewardProvider):
beta = 0.2 # Forward vs Inverse loss weight
loss_multiplier = 10.0 # Loss multiplier
def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
super().__init__(specs, settings)
self._ignore_done = True
self._network = CuriosityNetwork(specs, settings)
self.optimizer = torch.optim.Adam(
self._network.parameters(), lr=settings.learning_rate
)
self._has_updated_once = False
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
rewards = self._network.compute_reward(mini_batch).detach().cpu().numpy()
rewards = np.minimum(rewards, 1.0 / self.strength)
return rewards * self._has_updated_once
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
self._has_updated_once = True
forward_loss = self._network.compute_forward_loss(mini_batch)
inverse_loss = self._network.compute_inverse_loss(mini_batch)
loss = self.loss_multiplier * (
self.beta * forward_loss + (1.0 - self.beta) * inverse_loss
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": forward_loss.detach().cpu().numpy(),
"Losses/Curiosity Inverse Loss": inverse_loss.detach().cpu().numpy(),
}
class CuriosityNetwork(torch.nn.Module):
EPSILON = 1e-10
def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
super().__init__()
self._policy_specs = specs
state_encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
self._action_flattener = ModelUtils.ActionFlattener(specs)
self.inverse_model_action_predition = torch.nn.Sequential(
linear_layer(2 * settings.encoding_size, 256),
Swish(),
linear_layer(256, self._action_flattener.flattened_size),
)
self.forward_model_next_state_prediction = torch.nn.Sequential(
linear_layer(
settings.encoding_size + self._action_flattener.flattened_size, 256
),
Swish(),
linear_layer(256, settings.encoding_size),
)
def get_current_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Extracts the current state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(mini_batch["vector_obs"], dtype=torch.float)
],
vis_inputs=[
ModelUtils.list_to_tensor(
mini_batch["visual_obs%d" % i], dtype=torch.float
)
for i in range(n_vis)
],
)
return hidden
def get_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Extracts the next state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(
mini_batch["next_vector_in"], dtype=torch.float
)
],
vis_inputs=[
ModelUtils.list_to_tensor(
mini_batch["next_visual_obs%d" % i], dtype=torch.float
)
for i in range(n_vis)
],
)
return hidden
def predict_action(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
In the continuous case, returns the predicted action.
In the discrete case, returns the logits.
"""
inverse_model_input = torch.cat(
(self.get_current_state(mini_batch), self.get_next_state(mini_batch)), dim=1
)
hidden = self.inverse_model_action_predition(inverse_model_input)
if self._policy_specs.is_action_continuous():
return hidden
else:
branches = ModelUtils.break_into_branches(
hidden, self._policy_specs.discrete_action_branches
)
branches = [torch.softmax(b, dim=1) for b in branches]
return torch.cat(branches, dim=1)
def predict_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Uses the current state embedding and the action of the mini_batch to predict
the next state embedding.
"""
if self._policy_specs.is_action_continuous():
action = ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
else:
action = torch.cat(
ModelUtils.actions_to_onehot(
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
self._policy_specs.discrete_action_branches,
),
dim=1,
)
forward_model_input = torch.cat(
(self.get_current_state(mini_batch), action), dim=1
)
return self.forward_model_next_state_prediction(forward_model_input)
def compute_inverse_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Computes the inverse loss for a mini_batch. Corresponds to the error on the
action prediction (given the current and next state).
"""
predicted_action = self.predict_action(mini_batch)
if self._policy_specs.is_action_continuous():
sq_difference = (
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
- predicted_action
) ** 2
sq_difference = torch.sum(sq_difference, dim=1)
return torch.mean(
ModelUtils.dynamic_partition(
sq_difference,
ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
2,
)[1]
)
else:
true_action = torch.cat(
ModelUtils.actions_to_onehot(
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
self._policy_specs.discrete_action_branches,
),
dim=1,
)
cross_entropy = torch.sum(
-torch.log(predicted_action + self.EPSILON) * true_action, dim=1
)
return torch.mean(
ModelUtils.dynamic_partition(
cross_entropy,
ModelUtils.list_to_tensor(
mini_batch["masks"], dtype=torch.float
), # use masks not action_masks
2,
)[1]
)
def compute_reward(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Calculates the curiosity reward for the mini_batch. Corresponds to the error
between the predicted and actual next state.
"""
predicted_next_state = self.predict_next_state(mini_batch)
target = self.get_next_state(mini_batch)
sq_difference = 0.5 * (target - predicted_next_state) ** 2
sq_difference = torch.sum(sq_difference, dim=1)
return sq_difference
def compute_forward_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Computes the loss for the next state prediction
"""
return torch.mean(
ModelUtils.dynamic_partition(
self.compute_reward(mini_batch),
ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
2,
)[1]
)

256
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


from typing import Optional, Dict
import numpy as np
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.settings import GAILSettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.demo_loader import demo_to_buffer
class GAILRewardProvider(BaseRewardProvider):
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
super().__init__(specs, settings)
self._ignore_done = True
self._discriminator_network = DiscriminatorNetwork(specs, settings)
_, self._demo_buffer = demo_to_buffer(
settings.demo_path, 1, specs
) # This is supposed to be the sequence length but we do not have access here
params = list(self._discriminator_network.parameters())
self.optimizer = torch.optim.Adam(params, lr=settings.learning_rate)
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
estimates, _ = self._discriminator_network.compute_estimate(
mini_batch, use_vail_noise=False
)
return (
-torch.log(
1.0
- estimates.squeeze(dim=1)
* (1.0 - self._discriminator_network.EPSILON)
)
.detach()
.cpu()
.numpy()
)
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
expert_batch = self._demo_buffer.sample_mini_batch(
mini_batch.num_experiences, 1
)
loss, stats_dict = self._discriminator_network.compute_loss(
mini_batch, expert_batch
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return stats_dict
class DiscriminatorNetwork(torch.nn.Module):
gradient_penalty_weight = 10.0
z_size = 128
alpha = 0.0005
mutual_information = 0.5
EPSILON = 1e-7
initial_beta = 0.0
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
super().__init__()
self._policy_specs = specs
self._use_vail = settings.use_vail
self._settings = settings
state_encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
self._action_flattener = ModelUtils.ActionFlattener(specs)
encoder_input_size = settings.encoding_size
if settings.use_actions:
encoder_input_size += (
self._action_flattener.flattened_size + 1
) # + 1 is for done
self.encoder = torch.nn.Sequential(
linear_layer(encoder_input_size, settings.encoding_size),
Swish(),
linear_layer(settings.encoding_size, settings.encoding_size),
Swish(),
)
estimator_input_size = settings.encoding_size
if settings.use_vail:
estimator_input_size = self.z_size
self._z_sigma = torch.nn.Parameter(
torch.ones((self.z_size), dtype=torch.float), requires_grad=True
)
self._z_mu_layer = linear_layer(
settings.encoding_size,
self.z_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
)
self._beta = torch.nn.Parameter(
torch.tensor(self.initial_beta, dtype=torch.float), requires_grad=False
)
self._estimator = torch.nn.Sequential(
linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
)
def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Creates the action Tensor. In continuous case, corresponds to the action. In
the discrete case, corresponds to the concatenation of one hot action Tensors.
"""
return self._action_flattener.forward(
torch.as_tensor(mini_batch["actions"], dtype=torch.float)
)
def get_state_encoding(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Creates the observation input.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[torch.as_tensor(mini_batch["vector_obs"], dtype=torch.float)],
vis_inputs=[
torch.as_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float)
for i in range(n_vis)
],
)
return hidden
def compute_estimate(
self, mini_batch: AgentBuffer, use_vail_noise: bool = False
) -> torch.Tensor:
"""
Given a mini_batch, computes the estimate (How much the discriminator believes
the data was sampled from the demonstration data).
:param mini_batch: The AgentBuffer of data
:param use_vail_noise: Only when using VAIL : If true, will sample the code, if
false, will return the mean of the code.
"""
encoder_input = self.get_state_encoding(mini_batch)
if self._settings.use_actions:
actions = self.get_action_input(mini_batch)
dones = torch.as_tensor(mini_batch["done"], dtype=torch.float)
encoder_input = torch.cat([encoder_input, actions, dones], dim=1)
hidden = self.encoder(encoder_input)
z_mu: Optional[torch.Tensor] = None
if self._settings.use_vail:
z_mu = self._z_mu_layer(hidden)
hidden = torch.normal(z_mu, self._z_sigma * use_vail_noise)
estimate = self._estimator(hidden)
return estimate, z_mu
def compute_loss(
self, policy_batch: AgentBuffer, expert_batch: AgentBuffer
) -> torch.Tensor:
"""
Given a policy mini_batch and an expert mini_batch, computes the loss of the discriminator.
"""
total_loss = torch.zeros(1)
stats_dict: Dict[str, np.ndarray] = {}
policy_estimate, policy_mu = self.compute_estimate(
policy_batch, use_vail_noise=True
)
expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = (
policy_estimate.mean().detach().cpu().numpy()
)
stats_dict["Policy/GAIL Expert Estimate"] = (
expert_estimate.mean().detach().cpu().numpy()
)
discriminator_loss = -(
torch.log(expert_estimate + self.EPSILON)
+ torch.log(1.0 - policy_estimate + self.EPSILON)
).mean()
stats_dict["Losses/GAIL Loss"] = discriminator_loss.detach().cpu().numpy()
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)
kl_loss = torch.mean(
-torch.sum(
1
+ (self._z_sigma ** 2).log()
- 0.5 * expert_mu ** 2
- 0.5 * policy_mu ** 2
- (self._z_sigma ** 2),
dim=1,
)
)
vail_loss = self._beta * (kl_loss - self.mutual_information)
with torch.no_grad():
self._beta.data = torch.max(
self._beta + self.alpha * (kl_loss - self.mutual_information),
torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = self._beta.detach().cpu().numpy()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.detach().cpu().numpy()
if self.gradient_penalty_weight > 0.0:
total_loss += (
self.gradient_penalty_weight
* self.compute_gradient_magnitude(policy_batch, expert_batch)
)
return total_loss, stats_dict
def compute_gradient_magnitude(
self, policy_batch: AgentBuffer, expert_batch: AgentBuffer
) -> torch.Tensor:
"""
Gradient penalty from https://arxiv.org/pdf/1704.00028. Adds stability esp.
for off-policy. Compute gradients w.r.t randomly interpolated input.
"""
policy_obs = self.get_state_encoding(policy_batch)
expert_obs = self.get_state_encoding(expert_batch)
obs_epsilon = torch.rand(policy_obs.shape)
encoder_input = obs_epsilon * policy_obs + (1 - obs_epsilon) * expert_obs
if self._settings.use_actions:
policy_action = self.get_action_input(policy_batch)
expert_action = self.get_action_input(policy_batch)
action_epsilon = torch.rand(policy_action.shape)
policy_dones = torch.as_tensor(policy_batch["done"], dtype=torch.float)
expert_dones = torch.as_tensor(expert_batch["done"], dtype=torch.float)
dones_epsilon = torch.rand(policy_dones.shape)
encoder_input = torch.cat(
[
encoder_input,
action_epsilon * policy_action
+ (1 - action_epsilon) * expert_action,
dones_epsilon * policy_dones + (1 - dones_epsilon) * expert_dones,
],
dim=1,
)
hidden = self.encoder(encoder_input)
if self._settings.use_vail:
use_vail_noise = True
z_mu = self._z_mu_layer(hidden)
hidden = torch.normal(z_mu, self._z_sigma * use_vail_noise)
hidden = self._estimator(hidden)
estimate = torch.mean(torch.sum(hidden, dim=1))
gradient = torch.autograd.grad(estimate, encoder_input)[0]
# Norm's gradient could be NaN at 0. Use our own safe_norm
safe_norm = (torch.sum(gradient ** 2, dim=1) + self.EPSILON).sqrt()
gradient_mag = torch.mean((safe_norm - 1) ** 2)
return gradient_mag

0
ml-agents/mlagents/trainers/torch/components/bc/__init__.py

185
ml-agents/mlagents/trainers/torch/components/bc/module.py


from typing import Dict
import numpy as np
import torch
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.settings import BehavioralCloningSettings, ScheduleType
from mlagents.trainers.torch.utils import ModelUtils
class BCModule:
def __init__(
self,
policy: TorchPolicy,
settings: BehavioralCloningSettings,
policy_learning_rate: float,
default_batch_size: int,
default_num_epoch: int,
):
"""
A BC trainer that can be used inline with RL.
:param policy: The policy of the learning model
:param settings: The settings for BehavioralCloning including LR strength, batch_size,
num_epochs, samples_per_update and LR annealing steps.
:param policy_learning_rate: The initial Learning Rate of the policy. Used to set an appropriate learning rate
for the pretrainer.
"""
self.policy = policy
self._anneal_steps = settings.steps
self.current_lr = policy_learning_rate * settings.strength
learning_rate_schedule: ScheduleType = ScheduleType.LINEAR if self._anneal_steps > 0 else ScheduleType.CONSTANT
self.decay_learning_rate = ModelUtils.DecayedValue(
learning_rate_schedule, self.current_lr, 1e-10, self._anneal_steps
)
params = self.policy.actor_critic.parameters()
self.optimizer = torch.optim.Adam(params, lr=self.current_lr)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.behavior_spec
)
self.batch_size = (
settings.batch_size if settings.batch_size else default_batch_size
)
self.num_epoch = settings.num_epoch if settings.num_epoch else default_num_epoch
self.n_sequences = max(
min(self.batch_size, self.demonstration_buffer.num_experiences)
// policy.sequence_length,
1,
)
self.has_updated = False
self.use_recurrent = self.policy.use_recurrent
self.samples_per_update = settings.samples_per_update
def update(self) -> Dict[str, np.ndarray]:
"""
Updates model using buffer.
:param max_batches: The maximum number of batches to use per update.
:return: The loss of the update.
"""
# Don't continue training if the learning rate has reached 0, to reduce training time.
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
if self.current_lr <= 0:
return {"Losses/Pretraining Loss": 0}
batch_losses = []
possible_demo_batches = (
self.demonstration_buffer.num_experiences // self.n_sequences
)
possible_batches = possible_demo_batches
max_batches = self.samples_per_update // self.n_sequences
n_epoch = self.num_epoch
for _ in range(n_epoch):
self.demonstration_buffer.shuffle(
sequence_length=self.policy.sequence_length
)
if max_batches == 0:
num_batches = possible_batches
else:
num_batches = min(possible_batches, max_batches)
for i in range(num_batches // self.policy.sequence_length):
demo_update_buffer = self.demonstration_buffer
start = i * self.n_sequences * self.policy.sequence_length
end = (i + 1) * self.n_sequences * self.policy.sequence_length
mini_batch_demo = demo_update_buffer.make_mini_batch(start, end)
run_out = self._update_batch(mini_batch_demo, self.n_sequences)
loss = run_out["loss"]
batch_losses.append(loss)
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.current_lr = decay_lr
self.has_updated = True
update_stats = {"Losses/Pretraining Loss": np.mean(batch_losses)}
return update_stats
def _behavioral_cloning_loss(self, selected_actions, log_probs, expert_actions):
if self.policy.use_continuous_act:
bc_loss = torch.nn.functional.mse_loss(selected_actions, expert_actions)
else:
log_prob_branches = ModelUtils.break_into_branches(
log_probs, self.policy.act_size
)
bc_loss = torch.mean(
torch.stack(
[
torch.sum(
-torch.nn.functional.log_softmax(log_prob_branch, dim=1)
* expert_actions_branch,
dim=1,
)
for log_prob_branch, expert_actions_branch in zip(
log_prob_branches, expert_actions
)
]
)
)
return bc_loss
def _update_batch(
self, mini_batch_demo: Dict[str, np.ndarray], n_sequences: int
) -> Dict[str, float]:
"""
Helper function for update_batch.
"""
vec_obs = [ModelUtils.list_to_tensor(mini_batch_demo["vector_obs"])]
act_masks = None
if self.policy.use_continuous_act:
expert_actions = ModelUtils.list_to_tensor(mini_batch_demo["actions"])
else:
raw_expert_actions = ModelUtils.list_to_tensor(
mini_batch_demo["actions"], dtype=torch.long
)
expert_actions = ModelUtils.actions_to_onehot(
raw_expert_actions, self.policy.act_size
)
act_masks = ModelUtils.list_to_tensor(
np.ones(
(
self.n_sequences * self.policy.sequence_length,
sum(self.policy.behavior_spec.discrete_action_branches),
),
dtype=np.float32,
)
)
memories = []
if self.policy.use_recurrent:
memories = torch.zeros(
1, self.n_sequences, self.policy.actor_critic.half_mem_size * 2
)
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(
mini_batch_demo["visual_obs%d" % idx]
)
vis_obs.append(vis_ob)
else:
vis_obs = []
selected_actions, all_log_probs, _, _, _ = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=self.policy.sequence_length,
all_log_probs=True,
)
bc_loss = self._behavioral_cloning_loss(
selected_actions, all_log_probs, expert_actions
)
self.optimizer.zero_grad()
bc_loss.backward()
self.optimizer.step()
run_out = {"loss": bc_loss.detach().cpu().numpy()}
return run_out

77
ml-agents/mlagents/trainers/torch/model_serialization.py


import os
import torch
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.settings import SerializationSettings
logger = get_logger(__name__)
class ModelSerializer:
def __init__(self, policy):
self.policy = policy
batch_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
dummy_vis_obs = (
[torch.zeros(batch_dim + list(self.policy.vis_obs_shape))]
if self.policy.vis_obs_size > 0
else []
)
dummy_masks = torch.ones(batch_dim + [sum(self.policy.actor_critic.act_size)])
dummy_memories = torch.zeros(batch_dim + [1] + [self.policy.m_size])
# Need to pass all posslible inputs since currently keyword arguments is not
# supported by torch.nn.export()
self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)
# Input names can only contain actual input used since in torch.nn.export
# it maps input_names only to input nodes that exist in the graph
self.input_names = []
self.dynamic_axes = {"action": {0: "batch"}, "action_probs": {0: "batch"}}
if self.policy.use_vec_obs:
self.input_names.append("vector_observation")
self.dynamic_axes.update({"vector_observation": {0: "batch"}})
if self.policy.use_vis_obs:
self.input_names.append("visual_observation")
self.dynamic_axes.update({"visual_observation": {0: "batch"}})
if not self.policy.use_continuous_act:
self.input_names.append("action_masks")
self.dynamic_axes.update({"action_masks": {0: "batch"}})
if self.policy.use_recurrent:
self.input_names.append("memories")
self.dynamic_axes.update({"memories": {0: "batch"}})
self.output_names = [
"action",
"action_probs",
"version_number",
"memory_size",
"is_continuous_control",
"action_output_shape",
]
def export_policy_model(self, output_filepath: str) -> None:
"""
Exports a Torch model for a Policy to .onnx format for Unity embedding.
:param output_filepath: file path to output the model (without file suffix)
:param brain_name: Brain name of brain to be trained
"""
if not os.path.exists(output_filepath):
os.makedirs(output_filepath)
onnx_output_path = f"{output_filepath}.onnx"
logger.info(f"Converting to {onnx_output_path}")
torch.onnx.export(
self.policy.actor_critic,
self.dummy_input,
onnx_output_path,
verbose=False,
opset_version=SerializationSettings.onnx_opset,
input_names=self.input_names,
output_names=self.output_names,
dynamic_axes=self.dynamic_axes,
)
logger.info(f"Exported {onnx_output_path}")

0
ml-agents/mlagents/trainers/saver/__init__.py

171
ml-agents/mlagents/trainers/saver/tf_saver.py


import os
import shutil
from typing import Optional, Union, cast
from mlagents_envs.exception import UnityPolicyException
from mlagents_envs.logging_util import get_logger
from mlagents.tf_utils import tf
from mlagents.trainers.saver.saver import BaseSaver
from mlagents.trainers.tf.model_serialization import export_policy_model
from mlagents.trainers.settings import TrainerSettings, SerializationSettings
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
from mlagents.trainers import __version__
logger = get_logger(__name__)
class TFSaver(BaseSaver):
"""
Saver class for TensorFlow
"""
def __init__(
self, trainer_settings: TrainerSettings, model_path: str, load: bool = False
):
super().__init__()
self.model_path = model_path
self.initialize_path = trainer_settings.init_path
self._keep_checkpoints = trainer_settings.keep_checkpoints
self.load = load
# Currently only support saving one policy. This is the one to be saved.
self.policy: Optional[TFPolicy] = None
self.graph = None
self.sess = None
self.tf_saver = None
def register(self, module: Union[TFPolicy, TFOptimizer]) -> None:
if isinstance(module, TFPolicy):
self._register_policy(module)
elif isinstance(module, TFOptimizer):
self._register_optimizer(module)
else:
raise UnityPolicyException(
"Registering Object of unsupported type {} to Saver ".format(
type(module)
)
)
def _register_policy(self, policy: TFPolicy) -> None:
if self.policy is None:
self.policy = policy
self.graph = self.policy.graph
self.sess = self.policy.sess
with self.policy.graph.as_default():
self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
def save_checkpoint(self, behavior_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")
# Save the TF checkpoint and graph definition
if self.graph:
with self.graph.as_default():
if self.tf_saver:
self.tf_saver.save(self.sess, f"{checkpoint_path}.ckpt")
tf.train.write_graph(
self.graph, self.model_path, "raw_graph_def.pb", as_text=False
)
# also save the policy so we have optimized model files for each checkpoint
self.export(checkpoint_path, behavior_name)
return checkpoint_path
def export(self, output_filepath: str, behavior_name: str) -> None:
# save model if there is only one worker or
# only on worker-0 if there are multiple workers
if self.policy and self.policy.rank is not None and self.policy.rank != 0:
return
export_policy_model(
self.model_path, output_filepath, behavior_name, self.graph, self.sess
)
def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None:
# If there is an initialize path, load from that. Else, load from the set model path.
# If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
# e.g., resume from an initialize path.
if policy is None:
policy = self.policy
policy = cast(TFPolicy, policy)
reset_steps = not self.load
if self.initialize_path is not None:
self._load_graph(
policy, self.initialize_path, reset_global_steps=reset_steps
)
elif self.load:
self._load_graph(policy, self.model_path, reset_global_steps=reset_steps)
else:
policy.initialize()
TFPolicy.broadcast_global_variables(0)
def _load_graph(
self, policy: TFPolicy, model_path: str, reset_global_steps: bool = False
) -> None:
with policy.graph.as_default():
logger.info(f"Loading model from {model_path}.")
ckpt = tf.train.get_checkpoint_state(model_path)
if ckpt is None:
raise UnityPolicyException(
"The model {} could not be loaded. Make "
"sure you specified the right "
"--run-id and that the previous run you are loading from had the same "
"behavior names.".format(model_path)
)
if self.tf_saver:
try:
self.tf_saver.restore(policy.sess, ckpt.model_checkpoint_path)
except tf.errors.NotFoundError:
raise UnityPolicyException(
"The model {} was found but could not be loaded. Make "
"sure the model is from the same version of ML-Agents, has the same behavior parameters, "
"and is using the same trainer configuration as the current run.".format(
model_path
)
)
self._check_model_version(__version__)
if reset_global_steps:
policy.set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path
)
)
else:
logger.info(f"Resuming training from step {policy.get_current_step()}.")
def _check_model_version(self, version: str) -> None:
"""
Checks whether the model being loaded was created with the same version of
ML-Agents, and throw a warning if not so.
"""
if self.policy is not None and self.policy.version_tensors is not None:
loaded_ver = tuple(
num.eval(session=self.sess) for num in self.policy.version_tensors
)
if loaded_ver != TFPolicy._convert_version_string(version):
logger.warning(
f"The model checkpoint you are loading from was saved with ML-Agents version "
f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"
f"version is {version}. Model may not behave properly."
)
def copy_final_model(self, source_nn_path: str) -> None:
"""
Copy the .nn file at the given source to the destination.
Also copies the corresponding .onnx file if it exists.
"""
final_model_name = os.path.splitext(source_nn_path)[0]
if SerializationSettings.convert_to_barracuda:
source_path = f"{final_model_name}.nn"
destination_path = f"{self.model_path}.nn"
shutil.copyfile(source_path, destination_path)
logger.info(f"Copied {source_path} to {destination_path}.")
if SerializationSettings.convert_to_onnx:
try:
source_path = f"{final_model_name}.onnx"
destination_path = f"{self.model_path}.onnx"
shutil.copyfile(source_path, destination_path)
logger.info(f"Copied {source_path} to {destination_path}.")
except OSError:
pass

118
ml-agents/mlagents/trainers/saver/torch_saver.py


import os
import shutil
import torch
from typing import Dict, Union, Optional, cast
from mlagents_envs.exception import UnityPolicyException
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.saver.saver import BaseSaver
from mlagents.trainers.settings import TrainerSettings, SerializationSettings
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.torch.model_serialization import ModelSerializer
logger = get_logger(__name__)
class TorchSaver(BaseSaver):
"""
Saver class for PyTorch
"""
def __init__(
self, trainer_settings: TrainerSettings, model_path: str, load: bool = False
):
super().__init__()
self.model_path = model_path
self.initialize_path = trainer_settings.init_path
self._keep_checkpoints = trainer_settings.keep_checkpoints
self.load = load
self.policy: Optional[TorchPolicy] = None
self.exporter: Optional[ModelSerializer] = None
self.modules: Dict[str, torch.nn.Modules] = {}
def register(self, module: Union[TorchPolicy, TorchOptimizer]) -> None:
if isinstance(module, TorchPolicy) or isinstance(module, TorchOptimizer):
self.modules.update(module.get_modules()) # type: ignore
else:
raise UnityPolicyException(
"Registering Object of unsupported type {} to Saver ".format(
type(module)
)
)
if self.policy is None and isinstance(module, TorchPolicy):
self.policy = module
self.exporter = ModelSerializer(self.policy)
def save_checkpoint(self, behavior_name: str, step: int) -> str:
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")
state_dict = {
name: module.state_dict() for name, module in self.modules.items()
}
torch.save(state_dict, f"{checkpoint_path}.pt")
torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt"))
self.export(checkpoint_path, behavior_name)
return checkpoint_path
def export(self, output_filepath: str, behavior_name: str) -> None:
if self.exporter is not None:
self.exporter.export_policy_model(output_filepath)
def initialize_or_load(self, policy: Optional[TorchPolicy] = None) -> None:
# Initialize/Load registered self.policy by default.
# If given input argument policy, use the input policy instead.
# This argument is mainly for initialization of the ghost trainer's fixed policy.
reset_steps = not self.load
if self.initialize_path is not None:
self._load_model(
self.initialize_path, policy, reset_global_steps=reset_steps
)
elif self.load:
self._load_model(self.model_path, policy, reset_global_steps=reset_steps)
def _load_model(
self,
load_path: str,
policy: Optional[TorchPolicy] = None,
reset_global_steps: bool = False,
) -> None:
model_path = os.path.join(load_path, "checkpoint.pt")
saved_state_dict = torch.load(model_path)
if policy is None:
modules = self.modules
policy = self.policy
else:
modules = policy.get_modules()
policy = cast(TorchPolicy, policy)
for name, mod in modules.items():
mod.load_state_dict(saved_state_dict[name])
if reset_global_steps:
policy.set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path
)
)
else:
logger.info(f"Resuming training from step {policy.get_current_step()}.")
def copy_final_model(self, source_nn_path: str) -> None:
"""
Copy the .nn file at the given source to the destination.
Also copies the corresponding .onnx file if it exists.
"""
final_model_name = os.path.splitext(source_nn_path)[0]
if SerializationSettings.convert_to_onnx:
try:
source_path = f"{final_model_name}.onnx"
destination_path = f"{self.model_path}.onnx"
shutil.copyfile(source_path, destination_path)
logger.info(f"Copied {source_path} to {destination_path}.")
except OSError:
pass

11
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/OrientationCubeController.cs.meta


fileFormatVersion: 2
guid: 771e78c5e980e440e8cd19716b55075f
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

2
Project/Assets/csc.rsp


-warnaserror+
-warnaserror-:618

11
com.unity.ml-agents/Runtime/SensorHelper.cs.meta


fileFormatVersion: 2
guid: 7c1189c0af42c46f7b533350d49ad3e7
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

132
utils/validate_release_links.py


#!/usr/bin/env python3
import ast
import sys
import os
import re
import subprocess
from typing import List, Optional, Pattern
RELEASE_PATTERN = re.compile(r"release_[0-9]+(_docs)*")
TRAINER_INIT_FILE = "ml-agents/mlagents/trainers/__init__.py"
# Filename -> regex list to allow specific lines.
# To allow everything in the file, use None for the value
ALLOW_LIST = {
# Previous release table
"README.md": re.compile(r"\*\*Release [0-9]+\*\*"),
"docs/Versioning.md": None,
"com.unity.ml-agents/CHANGELOG.md": None,
"utils/make_readme_table.py": None,
"utils/validate_release_links.py": None,
}
def test_pattern():
# Just some sanity check that the regex works as expected.
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/Food.md"
)
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_4/Foo.md"
)
assert RELEASE_PATTERN.search(
"git clone --branch release_4 https://github.com/Unity-Technologies/ml-agents.git"
)
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_123_docs/Foo.md"
)
assert RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/release_123/Foo.md"
)
assert not RELEASE_PATTERN.search(
"https://github.com/Unity-Technologies/ml-agents/blob/latest_release/docs/Foo.md"
)
print("tests OK!")
def git_ls_files() -> List[str]:
"""
Run "git ls-files" and return a list with one entry per line.
This returns the list of all files tracked by git.
"""
return subprocess.check_output(["git", "ls-files"], universal_newlines=True).split(
"\n"
)
def get_release_tag() -> Optional[str]:
"""
Returns the release tag for the mlagents python package.
This will be None on the master branch.
:return:
"""
with open(TRAINER_INIT_FILE) as f:
for line in f:
if "__release_tag__" in line:
lhs, equals_string, rhs = line.strip().partition(" = ")
# Evaluate the right hand side of the expression
return ast.literal_eval(rhs)
# If we couldn't find the release tag, raise an exception
# (since we can't return None here)
raise RuntimeError("Can't determine release tag")
def check_file(filename: str, global_allow_pattern: Pattern) -> List[str]:
"""
Validate a single file and return any offending lines.
"""
bad_lines = []
with open(filename) as f:
for line in f:
if not RELEASE_PATTERN.search(line):
continue
if global_allow_pattern.search(line):
continue
if filename in ALLOW_LIST:
if ALLOW_LIST[filename] is None or ALLOW_LIST[filename].search(line):
continue
bad_lines.append(f"{filename}: {line.strip()}")
return bad_lines
def check_all_files(allow_pattern: Pattern) -> List[str]:
"""
Validate all files tracked by git.
:param allow_pattern:
"""
bad_lines = []
file_types = {".py", ".md", ".cs"}
for file_name in git_ls_files():
if "localized" in file_name or os.path.splitext(file_name)[1] not in file_types:
continue
bad_lines += check_file(file_name, allow_pattern)
return bad_lines
def main():
release_tag = get_release_tag()
if not release_tag:
print("Release tag is None, exiting")
sys.exit(0)
print(f"Release tag: {release_tag}")
allow_pattern = re.compile(f"{release_tag}(_docs)*")
bad_lines = check_all_files(allow_pattern)
if bad_lines:
print(
f"Found lines referring to previous release. Either update the files, or add an exclusion to {__file__}"
)
for line in bad_lines:
print(line)
sys.exit(1 if bad_lines else 0)
if __name__ == "__main__":
if "--test" in sys.argv:
test_pattern()
main()

36
ml-agents/mlagents/trainers/tests/test_models.py


import pytest
from mlagents.trainers.tf.models import ModelUtils
from mlagents.tf_utils import tf
from mlagents_envs.base_env import BehaviorSpec, ActionType
def create_behavior_spec(num_visual, num_vector, vector_size):
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector),
ActionType.DISCRETE,
(1,),
)
return behavior_spec
@pytest.mark.parametrize("num_visual", [1, 2, 4])
@pytest.mark.parametrize("num_vector", [1, 2, 4])
def test_create_input_placeholders(num_vector, num_visual):
vec_size = 8
name_prefix = "test123"
bspec = create_behavior_spec(num_visual, num_vector, vec_size)
vec_in, vis_in = ModelUtils.create_input_placeholders(
bspec.observation_shapes, name_prefix=name_prefix
)
assert isinstance(vis_in, list)
assert len(vis_in) == num_visual
assert isinstance(vec_in, tf.Tensor)
assert vec_in.get_shape().as_list()[1] == num_vector * 8
# Check names contain prefix and vis shapes are correct
for _vis in vis_in:
assert _vis.get_shape().as_list() == [None, 84, 84, 3]
assert _vis.name.startswith(name_prefix)
assert vec_in.name.startswith(name_prefix)

113
ml-agents/mlagents/trainers/tests/test_saver.py


import pytest
from unittest import mock
import os
import unittest
import tempfile
import numpy as np
from mlagents.tf_utils import tf
from mlagents.trainers.saver.tf_saver import TFSaver
from mlagents.trainers import __version__
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.ppo.optimizer import PPOOptimizer
def test_register(tmp_path):
trainer_params = TrainerSettings()
saver = TFSaver(trainer_params, tmp_path)
opt = mock.Mock(spec=PPOOptimizer)
saver.register(opt)
assert saver.policy is None
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params)
saver.register(policy)
assert saver.policy is not None
class ModelVersionTest(unittest.TestCase):
def test_version_compare(self):
# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
trainer_params = TrainerSettings()
mock_path = tempfile.mkdtemp()
policy = create_policy_mock(trainer_params)
saver = TFSaver(trainer_params, mock_path)
saver.register(policy)
saver._check_model_version(
"0.0.0"
) # This is not the right version for sure
# Assert that 1 warning has been thrown with incorrect version
assert len(cm.output) == 1
saver._check_model_version(__version__) # This should be the right version
# Assert that no additional warnings have been thrown wth correct ver
assert len(cm.output) == 1
def test_load_save(tmp_path):
path1 = os.path.join(tmp_path, "runid1")
path2 = os.path.join(tmp_path, "runid2")
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params)
saver = TFSaver(trainer_params, path1)
saver.register(policy)
saver.initialize_or_load(policy)
policy.set_step(2000)
mock_brain_name = "MockBrain"
saver.save_checkpoint(mock_brain_name, 2000)
assert len(os.listdir(tmp_path)) > 0
# Try load from this path
saver = TFSaver(trainer_params, path1, load=True)
policy2 = create_policy_mock(trainer_params)
saver.register(policy2)
saver.initialize_or_load(policy2)
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000
# Try initialize from path 1
trainer_params.init_path = path1
saver = TFSaver(trainer_params, path2)
policy3 = create_policy_mock(trainer_params)
saver.register(policy3)
saver.initialize_or_load(policy3)
_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:
"""
Make sure two policies have the same output for the same input.
"""
decision_step, _ = mb.create_steps_from_behavior_spec(
policy1.behavior_spec, num_agents=1
)
run_out1 = policy1.evaluate(decision_step, list(decision_step.agent_id))
run_out2 = policy2.evaluate(decision_step, list(decision_step.agent_id))
np.testing.assert_array_equal(run_out2["log_probs"], run_out1["log_probs"])
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_checkpoint_conversion(tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config = TrainerSettings()
model_path = os.path.join(tmpdir, "Mock_Brain")
policy = create_policy_mock(
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
trainer_params = TrainerSettings()
saver = TFSaver(trainer_params, model_path)
saver.register(policy)
saver.save_checkpoint("Mock_Brain", 100)
assert os.path.isfile(model_path + "/Mock_Brain-100.nn")

/ml-agents/mlagents/trainers/ppo/optimizer.py → /ml-agents/mlagents/trainers/ppo/optimizer_tf.py

/com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs.meta → /com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs.meta

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存