浏览代码

[feature] Add experimental PyTorch support (#4335)

* Begin porting work

* Add ResNet and distributions

* Dynamically construct actor and critic

* Initial optimizer port

* Refactoring policy and optimizer

* Resolving a few bugs

* Share more code between tf and torch policies

* Slightly closer to running model

* Training runs, but doesn’t actually work

* Fix a couple additional bugs

* Add conditional sigma for distribution

* Fix normalization

* Support discrete actions as well

* Continuous and discrete now train

* Mulkti-discrete now working

* Visual observations now train as well

* GRU in-progress and dynamic cnns

* Fix for memories

* Remove unused arg

* Combine actor and critic classes. Initial export.

* Support tf and pytorch alongside one another

* Prepare model for onnx export

* Use LSTM and fix a few merge errors

* Fix bug in probs calculation

* Optimize np -> tensor operations

* Time action sample funct...
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
1955af9e
共有 52 个文件被更改,包括 5374 次插入156 次删除
  1. 4
      com.unity.ml-agents/CHANGELOG.md
  2. 2
      ml-agents/mlagents/trainers/buffer.py
  3. 7
      ml-agents/mlagents/trainers/cli_utils.py
  4. 12
      ml-agents/mlagents/trainers/ghost/trainer.py
  5. 2
      ml-agents/mlagents/trainers/policy/tf_policy.py
  6. 76
      ml-agents/mlagents/trainers/ppo/trainer.py
  7. 120
      ml-agents/mlagents/trainers/sac/trainer.py
  8. 14
      ml-agents/mlagents/trainers/settings.py
  9. 7
      ml-agents/mlagents/trainers/tests/test_ghost.py
  10. 5
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  11. 3
      ml-agents/mlagents/trainers/tests/test_sac.py
  12. 2
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  13. 19
      ml-agents/mlagents/trainers/tests/torch/test_layers.py
  14. 17
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  15. 6
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  16. 20
      ml-agents/mlagents/trainers/tf/model_serialization.py
  17. 17
      ml-agents/mlagents/trainers/torch/encoders.py
  18. 67
      ml-agents/mlagents/trainers/torch/layers.py
  19. 115
      ml-agents/mlagents/trainers/torch/networks.py
  20. 4
      ml-agents/mlagents/trainers/torch/utils.py
  21. 99
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  22. 5
      ml-agents/mlagents/trainers/trainer/trainer.py
  23. 7
      ml-agents/mlagents/trainers/trainer_controller.py
  24. 94
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  25. 281
      ml-agents/mlagents/trainers/policy/torch_policy.py
  26. 203
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  27. 561
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  28. 118
      ml-agents/mlagents/trainers/saver/torch_saver.py
  29. 1001
      ml-agents/mlagents/trainers/tests/torch/test.demo
  30. 144
      ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
  31. 177
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  32. 150
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  33. 505
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  34. 446
      ml-agents/mlagents/trainers/tests/torch/testdcvis.demo
  35. 74
      ml-agents/mlagents/trainers/torch/model_serialization.py
  36. 111
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  37. 56
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  38. 138
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  39. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  40. 0
      ml-agents/mlagents/trainers/torch/components/__init__.py
  41. 0
      ml-agents/mlagents/trainers/torch/components/bc/__init__.py
  42. 183
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  43. 15
      ml-agents/mlagents/trainers/torch/components/reward_providers/__init__.py
  44. 72
      ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py
  45. 15
      ml-agents/mlagents/trainers/torch/components/reward_providers/extrinsic_reward_provider.py
  46. 43
      ml-agents/mlagents/trainers/torch/components/reward_providers/reward_provider_factory.py
  47. 225
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  48. 256
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py

4
com.unity.ml-agents/CHANGELOG.md


- The interaction between EnvManager and TrainerController was changed; EnvManager.advance() was split into to stages,
and TrainerController now uses the results from the first stage to handle new behavior names. This change speeds up
Python training by approximately 5-10%. (#4259)
- Experimental PyTorch support has been added. Use `--torch` when running `mlagents-learn`, or add
`framework: pytorch` to your trainer configuration (under the behavior name) to enable it.
Note that PyTorch 1.6.0 or greater should be installed to use this feature; see
[the PyTorch website](https://pytorch.org/) for installation instructions. (#4335)
### Minor Changes
#### com.unity.ml-agents (C#)

2
ml-agents/mlagents/trainers/buffer.py


Adds a list of np.arrays to the end of the list of np.arrays.
:param data: The np.array list to append.
"""
self += list(np.array(data))
self += list(np.array(data, dtype=np.float32))
def set(self, data):
"""

7
ml-agents/mlagents/trainers/cli_utils.py


action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)
argparser.add_argument(
"--torch",
default=False,
action=DetectDefaultStoreTrue,
help="(Experimental) Use the PyTorch framework instead of TensorFlow. Install PyTorch "
"before using this option",
)
eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(

12
ml-agents/mlagents/trainers/ghost/trainer.py


self.trainer.save_model()
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
"""
Creates policy with the wrapped trainer's create_policy function

wrapped trainer to be trained.
"""
policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
policy.create_tf_graph()
policy = self.trainer.create_policy(
parsed_behavior_id, behavior_spec, create_graph=True
)
policy.init_load_weights()
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)

parsed_behavior_id, behavior_spec
)
self.trainer.add_policy(parsed_behavior_id, internal_trainer_policy)
internal_trainer_policy.init_load_weights()
self.current_policy_snapshot[
parsed_behavior_id.brain_name
] = internal_trainer_policy.get_weights()

2
ml-agents/mlagents/trainers/policy/tf_policy.py


# We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
# it will re-load the full graph
self.initialize()
# Create assignment ops for Ghost Trainer
self.init_load_weights()
def _create_encoder(
self,

76
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchPPOOptimizer = None # type: ignore
logger = get_logger(__name__)

trajectory.next_obs,
trajectory.done_reached and not trajectory.interrupted,
)
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)
# Evaluate all reward functions
self.collected_rewards["environment"][agent_id] += np.sum(

evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

local_value_estimates = agent_buffer_trajectory[
f"{name}_value_estimates"
].get_batch()
local_advantage = get_gae(
rewards=local_rewards,
value_estimates=local_value_estimates,

self._clear_update_buffer()
return True
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
Creates a PPO policy to trainers list of policies.
Creates a policy with a Tensorflow backend and PPO hyperparameters
:param parsed_behavior_id:
:param create_graph: whether to create the Tensorflow graph on construction
:return policy
"""
policy = TFPolicy(

condition_sigma_on_obs=False, # Faster training for PPO
create_tf_graph=False, # We will create the TF graph in the Optimizer
create_tf_graph=create_graph,
return policy
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Creates a policy with a PyTorch backend and PPO hyperparameters
:param parsed_behavior_id:
:param behavior_spec: specifications for policy construction
:return policy
"""
policy = TorchPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=False, # Faster training for PPO
separate_critic=behavior_spec.is_action_continuous(),
)
return PPOOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
if self.framework == FrameworkType.PYTORCH:
return TorchPPOOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return PPOOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
self.optimizer = self.create_ppo_optimizer()
for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)

120
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchSACOptimizer = None # type: ignore
logger = get_logger(__name__)

agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

)
for name, v in value_estimates.items():
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
np.mean(v),
)
# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.

self._update_reward_signals()
return policy_was_updated
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TFPolicy:
policy = TFPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
tanh_squash=True,
reparameterize=True,
create_tf_graph=False,
)
def maybe_load_replay_buffer(self):
# Load the replay buffer if load
if self.load and self.checkpoint_replay_buffer:
try:

)
)
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> TFPolicy:
"""
Creates a policy with a Tensorflow backend and SAC hyperparameters
:param parsed_behavior_id:
:param behavior_spec: specifications for policy construction
:param create_graph: whether to create the Tensorflow graph on construction
:return policy
"""
policy = TFPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
tanh_squash=True,
reparameterize=True,
create_tf_graph=create_graph,
)
self.maybe_load_replay_buffer()
return policy
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Creates a policy with a PyTorch backend and SAC hyperparameters
:param parsed_behavior_id:
:param behavior_spec: specifications for policy construction
:return policy
"""
policy = TorchPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=True,
tanh_squash=True,
separate_critic=True,
)
self.maybe_load_replay_buffer()
return policy
def _update_sac_policy(self) -> bool:

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
if isinstance(signal, RewardSignal):
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
else:
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug(f"Updating {name} at step {self.step}")
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
if isinstance(signal, RewardSignal):
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
update_stats = self.optimizer.update_reward_signals(
reward_signal_minibatches, n_sequences
)

self._stats_reporter.add_stat(stat, np.mean(stat_list))
def create_sac_optimizer(self) -> SACOptimizer:
return SACOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
if self.framework == FrameworkType.PYTORCH:
return TorchSACOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return SACOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy

14
ml-agents/mlagents/trainers/settings.py


return _mapping[self]
class FrameworkType(Enum):
TENSORFLOW: str = "tensorflow"
PYTORCH: str = "pytorch"
@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
trainer_type: TrainerType = TrainerType.PPO

threaded: bool = True
self_play: Optional[SelfPlaySettings] = None
behavioral_cloning: Optional[BehavioralCloningSettings] = None
framework: FrameworkType = FrameworkType.TENSORFLOW
cattr.register_structure_hook(
Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure

configured_dict["engine_settings"][key] = val
else: # Base options
configured_dict[key] = val
return RunOptions.from_dict(configured_dict)
# Apply --torch retroactively
final_runoptions = RunOptions.from_dict(configured_dict)
if "torch" in DetectDefault.non_default_args:
for trainer_set in final_runoptions.behaviors.values():
trainer_set.framework = FrameworkType.PYTORCH
return final_runoptions
@staticmethod
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":

7
ml-agents/mlagents/trainers/tests/test_ghost.py


trainer_params = dummy_config
trainer = PPOTrainer("test", 0, trainer_params, True, False, 0, "0")
trainer.seed = 1
policy = trainer.create_policy("test", mock_specs)
policy.create_tf_graph()
policy = trainer.create_policy("test", mock_specs, create_graph=True)
to_load_policy = trainer.create_policy("test", mock_specs)
to_load_policy.create_tf_graph()
to_load_policy.init_load_weights()
to_load_policy = trainer.create_policy("test", mock_specs, create_graph=True)
weights = policy.get_weights()
load_weights = to_load_policy.get_weights()

5
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


mock_saver.save_checkpoint.side_effect = checkpoint_path
self.saver = mock_saver
def create_policy(self):
def create_tf_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def create_torch_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def _process_trajectory(self, trajectory):

3
ml-agents/mlagents/trainers/tests/test_sac.py


0, mock_brain, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = SACOptimizer(policy, trainer_settings)
policy.initialize()
optimizer.policy.initialize()
return optimizer

trainer.add_policy(behavior_id, policy)
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update = mock.Mock()
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update_reward_signals = mock.Mock()
trainer.optimizer.update_reward_signals.return_value = {}
trainer.optimizer.update.return_value = {}

2
ml-agents/mlagents/trainers/tests/test_simple_rl.py


RewardSignalType,
EncoderType,
ScheduleType,
FrameworkType,
)
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents_envs.side_channel.environment_parameters_channel import (

summary_freq=500,
max_steps=3000,
threaded=False,
framework=FrameworkType.TENSORFLOW,
)
SAC_CONFIG = TrainerSettings(

19
ml-agents/mlagents/trainers/tests/torch/test_layers.py


linear_layer,
lstm_layer,
Initialization,
LSTM,
)

assert torch.all(
torch.eq(param.data[4:8], torch.ones_like(param.data[4:8]))
)
def test_lstm_class():
torch.manual_seed(0)
input_size = 12
memory_size = 64
batch_size = 8
seq_len = 16
lstm = LSTM(input_size, memory_size)
assert lstm.memory_size == memory_size
sample_input = torch.ones((batch_size, seq_len, input_size))
sample_memories = torch.ones((1, batch_size, memory_size))
out, mem = lstm(sample_input, sample_memories)
# Hidden size should be half of memory_size
assert out.shape == (batch_size, seq_len, memory_size // 2)
assert mem.shape == (1, batch_size, memory_size)

17
ml-agents/mlagents/trainers/tests/torch/test_networks.py


assert act.shape == (1, 1)
# Test forward
actions, probs, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
# This is different from above for ONNX export
assert act.shape == (
act_size[0],
1,
) # This is different from above for ONNX export
assert act.shape == (act_size[0], 1)
assert act.shape == (1, 1)
assert act.shape == tuple(act_size)
# TODO: Once export works properly. fix the shapes here.
assert mem_size == 0
assert is_cont == int(action_type == ActionType.CONTINUOUS)
assert act_size_vec == torch.tensor(act_size)

if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(
(
1,
network_settings.memory.sequence_length,
network_settings.memory.memory_size,
)
(1, network_settings.memory.sequence_length, actor.memory_size)
)
else:
sample_obs = torch.ones((1, obs_size))

6
ml-agents/mlagents/trainers/tests/torch/test_utils.py


masks = torch.tensor([False, False, False, False, False])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 0.0
# Make sure it works with 2d arrays of shape (mask_length, N)
test_input = torch.tensor([1, 2, 3, 4, 5]).repeat(2, 1).T
masks = torch.tensor([False, False, True, True, True])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 4.0

20
ml-agents/mlagents/trainers/tf/model_serialization.py


def export_policy_model(
model_path: str,
output_filepath: str,
brain_name: str,
behavior_name: str,
graph: tf.Graph,
sess: tf.Session,
) -> None:

:param output_filepath: file path to output the model (without file suffix)
:param brain_name: brain name of the trained model
:param behavior_name: behavior name of the trained model
frozen_graph_def = _make_frozen_graph(brain_name, graph, sess)
frozen_graph_def = _make_frozen_graph(behavior_name, graph, sess)
if not os.path.exists(output_filepath):
os.makedirs(output_filepath)
# Save frozen graph

if ONNX_EXPORT_ENABLED:
if SerializationSettings.convert_to_onnx:
try:
onnx_graph = convert_frozen_to_onnx(brain_name, frozen_graph_def)
onnx_graph = convert_frozen_to_onnx(behavior_name, frozen_graph_def)
onnx_output_path = f"{output_filepath}.onnx"
with open(onnx_output_path, "wb") as f:
f.write(onnx_graph.SerializeToString())

def _make_frozen_graph(
brain_name: str, graph: tf.Graph, sess: tf.Session
behavior_name: str, graph: tf.Graph, sess: tf.Session
target_nodes = ",".join(_process_graph(brain_name, graph))
target_nodes = ",".join(_process_graph(behavior_name, graph))
graph_def = graph.as_graph_def()
output_graph_def = graph_util.convert_variables_to_constants(
sess, graph_def, target_nodes.replace(" ", "").split(",")

def convert_frozen_to_onnx(brain_name: str, frozen_graph_def: tf.GraphDef) -> Any:
def convert_frozen_to_onnx(behavior_name: str, frozen_graph_def: tf.GraphDef) -> Any:
# This is basically https://github.com/onnx/tensorflow-onnx/blob/master/tf2onnx/convert.py
inputs = _get_input_node_names(frozen_graph_def)

)
onnx_graph = optimizer.optimize_graph(g)
model_proto = onnx_graph.make_model(brain_name)
model_proto = onnx_graph.make_model(behavior_name)
return model_proto

return names
def _process_graph(brain_name: str, graph: tf.Graph) -> List[str]:
def _process_graph(behavior_name: str, graph: tf.Graph) -> List[str]:
"""
Gets the list of the output nodes present in the graph for inference
:return: list of node names

logger.info("List of nodes to export for brain :" + brain_name)
logger.info("List of nodes to export for behavior :" + behavior_name)
for n in nodes:
logger.info("\t" + n)
return nodes

17
ml-agents/mlagents/trainers/torch/encoders.py


super().__init__()
n_channels = [16, 32, 32] # channel for each stack
n_blocks = 2 # number of residual blocks
self.layers = []
layers = []
self.layers.append(
nn.Conv2d(last_channel, channel, [3, 3], [1, 1], padding=1)
)
self.layers.append(nn.MaxPool2d([3, 3], [2, 2]))
layers.append(nn.Conv2d(last_channel, channel, [3, 3], [1, 1], padding=1))
layers.append(nn.MaxPool2d([3, 3], [2, 2]))
self.layers.append(ResNetBlock(channel))
layers.append(ResNetBlock(channel))
self.layers.append(Swish())
layers.append(Swish())
self.dense = linear_layer(
n_channels[-1] * height * width,
final_hidden,

self.sequential = nn.Sequential(*layers)
hidden = visual_obs
for layer in self.layers:
hidden = layer(hidden)
hidden = self.sequential(visual_obs)
before_out = hidden.view(batch_size, -1)
return torch.relu(self.dense(before_out))

67
ml-agents/mlagents/trainers/torch/layers.py


import torch
import abc
from typing import Tuple
from enum import Enum

forget_bias
)
return lstm
class MemoryModule(torch.nn.Module):
@abc.abstractproperty
def memory_size(self) -> int:
"""
Size of memory that is required at the start of a sequence.
"""
pass
@abc.abstractmethod
def forward(
self, input_tensor: torch.Tensor, memories: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Pass a sequence to the memory module.
:input_tensor: Tensor of shape (batch_size, seq_length, size) that represents the input.
:memories: Tensor of initial memories.
:return: Tuple of output, final memories.
"""
pass
class LSTM(MemoryModule):
"""
Memory module that implements LSTM.
"""
def __init__(
self,
input_size: int,
memory_size: int,
num_layers: int = 1,
forget_bias: float = 1.0,
kernel_init: Initialization = Initialization.XavierGlorotUniform,
bias_init: Initialization = Initialization.Zero,
):
super().__init__()
# We set hidden size to half of memory_size since the initial memory
# will be divided between the hidden state and initial cell state.
self.hidden_size = memory_size // 2
self.lstm = lstm_layer(
input_size,
self.hidden_size,
num_layers,
True,
forget_bias,
kernel_init,
bias_init,
)
@property
def memory_size(self) -> int:
return 2 * self.hidden_size
def forward(
self, input_tensor: torch.Tensor, memories: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
# We don't use torch.split here since it is not supported by Barracuda
h0 = memories[:, :, : self.hidden_size]
c0 = memories[:, :, self.hidden_size :]
hidden = (h0, c0)
lstm_out, hidden_out = self.lstm(input_tensor, hidden)
output_mem = torch.cat(hidden_out, dim=-1)
return lstm_out, output_mem

115
ml-agents/mlagents/trainers/torch/networks.py


from typing import Callable, List, Dict, Tuple, Optional
import attr
import abc
import torch

from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import lstm_layer
from mlagents.trainers.torch.layers import LSTM
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
EncoderFunction = Callable[

)
if self.use_lstm:
self.lstm = lstm_layer(self.h_size, self.m_size // 2, batch_first=True)
self.lstm = LSTM(self.h_size, self.m_size)
self.lstm = None
self.lstm = None # type: ignore
def update_normalization(self, vec_inputs: List[torch.Tensor]) -> None:
for vec_input, vec_enc in zip(vec_inputs, self.vector_encoders):

for n1, n2 in zip(self.vector_encoders, other_network.vector_encoders):
n1.copy_normalization(n2)
@property
def memory_size(self) -> int:
return self.lstm.memory_size if self.use_lstm else 0
def forward(
self,
vec_inputs: List[torch.Tensor],

sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
vec_encodes = []
encodes = []
for idx, encoder in enumerate(self.vector_encoders):
vec_input = vec_inputs[idx]
if actions is not None:

vec_encodes.append(hidden)
encodes.append(hidden)
vis_encodes = []
vis_input = vis_input.permute([0, 3, 1, 2])
if not torch.onnx.is_in_onnx_export():
vis_input = vis_input.permute([0, 3, 1, 2])
vis_encodes.append(hidden)
encodes.append(hidden)
if len(vec_encodes) > 0 and len(vis_encodes) > 0:
vec_encodes_tensor = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
vis_encodes_tensor = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
encoding = torch.stack(
[vec_encodes_tensor, vis_encodes_tensor], dim=-1
).sum(dim=-1)
elif len(vec_encodes) > 0:
encoding = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
elif len(vis_encodes) > 0:
encoding = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
else:
if len(encodes) == 0:
# Constants don't work in Barracuda
encoding = encodes[0]
if len(encodes) > 1:
for _enc in encodes[1:]:
encoding += _enc
memories = torch.split(memories, self.m_size // 2, dim=-1)
memories = torch.cat(memories, dim=-1)
return encoding, memories

encoding_size = network_settings.hidden_units
self.value_heads = ValueHeads(stream_names, encoding_size, outputs_per_stream)
@property
def memory_size(self) -> int:
return self.network_body.memory_size
def forward(
self,
vec_inputs: List[torch.Tensor],

vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor, int, int, int, int]:
) -> Tuple[torch.Tensor, int, int, int, int]:
"""
Forward pass of the Actor for inference. This is required for export to ONNX, and
the inputs and outputs of this method should not be changed without a respective change

"""
pass
@abc.abstractproperty
def memory_size(self):
"""
Returns the size of the memory (same size used as input and output in the other
methods) used by this Actor.
"""
pass
class SimpleActor(nn.Module, Actor):
def __init__(

self.act_type = act_type
self.act_size = act_size
self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
self.memory_size = torch.nn.Parameter(torch.Tensor([0]))
self.is_continuous_int = torch.nn.Parameter(
torch.Tensor([int(act_type == ActionType.CONTINUOUS)])
)

self.encoding_size = network_settings.memory.memory_size // 2
else:
self.encoding_size = network_settings.hidden_units
if self.act_type == ActionType.CONTINUOUS:
self.distribution = GaussianDistribution(
self.encoding_size,

self.encoding_size, act_size
)
@property
def memory_size(self) -> int:
return self.network_body.memory_size
def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(vector_obs)

vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor, int, int, int, int]:
) -> Tuple[torch.Tensor, int, int, int, int]:
dists, _ = self.get_dists(
vec_inputs, vis_inputs, masks, memories, sequence_length
)
dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
if self.act_type == ActionType.CONTINUOUS:
action_out = sampled_actions
else:
action_out = dists[0].all_log_prob()
sampled_actions,
dists[0].pdf(sampled_actions),
action_out,
self.memory_size,
torch.Tensor([self.network_body.memory_size]),
self.is_continuous_int,
self.act_size_vector,
)

# Give the Actor only half the memories. Note we previously validate
# that memory_size must be a multiple of 4.
self.use_lstm = network_settings.memory is not None
if network_settings.memory is not None:
self.half_mem_size = network_settings.memory.memory_size // 2
new_memory_settings = attr.evolve(
network_settings.memory, memory_size=self.half_mem_size
)
use_network_settings = attr.evolve(
network_settings, memory=new_memory_settings
)
else:
use_network_settings = network_settings
self.half_mem_size = 0
use_network_settings,
network_settings,
act_type,
act_size,
conditional_sigma,

self.critic = ValueNetwork(
stream_names, observation_shapes, use_network_settings
)
self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
@property
def memory_size(self) -> int:
return self.network_body.memory_size + self.critic.memory_size
def critic_pass(
self,

actor_mem, critic_mem = None, None
if self.use_lstm:
# Use only the back half of memories for critic
actor_mem, critic_mem = torch.split(memories, self.half_mem_size, -1)
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
value_outputs, critic_mem_out = self.critic(
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
)

) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
if self.use_lstm:
# Use only the back half of memories for critic and actor
actor_mem, critic_mem = torch.split(memories, self.half_mem_size, dim=-1)
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
else:
critic_mem = None
actor_mem = None

class GlobalSteps(nn.Module):
def __init__(self):
super().__init__()
self.global_step = torch.Tensor([0])
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
@property
def current_step(self):
return int(self.__global_step.item())
@current_step.setter
def current_step(self, value):
self.__global_step[:] = value
self.global_step += value
self.__global_step += value
class LearningRate(nn.Module):

4
ml-agents/mlagents/trainers/torch/utils.py


:param tensor: Tensor which needs mean computation.
:param masks: Boolean tensor of masks with same dimension as tensor.
"""
return (tensor * masks).sum() / torch.clamp(masks.float().sum(), min=1.0)
return (tensor.T * masks).sum() / torch.clamp(
(torch.ones_like(tensor.T) * masks).float().sum(), min=1.0
)

99
ml-agents/mlagents/trainers/trainer/rl_trainer.py


from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.components.reward_signals import RewardSignalResult
from mlagents.trainers.components.reward_signals import RewardSignalResult, RewardSignal
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.settings import TrainerSettings, FrameworkType
from mlagents.trainers.exception import UnityTrainerException
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.saver.torch_saver import TorchSaver
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
RewardSignalResults = Dict[str, RewardSignalResult]

self._stats_reporter.add_property(
StatsPropertyType.HYPERPARAMETERS, self.trainer_settings.as_dict()
)
self.framework = self.trainer_settings.framework
logger.debug(f"Using framework {self.framework.value}")
self.trainer_settings, self.artifact_path, self.load
self.framework, self.trainer_settings, self.artifact_path, self.load
)
def end_episode(self) -> None:

for agent_id in rewards:
rewards[agent_id] = 0
@staticmethod
def create_saver(
trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
saver = TFSaver(trainer_settings, model_path, load)
return saver
def _update_end_episode_stats(self, agent_id: str, optimizer: Optimizer) -> None:
for name, rewards in self.collected_rewards.items():
if name == "environment":

self.reward_buffer.appendleft(rewards.get(agent_id, 0))
rewards[agent_id] = 0
else:
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name, rewards.get(agent_id, 0)
)
if isinstance(optimizer.reward_signals[name], RewardSignal):
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name,
rewards.get(agent_id, 0),
)
else:
self.stats_reporter.add_stat(
f"Policy/{optimizer.reward_signals[name].name.capitalize()} Reward",
rewards.get(agent_id, 0),
)
rewards[agent_id] = 0
def _clear_update_buffer(self) -> None:

"""
return False
def create_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
if self.framework == FrameworkType.PYTORCH and TorchPolicy is None:
raise UnityTrainerException(
"To use the experimental PyTorch backend, install the PyTorch Python package first."
)
elif self.framework == FrameworkType.PYTORCH:
return self.create_torch_policy(parsed_behavior_id, behavior_spec)
else:
return self.create_tf_policy(
parsed_behavior_id, behavior_spec, create_graph=create_graph
)
@abc.abstractmethod
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Create a Policy object that uses the PyTorch backend.
"""
pass
@abc.abstractmethod
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> TFPolicy:
"""
Create a Policy object that uses the TensorFlow backend.
"""
pass
@staticmethod
def create_saver(
framework: str, trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
if framework == FrameworkType.PYTORCH:
saver = TorchSaver( # type: ignore
trainer_settings, model_path, load
)
else:
saver = TFSaver( # type: ignore
trainer_settings, model_path, load
)
return saver
def _policy_mean_reward(self) -> Optional[float]:
""" Returns the mean episode reward for the current policy. """
rewards = self.cumulative_returns_since_policy_update

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
elif n_policies == 0:
logger.warning("Trainer has no policies, not saving anything.")
return
# Copy the checkpointed model files to the final output location
final_checkpoint = attr.evolve(
model_checkpoint, file_path=f"{self.saver.model_path}.nn"
)

5
ml-agents/mlagents/trainers/trainer/trainer.py


@abc.abstractmethod
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
"""
Creates policy

7
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.agent_processor import AgentManager
from mlagents.tf_utils.globals import get_rank
try:
import torch
except ModuleNotFoundError:
torch = None # type: ignore
class TrainerController:
def __init__(

self.kill_trainers = False
np.random.seed(training_seed)
tf.set_random_seed(training_seed)
if torch is not None:
torch.manual_seed(training_seed)
self.rank = get_rank()
@timed

94
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


from typing import Dict, Optional, Tuple, List
import torch
import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.torch.components.reward_providers import create_reward_provider
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.torch.utils import ModelUtils
class TorchOptimizer(Optimizer): # pylint: disable=W0223
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
super().__init__()
self.policy = policy
self.trainer_settings = trainer_settings
self.update_dict: Dict[str, torch.Tensor] = {}
self.value_heads: Dict[str, torch.Tensor] = {}
self.memory_in: torch.Tensor = None
self.memory_out: torch.Tensor = None
self.m_size: int = 0
self.global_step = torch.tensor(0)
self.bc_module: Optional[BCModule] = None
self.create_reward_signals(trainer_settings.reward_signals)
if trainer_settings.behavioral_cloning is not None:
self.bc_module = BCModule(
self.policy,
trainer_settings.behavioral_cloning,
policy_learning_rate=trainer_settings.hyperparameters.learning_rate,
default_batch_size=trainer_settings.hyperparameters.batch_size,
default_num_epoch=3,
)
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
pass
def create_reward_signals(self, reward_signal_configs):
"""
Create reward signals
:param reward_signal_configs: Reward signal config.
"""
for reward_signal, settings in reward_signal_configs.items():
# Name reward signals by string in case we have duplicates later
self.reward_signals[reward_signal.value] = create_reward_provider(
reward_signal, self.policy.behavior_spec, settings
)
def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
if self.policy.use_vis_obs:
visual_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
visual_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
visual_obs.append(visual_ob)
else:
visual_obs = []
memory = torch.zeros([1, 1, self.policy.m_size])
vec_vis_obs = SplitObservations.from_observations(next_obs)
next_vec_obs = [
ModelUtils.list_to_tensor(vec_vis_obs.vector_observations).unsqueeze(0)
]
next_vis_obs = [
ModelUtils.list_to_tensor(_vis_ob).unsqueeze(0)
for _vis_ob in vec_vis_obs.visual_observations
]
value_estimates, next_memory = self.policy.actor_critic.critic_pass(
vector_obs, visual_obs, memory, sequence_length=batch.num_experiences
)
next_value_estimate, _ = self.policy.actor_critic.critic_pass(
next_vec_obs, next_vis_obs, next_memory, sequence_length=1
)
for name, estimate in value_estimates.items():
value_estimates[name] = estimate.detach().cpu().numpy()
next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
if done:
for k in next_value_estimate:
if not self.reward_signals[k].ignore_done:
next_value_estimate[k] = 0.0
return value_estimates, next_value_estimate

281
ml-agents/mlagents/trainers/policy/torch_policy.py


from typing import Any, Dict, List, Tuple, Optional
import numpy as np
import torch
import copy
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents.trainers.policy import Policy
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents_envs.timers import timed
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.networks import (
SharedActorCritic,
SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.utils import ModelUtils
EPSILON = 1e-7 # Small value to avoid divide by zero
class TorchPolicy(Policy):
def __init__(
self,
seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
tanh_squash: bool = False,
reparameterize: bool = False,
separate_critic: bool = True,
condition_sigma_on_obs: bool = True,
):
"""
Policy that uses a multilayer perceptron to map the observations to actions. Could
also use a CNN to encode visual input prior to the MLP. Supports discrete and
continuous action spaces, as well as recurrent networks.
:param seed: Random seed.
:param brain: Assigned BrainParameters object.
:param trainer_settings: Defined training parameters.
:param load: Whether a pre-trained model will be loaded or a new one created.
:param tanh_squash: Whether to use a tanh function on the continuous output,
or a clipped output.
:param reparameterize: Whether we are using the resampling trick to update the policy
in continuous output.
"""
super().__init__(
seed,
behavior_spec,
trainer_settings,
tanh_squash,
reparameterize,
condition_sigma_on_obs,
)
self.global_step = (
GlobalSteps()
) # could be much simpler if TorchPolicy is nn.Module
self.grads = None
torch.set_default_tensor_type(torch.FloatTensor)
reward_signal_configs = trainer_settings.reward_signals
reward_signal_names = [key.value for key, _ in reward_signal_configs.items()]
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
}
if separate_critic:
ac_class = SeparateActorCritic
else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_shapes,
network_settings=trainer_settings.network_settings,
act_type=behavior_spec.action_type,
act_size=self.act_size,
stream_names=reward_signal_names,
conditional_sigma=self.condition_sigma_on_obs,
tanh_squash=tanh_squash,
)
# Save the m_size needed for export
self._export_m_size = self.m_size
# m_size needed for training is determined by network, not trainer settings
self.m_size = self.actor_critic.memory_size
self.actor_critic.to("cpu")
@property
def export_memory_size(self) -> int:
"""
Returns the memory size of the exported ONNX policy. This only includes the memory
of the Actor and not any auxillary networks.
"""
return self._export_m_size
def _split_decision_step(
self, decision_requests: DecisionSteps
) -> Tuple[SplitObservations, np.ndarray]:
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
mask = None
if not self.use_continuous_act:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
if decision_requests.action_mask is not None:
mask = torch.as_tensor(
1 - np.concatenate(decision_requests.action_mask, axis=1)
)
return vec_vis_obs, mask
def update_normalization(self, vector_obs: np.ndarray) -> None:
"""
If this policy normalizes vector observations, this will update the norm values in the graph.
:param vector_obs: The vector observations to add to the running estimate of the distribution.
"""
vector_obs = [torch.as_tensor(vector_obs)]
if self.use_vec_obs and self.normalize:
self.actor_critic.update_normalization(vector_obs)
@timed
def sample_actions(
self,
vec_obs: List[torch.Tensor],
vis_obs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
all_log_probs: bool = False,
) -> Tuple[
torch.Tensor, torch.Tensor, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:
"""
:param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
"""
dists, value_heads, memories = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
action_list, dists
)
actions = torch.stack(action_list, dim=-1)
if self.use_continuous_act:
actions = actions[:, :, 0]
else:
actions = actions[:, 0, :]
return (
actions,
all_logs if all_log_probs else log_probs,
entropies,
value_heads,
memories,
)
def evaluate_actions(
self,
vec_obs: torch.Tensor,
vis_obs: torch.Tensor,
actions: torch.Tensor,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
dists, value_heads, _ = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
action_list = [actions[..., i] for i in range(actions.shape[-1])]
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
return log_probs, entropies, value_heads
@timed
def evaluate(
self, decision_requests: DecisionSteps, global_agent_ids: List[str]
) -> Dict[str, Any]:
"""
Evaluates policy for the agent experiences provided.
:param global_agent_ids:
:param decision_requests: DecisionStep object containing inputs.
:return: Outputs from network as defined by self.inference_dict.
"""
vec_vis_obs, masks = self._split_decision_step(decision_requests)
vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
vis_obs = [
torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations
]
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0
)
run_out = {}
with torch.no_grad():
action, log_probs, entropy, value_heads, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = action.detach().cpu().numpy()
run_out["pre_action"] = action.detach().cpu().numpy()
# Todo - make pre_action difference
run_out["log_probs"] = log_probs.detach().cpu().numpy()
run_out["entropy"] = entropy.detach().cpu().numpy()
run_out["value_heads"] = {
name: t.detach().cpu().numpy() for name, t in value_heads.items()
}
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
run_out["learning_rate"] = 0.0
if self.use_recurrent:
run_out["memory_out"] = memories.detach().cpu().numpy().squeeze(0)
return run_out
def get_action(
self, decision_requests: DecisionSteps, worker_id: int = 0
) -> ActionInfo:
"""
Decides actions given observations information, and takes them in environment.
:param worker_id:
:param decision_requests: A dictionary of brain names and BrainInfo from environment.
:return: an ActionInfo containing action, memories, values and an object
to be passed to add experiences
"""
if len(decision_requests) == 0:
return ActionInfo.empty()
global_agent_ids = [
get_global_agent_id(worker_id, int(agent_id))
for agent_id in decision_requests.agent_id
] # For 1-D array, the iterator order is correct.
run_out = self.evaluate(
decision_requests, global_agent_ids
) # pylint: disable=assignment-from-no-return
self.save_memories(global_agent_ids, run_out.get("memory_out"))
return ActionInfo(
action=run_out.get("action"),
value=run_out.get("value"),
outputs=run_out,
agent_ids=list(decision_requests.agent_id),
)
@property
def use_vis_obs(self):
return self.vis_obs_size > 0
@property
def use_vec_obs(self):
return self.vec_obs_size > 0
def get_current_step(self):
"""
Gets current model step.
:return: current model step.
"""
return self.global_step.current_step
def set_step(self, step: int) -> int:
"""
Sets current model step to step without creating additional ops.
:param step: Step to set the current model step to.
:return: The step the model was set to.
"""
self.global_step.current_step = step
return step
def increment_step(self, n_steps):
"""
Increments model step.
"""
self.global_step.increment(n_steps)
return self.get_current_step()
def load_weights(self, values: List[np.ndarray]) -> None:
self.actor_critic.load_state_dict(values)
def init_load_weights(self) -> None:
pass
def get_weights(self) -> List[np.ndarray]:
return copy.deepcopy(self.actor_critic.state_dict())
def get_modules(self):
return {"Policy": self.actor_critic, "global_step": self.global_step}

203
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from typing import Dict, cast
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.utils import ModelUtils
class TorchPPOOptimizer(TorchOptimizer):
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.
The PPO optimizer has a value estimator and a loss function.
:param policy: A TFPolicy object that will be updated by this PPO Optimizer.
:param trainer_params: Trainer parameters dictionary that specifies the
properties of the trainer.
"""
# Create the graph here to give more granular control of the TF graph to the Optimizer.
super().__init__(policy, trainer_settings)
params = list(self.policy.actor_critic.parameters())
self.hyperparameters: PPOSettings = cast(
PPOSettings, trainer_settings.hyperparameters
)
self.decay_learning_rate = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.decay_epsilon = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.epsilon,
0.1,
self.trainer_settings.max_steps,
)
self.decay_beta = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.beta,
1e-5,
self.trainer_settings.max_steps,
)
self.optimizer = torch.optim.Adam(
params, lr=self.trainer_settings.hyperparameters.learning_rate
)
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
}
self.stream_names = list(self.reward_signals.keys())
def ppo_value_loss(
self,
values: Dict[str, torch.Tensor],
old_values: Dict[str, torch.Tensor],
returns: Dict[str, torch.Tensor],
epsilon: float,
loss_masks: torch.Tensor,
) -> torch.Tensor:
"""
Evaluates value loss for PPO.
:param values: Value output of the current network.
:param old_values: Value stored with experiences in buffer.
:param returns: Computed returns.
:param epsilon: Clipping value for value estimate.
:param loss_mask: Mask for losses. Used with LSTM to ignore 0'ed out experiences.
"""
value_losses = []
for name, head in values.items():
old_val_tensor = old_values[name]
returns_tensor = returns[name]
clipped_value_estimate = old_val_tensor + torch.clamp(
head - old_val_tensor, -1 * epsilon, epsilon
)
v_opt_a = (returns_tensor - head) ** 2
v_opt_b = (returns_tensor - clipped_value_estimate) ** 2
value_loss = ModelUtils.masked_mean(torch.max(v_opt_a, v_opt_b), loss_masks)
value_losses.append(value_loss)
value_loss = torch.mean(torch.stack(value_losses))
return value_loss
def ppo_policy_loss(
self,
advantages: torch.Tensor,
log_probs: torch.Tensor,
old_log_probs: torch.Tensor,
loss_masks: torch.Tensor,
) -> torch.Tensor:
"""
Evaluate PPO policy loss.
:param advantages: Computed advantages.
:param log_probs: Current policy probabilities
:param old_log_probs: Past policy probabilities
:param loss_masks: Mask for losses. Used with LSTM to ignore 0'ed out experiences.
"""
advantage = advantages.unsqueeze(-1)
decay_epsilon = self.hyperparameters.epsilon
r_theta = torch.exp(log_probs - old_log_probs)
p_opt_a = r_theta * advantage
p_opt_b = (
torch.clamp(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * advantage
)
policy_loss = -1 * ModelUtils.masked_mean(
torch.min(p_opt_a, p_opt_b), loss_masks
)
return policy_loss
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
"""
Performs update on model.
:param batch: Batch of experiences.
:param num_sequences: Number of sequences to process.
:return: Results of update.
"""
# Get decayed parameters
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
decay_eps = self.decay_epsilon.get_value(self.policy.get_current_step())
decay_bet = self.decay_beta.get_value(self.policy.get_current_step())
returns = {}
old_values = {}
for name in self.reward_signals:
old_values[name] = ModelUtils.list_to_tensor(
batch[f"{name}_value_estimates"]
)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
else:
vis_obs = []
log_probs, entropy, values = self.policy.evaluate_actions(
vec_obs,
vis_obs,
masks=act_masks,
actions=actions,
memories=memories,
seq_len=self.policy.sequence_length,
)
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(
values, old_values, returns, decay_eps, loss_masks
)
policy_loss = self.ppo_policy_loss(
ModelUtils.list_to_tensor(batch["advantages"]),
log_probs,
ModelUtils.list_to_tensor(batch["action_probs"]),
loss_masks,
)
loss = (
policy_loss
+ 0.5 * value_loss
- decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
)
# Set optimizer learning rate
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,
}
for reward_provider in self.reward_signals.values():
update_stats.update(reward_provider.update(batch))
return update_stats
def get_modules(self):
return {"Optimizer": self.optimizer}

561
ml-agents/mlagents/trainers/sac/optimizer_torch.py


import numpy as np
from typing import Dict, List, Mapping, cast, Tuple, Optional
import torch
from torch import nn
import attr
from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import ActionType
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.networks import ValueNetwork
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
EPSILON = 1e-6 # Small value to avoid divide by zero
logger = get_logger(__name__)
class TorchSACOptimizer(TorchOptimizer):
class PolicyValueNetwork(nn.Module):
def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
act_type: ActionType,
act_size: List[int],
):
super().__init__()
if act_type == ActionType.CONTINUOUS:
num_value_outs = 1
num_action_ins = sum(act_size)
else:
num_value_outs = sum(act_size)
num_action_ins = 0
self.q1_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
)
self.q2_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
)
def forward(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
q1_out, _ = self.q1_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
q2_out, _ = self.q2_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
return q1_out, q2_out
def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
super().__init__(policy, trainer_params)
hyperparameters: SACSettings = cast(SACSettings, trainer_params.hyperparameters)
self.tau = hyperparameters.tau
self.init_entcoef = hyperparameters.init_entcoef
self.policy = policy
self.act_size = policy.act_size
policy_network_settings = policy.network_settings
self.tau = hyperparameters.tau
self.burn_in_ratio = 0.0
# Non-exposed SAC parameters
self.discrete_target_entropy_scale = 0.2 # Roughly equal to e-greedy 0.05
self.continuous_target_entropy_scale = 1.0
self.stream_names = list(self.reward_signals.keys())
# Use to reduce "survivor bonus" when using Curiosity or GAIL.
self.gammas = [_val.gamma for _val in trainer_params.reward_signals.values()]
self.use_dones_in_backup = {
name: int(not self.reward_signals[name].ignore_done)
for name in self.stream_names
}
# Critics should have 1/2 of the memory of the policy
critic_memory = policy_network_settings.memory
if critic_memory is not None:
critic_memory = attr.evolve(
critic_memory, memory_size=critic_memory.memory_size // 2
)
value_network_settings = attr.evolve(
policy_network_settings, memory=critic_memory
)
self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
value_network_settings,
self.policy.behavior_spec.action_type,
self.act_size,
)
self.target_network = ValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
value_network_settings,
)
self.soft_update(self.policy.actor_critic.critic, self.target_network, 1.0)
self._log_ent_coef = torch.nn.Parameter(
torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),
requires_grad=True,
)
if self.policy.use_continuous_act:
self.target_entropy = torch.as_tensor(
-1
* self.continuous_target_entropy_scale
* np.prod(self.act_size[0]).astype(np.float32)
)
else:
self.target_entropy = [
self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
for i in self.act_size
]
policy_params = list(self.policy.actor_critic.network_body.parameters()) + list(
self.policy.actor_critic.distribution.parameters()
)
value_params = list(self.value_network.parameters()) + list(
self.policy.actor_critic.critic.parameters()
)
logger.debug("value_vars")
for param in value_params:
logger.debug(param.shape)
logger.debug("policy_vars")
for param in policy_params:
logger.debug(param.shape)
self.decay_learning_rate = ModelUtils.DecayedValue(
hyperparameters.learning_rate_schedule,
hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.policy_optimizer = torch.optim.Adam(
policy_params, lr=hyperparameters.learning_rate
)
self.value_optimizer = torch.optim.Adam(
value_params, lr=hyperparameters.learning_rate
)
self.entropy_optimizer = torch.optim.Adam(
[self._log_ent_coef], lr=hyperparameters.learning_rate
)
def sac_q_loss(
self,
q1_out: Dict[str, torch.Tensor],
q2_out: Dict[str, torch.Tensor],
target_values: Dict[str, torch.Tensor],
dones: torch.Tensor,
rewards: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
q1_losses = []
q2_losses = []
# Multiple q losses per stream
for i, name in enumerate(q1_out.keys()):
q1_stream = q1_out[name].squeeze()
q2_stream = q2_out[name].squeeze()
with torch.no_grad():
q_backup = rewards[name] + (
(1.0 - self.use_dones_in_backup[name] * dones)
* self.gammas[i]
* target_values[name]
)
_q1_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(q_backup, q1_stream), loss_masks
)
_q2_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(q_backup, q2_stream), loss_masks
)
q1_losses.append(_q1_loss)
q2_losses.append(_q2_loss)
q1_loss = torch.mean(torch.stack(q1_losses))
q2_loss = torch.mean(torch.stack(q2_losses))
return q1_loss, q2_loss
def soft_update(self, source: nn.Module, target: nn.Module, tau: float) -> None:
for source_param, target_param in zip(source.parameters(), target.parameters()):
target_param.data.copy_(
target_param.data * (1.0 - tau) + source_param.data * tau
)
def sac_value_loss(
self,
log_probs: torch.Tensor,
values: Dict[str, torch.Tensor],
q1p_out: Dict[str, torch.Tensor],
q2p_out: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
discrete: bool,
) -> torch.Tensor:
min_policy_qs = {}
with torch.no_grad():
_ent_coef = torch.exp(self._log_ent_coef)
for name in values.keys():
if not discrete:
min_policy_qs[name] = torch.min(q1p_out[name], q2p_out[name])
else:
action_probs = log_probs.exp()
_branched_q1p = ModelUtils.break_into_branches(
q1p_out[name] * action_probs, self.act_size
)
_branched_q2p = ModelUtils.break_into_branches(
q2p_out[name] * action_probs, self.act_size
)
_q1p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q1p]
),
dim=0,
)
_q2p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q2p]
),
dim=0,
)
min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)
value_losses = []
if not discrete:
for name in values.keys():
with torch.no_grad():
v_backup = min_policy_qs[name] - torch.sum(
_ent_coef * log_probs, dim=1
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup), loss_masks
)
value_losses.append(value_loss)
else:
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * log_probs.exp(), self.act_size
)
# We have to do entropy bonus per action branch
branched_ent_bonus = torch.stack(
[
torch.sum(_ent_coef[i] * _lp, dim=1, keepdim=True)
for i, _lp in enumerate(branched_per_action_ent)
]
)
for name in values.keys():
with torch.no_grad():
v_backup = min_policy_qs[name] - torch.mean(
branched_ent_bonus, axis=0
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup.squeeze()),
loss_masks,
)
value_losses.append(value_loss)
value_loss = torch.mean(torch.stack(value_losses))
if torch.isinf(value_loss).any() or torch.isnan(value_loss).any():
raise UnityTrainerException("Inf found")
return value_loss
def sac_policy_loss(
self,
log_probs: torch.Tensor,
q1p_outs: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
discrete: bool,
) -> torch.Tensor:
_ent_coef = torch.exp(self._log_ent_coef)
mean_q1 = torch.mean(torch.stack(list(q1p_outs.values())), axis=0)
if not discrete:
mean_q1 = mean_q1.unsqueeze(1)
batch_policy_loss = torch.mean(_ent_coef * log_probs - mean_q1, dim=1)
policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
else:
action_probs = log_probs.exp()
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * action_probs, self.act_size
)
branched_q_term = ModelUtils.break_into_branches(
mean_q1 * action_probs, self.act_size
)
branched_policy_loss = torch.stack(
[
torch.sum(_ent_coef[i] * _lp - _qt, dim=1, keepdim=True)
for i, (_lp, _qt) in enumerate(
zip(branched_per_action_ent, branched_q_term)
)
]
)
batch_policy_loss = torch.squeeze(branched_policy_loss)
policy_loss = torch.mean(loss_masks * batch_policy_loss)
return policy_loss
def sac_entropy_loss(
self, log_probs: torch.Tensor, loss_masks: torch.Tensor, discrete: bool
) -> torch.Tensor:
if not discrete:
with torch.no_grad():
target_current_diff = torch.sum(log_probs + self.target_entropy, dim=1)
entropy_loss = -torch.mean(
self._log_ent_coef * loss_masks * target_current_diff
)
else:
with torch.no_grad():
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * log_probs.exp(), self.act_size
)
target_current_diff_branched = torch.stack(
[
torch.sum(_lp, axis=1, keepdim=True) + _te
for _lp, _te in zip(
branched_per_action_ent, self.target_entropy
)
],
axis=1,
)
target_current_diff = torch.squeeze(
target_current_diff_branched, axis=2
)
entropy_loss = -1 * ModelUtils.masked_mean(
torch.mean(self._log_ent_coef * target_current_diff, axis=1), loss_masks
)
return entropy_loss
def _condense_q_streams(
self, q_output: Dict[str, torch.Tensor], discrete_actions: torch.Tensor
) -> Dict[str, torch.Tensor]:
condensed_q_output = {}
onehot_actions = ModelUtils.actions_to_onehot(discrete_actions, self.act_size)
for key, item in q_output.items():
branched_q = ModelUtils.break_into_branches(item, self.act_size)
only_action_qs = torch.stack(
[
torch.sum(_act * _q, dim=1, keepdim=True)
for _act, _q in zip(onehot_actions, branched_q)
]
)
condensed_q_output[key] = torch.mean(only_action_qs, dim=0)
return condensed_q_output
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
"""
Updates model using buffer.
:param num_sequences: Number of trajectories in batch.
:param batch: Experience mini-batch.
:param update_target: Whether or not to update target value network
:param reward_signal_batches: Minibatches to use for updating the reward signals,
indexed by name. If none, don't update the reward signals.
:return: Output from update process.
"""
rewards = {}
for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
next_vec_obs = [ModelUtils.list_to_tensor(batch["next_vector_in"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
memories_list = [
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
# LSTM shouldn't have sequence length <1, but stop it from going out of the index if true.
offset = 1 if self.policy.sequence_length > 1 else 0
next_memories_list = [
ModelUtils.list_to_tensor(
batch["memory"][i][self.policy.m_size // 2 :]
) # only pass value part of memory to target network
for i in range(offset, len(batch["memory"]), self.policy.sequence_length)
]
if len(memories_list) > 0:
memories = torch.stack(memories_list).unsqueeze(0)
next_memories = torch.stack(next_memories_list).unsqueeze(0)
else:
memories = None
next_memories = None
# Q network memories are 0'ed out, since we don't have them during inference.
q_memories = (
torch.zeros_like(next_memories) if next_memories is not None else None
)
vis_obs: List[torch.Tensor] = []
next_vis_obs: List[torch.Tensor] = []
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
next_vis_ob = ModelUtils.list_to_tensor(
batch["next_visual_obs%d" % idx]
)
next_vis_obs.append(next_vis_ob)
# Copy normalizers from policy
self.value_network.q1_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
self.value_network.q2_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
self.target_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
_,
) = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=self.policy.sequence_length,
all_log_probs=not self.policy.use_continuous_act,
)
if self.policy.use_continuous_act:
squeezed_actions = actions.squeeze(-1)
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
sampled_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_out, q2_out = self.value_network(
vec_obs,
vis_obs,
squeezed_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_stream, q2_stream = q1_out, q2_out
else:
with torch.no_grad():
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_out, q2_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_stream = self._condense_q_streams(q1_out, actions)
q2_stream = self._condense_q_streams(q2_out, actions)
with torch.no_grad():
target_values, _ = self.target_network(
next_vec_obs,
next_vis_obs,
memories=next_memories,
sequence_length=self.policy.sequence_length,
)
masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
use_discrete = not self.policy.use_continuous_act
dones = ModelUtils.list_to_tensor(batch["done"])
q1_loss, q2_loss = self.sac_q_loss(
q1_stream, q2_stream, target_values, dones, rewards, masks
)
value_loss = self.sac_value_loss(
log_probs, sampled_values, q1p_out, q2p_out, masks, use_discrete
)
policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)
total_value_loss = q1_loss + q2_loss + value_loss
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
ModelUtils.update_learning_rate(self.policy_optimizer, decay_lr)
self.policy_optimizer.zero_grad()
policy_loss.backward()
self.policy_optimizer.step()
ModelUtils.update_learning_rate(self.value_optimizer, decay_lr)
self.value_optimizer.zero_grad()
total_value_loss.backward()
self.value_optimizer.step()
ModelUtils.update_learning_rate(self.entropy_optimizer, decay_lr)
self.entropy_optimizer.zero_grad()
entropy_loss.backward()
self.entropy_optimizer.step()
# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Losses/Q1 Loss": q1_loss.detach().cpu().numpy(),
"Losses/Q2 Loss": q2_loss.detach().cpu().numpy(),
"Policy/Entropy Coeff": torch.exp(self._log_ent_coef)
.detach()
.cpu()
.numpy(),
"Policy/Learning Rate": decay_lr,
}
for signal in self.reward_signals.values():
signal.update(batch)
return update_stats
def update_reward_signals(
self, reward_signal_minibatches: Mapping[str, AgentBuffer], num_sequences: int
) -> Dict[str, float]:
return {}
def get_modules(self):
return {
"Optimizer:value_network": self.value_network,
"Optimizer:target_network": self.target_network,
"Optimizer:policy_optimizer": self.policy_optimizer,
"Optimizer:value_optimizer": self.value_optimizer,
"Optimizer:entropy_optimizer": self.entropy_optimizer,
}

118
ml-agents/mlagents/trainers/saver/torch_saver.py


import os
import shutil
import torch
from typing import Dict, Union, Optional, cast
from mlagents_envs.exception import UnityPolicyException
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.saver.saver import BaseSaver
from mlagents.trainers.settings import TrainerSettings, SerializationSettings
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.torch.model_serialization import ModelSerializer
logger = get_logger(__name__)
class TorchSaver(BaseSaver):
"""
Saver class for PyTorch
"""
def __init__(
self, trainer_settings: TrainerSettings, model_path: str, load: bool = False
):
super().__init__()
self.model_path = model_path
self.initialize_path = trainer_settings.init_path
self._keep_checkpoints = trainer_settings.keep_checkpoints
self.load = load
self.policy: Optional[TorchPolicy] = None
self.exporter: Optional[ModelSerializer] = None
self.modules: Dict[str, torch.nn.Modules] = {}
def register(self, module: Union[TorchPolicy, TorchOptimizer]) -> None:
if isinstance(module, TorchPolicy) or isinstance(module, TorchOptimizer):
self.modules.update(module.get_modules()) # type: ignore
else:
raise UnityPolicyException(
"Registering Object of unsupported type {} to Saver ".format(
type(module)
)
)
if self.policy is None and isinstance(module, TorchPolicy):
self.policy = module
self.exporter = ModelSerializer(self.policy)
def save_checkpoint(self, brain_name: str, step: int) -> str:
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
state_dict = {
name: module.state_dict() for name, module in self.modules.items()
}
torch.save(state_dict, f"{checkpoint_path}.pt")
torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt"))
self.export(checkpoint_path, brain_name)
return checkpoint_path
def export(self, output_filepath: str, brain_name: str) -> None:
if self.exporter is not None:
self.exporter.export_policy_model(output_filepath)
def initialize_or_load(self, policy: Optional[TorchPolicy] = None) -> None:
# Initialize/Load registered self.policy by default.
# If given input argument policy, use the input policy instead.
# This argument is mainly for initialization of the ghost trainer's fixed policy.
reset_steps = not self.load
if self.initialize_path is not None:
self._load_model(
self.initialize_path, policy, reset_global_steps=reset_steps
)
elif self.load:
self._load_model(self.model_path, policy, reset_global_steps=reset_steps)
def _load_model(
self,
load_path: str,
policy: Optional[TorchPolicy] = None,
reset_global_steps: bool = False,
) -> None:
model_path = os.path.join(load_path, "checkpoint.pt")
saved_state_dict = torch.load(model_path)
if policy is None:
modules = self.modules
policy = self.policy
else:
modules = policy.get_modules()
policy = cast(TorchPolicy, policy)
for name, mod in modules.items():
mod.load_state_dict(saved_state_dict[name])
if reset_global_steps:
policy.set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path
)
)
else:
logger.info(f"Resuming training from step {policy.get_current_step()}.")
def copy_final_model(self, source_nn_path: str) -> None:
"""
Copy the .nn file at the given source to the destination.
Also copies the corresponding .onnx file if it exists.
"""
final_model_name = os.path.splitext(source_nn_path)[0]
if SerializationSettings.convert_to_onnx:
try:
source_path = f"{final_model_name}.onnx"
destination_path = f"{self.model_path}.onnx"
shutil.copyfile(source_path, destination_path)
logger.info(f"Copied {source_path} to {destination_path}.")
except OSError:
pass

1001
ml-agents/mlagents/trainers/tests/torch/test.demo
文件差异内容过多而无法显示
查看文件

144
ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py


from unittest.mock import MagicMock
import pytest
import mlagents.trainers.tests.mock_brain as mb
import numpy as np
import os
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.settings import (
TrainerSettings,
BehavioralCloningSettings,
NetworkSettings,
)
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
# model_path = env.external_brain_names[0]
trainer_config = TrainerSettings()
trainer_config.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TorchPolicy(
0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
)
bc_module = BCModule(
policy,
settings=bc_settings,
policy_learning_rate=trainer_config.hyperparameters.learning_rate,
default_batch_size=trainer_config.hyperparameters.batch_size,
default_num_epoch=3,
)
return bc_module
# Test default values
def test_bcmodule_defaults():
# See if default values match
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 3
assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
# Assign strange values and see if it overrides properly
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
num_epoch=100,
batch_size=10000,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 100
assert bc_module.batch_size == 10000
# Test with continuous control env and vector actions
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_constant_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=0,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate == bc_module.current_lr
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_linear_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=100,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
# Should decay by 10/100 * 0.0003 = 0.00003
bc_module.policy.get_current_step = MagicMock(return_value=10)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate - 0.00003 == pytest.approx(bc_module.current_lr, abs=0.01)
# Test with RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control and visual observations
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_dc_visual_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control, visual observations and RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_dc_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
if __name__ == "__main__":
pytest.main()

177
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


import pytest
import numpy as np
from mlagents.trainers.ghost.trainer import GhostTrainer
from mlagents.trainers.ghost.controller import GhostController
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings, FrameworkType
@pytest.fixture
def dummy_config():
return TrainerSettings(
self_play=SelfPlaySettings(), framework=FrameworkType.PYTORCH
)
VECTOR_ACTION_SPACE = 1
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 513
NUM_AGENTS = 12
@pytest.mark.parametrize("use_discrete", [True, False])
def test_load_and_set(dummy_config, use_discrete):
mock_specs = mb.setup_test_behavior_specs(
use_discrete,
False,
vector_action_space=DISCRETE_ACTION_SPACE
if use_discrete
else VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
trainer_params = dummy_config
trainer = PPOTrainer("test", 0, trainer_params, True, False, 0, "0")
trainer.seed = 1
policy = trainer.create_policy("test", mock_specs)
trainer.seed = 20 # otherwise graphs are the same
to_load_policy = trainer.create_policy("test", mock_specs)
weights = policy.get_weights()
load_weights = to_load_policy.get_weights()
try:
for w, lw in zip(weights, load_weights):
np.testing.assert_array_equal(w, lw)
except AssertionError:
pass
to_load_policy.load_weights(weights)
load_weights = to_load_policy.get_weights()
for w, lw in zip(weights, load_weights):
np.testing.assert_array_equal(w, lw)
def test_process_trajectory(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
)
# first policy encountered becomes policy trained by wrapped PPO
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)
trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
trainer.subscribe_trajectory_queue(trajectory_queue0)
# Ghost trainer should ignore this queue because off policy
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)
trajectory_queue1 = AgentManagerQueue(behavior_id_team1)
trainer.subscribe_trajectory_queue(trajectory_queue1)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
)
trajectory_queue0.put(trajectory)
trainer.advance()
# Check that trainer put trajectory in update buffer
assert trainer.trainer.update_buffer.num_experiences == 15
trajectory_queue1.put(trajectory)
trainer.advance()
# Check that ghost trainer ignored off policy queue
assert trainer.trainer.update_buffer.num_experiences == 15
# Check that it emptied the queue
assert trajectory_queue1.empty()
def test_publish_queue(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[1], vector_obs_space=8
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
brain_name = parsed_behavior_id0.brain_name
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
)
# First policy encountered becomes policy trained by wrapped PPO
# This queue should remain empty after swap snapshot
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)
policy_queue0 = AgentManagerQueue(behavior_id_team0)
trainer.publish_policy_queue(policy_queue0)
# Ghost trainer should use this queue for ghost policy swap
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)
policy_queue1 = AgentManagerQueue(behavior_id_team1)
trainer.publish_policy_queue(policy_queue1)
# check ghost trainer swap pushes to ghost queue and not trainer
assert policy_queue0.empty() and policy_queue1.empty()
trainer._swap_snapshots()
assert policy_queue0.empty() and not policy_queue1.empty()
# clear
policy_queue1.get_nowait()
mock_specs = mb.setup_test_behavior_specs(
False,
False,
vector_action_space=VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_specs)
# Mock out reward signal eval
buffer["extrinsic_rewards"] = buffer["environment_rewards"]
buffer["extrinsic_returns"] = buffer["environment_rewards"]
buffer["extrinsic_value_estimates"] = buffer["environment_rewards"]
buffer["curiosity_rewards"] = buffer["environment_rewards"]
buffer["curiosity_returns"] = buffer["environment_rewards"]
buffer["curiosity_value_estimates"] = buffer["environment_rewards"]
buffer["advantages"] = buffer["environment_rewards"]
trainer.trainer.update_buffer = buffer
# when ghost trainer advance and wrapped trainer buffers full
# the wrapped trainer pushes updated policy to correct queue
assert policy_queue0.empty() and policy_queue1.empty()
trainer.advance()
assert not policy_queue0.empty() and policy_queue1.empty()
if __name__ == "__main__":
pytest.main()

150
ml-agents/mlagents/trainers/tests/torch/test_policy.py


import pytest
import torch
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
VECTOR_ACTION_SPACE = 2
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 32
NUM_AGENTS = 12
EPSILON = 1e-7
def create_policy_mock(
dummy_config: TrainerSettings,
use_rnn: bool = False,
use_discrete: bool = True,
use_visual: bool = False,
seed: int = 0,
) -> TorchPolicy:
mock_spec = mb.setup_test_behavior_specs(
use_discrete,
use_visual,
vector_action_space=DISCRETE_ACTION_SPACE
if use_discrete
else VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
trainer_settings = dummy_config
trainer_settings.keep_checkpoints = 3
trainer_settings.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TorchPolicy(seed, mock_spec, trainer_settings)
return policy
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_evaluate(rnn, visual, discrete):
# Test evaluate
policy = create_policy_mock(
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
decision_step, terminal_step = mb.create_steps_from_behavior_spec(
policy.behavior_spec, num_agents=NUM_AGENTS
)
run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
if discrete:
run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
else:
assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_evaluate_actions(rnn, visual, discrete):
policy = create_policy_mock(
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
if policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(buffer["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long)
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
vis_obs.append(vis_ob)
memories = [
ModelUtils.list_to_tensor(buffer["memory"][i])
for i in range(0, len(buffer["memory"]), policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
log_probs, entropy, values = policy.evaluate_actions(
vec_obs,
vis_obs,
masks=act_masks,
actions=actions,
memories=memories,
seq_len=policy.sequence_length,
)
assert log_probs.shape == (64, policy.behavior_spec.action_size)
assert entropy.shape == (64, policy.behavior_spec.action_size)
for val in values.values():
assert val.shape == (64,)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_sample_actions(rnn, visual, discrete):
policy = create_policy_mock(
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
vis_obs.append(vis_ob)
memories = [
ModelUtils.list_to_tensor(buffer["memory"][i])
for i in range(0, len(buffer["memory"]), policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
memories,
) = policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=policy.sequence_length,
all_log_probs=not policy.use_continuous_act,
)
if discrete:
assert log_probs.shape == (
64,
sum(policy.behavior_spec.discrete_action_branches),
)
else:
assert log_probs.shape == (64, policy.behavior_spec.action_shape)
assert entropies.shape == (64, policy.behavior_spec.action_size)
for val in sampled_values.values():
assert val.shape == (64,)
if rnn:
assert memories.shape == (1, 1, policy.m_size)

505
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


import math
import tempfile
import pytest
import numpy as np
import attr
from typing import Dict
from mlagents.trainers.tests.simple_test_envs import (
SimpleEnvironment,
MemoryEnvironment,
RecordEnvironment,
)
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents.trainers.demo_loader import write_demo
from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary
from mlagents.trainers.settings import (
TrainerSettings,
PPOSettings,
SACSettings,
NetworkSettings,
SelfPlaySettings,
BehavioralCloningSettings,
GAILSettings,
TrainerType,
RewardSignalType,
EncoderType,
ScheduleType,
FrameworkType,
)
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents_envs.side_channel.environment_parameters_channel import (
EnvironmentParametersChannel,
)
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
BRAIN_NAME = "1D"
PPO_CONFIG = TrainerSettings(
trainer_type=TrainerType.PPO,
hyperparameters=PPOSettings(
learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.CONSTANT,
batch_size=16,
buffer_size=64,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=32),
summary_freq=500,
max_steps=3000,
threaded=False,
framework=FrameworkType.PYTORCH,
)
SAC_CONFIG = TrainerSettings(
trainer_type=TrainerType.SAC,
hyperparameters=SACSettings(
learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.CONSTANT,
batch_size=8,
buffer_init_steps=100,
buffer_size=5000,
tau=0.01,
init_entcoef=0.01,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=16),
summary_freq=100,
max_steps=1000,
threaded=False,
)
# The reward processor is passed as an argument to _check_environment_trains.
# It is applied to the list of all final rewards for each brain individually.
# This is so that we can process all final rewards in different ways for different algorithms.
# Custom reward processors should be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]
# For debugging tests
print(f"Last {last_n_rewards} rewards:", rewards_to_use)
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()
class DebugWriter(StatsWriter):
"""
Print to stdout so stats can be viewed in pytest
"""
def __init__(self):
self._last_reward_summary: Dict[str, float] = {}
def get_last_rewards(self):
return self._last_reward_summary
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int
) -> None:
for val, stats_summary in values.items():
if val == "Environment/Cumulative Reward":
print(step, val, stats_summary.mean)
self._last_reward_summary[category] = stats_summary.mean
def _check_environment_trains(
env,
trainer_config,
reward_processor=default_reward_processor,
env_parameter_manager=None,
success_threshold=0.9,
env_manager=None,
):
if env_parameter_manager is None:
env_parameter_manager = EnvironmentParameterManager()
# Create controller and begin training.
with tempfile.TemporaryDirectory() as dir:
run_id = "id"
seed = 1337
StatsReporter.writers.clear() # Clear StatsReporters so we don't write to file
debug_writer = DebugWriter()
StatsReporter.add_writer(debug_writer)
if env_manager is None:
env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
trainer_factory = TrainerFactory(
trainer_config=trainer_config,
output_path=dir,
train_model=True,
load_model=False,
seed=seed,
param_manager=env_parameter_manager,
multi_gpu=False,
)
tc = TrainerController(
trainer_factory=trainer_factory,
output_path=dir,
run_id=run_id,
param_manager=env_parameter_manager,
train=True,
training_seed=seed,
)
# Begin training
tc.start_learning(env_manager)
if (
success_threshold is not None
): # For tests where we are just checking setup and not reward
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]
assert all(not math.isnan(reward) for reward in processed_rewards)
assert all(reward > success_threshold for reward in processed_rewards)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(PPO_CONFIG)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual, use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(36, 36, 3),
)
new_networksettings = attr.evolve(
SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
)
new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
config = attr.evolve(
PPO_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=500,
summary_freq=100,
)
# The number of steps is pretty small for these encoders
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
new_network_settings = attr.evolve(
PPO_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),
)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128
)
config = attr.evolve(
PPO_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=5000,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(SAC_CONFIG)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(SAC_CONFIG.hyperparameters, buffer_init_steps=2000)
config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_sac(num_visual, use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
new_hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4
)
config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_sac(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(36, 36, 3),
)
new_networksettings = attr.evolve(
SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
)
new_hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters,
batch_size=16,
learning_rate=3e-4,
buffer_init_steps=0,
)
config = attr.evolve(
SAC_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=100,
)
# The number of steps is pretty small for these encoders
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.5 if use_discrete else 0.2
env = MemoryEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
)
new_hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters,
batch_size=128,
learning_rate=1e-3,
buffer_init_steps=1000,
steps_per_update=2,
)
config = attr.evolve(
SAC_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=5000,
)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=4000
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=None)
processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost(use_discrete):
# Make opponent for asymmetric case
brain_name_opp = BRAIN_NAME + "Opp"
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0,
save_steps=10000,
swap_steps=10000,
team_change=400,
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=4000)
_check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost_fails(use_discrete):
# Make opponent for asymmetric case
brain_name_opp = BRAIN_NAME + "Opp"
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the team that us not learning when both have reached
# max step should be executing the initial, untrained poliy.
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=0.0,
save_steps=5000,
swap_steps=5000,
team_change=2000,
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=3000)
_check_environment_trains(
env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
)
processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
env = RecordEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,
)
# If we want to use true demos, we can solve the env in the usual way
# Otherwise, we can just call solve to execute the optimal policy
env.solve()
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[2] if use_discrete else [1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path_name = "1DTest" + action_type + ".demo"
demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
return demo_path
return record_demo
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
config = attr.evolve(
trainer_config,
reward_signals=reward_signals,
behavioral_cloning=bc_settings,
max_steps=500,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3e-4)
config = attr.evolve(
PPO_CONFIG,
reward_signals=reward_signals,
hyperparameters=hyperparams,
behavioral_cloning=bc_settings,
max_steps=1000,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16
)
config = attr.evolve(
SAC_CONFIG,
reward_signals=reward_signals,
hyperparameters=hyperparams,
behavioral_cloning=bc_settings,
max_steps=500,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

446
ml-agents/mlagents/trainers/tests/torch/testdcvis.demo


bcvis& -****:VisualFoodCollectorLearning�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"

74
ml-agents/mlagents/trainers/torch/model_serialization.py


import os
import torch
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.settings import SerializationSettings
logger = get_logger(__name__)
class ModelSerializer:
def __init__(self, policy):
# ONNX only support input in NCHW (channel first) format.
# Barracuda also expect to get data in NCHW.
# Any multi-dimentional input should follow that otherwise will
# cause problem to barracuda import.
self.policy = policy
batch_dim = [1]
seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.observation_shapes)
dummy_vis_obs = [
torch.zeros(batch_dim + [shape[2], shape[0], shape[1]])
for shape in self.policy.behavior_spec.observation_shapes
if len(shape) == 3
]
dummy_masks = torch.ones(batch_dim + [sum(self.policy.actor_critic.act_size)])
dummy_memories = torch.zeros(
batch_dim + seq_len_dim + [self.policy.export_memory_size]
)
self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)
self.input_names = (
["vector_observation"]
+ [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
+ ["action_masks", "memories"]
)
self.output_names = [
"action",
"version_number",
"memory_size",
"is_continuous_control",
"action_output_shape",
]
self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}
self.dynamic_axes.update({"action": {0: "batch"}})
def export_policy_model(self, output_filepath: str) -> None:
"""
Exports a Torch model for a Policy to .onnx format for Unity embedding.
:param output_filepath: file path to output the model (without file suffix)
:param brain_name: Brain name of brain to be trained
"""
if not os.path.exists(output_filepath):
os.makedirs(output_filepath)
onnx_output_path = f"{output_filepath}.onnx"
logger.info(f"Converting to {onnx_output_path}")
torch.onnx.export(
self.policy.actor_critic,
self.dummy_input,
onnx_output_path,
opset_version=SerializationSettings.onnx_opset,
input_names=self.input_names,
output_names=self.output_names,
dynamic_axes=self.dynamic_axes,
)
logger.info(f"Exported {onnx_output_path}")

111
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


import numpy as np
import pytest
import torch
from mlagents.trainers.torch.components.reward_providers import (
CuriosityRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import CuriositySettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
SEED = [42]
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_settings.strength = 0.1
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
assert curiosity_rp.strength == 0.1
assert curiosity_rp.name == "Curiosity"
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,), (64, 66, 1)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_rp = create_reward_provider(
RewardSignalType.CURIOSITY, behavior_spec, curiosity_settings
)
assert curiosity_rp.name == "Curiosity"
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
curiosity_rp.update(buffer)
reward_old = curiosity_rp.evaluate(buffer)[0]
for _ in range(10):
curiosity_rp.update(buffer)
reward_new = curiosity_rp.evaluate(buffer)[0]
assert reward_new < reward_old
reward_old = reward_new
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5)]
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.1)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(200):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0].detach()
target = buffer["actions"][0]
error = float(torch.mean((prediction - target) ** 2))
assert error < 0.001
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.1)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(100):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_next_state(buffer)[0]
target = curiosity_rp._network.get_next_state(buffer)[0]
error = float(torch.mean((prediction - target) ** 2).detach())
assert error < 0.001

56
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


import pytest
from mlagents.trainers.torch.components.reward_providers import (
ExtrinsicRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
settings = RewardSignalSettings()
settings.gamma = 0.2
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
assert extrinsic_rp.gamma == 0.2
assert extrinsic_rp.name == "Extrinsic"
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
settings = RewardSignalSettings()
extrinsic_rp = create_reward_provider(
RewardSignalType.EXTRINSIC, behavior_spec, settings
)
assert extrinsic_rp.name == "Extrinsic"
@pytest.mark.parametrize("reward", [2.0, 3.0, 4.0])
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:
buffer = create_agent_buffer(behavior_spec, 1000, reward)
settings = RewardSignalSettings()
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
generated_rewards = extrinsic_rp.evaluate(buffer)
assert (generated_rewards == reward).all()

138
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from typing import Any
import numpy as np
import pytest
from unittest.mock import patch
import torch
import os
from mlagents.trainers.torch.components.reward_providers import (
GAILRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
CONTINUOUS_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/test.demo"
)
DISCRETE_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/testdcvis.demo"
)
SEED = [42]
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2)]
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = GAILRewardProvider(behavior_spec, gail_settings)
assert gail_rp.name == "GAIL"
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2)]
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
assert gail_rp.name == "GAIL"
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ActionType.CONTINUOUS, 2),
BehaviorSpec([(50,)], ActionType.DISCRETE, (2, 3, 3, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (20,)),
],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
def test_reward_decreases(
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
buffer_expert = create_agent_buffer(behavior_spec, 1000)
buffer_policy = create_agent_buffer(behavior_spec, 1000)
demo_to_buffer.return_value = None, buffer_expert
gail_settings = GAILSettings(
demo_path="", learning_rate=0.05, use_vail=False, use_actions=use_actions
)
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
init_reward_policy = gail_rp.evaluate(buffer_policy)[0]
for _ in range(10):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert >= 0 # GAIL / VAIL reward always positive
assert reward_policy >= 0
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert > reward_policy # Expert reward greater than non-expert reward
assert (
reward_expert > init_reward_expert
) # Expert reward getting better as network trains
assert (
reward_policy < init_reward_policy
) # Non-expert reward getting worse as network trains
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3, 3, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (20,)),
],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
def test_reward_decreases_vail(
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
buffer_expert = create_agent_buffer(behavior_spec, 1000)
buffer_policy = create_agent_buffer(behavior_spec, 1000)
demo_to_buffer.return_value = None, buffer_expert
gail_settings = GAILSettings(
demo_path="", learning_rate=0.005, use_vail=True, use_actions=use_actions
)
DiscriminatorNetwork.initial_beta = 0.0
# we must set the initial value of beta to 0 for testing
# If we do not, the kl-loss will dominate early and will block the estimator
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
for _ in range(100):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert >= 0 # GAIL / VAIL reward always positive
assert reward_policy >= 0
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert > reward_policy # Expert reward greater than non-expert reward

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.trajectory import SplitObservations
def create_agent_buffer(
behavior_spec: BehaviorSpec, number: int, reward: float = 0.0
) -> AgentBuffer:
buffer = AgentBuffer()
curr_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
next_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
action = behavior_spec.create_random_action(1)[0, :]
for _ in range(number):
curr_split_obs = SplitObservations.from_observations(curr_observations)
next_split_obs = SplitObservations.from_observations(next_observations)
for i, _ in enumerate(curr_split_obs.visual_observations):
buffer["visual_obs%d" % i].append(curr_split_obs.visual_observations[i])
buffer["next_visual_obs%d" % i].append(
next_split_obs.visual_observations[i]
)
buffer["vector_obs"].append(curr_split_obs.vector_observations)
buffer["next_vector_in"].append(next_split_obs.vector_observations)
buffer["actions"].append(action)
buffer["done"].append(np.zeros(1, dtype=np.float32))
buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
buffer["masks"].append(np.ones(1, dtype=np.float32))
return buffer

0
ml-agents/mlagents/trainers/torch/components/__init__.py

0
ml-agents/mlagents/trainers/torch/components/bc/__init__.py

183
ml-agents/mlagents/trainers/torch/components/bc/module.py


from typing import Dict
import numpy as np
import torch
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.settings import BehavioralCloningSettings, ScheduleType
from mlagents.trainers.torch.utils import ModelUtils
class BCModule:
def __init__(
self,
policy: TorchPolicy,
settings: BehavioralCloningSettings,
policy_learning_rate: float,
default_batch_size: int,
default_num_epoch: int,
):
"""
A BC trainer that can be used inline with RL.
:param policy: The policy of the learning model
:param settings: The settings for BehavioralCloning including LR strength, batch_size,
num_epochs, samples_per_update and LR annealing steps.
:param policy_learning_rate: The initial Learning Rate of the policy. Used to set an appropriate learning rate
for the pretrainer.
"""
self.policy = policy
self._anneal_steps = settings.steps
self.current_lr = policy_learning_rate * settings.strength
learning_rate_schedule: ScheduleType = ScheduleType.LINEAR if self._anneal_steps > 0 else ScheduleType.CONSTANT
self.decay_learning_rate = ModelUtils.DecayedValue(
learning_rate_schedule, self.current_lr, 1e-10, self._anneal_steps
)
params = self.policy.actor_critic.parameters()
self.optimizer = torch.optim.Adam(params, lr=self.current_lr)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.behavior_spec
)
self.batch_size = (
settings.batch_size if settings.batch_size else default_batch_size
)
self.num_epoch = settings.num_epoch if settings.num_epoch else default_num_epoch
self.n_sequences = max(
min(self.batch_size, self.demonstration_buffer.num_experiences)
// policy.sequence_length,
1,
)
self.has_updated = False
self.use_recurrent = self.policy.use_recurrent
self.samples_per_update = settings.samples_per_update
def update(self) -> Dict[str, np.ndarray]:
"""
Updates model using buffer.
:param max_batches: The maximum number of batches to use per update.
:return: The loss of the update.
"""
# Don't continue training if the learning rate has reached 0, to reduce training time.
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
if self.current_lr <= 0:
return {"Losses/Pretraining Loss": 0}
batch_losses = []
possible_demo_batches = (
self.demonstration_buffer.num_experiences // self.n_sequences
)
possible_batches = possible_demo_batches
max_batches = self.samples_per_update // self.n_sequences
n_epoch = self.num_epoch
for _ in range(n_epoch):
self.demonstration_buffer.shuffle(
sequence_length=self.policy.sequence_length
)
if max_batches == 0:
num_batches = possible_batches
else:
num_batches = min(possible_batches, max_batches)
for i in range(num_batches // self.policy.sequence_length):
demo_update_buffer = self.demonstration_buffer
start = i * self.n_sequences * self.policy.sequence_length
end = (i + 1) * self.n_sequences * self.policy.sequence_length
mini_batch_demo = demo_update_buffer.make_mini_batch(start, end)
run_out = self._update_batch(mini_batch_demo, self.n_sequences)
loss = run_out["loss"]
batch_losses.append(loss)
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.current_lr = decay_lr
self.has_updated = True
update_stats = {"Losses/Pretraining Loss": np.mean(batch_losses)}
return update_stats
def _behavioral_cloning_loss(self, selected_actions, log_probs, expert_actions):
if self.policy.use_continuous_act:
bc_loss = torch.nn.functional.mse_loss(selected_actions, expert_actions)
else:
log_prob_branches = ModelUtils.break_into_branches(
log_probs, self.policy.act_size
)
bc_loss = torch.mean(
torch.stack(
[
torch.sum(
-torch.nn.functional.log_softmax(log_prob_branch, dim=1)
* expert_actions_branch,
dim=1,
)
for log_prob_branch, expert_actions_branch in zip(
log_prob_branches, expert_actions
)
]
)
)
return bc_loss
def _update_batch(
self, mini_batch_demo: Dict[str, np.ndarray], n_sequences: int
) -> Dict[str, float]:
"""
Helper function for update_batch.
"""
vec_obs = [ModelUtils.list_to_tensor(mini_batch_demo["vector_obs"])]
act_masks = None
if self.policy.use_continuous_act:
expert_actions = ModelUtils.list_to_tensor(mini_batch_demo["actions"])
else:
raw_expert_actions = ModelUtils.list_to_tensor(
mini_batch_demo["actions"], dtype=torch.long
)
expert_actions = ModelUtils.actions_to_onehot(
raw_expert_actions, self.policy.act_size
)
act_masks = ModelUtils.list_to_tensor(
np.ones(
(
self.n_sequences * self.policy.sequence_length,
sum(self.policy.behavior_spec.discrete_action_branches),
),
dtype=np.float32,
)
)
memories = []
if self.policy.use_recurrent:
memories = torch.zeros(1, self.n_sequences, self.policy.m_size)
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(
mini_batch_demo["visual_obs%d" % idx]
)
vis_obs.append(vis_ob)
else:
vis_obs = []
selected_actions, all_log_probs, _, _, _ = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=self.policy.sequence_length,
all_log_probs=True,
)
bc_loss = self._behavioral_cloning_loss(
selected_actions, all_log_probs, expert_actions
)
self.optimizer.zero_grad()
bc_loss.backward()
self.optimizer.step()
run_out = {"loss": bc_loss.detach().cpu().numpy()}
return run_out

15
ml-agents/mlagents/trainers/torch/components/reward_providers/__init__.py


from mlagents.trainers.torch.components.reward_providers.base_reward_provider import ( # noqa F401
BaseRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.extrinsic_reward_provider import ( # noqa F401
ExtrinsicRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.curiosity_reward_provider import ( # noqa F401
CuriosityRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import ( # noqa F401
GAILRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.reward_provider_factory import ( # noqa F401
create_reward_provider,
)

72
ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py


import numpy as np
from abc import ABC, abstractmethod
from typing import Dict
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.settings import RewardSignalSettings
from mlagents_envs.base_env import BehaviorSpec
class BaseRewardProvider(ABC):
def __init__(self, specs: BehaviorSpec, settings: RewardSignalSettings) -> None:
self._policy_specs = specs
self._gamma = settings.gamma
self._strength = settings.strength
self._ignore_done = False
@property
def gamma(self) -> float:
"""
The discount factor for the reward signal
"""
return self._gamma
@property
def strength(self) -> float:
"""
The strength multiplier of the reward provider
"""
return self._strength
@property
def name(self) -> str:
"""
The name of the reward provider. Is used for reporting and identification
"""
class_name = self.__class__.__name__
return class_name.replace("RewardProvider", "")
@property
def ignore_done(self) -> bool:
"""
If true, when the agent is done, the rewards of the next episode must be
used to calculate the return of the current episode.
Is used to mitigate the positive bias in rewards with no natural end.
"""
return self._ignore_done
@abstractmethod
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
"""
Evaluates the reward for the data present in the Dict mini_batch. Use this when evaluating a reward
function drawn straight from a Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: a np.ndarray of rewards generated by the reward provider
"""
raise NotImplementedError(
"The reward provider's evaluate method has not been implemented "
)
@abstractmethod
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
"""
Update the reward for the data present in the Dict mini_batch. Use this when updating a reward
function drawn straight from a Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: A dictionary from string to stats values
"""
raise NotImplementedError(
"The reward provider's update method has not been implemented "
)

15
ml-agents/mlagents/trainers/torch/components/reward_providers/extrinsic_reward_provider.py


import numpy as np
from typing import Dict
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
class ExtrinsicRewardProvider(BaseRewardProvider):
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
return np.array(mini_batch["environment_rewards"], dtype=np.float32)
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
return {}

43
ml-agents/mlagents/trainers/torch/components/reward_providers/reward_provider_factory.py


from typing import Dict, Type
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.extrinsic_reward_provider import (
ExtrinsicRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.curiosity_reward_provider import (
CuriosityRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
GAILRewardProvider,
)
from mlagents_envs.base_env import BehaviorSpec
NAME_TO_CLASS: Dict[RewardSignalType, Type[BaseRewardProvider]] = {
RewardSignalType.EXTRINSIC: ExtrinsicRewardProvider,
RewardSignalType.CURIOSITY: CuriosityRewardProvider,
RewardSignalType.GAIL: GAILRewardProvider,
}
def create_reward_provider(
name: RewardSignalType, specs: BehaviorSpec, settings: RewardSignalSettings
) -> BaseRewardProvider:
"""
Creates a reward provider class based on the name and config entry provided as a dict.
:param name: The name of the reward signal
:param specs: The BehaviorSpecs of the policy
:param settings: The RewardSignalSettings for that reward signal
:return: The reward signal class instantiated
"""
rcls = NAME_TO_CLASS.get(name)
if not rcls:
raise UnityTrainerException(f"Unknown reward signal type {name}")
class_inst = rcls(specs, settings)
return class_inst

225
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


import numpy as np
from typing import Dict
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.settings import CuriositySettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish
from mlagents.trainers.settings import NetworkSettings, EncoderType
class CuriosityRewardProvider(BaseRewardProvider):
beta = 0.2 # Forward vs Inverse loss weight
loss_multiplier = 10.0 # Loss multiplier
def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
super().__init__(specs, settings)
self._ignore_done = True
self._network = CuriosityNetwork(specs, settings)
self.optimizer = torch.optim.Adam(
self._network.parameters(), lr=settings.learning_rate
)
self._has_updated_once = False
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
rewards = self._network.compute_reward(mini_batch).detach().cpu().numpy()
rewards = np.minimum(rewards, 1.0 / self.strength)
return rewards * self._has_updated_once
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
self._has_updated_once = True
forward_loss = self._network.compute_forward_loss(mini_batch)
inverse_loss = self._network.compute_inverse_loss(mini_batch)
loss = self.loss_multiplier * (
self.beta * forward_loss + (1.0 - self.beta) * inverse_loss
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": forward_loss.detach().cpu().numpy(),
"Losses/Curiosity Inverse Loss": inverse_loss.detach().cpu().numpy(),
}
class CuriosityNetwork(torch.nn.Module):
EPSILON = 1e-10
def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
super().__init__()
self._policy_specs = specs
state_encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
self._action_flattener = ModelUtils.ActionFlattener(specs)
self.inverse_model_action_predition = torch.nn.Sequential(
linear_layer(2 * settings.encoding_size, 256),
Swish(),
linear_layer(256, self._action_flattener.flattened_size),
)
self.forward_model_next_state_prediction = torch.nn.Sequential(
linear_layer(
settings.encoding_size + self._action_flattener.flattened_size, 256
),
Swish(),
linear_layer(256, settings.encoding_size),
)
def get_current_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Extracts the current state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(mini_batch["vector_obs"], dtype=torch.float)
],
vis_inputs=[
ModelUtils.list_to_tensor(
mini_batch["visual_obs%d" % i], dtype=torch.float
)
for i in range(n_vis)
],
)
return hidden
def get_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Extracts the next state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(
mini_batch["next_vector_in"], dtype=torch.float
)
],
vis_inputs=[
ModelUtils.list_to_tensor(
mini_batch["next_visual_obs%d" % i], dtype=torch.float
)
for i in range(n_vis)
],
)
return hidden
def predict_action(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
In the continuous case, returns the predicted action.
In the discrete case, returns the logits.
"""
inverse_model_input = torch.cat(
(self.get_current_state(mini_batch), self.get_next_state(mini_batch)), dim=1
)
hidden = self.inverse_model_action_predition(inverse_model_input)
if self._policy_specs.is_action_continuous():
return hidden
else:
branches = ModelUtils.break_into_branches(
hidden, self._policy_specs.discrete_action_branches
)
branches = [torch.softmax(b, dim=1) for b in branches]
return torch.cat(branches, dim=1)
def predict_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Uses the current state embedding and the action of the mini_batch to predict
the next state embedding.
"""
if self._policy_specs.is_action_continuous():
action = ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
else:
action = torch.cat(
ModelUtils.actions_to_onehot(
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
self._policy_specs.discrete_action_branches,
),
dim=1,
)
forward_model_input = torch.cat(
(self.get_current_state(mini_batch), action), dim=1
)
return self.forward_model_next_state_prediction(forward_model_input)
def compute_inverse_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Computes the inverse loss for a mini_batch. Corresponds to the error on the
action prediction (given the current and next state).
"""
predicted_action = self.predict_action(mini_batch)
if self._policy_specs.is_action_continuous():
sq_difference = (
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
- predicted_action
) ** 2
sq_difference = torch.sum(sq_difference, dim=1)
return torch.mean(
ModelUtils.dynamic_partition(
sq_difference,
ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
2,
)[1]
)
else:
true_action = torch.cat(
ModelUtils.actions_to_onehot(
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
self._policy_specs.discrete_action_branches,
),
dim=1,
)
cross_entropy = torch.sum(
-torch.log(predicted_action + self.EPSILON) * true_action, dim=1
)
return torch.mean(
ModelUtils.dynamic_partition(
cross_entropy,
ModelUtils.list_to_tensor(
mini_batch["masks"], dtype=torch.float
), # use masks not action_masks
2,
)[1]
)
def compute_reward(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Calculates the curiosity reward for the mini_batch. Corresponds to the error
between the predicted and actual next state.
"""
predicted_next_state = self.predict_next_state(mini_batch)
target = self.get_next_state(mini_batch)
sq_difference = 0.5 * (target - predicted_next_state) ** 2
sq_difference = torch.sum(sq_difference, dim=1)
return sq_difference
def compute_forward_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Computes the loss for the next state prediction
"""
return torch.mean(
ModelUtils.dynamic_partition(
self.compute_reward(mini_batch),
ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
2,
)[1]
)

256
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


from typing import Optional, Dict
import numpy as np
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.settings import GAILSettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.demo_loader import demo_to_buffer
class GAILRewardProvider(BaseRewardProvider):
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
super().__init__(specs, settings)
self._ignore_done = True
self._discriminator_network = DiscriminatorNetwork(specs, settings)
_, self._demo_buffer = demo_to_buffer(
settings.demo_path, 1, specs
) # This is supposed to be the sequence length but we do not have access here
params = list(self._discriminator_network.parameters())
self.optimizer = torch.optim.Adam(params, lr=settings.learning_rate)
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
estimates, _ = self._discriminator_network.compute_estimate(
mini_batch, use_vail_noise=False
)
return (
-torch.log(
1.0
- estimates.squeeze(dim=1)
* (1.0 - self._discriminator_network.EPSILON)
)
.detach()
.cpu()
.numpy()
)
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
expert_batch = self._demo_buffer.sample_mini_batch(
mini_batch.num_experiences, 1
)
loss, stats_dict = self._discriminator_network.compute_loss(
mini_batch, expert_batch
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return stats_dict
class DiscriminatorNetwork(torch.nn.Module):
gradient_penalty_weight = 10.0
z_size = 128
alpha = 0.0005
mutual_information = 0.5
EPSILON = 1e-7
initial_beta = 0.0
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
super().__init__()
self._policy_specs = specs
self._use_vail = settings.use_vail
self._settings = settings
state_encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
self._action_flattener = ModelUtils.ActionFlattener(specs)
encoder_input_size = settings.encoding_size
if settings.use_actions:
encoder_input_size += (
self._action_flattener.flattened_size + 1
) # + 1 is for done
self.encoder = torch.nn.Sequential(
linear_layer(encoder_input_size, settings.encoding_size),
Swish(),
linear_layer(settings.encoding_size, settings.encoding_size),
Swish(),
)
estimator_input_size = settings.encoding_size
if settings.use_vail:
estimator_input_size = self.z_size
self._z_sigma = torch.nn.Parameter(
torch.ones((self.z_size), dtype=torch.float), requires_grad=True
)
self._z_mu_layer = linear_layer(
settings.encoding_size,
self.z_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
)
self._beta = torch.nn.Parameter(
torch.tensor(self.initial_beta, dtype=torch.float), requires_grad=False
)
self._estimator = torch.nn.Sequential(
linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
)
def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Creates the action Tensor. In continuous case, corresponds to the action. In
the discrete case, corresponds to the concatenation of one hot action Tensors.
"""
return self._action_flattener.forward(
torch.as_tensor(mini_batch["actions"], dtype=torch.float)
)
def get_state_encoding(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Creates the observation input.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[torch.as_tensor(mini_batch["vector_obs"], dtype=torch.float)],
vis_inputs=[
torch.as_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float)
for i in range(n_vis)
],
)
return hidden
def compute_estimate(
self, mini_batch: AgentBuffer, use_vail_noise: bool = False
) -> torch.Tensor:
"""
Given a mini_batch, computes the estimate (How much the discriminator believes
the data was sampled from the demonstration data).
:param mini_batch: The AgentBuffer of data
:param use_vail_noise: Only when using VAIL : If true, will sample the code, if
false, will return the mean of the code.
"""
encoder_input = self.get_state_encoding(mini_batch)
if self._settings.use_actions:
actions = self.get_action_input(mini_batch)
dones = torch.as_tensor(mini_batch["done"], dtype=torch.float)
encoder_input = torch.cat([encoder_input, actions, dones], dim=1)
hidden = self.encoder(encoder_input)
z_mu: Optional[torch.Tensor] = None
if self._settings.use_vail:
z_mu = self._z_mu_layer(hidden)
hidden = torch.normal(z_mu, self._z_sigma * use_vail_noise)
estimate = self._estimator(hidden)
return estimate, z_mu
def compute_loss(
self, policy_batch: AgentBuffer, expert_batch: AgentBuffer
) -> torch.Tensor:
"""
Given a policy mini_batch and an expert mini_batch, computes the loss of the discriminator.
"""
total_loss = torch.zeros(1)
stats_dict: Dict[str, np.ndarray] = {}
policy_estimate, policy_mu = self.compute_estimate(
policy_batch, use_vail_noise=True
)
expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = (
policy_estimate.mean().detach().cpu().numpy()
)
stats_dict["Policy/GAIL Expert Estimate"] = (
expert_estimate.mean().detach().cpu().numpy()
)
discriminator_loss = -(
torch.log(expert_estimate + self.EPSILON)
+ torch.log(1.0 - policy_estimate + self.EPSILON)
).mean()
stats_dict["Losses/GAIL Loss"] = discriminator_loss.detach().cpu().numpy()
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)
kl_loss = torch.mean(
-torch.sum(
1
+ (self._z_sigma ** 2).log()
- 0.5 * expert_mu ** 2
- 0.5 * policy_mu ** 2
- (self._z_sigma ** 2),
dim=1,
)
)
vail_loss = self._beta * (kl_loss - self.mutual_information)
with torch.no_grad():
self._beta.data = torch.max(
self._beta + self.alpha * (kl_loss - self.mutual_information),
torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = self._beta.detach().cpu().numpy()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.detach().cpu().numpy()
if self.gradient_penalty_weight > 0.0:
total_loss += (
self.gradient_penalty_weight
* self.compute_gradient_magnitude(policy_batch, expert_batch)
)
return total_loss, stats_dict
def compute_gradient_magnitude(
self, policy_batch: AgentBuffer, expert_batch: AgentBuffer
) -> torch.Tensor:
"""
Gradient penalty from https://arxiv.org/pdf/1704.00028. Adds stability esp.
for off-policy. Compute gradients w.r.t randomly interpolated input.
"""
policy_obs = self.get_state_encoding(policy_batch)
expert_obs = self.get_state_encoding(expert_batch)
obs_epsilon = torch.rand(policy_obs.shape)
encoder_input = obs_epsilon * policy_obs + (1 - obs_epsilon) * expert_obs
if self._settings.use_actions:
policy_action = self.get_action_input(policy_batch)
expert_action = self.get_action_input(policy_batch)
action_epsilon = torch.rand(policy_action.shape)
policy_dones = torch.as_tensor(policy_batch["done"], dtype=torch.float)
expert_dones = torch.as_tensor(expert_batch["done"], dtype=torch.float)
dones_epsilon = torch.rand(policy_dones.shape)
encoder_input = torch.cat(
[
encoder_input,
action_epsilon * policy_action
+ (1 - action_epsilon) * expert_action,
dones_epsilon * policy_dones + (1 - dones_epsilon) * expert_dones,
],
dim=1,
)
hidden = self.encoder(encoder_input)
if self._settings.use_vail:
use_vail_noise = True
z_mu = self._z_mu_layer(hidden)
hidden = torch.normal(z_mu, self._z_sigma * use_vail_noise)
hidden = self._estimator(hidden)
estimate = torch.mean(torch.sum(hidden, dim=1))
gradient = torch.autograd.grad(estimate, encoder_input)[0]
# Norm's gradient could be NaN at 0. Use our own safe_norm
safe_norm = (torch.sum(gradient ** 2, dim=1) + self.EPSILON).sqrt()
gradient_mag = torch.mean((safe_norm - 1) ** 2)
return gradient_mag
正在加载...
取消
保存