Multi Directional Walker and Initial Hypernetwork (#4740)

4 年前 · cc6b4564
--- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
    def get_trajectory_value_estimates(
        self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
    ) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
-        vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
+        vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, 1:]]
+        goals = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, :1]]
        if self.policy.use_vis_obs:
            visual_obs = []
            for idx, _ in enumerate(

        vec_vis_obs = SplitObservations.from_observations(next_obs)
        next_vec_obs = [
-            ModelUtils.list_to_tensor(vec_vis_obs.vector_observations).unsqueeze(0)
+            ModelUtils.list_to_tensor(vec_vis_obs.vector_observations[1:]).unsqueeze(0)
        ]
        next_vis_obs = [
            ModelUtils.list_to_tensor(_vis_ob).unsqueeze(0)
+        # goals dont change but otherwise broken
+        next_goals = [torch.as_tensor(vec_vis_obs.vector_observations[:1])]
-            vector_obs, visual_obs, memory, sequence_length=batch.num_experiences
+            vector_obs, visual_obs, goals, memory, sequence_length=batch.num_experiences
-            next_vec_obs, next_vis_obs, next_memory, sequence_length=1
+            next_vec_obs, next_vis_obs, next_goals, next_memory, sequence_length=1
        )

        for name, estimate in value_estimates.items():
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
        If this policy normalizes vector observations, this will update the norm values in the graph.
        :param vector_obs: The vector observations to add to the running estimate of the distribution.
        """
-        vector_obs = [torch.as_tensor(vector_obs)]
+        vector_obs = [torch.as_tensor(vector_obs)[:, 1:]]
        if self.use_vec_obs and self.normalize:
            self.actor_critic.update_normalization(vector_obs)

        vec_obs: List[torch.Tensor],
        vis_obs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
        :return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
        """
        actions, log_probs, entropies, _, memories = self.actor_critic.get_action_stats_and_value(
-            vec_obs, vis_obs, masks, memories, seq_len
+            vec_obs, vis_obs, goals, masks, memories, seq_len
        )
        return (actions, log_probs, entropies, memories)

        vis_obs: torch.Tensor,
+        goals: torch.Tensor,
        actions: AgentAction,
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
-            vec_obs, vis_obs, actions, masks, memories, seq_len
+            vec_obs, vis_obs, goals, actions, masks, memories, seq_len
        )
        return log_probs, entropies, value_heads

        :return: Outputs from network as defined by self.inference_dict.
        """
        vec_vis_obs, masks = self._split_decision_step(decision_requests)
-        vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
+        vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations[:, 1:])]
+        goals = [torch.as_tensor(vec_vis_obs.vector_observations[:, :1])]
        memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
            0
        )
            action, log_probs, entropy, memories = self.sample_actions(
-                vec_obs, vis_obs, masks=masks, memories=memories
+                vec_obs, vis_obs, goals, masks=masks, memories=memories
            )
        action_tuple = action.to_action_tuple()
        run_out["action"] = action_tuple
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
            )
            returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])

-        vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
+        vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, 1:]]
+        goals = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, :1]]
        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
        actions = AgentAction.from_dict(batch)

        log_probs, entropy, values = self.policy.evaluate_actions(
            vec_obs,
            vis_obs,
+            goals,
            masks=act_masks,
            actions=actions,
            memories=memories,
            + 0.5 * value_loss
            - decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
        )
-
        # Set optimizer learning rate
        ModelUtils.update_learning_rate(self.optimizer, decay_lr)
        self.optimizer.zero_grad()
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
            self.seed,
            behavior_spec,
            self.trainer_settings,
-            condition_sigma_on_obs=False,  # Faster training for PPO
+            condition_sigma_on_obs=True,  # Faster training for PPO
            separate_critic=True,  # Match network architecture with TF
        )
        return policy
--- a/ml-agents/mlagents/trainers/torch/action_model.py
+++ b/ml-agents/mlagents/trainers/torch/action_model.py
    DistInstance,
    DiscreteDistInstance,
    GaussianDistribution,
+    GaussianHyperNetwork,
    MultiCategoricalDistribution,
 )
 from mlagents.trainers.torch.agent_action import AgentAction
        self._discrete_distribution = None

        if self.action_spec.continuous_size > 0:
-            self._continuous_distribution = GaussianDistribution(
-                self.encoding_size,
-                self.action_spec.continuous_size,
+            self._continuous_distribution = GaussianHyperNetwork(
+                num_layers=1,
+                layer_size=256,
+                hidden_size=self.encoding_size,
+                num_outputs=self.action_spec.continuous_size,
+                num_goals=2,
            )

        if self.action_spec.discrete_size > 0:
                discrete_action.append(discrete_dist.sample())
        return AgentAction(continuous_action, discrete_action)

-    def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> DistInstances:
+    def _get_dists(
+        self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
+    ) -> DistInstances:
        """
        Creates a DistInstances tuple using the continuous and discrete distributions
        :params inputs: The encoding from the network body
        discrete_dist: Optional[List[DiscreteDistInstance]] = None
        # This checks None because mypy complains otherwise
        if self._continuous_distribution is not None:
-            continuous_dist = self._continuous_distribution(inputs)
+            continuous_dist = self._continuous_distribution(inputs, goal)
        if self._discrete_distribution is not None:
            discrete_dist = self._discrete_distribution(inputs, masks)
        return DistInstances(continuous_dist, discrete_dist)
        return action_log_probs, entropies

    def evaluate(
-        self, inputs: torch.Tensor, masks: torch.Tensor, actions: AgentAction
+        self,
+        inputs: torch.Tensor,
+        masks: torch.Tensor,
+        actions: AgentAction,
+        goal: torch.Tensor,
    ) -> Tuple[ActionLogProbs, torch.Tensor]:
        """
        Given actions and encoding from the network body, gets the distributions and
        :params actions: The AgentAction
        :return: An ActionLogProbs tuple and a torch tensor of the distribution entropies.
        """
-        dists = self._get_dists(inputs, masks)
+        dists = self._get_dists(inputs, masks, goal)
-    def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
+    def get_action_out(
+        self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
+    ) -> torch.Tensor:
        """
        Gets the tensors corresponding to the output of the policy network to be used for
        inference. Called by the Actor's forward call.
        """
-        dists = self._get_dists(inputs, masks)
+        dists = self._get_dists(inputs, masks, goal)
        continuous_out, discrete_out, action_out_deprecated = None, None, None
        if self.action_spec.continuous_size > 0 and dists.continuous is not None:
            continuous_out = dists.continuous.exported_model_output()
        return continuous_out, discrete_out, action_out_deprecated

    def forward(
-        self, inputs: torch.Tensor, masks: torch.Tensor
+        self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
    ) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor]:
        """
        The forward method of this module. Outputs the action, log probs,
        :return: Given the input, an AgentAction of the actions generated by the policy and the corresponding
        ActionLogProbs and entropies.
        """
-        dists = self._get_dists(inputs, masks)
+        dists = self._get_dists(inputs, masks, goal)
        actions = self._sample_action(dists)
        log_probs, entropies = self._get_probs_and_entropy(actions, dists)
        # Use the sum of entropy across actions, not the mean
--- a/ml-agents/mlagents/trainers/torch/decoders.py
+++ b/ml-agents/mlagents/trainers/torch/decoders.py
 from typing import List, Dict

 from mlagents.torch_utils import torch, nn
-from mlagents.trainers.torch.layers import linear_layer
+from mlagents.trainers.torch.layers import (
+    linear_layer,
+    LinearEncoder,
+    Initialization,
+    Swish,
+)
+
+from collections import defaultdict


 class ValueHeads(nn.Module):
        for stream_name, head in self.value_heads.items():
            value_outputs[stream_name] = head(hidden).squeeze(-1)
        return value_outputs
+
+
+class ValueHeadsHyperNetwork(nn.Module):
+    def __init__(
+        self,
+        num_layers,
+        layer_size,
+        num_goals,
+        stream_names: List[str],
+        input_size: int,
+        output_size: int = 1,
+    ):
+        super().__init__()
+        self.stream_names = stream_names
+        self._num_goals = num_goals
+        self.input_size = input_size
+        self.output_size = output_size
+        self.streams_size = len(stream_names)
+        layers = []
+        layers.append(
+            linear_layer(
+                num_goals,
+                layer_size,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=1.0,
+                bias_init=Initialization.Zero,
+            )
+        )
+        layers.append(Swish())
+        for _ in range(num_layers - 1):
+            layers.append(
+                linear_layer(
+                    layer_size,
+                    layer_size,
+                    kernel_init=Initialization.KaimingHeNormal,
+                    kernel_gain=1.0,
+                    bias_init=Initialization.Zero,
+                )
+            )
+            layers.append(Swish())
+        flat_output = linear_layer(
+            layer_size,
+            input_size * output_size * self.streams_size
+            + self.output_size * self.streams_size,
+            kernel_init=Initialization.KaimingHeNormal,
+            kernel_gain=0.1,
+            bias_init=Initialization.Zero,
+        )
+        self.hypernet = torch.nn.Sequential(*layers, flat_output)
+
+    def forward(
+        self, hidden: torch.Tensor, goal: torch.Tensor
+    ) -> Dict[str, torch.Tensor]:
+        goal_onehot = torch.nn.functional.one_hot(
+            goal[0].long(), self._num_goals
+        ).float()
+        # (b, i * o * streams + o * streams)
+        flat_output_weights = self.hypernet(goal_onehot)
+        b = hidden.size(0)
+
+        output_weights, output_bias = torch.split(
+            flat_output_weights,
+            self.streams_size * self.input_size * self.output_size,
+            dim=-1,
+        )
+        output_weights = torch.reshape(
+            output_weights, (self.streams_size, b, self.input_size, self.output_size)
+        )
+        output_bias = torch.reshape(
+            output_bias, (self.streams_size, b, self.output_size)
+        )
+        output_bias = output_bias.unsqueeze(dim=2)
+        value_outputs = {}
+        for stream_name, out_w, out_b in zip(
+            self.stream_names, output_weights, output_bias
+        ):
+            inp_out_w = torch.bmm(hidden.unsqueeze(dim=1), out_w)
+            inp_out_w_out_b = inp_out_w + out_b
+            value_outputs[stream_name] = inp_out_w_out_b.squeeze()
+        return value_outputs
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
 from mlagents.torch_utils import torch, nn
 import numpy as np
 import math
-from mlagents.trainers.torch.layers import linear_layer, Initialization
+from mlagents.trainers.torch.layers import (
+    linear_layer,
+    Initialization,
+    LinearEncoder,
+    Swish,
+)
+from mlagents.trainers.torch.utils import ModelUtils

 EPSILON = 1e-7  # Small value to avoid divide by zero

            # torch.cat here instead of torch.expand() becuase it is not supported in the
            # verified version of Barracuda (1.0.2).
            log_sigma = torch.cat([self.log_sigma] * inputs.shape[0], axis=0)
+        if self.tanh_squash:
+            return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
+        else:
+            return GaussianDistInstance(mu, torch.exp(log_sigma))
+
+
+class GaussianHyperNetwork(nn.Module):
+    def __init__(
+        self,
+        num_layers,
+        layer_size,
+        hidden_size,
+        num_outputs,
+        conditional_sigma,
+        tanh_squash,
+        num_goals,
+    ):
+        super().__init__()
+        self._num_goals = num_goals
+        self.hidden_size = hidden_size
+        self.tanh_squash = tanh_squash
+        self.conditional_sigma = conditional_sigma
+        self.num_outputs = num_outputs
+        layers = []
+        layers.append(
+            linear_layer(
+                num_goals,
+                layer_size,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+        )
+        layers.append(Swish())
+        for _ in range(num_layers - 1):
+            layers.append(
+                linear_layer(
+                    layer_size,
+                    layer_size,
+                    kernel_init=Initialization.KaimingHeNormal,
+                    kernel_gain=0.1,
+                    bias_init=Initialization.Zero,
+                )
+            )
+            layers.append(Swish())
+        if conditional_sigma:
+            flat_output = linear_layer(
+                layer_size,
+                2 * (hidden_size * num_outputs + num_outputs),
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+            self._log_sigma_w = None
+        else:
+            flat_output = linear_layer(
+                layer_size,
+                hidden_size * num_outputs + num_outputs,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+            self._log_sigma_w = linear_layer(
+                num_goals,
+                num_outputs,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+        self.hypernet = torch.nn.Sequential(*layers, flat_output)
+
+    def forward(self, inputs: torch.Tensor, goal: torch.Tensor):
+        goal_onehot = torch.nn.functional.one_hot(
+            goal[0].long(), self._num_goals
+        ).float()
+
+        # cond (b, 2 * H * O + O
+        # not cond (b, H * O + O
+        flat_output_weights = self.hypernet(goal_onehot)
+        b = inputs.size(0)
+        inputs = inputs.unsqueeze(dim=1)
+        if self.conditional_sigma:
+            mu_w_log_sigma_w, mu_b, log_sigma_b = torch.split(
+                flat_output_weights,
+                [
+                    2 * self.hidden_size * self.num_outputs,
+                    self.num_outputs,
+                    self.num_outputs,
+                ],
+                dim=-1,
+            )
+            mu_w_log_sigma_w = torch.reshape(
+                mu_w_log_sigma_w, (b, 2 * self.hidden_size, self.num_outputs)
+            )
+
+            mu_w, log_sigma_w = torch.split(mu_w_log_sigma_w, self.hidden_size, dim=1)
+            log_sigma = torch.bmm(inputs, log_sigma_w)
+            log_sigma = log_sigma + log_sigma_b
+            log_sigma = log_sigma.squeeze()
+            log_sigma = torch.clamp(log_sigma, min=-20, max=2)
+        else:
+            mu_w, mu_b = torch.split(
+                flat_output_weights, self.hidden_size * self.num_outputs, dim=-1
+            )
+            mu_w = torch.reshape(mu_w, (b, self.hidden_size, self.num_outputs))
+            log_sigma = self._log_sigma_w(goal_onehot)
+            log_sigma = torch.squeeze(log_sigma)
+
+        mu = torch.bmm(inputs, mu_w)
+        mu = mu + mu_b
+        mu = mu.squeeze()
        if self.tanh_squash:
            return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
        else:
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
 from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents.trainers.settings import NetworkSettings
 from mlagents.trainers.torch.utils import ModelUtils
-from mlagents.trainers.torch.decoders import ValueHeads
+from mlagents.trainers.torch.decoders import ValueHeads, ValueHeadsHyperNetwork
 from mlagents.trainers.torch.layers import LSTM, LinearEncoder
 from mlagents.trainers.torch.model_serialization import exporting_to_onnx

            self.vector_processors,
            encoder_input_size,
        ) = ModelUtils.create_input_processors(
-            observation_shapes,
+            observation_shapes[1:],
            self.h_size,
            network_settings.vis_encode_type,
            normalize=self.normalize,
            encoding_size = network_settings.memory.memory_size // 2
        else:
            encoding_size = network_settings.hidden_units
-        self.value_heads = ValueHeads(stream_names, encoding_size, outputs_per_stream)
+        self.value_heads = ValueHeadsHyperNetwork(
+            num_layers=1,
+            layer_size=256,
+            num_goals=2,
+            stream_names=stream_names,
+            input_size=encoding_size,
+            output_size=outputs_per_stream,
+        )

    @property
    def memory_size(self) -> int:
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goal: List[torch.tensor],
        actions: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
        )
-        output = self.value_heads(encoding)
+        output = self.value_heads(encoding, goal)
        return output, memories


        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goal: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
        At this moment, torch.onnx.export() doesn't accept None as tensor to be exported,
        so the size of return tuple varies with action spec.
        """
+        vec_inputs = [vec_inputs[0][:, 1:]]
+        goal = [vec_inputs[0][:, :1]]
        encoding, memories_out = self.network_body(
            vec_inputs, vis_inputs, memories=memories, sequence_length=1
        )
            disc_action_out,
            action_out_deprecated,
-        ) = self.action_model.get_action_out(encoding, masks)
+        ) = self.action_model.get_action_out(encoding, masks, goal)
        export_out = [
            self.version_number,
            torch.Tensor([self.network_body.memory_size]),
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goal: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
        encoding, memories = self.network_body(
            vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
        )
-        log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
+        log_probs, entropies = self.action_model.evaluate(
+            encoding, masks, actions, goal
+        )
        value_outputs = self.value_heads(encoding)
        return log_probs, entropies, value_outputs

        encoding, memories = self.network_body(
            vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
        )
-        action, log_probs, entropies = self.action_model(encoding, masks)
+        action, log_probs, entropies = self.action_model(encoding, masks, goal)
        value_outputs = self.value_heads(encoding)
        return action, log_probs, entropies, value_outputs, memories

        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goal: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
        value_outputs, critic_mem_out = self.critic(
-            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+            vec_inputs,
+            vis_inputs,
+            goal,
+            memories=critic_mem,
+            sequence_length=sequence_length,
        )
        if actor_mem is not None:
            # Make memories with the actor mem unchanged
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        actions: AgentAction,
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        else:
            critic_mem = None
            actor_mem = None
+
-        log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
+        log_probs, entropies = self.action_model.evaluate(
+            encoding, masks, actions, goals
+        )
-            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+            vec_inputs,
+            vis_inputs,
+            goals,
+            memories=critic_mem,
+            sequence_length=sequence_length,
        )

        return log_probs, entropies, value_outputs
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
        encoding, actor_mem_outs = self.network_body(
            vec_inputs, vis_inputs, memories=actor_mem, sequence_length=sequence_length
        )
-        action, log_probs, entropies = self.action_model(encoding, masks)
+        action, log_probs, entropies = self.action_model(encoding, masks, goals)
-            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+            vec_inputs,
+            vis_inputs,
+            goals,
+            memories=critic_mem,
+            sequence_length=sequence_length,
        )
        if self.use_lstm:
            mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)
--- a/ml-agents/mlagents/trainers/trajectory.py
+++ b/ml-agents/mlagents/trainers/trajectory.py
            agent_buffer_trajectory["vector_obs"].append(
                vec_vis_obs.vector_observations
            )
+
+
            if exp.memory is not None:
                agent_buffer_trajectory["memory"].append(exp.memory)

--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!114 &4469182458895145650
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1077752704035527923}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: c52bddbfaf39944a6bb673a9dfcfe4b6, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
+  m_TargetWalkingSpeed: 10
+  randomizeWalkSpeedEachEpisode: 0
+  target: {fileID: 4058446934158437408}
+  hips: {fileID: 1077752704392483292}
+  chest: {fileID: 7818481575961221087}
+  spine: {fileID: 7818481575902529953}
+  head: {fileID: 7818481576732930258}
+  thighL: {fileID: 7818481576528932657}
+  shinL: {fileID: 7818481576468061548}
+  footL: {fileID: 7818481575932963445}
+  thighR: {fileID: 7818481577110242841}
+  shinR: {fileID: 7818481577111017236}
+  footR: {fileID: 7818481576882516798}
+  armL: {fileID: 7818481576458883964}
+  forearmL: {fileID: 7818481576500842159}
+  handL: {fileID: 7818481576440584931}
+  armR: {fileID: 7818481575774466714}
+  forearmR: {fileID: 7818481576563420652}
+  handR: {fileID: 7818481575132336870}
+  goals: 2
+--- !u!114 &1800586501491974962
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1077752704035527923}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  observationSize: 1
+--- !u!1001 &186987432828422960
+PrefabInstance:
+  m_ObjectHideFlags: 0
+  serializedVersion: 2
+  m_Modification:
+    m_TransformParent: {fileID: 0}
+    m_Modifications:
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: walkDirectionMethod
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: target
+      value: 
+      objectReference: {fileID: 4058446934158437408}
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: randomizeWalkSpeedEachEpisode
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_Enabled
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.x
+      value: -500
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.y
+      value: 2.57
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.z
+      value: -250
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.x
+      value: -0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.y
+      value: 0.7071068
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.z
+      value: -0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.w
+      value: 0.7071068
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_RootOrder
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
+      propertyPath: updateManually
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
+      propertyPath: updateViaScript
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
+      propertyPath: updatedByAgent
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_Name
+      value: MultiDirRagDoll
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_IsActive
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_BehaviorName
+      value: MultiDirWalker
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_Model
+      value: 
+      objectReference: {fileID: 5022602860645237092, guid: c5c81d94c2dfe4c2b9f7440f533957fa,
+        type: 3}
+    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_BrainParameters.VectorObservationSize
+      value: 243
+      objectReference: {fileID: 0}
+    - target: {fileID: 6359877977706987617, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.y
+      value: -2.517
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: -0.699997
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: -0.69999707
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: 0.5000001
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.29999995
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: -0.5000001
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.29999995
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354074184678, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: 0.5119995
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: 0.69999707
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354652902044, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: 0.3829999
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354845945066, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: 0.3050002
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: 0.699997
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353272702555, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353655703554, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353711811619, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354882597209, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    m_RemovedComponents:
+    - {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f, type: 3}
+  m_SourcePrefab: {fileID: 100100000, guid: 765582efd9dda46ed98564603316353f, type: 3}
+--- !u!1 &1077752704035527923 stripped
+GameObject:
+  m_CorrespondingSourceObject: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &1077752704035527914 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &1077752704392483292 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 895268871264836332, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576528932657 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353228551169, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576468061548 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353272702556, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575932963445 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354882597189, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481577110242841 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353713167657, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481577111017236 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353711811620, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576882516798 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353655703566, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575902529953 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354652902033, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575961221087 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354845945071, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576458883964 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353030744140, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576500842159 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353240438175, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576440584931 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353041637843, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575774466714 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235355057813930, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576563420652 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353195701980, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575132336870 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354616748502, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576732930258 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354074184674, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!1001 &942701540323662238
+PrefabInstance:
+  m_ObjectHideFlags: 0
+  serializedVersion: 2
+  m_Modification:
+    m_TransformParent: {fileID: 1077752704035527914}
+    m_Modifications:
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalPosition.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalPosition.y
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalPosition.z
+      value: 1800
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.z
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.w
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_RootOrder
+      value: 3
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3840539935788495952, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_Name
+      value: StaticTarget
+      objectReference: {fileID: 0}
+    m_RemovedComponents: []
+  m_SourcePrefab: {fileID: 100100000, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e, type: 3}
+--- !u!4 &4058446934158437408 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+    type: 3}
+  m_PrefabInstance: {fileID: 942701540323662238}
+  m_PrefabAsset: {fileID: 0}
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta
+fileFormatVersion: 2
+guid: d32d9be22fe544fd38de3cf5db023465
+PrefabImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta
+fileFormatVersion: 2
+guid: 0c5ba64aa7c084a63b21f8e2b900fc29
+timeCreated: 1520420566
+licenseType: Free
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs
+using System;
+using UnityEngine;
+using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
+using Unity.MLAgentsExamples;
+using Unity.MLAgents.Sensors;
+using BodyPart = Unity.MLAgentsExamples.BodyPart;
+using Random = UnityEngine.Random;
+
+public class MultiDirWalkerAgent : Agent
+{
+    [Header("Walk Speed")]
+    [Range(0.1f, 10)]
+    [SerializeField]
+    //The walking speed to try and achieve
+    private float m_TargetWalkingSpeed = 10;
+    private Vector3 m_startingPos; //the starting position of the target
+    public float MTargetWalkingSpeed // property
+    {
+        get { return m_TargetWalkingSpeed; }
+        set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
+    }
+
+    const float m_maxWalkingSpeed = 10; //The max walking speed
+
+    //Should the agent sample a new goal velocity each episode?
+    //If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin() 
+    //If false, the goal velocity will be walkingSpeed
+    public bool randomizeWalkSpeedEachEpisode;
+
+    //The direction an agent will walk during training.
+    private Vector3 m_WorldDirToWalk = Vector3.right;
+
+    [Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
+
+    [Header("Body Parts")] public Transform hips;
+    public Transform chest;
+    public Transform spine;
+    public Transform head;
+    public Transform thighL;
+    public Transform shinL;
+    public Transform footL;
+    public Transform thighR;
+    public Transform shinR;
+    public Transform footR;
+    public Transform armL;
+    public Transform forearmL;
+    public Transform handL;
+    public Transform armR;
+    public Transform forearmR;
+    public Transform handR;
+
+    int m_Goal;
+    float dir;
+    public int goals;
+    float[] m_GoalOneHot;
+    //This will be used as a stabilized model space reference point for observations
+    //Because ragdolls can move erratically during training, using a stabilized reference transform improves learning
+    OrientationCubeController m_OrientationCube;
+    GoalSensorComponent goalSensor;
+
+    //The indicator graphic gameobject that points towards the target
+    DirectionIndicator m_DirectionIndicator;
+    JointDriveController m_JdController;
+    EnvironmentParameters m_ResetParams;
+
+    public override void Initialize()
+    {
+        m_startingPos = target.position;
+        m_Goal = Random.Range(0, goals);
+        //m_Goal = 0;
+        m_GoalOneHot = new float[goals];
+        System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
+        m_GoalOneHot[m_Goal] = 1;
+        if (m_Goal == 0)
+        {
+            var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = 1f;
+        }
+        else
+        {
+            var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = -1f;
+        }
+        m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
+        m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();
+
+        //Setup each body part
+        m_JdController = GetComponent<JointDriveController>();
+        m_JdController.SetupBodyPart(hips);
+        m_JdController.SetupBodyPart(chest);
+        m_JdController.SetupBodyPart(spine);
+        m_JdController.SetupBodyPart(head);
+        m_JdController.SetupBodyPart(thighL);
+        m_JdController.SetupBodyPart(shinL);
+        m_JdController.SetupBodyPart(footL);
+        m_JdController.SetupBodyPart(thighR);
+        m_JdController.SetupBodyPart(shinR);
+        m_JdController.SetupBodyPart(footR);
+        m_JdController.SetupBodyPart(armL);
+        m_JdController.SetupBodyPart(forearmL);
+        m_JdController.SetupBodyPart(handL);
+        m_JdController.SetupBodyPart(armR);
+        m_JdController.SetupBodyPart(forearmR);
+        m_JdController.SetupBodyPart(handR);
+
+        m_ResetParams = Academy.Instance.EnvironmentParameters;
+
+        SetResetParameters();
+    }
+
+    /// <summary>
+    /// Loop over body parts and reset them to initial conditions.
+    /// </summary>
+    public override void OnEpisodeBegin()
+    {
+        m_Goal = Random.Range(0, goals);
+        //m_Goal = 0;
+        System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
+        m_GoalOneHot[m_Goal] = 1;
+        if (m_Goal == 0)
+        {
+            var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = 1f;
+        }
+        else
+        {
+            var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = -1f;
+        }
+        //Reset all of the body parts
+        foreach (var bodyPart in m_JdController.bodyPartsDict.Values)
+        {
+            bodyPart.Reset(bodyPart);
+        }
+
+        //Random start rotation to help generalize
+        hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
+
+        UpdateOrientationObjects();
+
+        //Set our goal walking speed
+        MTargetWalkingSpeed =
+            randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;
+
+        SetResetParameters();
+    }
+
+    /// <summary>
+    /// Add relevant information on each body part to observations.
+    /// </summary>
+    public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
+    {
+        //GROUND CHECK
+        sensor.AddObservation(bp.groundContact.touchingGround); // Is this bp touching the ground
+
+        //Get velocities in the context of our orientation cube's space
+        //Note: You can get these velocities in world space as well but it may not train as well.
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
+
+        //Get position relative to hips in the context of our orientation cube's space
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
+
+        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
+        {
+            sensor.AddObservation(bp.rb.transform.localRotation);
+            sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
+        }
+    }
+
+    /// <summary>
+    /// Loop over body parts to add them to observation.
+    /// </summary>
+    public override void CollectObservations(VectorSensor sensor)
+    {
+        var cubeForward = m_OrientationCube.transform.forward;
+
+        //velocity we want to match
+        var velGoal = cubeForward * MTargetWalkingSpeed;
+        //ragdoll's avg vel
+        var avgVel = GetAvgVelocity();
+
+        //current ragdoll velocity. normalized 
+        sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
+        //avg body vel relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
+        //vel goal relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
+
+        //rotation deltas
+        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
+        sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
+
+        //Position of target position relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));
+
+        foreach (var bodyPart in m_JdController.bodyPartsList)
+        {
+            CollectObservationBodyPart(bodyPart, sensor);
+        }
+
+        //sensor.AddObservation(m_GoalOneHot);
+        goalSensor = this.GetComponent<GoalSensorComponent>();
+        goalSensor.AddGoal(m_Goal);
+    }
+
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
+    {
+        var bpDict = m_JdController.bodyPartsDict;
+        var i = -1;
+
+        var continuousActions = actionBuffers.ContinuousActions;
+        bpDict[chest].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+        bpDict[spine].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+
+        bpDict[thighL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[thighR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[shinL].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[shinR].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[footR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+        bpDict[footL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+
+        bpDict[armL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[armR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[forearmL].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[forearmR].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[head].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+
+        //update joint strength settings
+        bpDict[chest].SetJointStrength(continuousActions[++i]);
+        bpDict[spine].SetJointStrength(continuousActions[++i]);
+        bpDict[head].SetJointStrength(continuousActions[++i]);
+        bpDict[thighL].SetJointStrength(continuousActions[++i]);
+        bpDict[shinL].SetJointStrength(continuousActions[++i]);
+        bpDict[footL].SetJointStrength(continuousActions[++i]);
+        bpDict[thighR].SetJointStrength(continuousActions[++i]);
+        bpDict[shinR].SetJointStrength(continuousActions[++i]);
+        bpDict[footR].SetJointStrength(continuousActions[++i]);
+        bpDict[armL].SetJointStrength(continuousActions[++i]);
+        bpDict[forearmL].SetJointStrength(continuousActions[++i]);
+        bpDict[armR].SetJointStrength(continuousActions[++i]);
+        bpDict[forearmR].SetJointStrength(continuousActions[++i]);
+    }
+
+    //Update OrientationCube and DirectionIndicator
+    void UpdateOrientationObjects()
+    {
+        m_WorldDirToWalk = target.position - hips.position;
+        m_OrientationCube.UpdateOrientation(hips, target);
+        if (m_DirectionIndicator)
+        {
+            m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
+        }
+    }
+
+    void FixedUpdate()
+    {
+        UpdateOrientationObjects();
+
+        var cubeForward = m_OrientationCube.transform.forward;
+
+        // Set reward for this step according to mixture of the following elements.
+        // a. Match target speed
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
+
+        //Check for NaNs
+        if (float.IsNaN(matchSpeedReward))
+        {
+            throw new ArgumentException(
+                "NaN in moveTowardsTargetReward.\n" +
+                $" cubeForward: {cubeForward}\n" +
+                $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
+                $" maximumWalkingSpeed: {m_maxWalkingSpeed}"
+            );
+        }
+
+        // b. Rotation alignment with target direction.
+        //This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
+        var lookAtTargetReward = (Vector3.Dot(cubeForward, dir * head.forward) + 1) * .5F;
+
+        //Check for NaNs
+        if (float.IsNaN(lookAtTargetReward))
+        {
+            throw new ArgumentException(
+                "NaN in lookAtTargetReward.\n" +
+                $" cubeForward: {cubeForward}\n" +
+                $" head.forward: {head.forward}"
+            );
+        }
+
+        Debug.Log(lookAtTargetReward);
+        Debug.Log(matchSpeedReward);
+        AddReward(matchSpeedReward * lookAtTargetReward);
+    }
+
+    //Returns the average velocity of all of the body parts
+    //Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
+    //...using the average helps prevent this erratic movement
+    Vector3 GetAvgVelocity()
+    {
+        Vector3 velSum = Vector3.zero;
+        Vector3 avgVel = Vector3.zero;
+
+        //ALL RBS
+        int numOfRB = 0;
+        foreach (var item in m_JdController.bodyPartsList)
+        {
+            numOfRB++;
+            velSum += item.rb.velocity;
+        }
+
+        avgVel = velSum / numOfRB;
+        return avgVel;
+    }
+
+    //normalized value of the difference in avg speed vs goal walking speed.
+    public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
+    {
+        //distance between our actual velocity and goal velocity
+        var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
+
+        //return the value on a declining sigmoid shaped curve that decays from 1 to 0
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
+    }
+
+    /// <summary>
+    /// Agent touched the target
+    /// </summary>
+    public void TouchedTarget()
+    {
+        AddReward(1f);
+    }
+
+    public void SetTorsoMass()
+    {
+        m_JdController.bodyPartsDict[chest].rb.mass = m_ResetParams.GetWithDefault("chest_mass", 8);
+        m_JdController.bodyPartsDict[spine].rb.mass = m_ResetParams.GetWithDefault("spine_mass", 8);
+        m_JdController.bodyPartsDict[hips].rb.mass = m_ResetParams.GetWithDefault("hip_mass", 8);
+    }
+
+    public void SetResetParameters()
+    {
+        SetTorsoMass();
+    }
+}
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta
+fileFormatVersion: 2
+guid: c52bddbfaf39944a6bb673a9dfcfe4b6
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
--- a/config/ppo/MultiDirWalker.yaml
+++ b/config/ppo/MultiDirWalker.yaml
+behaviors:
+  MultiDirWalker:
+    trainer_type: ppo
+    hyperparameters:
+      batch_size: 2048
+      buffer_size: 20480
+      learning_rate: 0.0003
+      beta: 0.005
+      epsilon: 0.2
+      lambd: 0.95
+      num_epoch: 3
+      learning_rate_schedule: linear
+    network_settings:
+      normalize: true
+      hidden_units: 256
+      num_layers: 2
+      vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        gamma: 0.995
+        strength: 1.0
+    keep_checkpoints: 5
+    max_steps: 30000000
+    time_horizon: 1000
+    summary_freq: 30000
+    threaded: true