Merge branch 'master' into develop-adjust-cpu-settings

4 年前 · 60eacc0d
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/CameraFollow.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/CameraFollow.cs
 {
    public class CameraFollow : MonoBehaviour
    {
-        public Transform target;
-        Vector3 m_Offset;
+        [Tooltip("The target to follow")] public Transform target;
+
+        [Tooltip("The time it takes to move to the new position")]
+        public float smoothingTime; //The time it takes to move to the new position
+
+        private Vector3 m_Offset;
+        private Vector3 m_CamVelocity; //Camera's velocity (used by SmoothDamp)

        // Use this for initialization
        void Start()

-        // Update is called once per frame
-        void Update()
+        void FixedUpdate()
-            // gameObject.transform.position = target.position + offset;
-            gameObject.transform.position = newPosition;
+
+            gameObject.transform.position =
+                Vector3.SmoothDamp(transform.position, newPosition, ref m_CamVelocity, smoothingTime, Mathf.Infinity,
+                    Time.fixedDeltaTime);
        }
    }
 }
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
 using UnityEngine;
 using Unity.Barracuda;
 using System.IO;
+using Unity.Barracuda.ONNX;
 using Unity.MLAgents;
 using Unity.MLAgents.Policies;
 #if UNITY_EDITOR
                {
                    m_OverrideExtension = args[i + 1].Trim().ToLower();
                    var isKnownExtension = k_SupportedExtensions.Contains(m_OverrideExtension);
-                    // Not supported yet - need to update the model loading code to support
-                    var isOnnx = m_OverrideExtension.Equals("onnx");
-                    if (!isKnownExtension || isOnnx)
+                    if (!isKnownExtension)
                    {
                        Debug.LogError($"loading unsupported format: {m_OverrideExtension}");
                        Application.Quit(1);
                return null;
            }

-            byte[] model = null;
+            byte[] rawModel = null;
-                model = File.ReadAllBytes(assetPath);
+                rawModel = File.ReadAllBytes(assetPath);
            }
            catch (IOException)
            {
                return null;
            }

-            // Note - this approach doesn't work for onnx files. Need to replace with
-            // the equivalent of ONNXModelImporter.OnImportAsset()
-            var asset = ScriptableObject.CreateInstance<NNModel>();
-            asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
-            asset.modelData.Value = model;
+            NNModel asset;
+            var isOnnx = m_OverrideExtension.Equals("onnx");
+            if (isOnnx)
+            {
+                var converter = new ONNXModelConverter(true);
+                var onnxModel = converter.Convert(rawModel);
+
+                NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
+                using (var memoryStream = new MemoryStream())
+                using (var writer = new BinaryWriter(memoryStream))
+                {
+                    ModelWriter.Save(writer, onnxModel);
+                    assetData.Value = memoryStream.ToArray();
+                }
+                assetData.name = "Data";
+                assetData.hideFlags = HideFlags.HideInHierarchy;
+
+                asset = ScriptableObject.CreateInstance<NNModel>();
+                asset.modelData = assetData;
+            }
+            else
+            {
+                // Note - this approach doesn't work for onnx files. Need to replace with
+                // the equivalent of ONNXModelImporter.OnImportAsset()
+                asset = ScriptableObject.CreateInstance<NNModel>();
+                asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
+                asset.modelData.Value = rawModel;
+            }

            asset.name = "Override - " + Path.GetFileName(assetPath);
            m_CachedModels[behaviorName] = asset;
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md

 ### Minor Changes
 #### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.2.
- Enabled C# formatting using `dotnet-format`.
+- Update Barracuda to 1.1.0-preview (#4208)
+- Enabled C# formatting using `dotnet-format`. (#4362)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Experimental PyTorch support has been added. Use `--torch` when running `mlagents-learn`, or add
 `framework: pytorch` to your trainer configuration (under the behavior name) to enable it.
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
  "unity": "2018.4",
  "description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
  "dependencies": {
-    "com.unity.barracuda": "1.0.2",
+    "com.unity.barracuda": "1.1.0-preview",
    "com.unity.modules.imageconversion": "1.0.0",
    "com.unity.modules.jsonserialize": "1.0.0",
    "com.unity.modules.physics": "1.0.0",
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
    return BehaviorSpec(observation_shape, action_type, action_shape)


+class OffsetBytesIO:
+    """
+    Simple file-like class that wraps a bytes, and allows moving its "start"
+    position in the bytes. This is only used for reading concatenated PNGs,
+    because Pillow always calls seek(0) at the start of reading.
+    """
+
+    __slots__ = ["fp", "offset"]
+
+    def __init__(self, data: bytes):
+        self.fp = io.BytesIO(data)
+        self.offset = 0
+
+    def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
+        if whence == io.SEEK_SET:
+            res = self.fp.seek(offset + self.offset)
+            return res - self.offset
+        raise NotImplementedError()
+
+    def tell(self) -> int:
+        return self.fp.tell() - self.offset
+
+    def read(self, size: int = -1) -> bytes:
+        return self.fp.read(size)
+
+    def original_tell(self) -> int:
+        """
+        Returns the offset into the original byte array
+        """
+        return self.fp.tell()
+
+
@timed
 def process_pixels(image_bytes: bytes, expected_channels: int) -> np.ndarray:
    """
    :param expected_channels: Expected output channels
    :return: processed numpy array of observation from environment
    """
-    image_bytearray = bytearray(image_bytes)
+    image_fp = OffsetBytesIO(image_bytes)
-            image = Image.open(io.BytesIO(image_bytearray))
+            image = Image.open(image_fp)
            # Normally Image loads lazily, load() forces it to do loading in the timer scope.
            image.load()
        s = np.array(image, dtype=np.float32) / 255.0

    image_arrays = []

-    bytes_read = 0
-        # TODO avoid creating a new array here. Unfortunately, Pillow doesn't respect the current state of the buffer
-        # and always starts with seek(0), but we should be able to wrap BytesIO with something that lets us adjust
-        # the "start" offset.
-        buffer = io.BytesIO(image_bytearray[bytes_read:])
-            image = Image.open(buffer)
+            image = Image.open(image_fp)
-            offset = buffer.getvalue().index(PNG_HEADER, buffer.tell())
-            bytes_read += offset
+            new_offset = image_bytes.index(PNG_HEADER, image_fp.original_tell())
+            image_fp.offset = new_offset
        except ValueError:
            # Didn't find the header, so must be at the end.
            break
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
        RewardSignalType.GAIL, behavior_spec, gail_settings
    )

-    for _ in range(100):
+    for _ in range(200):
        gail_rp.update(buffer_policy)
        reward_expert = gail_rp.evaluate(buffer_expert)[0]
        reward_policy = gail_rp.evaluate(buffer_policy)[0]
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
-from typing import Optional, Dict
+from typing import Optional, Dict, List, Tuple
 import numpy as np
 from mlagents.torch_utils import torch, default_device

 from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.torch.networks import NetworkBody
-from mlagents.trainers.torch.layers import linear_layer, Swish, Initialization
+from mlagents.trainers.torch.layers import linear_layer, Initialization
 from mlagents.trainers.settings import NetworkSettings, EncoderType
 from mlagents.trainers.demo_loader import demo_to_buffer

        self._use_vail = settings.use_vail
        self._settings = settings

-        state_encoder_settings = NetworkSettings(
+        encoder_settings = NetworkSettings(
            normalize=False,
            hidden_units=settings.encoding_size,
            num_layers=2,
-        self._state_encoder = NetworkBody(
-            specs.observation_shapes, state_encoder_settings
-        )
-
-
-        encoder_input_size = settings.encoding_size
-        if settings.use_actions:
-            encoder_input_size += (
-                self._action_flattener.flattened_size + 1
-            )  # + 1 is for done
-
-        self.encoder = torch.nn.Sequential(
-            linear_layer(encoder_input_size, settings.encoding_size),
-            Swish(),
-            linear_layer(settings.encoding_size, settings.encoding_size),
-            Swish(),
+        unencoded_size = (
+            self._action_flattener.flattened_size + 1 if settings.use_actions else 0
+        )  # +1 is for dones
+        self.encoder = NetworkBody(
+            specs.observation_shapes, encoder_settings, unencoded_size
        )

        estimator_input_size = settings.encoding_size
            torch.as_tensor(mini_batch["actions"], dtype=torch.float)
        )

-    def get_state_encoding(self, mini_batch: AgentBuffer) -> torch.Tensor:
+    def get_state_inputs(
+        self, mini_batch: AgentBuffer
+    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
-        n_vis = len(self._state_encoder.visual_processors)
-        hidden, _ = self._state_encoder.forward(
-            vec_inputs=[torch.as_tensor(mini_batch["vector_obs"], dtype=torch.float)],
-            vis_inputs=[
-                torch.as_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float)
-                for i in range(n_vis)
-            ],
+        n_vis = len(self.encoder.visual_processors)
+        n_vec = len(self.encoder.vector_processors)
+        vec_inputs = (
+            [ModelUtils.list_to_tensor(mini_batch["vector_obs"], dtype=torch.float)]
+            if n_vec > 0
+            else []
-        return hidden
+        vis_inputs = [
+            ModelUtils.list_to_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float)
+            for i in range(n_vis)
+        ]
+        return vec_inputs, vis_inputs

    def compute_estimate(
        self, mini_batch: AgentBuffer, use_vail_noise: bool = False
        :param use_vail_noise: Only when using VAIL : If true, will sample the code, if
        false, will return the mean of the code.
        """
-        encoder_input = self.get_state_encoding(mini_batch)
+        vec_inputs, vis_inputs = self.get_state_inputs(mini_batch)
-            encoder_input = torch.cat([encoder_input, actions, dones], dim=1)
-        hidden = self.encoder(encoder_input)
+            action_inputs = torch.cat([actions, dones], dim=1)
+            hidden, _ = self.encoder(vec_inputs, vis_inputs, action_inputs)
+        else:
+            hidden, _ = self.encoder(vec_inputs, vis_inputs)
        z_mu: Optional[torch.Tensor] = None
        if self._settings.use_vail:
            z_mu = self._z_mu_layer(hidden)
        Gradient penalty from https://arxiv.org/pdf/1704.00028. Adds stability esp.
        for off-policy. Compute gradients w.r.t randomly interpolated input.
        """
-        policy_obs = self.get_state_encoding(policy_batch)
-        expert_obs = self.get_state_encoding(expert_batch)
-        obs_epsilon = torch.rand(policy_obs.shape)
-        encoder_input = obs_epsilon * policy_obs + (1 - obs_epsilon) * expert_obs
+        policy_vec_inputs, policy_vis_inputs = self.get_state_inputs(policy_batch)
+        expert_vec_inputs, expert_vis_inputs = self.get_state_inputs(expert_batch)
+        interp_vec_inputs = []
+        for policy_vec_input, expert_vec_input in zip(
+            policy_vec_inputs, expert_vec_inputs
+        ):
+            obs_epsilon = torch.rand(policy_vec_input.shape)
+            interp_vec_input = (
+                obs_epsilon * policy_vec_input + (1 - obs_epsilon) * expert_vec_input
+            )
+            interp_vec_input.requires_grad = True  # For gradient calculation
+            interp_vec_inputs.append(interp_vec_input)
+        interp_vis_inputs = []
+        for policy_vis_input, expert_vis_input in zip(
+            policy_vis_inputs, expert_vis_inputs
+        ):
+            obs_epsilon = torch.rand(policy_vis_input.shape)
+            interp_vis_input = (
+                obs_epsilon * policy_vis_input + (1 - obs_epsilon) * expert_vis_input
+            )
+            interp_vis_input.requires_grad = True  # For gradient calculation
+            interp_vis_inputs.append(interp_vis_input)
        if self._settings.use_actions:
            policy_action = self.get_action_input(policy_batch)
            expert_action = self.get_action_input(expert_batch)
                expert_batch["done"], dtype=torch.float
            ).unsqueeze(1)
            dones_epsilon = torch.rand(policy_dones.shape)
-            encoder_input = torch.cat(
+            action_inputs = torch.cat(
-                    encoder_input,
                    action_epsilon * policy_action
                    + (1 - action_epsilon) * expert_action,
                    dones_epsilon * policy_dones + (1 - dones_epsilon) * expert_dones,
-        hidden = self.encoder(encoder_input)
+            action_inputs.requires_grad = True
+            hidden, _ = self.encoder(
+                interp_vec_inputs, interp_vis_inputs, action_inputs
+            )
+            encoder_input = tuple(
+                interp_vec_inputs + interp_vis_inputs + [action_inputs]
+            )
+        else:
+            hidden, _ = self.encoder(interp_vec_inputs, interp_vis_inputs)
+            encoder_input = tuple(interp_vec_inputs + interp_vis_inputs)
-
        gradient = torch.autograd.grad(estimate, encoder_input, create_graph=True)[0]
        # Norm's gradient could be NaN at 0. Use our own safe_norm
        safe_norm = (torch.sum(gradient ** 2, dim=1) + self.EPSILON).sqrt()
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
        sequence_length: int = 1,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        encodes = []
-
        for idx, processor in enumerate(self.vector_processors):
            vec_input = vec_inputs[idx]
            processed_vec = processor(vec_input)
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
    if csharp_version is None and python_version is None:
        # Use abs path so that loading doesn't get confused
        model_path = os.path.abspath(os.path.dirname(nn_file_expected))
-        # Onnx loading for overrides not currently supported, but this is
-        # where to add it in when it is.
-        for extension in ["nn"]:
+        for extension in ["nn", "onnx"]:
            inference_ok = run_inference(env_path, model_path, extension)
            if not inference_ok:
                return False