Merge remote-tracking branch 'origin/master' into develop-add-fire

5 年前 · 2b3a6347
--- a/.yamato/com.unity.ml-agents-test.yml
+++ b/.yamato/com.unity.ml-agents-test.yml
  triggers:
    cancel_old_ci: true
    {% if platform.name == "mac" %}
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "ml-agents/tests/yamato/**"
-        - ".yamato/com.unity.ml-agents-test.yml"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match ".yamato/com.unity.ml-agents-test.yml")
    {% endif %}
  {% endfor %}
 {% endfor %}
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/gym-interface-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND 
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR 
+      pull_request.changes.any match "Project/**" OR 
+      pull_request.changes.any match "ml-agents/**" OR 
+      pull_request.changes.any match "ml-agents-envs/**" OR 
+      pull_request.changes.any match "gym-unity/**" OR 
+      pull_request.changes.any match ".yamato/gym-interface-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
 {% endfor %}
--- a/.yamato/protobuf-generation-test.yml
+++ b/.yamato/protobuf-generation-test.yml
      git diff -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" > artifacts/proto.patch; exit $GIT_ERR; }
  triggers:
    cancel_old_ci: true
-    changes:
-      only:
-        - "protobuf-definitions/**"
-        - ".yamato/protobuf-generation-test.yml"
-      except:
-        - "protobuf-definitions/*.md"
-        - "protobuf-definitions/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "protobuf-definitions/**" OR
+      pull_request.changes.any match ".yamato/protobuf-generation-test.yml") AND
+      NOT pull_request.changes.all match "protobuf-definitions/**/*.md"
  artifacts:
    patch:
      paths:
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/python-ll-api-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND 
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR 
+      pull_request.changes.any match "Project/**" OR 
+      pull_request.changes.any match "ml-agents/**" OR 
+      pull_request.changes.any match "ml-agents-envs/**" OR 
+      pull_request.changes.any match ".yamato/python-ll-api-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
 {% endfor %}
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
    - pip install pyyaml
    - python -u -m ml-agents.tests.yamato.standalone_build_tests
    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity 
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity 
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - ".yamato/standalone-build-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match ".yamato/standalone-build-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
  artifacts:
    logs:
      paths:
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/training-int-tests.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match "Project/**" OR
+      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents-envs/**" OR
+      pull_request.changes.any match ".yamato/training-int-tests.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
  artifacts:
    logs:
      paths:
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
    {
        const string k_CommandLineModelOverrideFlag = "--mlagents-override-model";
        const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
+        const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";

        // The attached Agent
        Agent m_Agent;
        int m_MaxEpisodes;

        int m_NumSteps;
+
+        bool m_QuitOnLoadFailure;

        /// <summary>
        /// Get the asset path to use from the commandline arguments.
            var maxEpisodes = 0;

            var args = Environment.GetCommandLineArgs();
-            for (var i = 0; i < args.Length - 1; i++)
+            for (var i = 0; i < args.Length; i++)
            {
                if (args[i] == k_CommandLineModelOverrideFlag && i < args.Length-2)
                {
                }
-                else if (args[i] == k_CommandLineQuitAfterEpisodesFlag)
+                else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length-1)
+                }
+                else if (args[i] == k_CommandLineQuitOnLoadFailure)
+                {
+                    m_QuitOnLoadFailure = true;
                }
            }

            var behaviorName = bp.BehaviorName;

            var nnModel = GetModelForBehaviorName(behaviorName);
-            Debug.Log($"Overriding behavior {behaviorName} for agent with model {nnModel?.name}");
+            if (nnModel == null && m_QuitOnLoadFailure)
+            {
+                Debug.Log(
+                    $"Didn't find a model for behaviorName {behaviorName}. Make " +
+                    $"sure the behaviorName is set correctly in the commandline " +
+                    $"and that the model file exists"
+                );
+                Application.Quit(1);
+            }
+            var modelName = nnModel != null ? nnModel.name : "<null>";
+            Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}");
            // This might give a null model; that's better because we'll fall back to the Heuristic
            m_Agent.SetModel($"Override_{behaviorName}", nnModel);

--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
+- `beta` and `epsilon` in `PPO` are no longer decayed by default but follow the same schedule as learning rate. (#3940)
 ### Minor Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
        {
            var agentInfoProto = ai.ToAgentInfoProto();

-            var agentActionProto = new AgentActionProto
+            var agentActionProto = new AgentActionProto();
+            if(ai.storedVectorActions != null)
-                VectorActions = { ai.storedVectorActions }
-            };
+                agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
+            }

            return new AgentInfoActionPairProto
            {
            var brainParametersProto = new BrainParametersProto
            {
                VectorActionSize = { bp.VectorActionSize },
-                VectorActionSpaceType =
-                    (SpaceTypeProto)bp.VectorActionSpaceType,
+                VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType,
-            brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            if(bp.VectorActionDescriptions != null)
+            {
+                brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            }
            return brainParametersProto;
        }

        /// </summary>
        public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
        {
+            var demonstrationName = dm.demonstrationName ?? "";
            var demoProto = new DemonstrationMetaProto
            {
                ApiVersion = DemonstrationMetaData.ApiVersion,
-                DemonstrationName = dm.demonstrationName
+                DemonstrationName = demonstrationName
            };
            return demoProto;
        }
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
    {
        public Action OnRequestDecision;
        ObservationWriter m_ObsWriter = new ObservationWriter();
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
-            foreach(var sensor in sensors){
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
+        {
+            foreach (var sensor in sensors)
+            {
                sensor.GetObservationProto(m_ObsWriter);
            }
            OnRequestDecision?.Invoke();
            agent1.SetPolicy(policy);

            StackingSensor sensor = null;
-            foreach(ISensor s in agent1.sensors){
-                if (s is  StackingSensor){
+            foreach (ISensor s in agent1.sensors)
+            {
+                if (s is  StackingSensor)
+                {
                    sensor = s as StackingSensor;
                }
            }
            {
                agent1.RequestDecision();
                aca.EnvironmentStep();
-
            }

            policy.OnRequestDecision = () =>  SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
  0, rays will be used instead of spheres. Rays may be more efficient,
  especially in complex scenes.
 - _Ray Length_ The length of the casts
+- _Ray Layer Mask_ The [LayerMask](https://docs.unity3d.com/ScriptReference/LayerMask.html)
+  passed to the raycast or spherecast. This can be used to ignore certain types
+  of objects when casting.
 - _Observation Stacks_ The number of previous results to "stack" with the cast
  results. Note that this can be independent of the "Stacked Vectors" setting in
  `Behavior Parameters`.
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
    data in the new MonoBehaviour instead.
  - If the class overrode the virtual methods, create a new MonoBehaviour and
    move the logic to it:
-    - Move the InitializeAcademy code to MonoBehaviour.OnAwake
+    - Move the InitializeAcademy code to MonoBehaviour.Awake
    - Move the AcademyStep code to MonoBehaviour.FixedUpdate
    - Move the OnDestroy code to MonoBehaviour.OnDestroy.
    - Move the AcademyReset code to a new method and add it to the
--- a/docs/Python-API.md
+++ b/docs/Python-API.md

 ```python
 from mlagents_envs.environment import UnityEnvironment
+# This is a non-blocking call that only loads the environment.
+# Start interacting with the evironment.
+env.reset()
+behavior_names = env.get_behavior_names()
+...
+**NOTE:** Please read [Interacting with a Unity Environment](#interacting-with-a-unity-environment)
+to read more about how you can interact with the Unity environment from Python.

 - `file_name` is the name of the environment binary (located in the root
  directory of the python project).
--- a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
 import uuid
+import pytest
+from mlagents_envs.side_channel.engine_configuration_channel import (
+    EngineConfigurationChannel,
+    EngineConfig,
+)
+from mlagents_envs.side_channel.environment_parameters_channel import (
+    EnvironmentParametersChannel,
+)
+from mlagents_envs.side_channel.stats_side_channel import (
+    StatsSideChannel,
+    StatsAggregationMethod,
+)
+from mlagents_envs.exception import (
+    UnitySideChannelException,
+    UnityCommunicationException,
+)


 class IntChannel(SideChannel):
    # Test reading with defaults
    assert [] == msg_in.read_float32_list()
    assert val == msg_in.read_float32_list(default_value=val)
+
+
+def test_engine_configuration():
+    sender = EngineConfigurationChannel()
+    # We use a raw bytes channel to interpred the data
+    receiver = RawBytesChannel(sender.channel_id)
+
+    config = EngineConfig.default_config()
+    sender.set_configuration(config)
+    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
+    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+
+    received_data = receiver.get_and_clear_received_messages()
+    assert len(received_data) == 5  # 5 different messages one for each setting
+
+    sent_time_scale = 4.5
+    sender.set_configuration_parameters(time_scale=sent_time_scale)
+    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
+    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+
+    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
+    message.read_int32()
+    time_scale = message.read_float32()
+    assert time_scale == sent_time_scale
+
+    with pytest.raises(UnitySideChannelException):
+        sender.set_configuration_parameters(width=None, height=42)
+
+    with pytest.raises(UnityCommunicationException):
+        # try to send data to the EngineConfigurationChannel
+        sender.set_configuration_parameters(time_scale=sent_time_scale)
+        data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
+        UnityEnvironment._parse_side_channel_message(
+            {receiver.channel_id: sender}, data
+        )
+
+
+def test_environment_parameters():
+    sender = EnvironmentParametersChannel()
+    # We use a raw bytes channel to interpred the data
+    receiver = RawBytesChannel(sender.channel_id)
+
+    sender.set_float_parameter("param-1", 0.1)
+    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
+    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+
+    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
+    key = message.read_string()
+    dtype = message.read_int32()
+    value = message.read_float32()
+    assert key == "param-1"
+    assert dtype == EnvironmentParametersChannel.EnvironmentDataTypes.FLOAT
+    assert value - 0.1 < 1e-8
+
+    sender.set_float_parameter("param-1", 0.1)
+    sender.set_float_parameter("param-2", 0.1)
+    sender.set_float_parameter("param-3", 0.1)
+
+    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
+    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+
+    assert len(receiver.get_and_clear_received_messages()) == 3
+
+    with pytest.raises(UnityCommunicationException):
+        # try to send data to the EngineConfigurationChannel
+        sender.set_float_parameter("param-1", 0.1)
+        data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
+        UnityEnvironment._parse_side_channel_message(
+            {receiver.channel_id: sender}, data
+        )
+
+
+def test_stats_channel():
+    receiver = StatsSideChannel()
+    message = OutgoingMessage()
+    message.write_string("stats-1")
+    message.write_float32(42.0)
+    message.write_int32(1)  # corresponds to StatsAggregationMethod.MOST_RECENT
+
+    receiver.on_message_received(IncomingMessage(message.buffer))
+
+    stats = receiver.get_and_reset_stats()
+
+    assert len(stats) == 1
+    val, method = stats["stats-1"]
+    assert val - 42.0 < 1e-8
+    assert method == StatsAggregationMethod.MOST_RECENT
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
    RESNET = "resnet"


-class LearningRateSchedule(Enum):
+class ScheduleType(Enum):
    CONSTANT = "constant"
    LINEAR = "linear"

        return global_step, increment_step, steps_to_increment

    @staticmethod
-    def create_learning_rate(
-        lr_schedule: LearningRateSchedule,
-        lr: float,
+    def create_schedule(
+        schedule: ScheduleType,
+        parameter: float,
+        min_value: float,
    ) -> tf.Tensor:
        """
        Create a learning rate tensor.
        :param max_step: The maximum number of steps in the training run.
        :return: A Tensor containing the learning rate.
        """
-        if lr_schedule == LearningRateSchedule.CONSTANT:
-            learning_rate = tf.Variable(lr)
-        elif lr_schedule == LearningRateSchedule.LINEAR:
-            learning_rate = tf.train.polynomial_decay(
-                lr, global_step, max_step, 1e-10, power=1.0
+        if schedule == ScheduleType.CONSTANT:
+            parameter_rate = tf.Variable(parameter, trainable=False)
+        elif schedule == ScheduleType.LINEAR:
+            parameter_rate = tf.train.polynomial_decay(
+                parameter, global_step, max_step, min_value, power=1.0
-            raise UnityTrainerException(
-                "The learning rate schedule {} is invalid.".format(lr_schedule)
-            )
-        return learning_rate
+            raise UnityTrainerException("The schedule {} is invalid.".format(schedule))
+        return parameter_rate

    @staticmethod
    def scaled_init(scale):
--- a/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
 import numpy as np
 from mlagents.tf_utils import tf
 from mlagents_envs.timers import timed
-from mlagents.trainers.models import ModelUtils, EncoderType, LearningRateSchedule
+from mlagents.trainers.models import ModelUtils, EncoderType, ScheduleType
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
 from mlagents.trainers.buffer import AgentBuffer
                super().__init__(policy, trainer_params)

                lr = float(trainer_params["learning_rate"])
-                lr_schedule = LearningRateSchedule(
+                self._schedule = ScheduleType(
                    trainer_params.get("learning_rate_schedule", "linear")
                )
                h_size = int(trainer_params["hidden_units"])
                    "Losses/Value Loss": "value_loss",
                    "Losses/Policy Loss": "policy_loss",
                    "Policy/Learning Rate": "learning_rate",
+                    "Policy/Epsilon": "decay_epsilon",
+                    "Policy/Beta": "decay_beta",
                }
                if self.policy.use_recurrent:
                    self.m_size = self.policy.m_size
                else:
                    self._create_dc_critic(h_size, num_layers, vis_encode_type)

-                self.learning_rate = ModelUtils.create_learning_rate(
-                    lr_schedule, lr, self.policy.global_step, int(max_step)
+                self.learning_rate = ModelUtils.create_schedule(
+                    self._schedule,
+                    lr,
+                    self.policy.global_step,
+                    int(max_step),
+                    min_value=1e-10,
                )
                self._create_losses(
                    self.policy.total_log_probs,
                    "policy_loss": self.abs_policy_loss,
                    "update_batch": self.update_batch,
                    "learning_rate": self.learning_rate,
+                    "decay_epsilon": self.decay_epsilon,
+                    "decay_beta": self.decay_beta,
                }
            )

        )
        advantage = tf.expand_dims(self.advantage, -1)

-        decay_epsilon = tf.train.polynomial_decay(
-            epsilon, self.policy.global_step, max_step, 0.1, power=1.0
+        self.decay_epsilon = ModelUtils.create_schedule(
+            self._schedule, epsilon, self.policy.global_step, max_step, min_value=0.1
-        decay_beta = tf.train.polynomial_decay(
-            beta, self.policy.global_step, max_step, 1e-5, power=1.0
+        self.decay_beta = ModelUtils.create_schedule(
+            self._schedule, beta, self.policy.global_step, max_step, min_value=1e-5
        )

        value_losses = []
-                -decay_epsilon,
-                decay_epsilon,
+                -self.decay_epsilon,
+                self.decay_epsilon,
            )
            v_opt_a = tf.squared_difference(
                self.returns_holders[name], tf.reduce_sum(head, axis=1)
        r_theta = tf.exp(probs - old_probs)
        p_opt_a = r_theta * advantage
        p_opt_b = (
-            tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon)
+            tf.clip_by_value(
+                r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+            )
            * advantage
        )
        self.policy_loss = -tf.reduce_mean(
        self.loss = (
            self.policy_loss
            + 0.5 * self.value_loss
-            - decay_beta
+            - self.decay_beta
            * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
        )

--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py

 from mlagents_envs.logging_util import get_logger
 from mlagents.trainers.sac.network import SACPolicyNetwork, SACTargetNetwork
-from mlagents.trainers.models import LearningRateSchedule, EncoderType, ModelUtils
+from mlagents.trainers.models import ScheduleType, EncoderType, ModelUtils
 from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.buffer import AgentBuffer
            with tf.variable_scope(""):
                super().__init__(policy, trainer_params)
                lr = float(trainer_params["learning_rate"])
-                lr_schedule = LearningRateSchedule(
+                lr_schedule = ScheduleType(
                    trainer_params.get("learning_rate_schedule", "constant")
                )
                self.policy = policy
                # The optimizer's m_size is 3 times the policy (Q1, Q2, and Value)
                self.m_size = 3 * self.policy.m_size
                self._create_inputs_and_outputs()
-                self.learning_rate = ModelUtils.create_learning_rate(
-                    lr_schedule, lr, self.policy.global_step, int(max_step)
+                self.learning_rate = ModelUtils.create_schedule(
+                    lr_schedule,
+                    lr,
+                    self.policy.global_step,
+                    int(max_step),
+                    min_value=1e-10,
                )
                self._create_losses(
                    self.policy_network.q1_heads,
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
        step_size=0.2,
    )
    override_vals = {
-        "max_steps": 750,
+        "max_steps": 1000,
-        "behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1000},
+        "behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1500},
        "reward_signals": {
            "gail": {
                "strength": 1.0,
--- a/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
+using NUnit.Framework;
+using UnityEngine;
+using Unity.MLAgents.Policies;
+using Unity.MLAgents.Demonstrations;
+using Unity.MLAgents.Sensors;
+
+namespace Unity.MLAgents.Tests
+{
+    [TestFixture]
+    public class GrpcExtensionsTests
+    {
+        [Test]
+        public void TestDefaultBrainParametersToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var brain = new BrainParameters();
+            brain.ToProto("foo", false);
+        }
+
+        [Test]
+        public void TestDefaultAgentInfoToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var agentInfo = new AgentInfo();
+            agentInfo.ToInfoActionPairProto();
+            agentInfo.ToAgentInfoProto();
+        }
+
+        [Test]
+        public void TestDefaultDemonstrationMetaDataToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var demoMetaData = new DemonstrationMetaData();
+            demoMetaData.ToProto();
+        }
+    }
+}
--- a/docs/Versioning.md
+++ b/docs/Versioning.md
+# ML-Agents Versioning
+
+## Context
+As the ML-Agents project evolves into a more mature product, we want to communicate the process
+we use to version our packages and the data that flows into, through, and out of them clearly.
+Our project now has four packages (1 Unity, 3 Python) along with artifacts that are produced as
+well as consumed.  This document covers the versioning for these packages and artifacts.
+
+## GitHub Releases
+Up until now, all packages were in lockstep in-terms of versioning. As a result, the GitHub releases
+were tagged with the version of all those packages (e.g. v0.15.0, v0.15.1) and labeled accordingly.
+With the decoupling of package versions, we now need to revisit our GitHub release tagging.
+The proposal is that we move towards an integer release numbering for our repo and each such
+release will call out specific version upgrades of each package. For instance, with
+[the April 30th release](https://github.com/Unity-Technologies/ml-agents/releases/tag/release_1),
+we will have:
+- GitHub Release 1 (branch name: *release_1_branch*)
+  - com.unity.ml-agents release 1.0.0
+  - ml-agents release 0.16.0
+  - ml-agents-envs release 0.16.0
+  - gym-unity release 0.16.0
+
+Our release cadence will not be affected by these versioning changes.  We will keep having
+monthly releases to fix bugs and release new features.
+
+## Packages
+All of the software packages, and their generated artifacts will be versioned.  Any automation
+tools will not be versioned.
+
+### Unity package
+Package name: com.unity.ml-agents
+- Versioned following [Semantic Versioning Guidelines](https://www.semver.org)
+- This package consumes an artifact of the training process: the `.nn` file.  These files
+    are integer versioned and currently at version 2. The com.unity.ml-agents package
+    will need to support the version of `.nn` files which existed at its 1.0.0 release.
+    For example, consider that com.unity.ml-agents is at version 1.0.0 and the NN files
+    are at version 2.  If the NN files change to version 3, the next release of
+    com.unity.ml-agents at version 1.1.0 guarantees it will be able to read both of these
+    formats.  If the NN files were to change to version 4 and com.unity.ml-agents to
+    version 2.0.0, support for NN versions 2 and 3 could be dropped for com.unity.ml-agents
+    version 2.0.0.
+- This package produces one artifact, the `.demo` files.  These files will have integer
+    versioning. This means their version will increment by 1 at each change.  The
+    com.unity.ml-agents package must be backward compatible with version changes
+    that occur between minor versions.
+- To summarize, the artifacts produced and consumed by com.unity.ml-agents are guaranteed
+    to be supported for 1.x.x versions of com.unity.ml-agents.  We intend to provide stability
+    for our users by moving to a 1.0.0 release of com.unity.ml-agents.
+
+
+### Python Packages
+Package names: ml-agents / ml-agents-envs / gym-unity
+- The python packages remain in "Beta."  This means that breaking changes to the public
+    API of the python packages can change without having to have a major version bump.
+    Historically, the python and C# packages were in version lockstep.  This is no longer
+    the case.  The python packages will remain in lockstep with each other for now, while the
+    C# package will follow its own versioning as is appropriate.  However, the python package
+    versions may diverge in the future.
+- While the python packages will remain in Beta for now, we acknowledge that the most
+    heavily used portion of our python interface is the `mlagents-learn` CLI and strive
+    to make this part of our API backward compatible. We are actively working on this and
+    expect to have a stable CLI in the next few weeks.
+
+## Communicator
+
+Packages which communicate: com.unity.ml-agents / ml-agents-envs
+
+Another entity of the ML-Agents Toolkit that requires versioning is the communication layer
+between C# and Python, which will follow also semantic versioning.  This guarantees a level of
+backward compatibility between different versions of C# and Python packages which communicate.
+Any Communicator version 1.x.x of the Unity package should be compatible with any 1.x.x
+Communicator Version in Python.
+
+An RLCapabilities struct keeps track of which features exist. This struct is passed from C# to
+Python, and another from Python to C#.  With this feature level granularity, we can notify users
+more specifically about feature limitations based on what's available in both C# and Python.
+These notifications will be logged to the python terminal, or to the Unity Editor Console.
+
+
+## Side Channels
+
+The communicator is what manages data transfer between Unity and Python for the core
+training loop. Side Channels are another means of data transfer between Unity and Python.
+Side Channels are not versioned, but have been designed to support backward compatibility
+for what they are. As of today, we provide 4 side channels:
+- FloatProperties: shared float data between Unity - Python (bidirectional)
+- RawBytes: raw data that can be sent Unity - Python (bidirectional)
+- EngineConfig: a set of numeric fields in a pre-defined order sent from Python to Unity
+- Stats: (name, value, agg) messages sent from Unity to Python
+
+Aside from the specific implementations of side channels we provide (and use ourselves),
+the Side Channel interface is made available for users to create their own custom side
+channels. As such, we guarantee that the built in SideChannel interface between Unity and
+Python is backward compatible in packages that share the same major version.
+