Merge pull request #2393 from Unity-Technologies/hotfix-v0.9.0a

- Fix issue with BC Trainer `increment_steps`. - Fix issue with Demonstration Recorder and visual observations (memory leak fix was deleting vis obs too early). - Make Samplers sample from the same random seed every time, so generalization runs are repeatable. - Fix crash when using GAIL, Curiosity, and visual observations together.
6 年前 · afb6ede5
--- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
 namespace MLAgents
 {
    /// <summary>
-    /// Struct that contains all the information for an Agent, including its 
+    /// Struct that contains all the information for an Agent, including its
    /// observations, actions and current status, that is sent to the Brain.
    /// </summary>
    public struct AgentInfo
                agentInfoProto.VisualObservations.Add(
                    ByteString.CopyFrom(obs.EncodeToPNG())
                );
+            }
+            return agentInfoProto;
+        }
+
+        /// <summary>
+        /// Remove the visual observations from memory. Call at each timestep
+        /// to avoid memory leaks.
+        /// </summary>
+        public void ClearVisualObs()
+        {
+            foreach (Texture2D obs in visualObservations)
+            {
-            return agentInfoProto;
-    /// Struct that contains the action information sent from the Brain to the 
+    /// Struct that contains the action information sent from the Brain to the
    /// Agent.
    /// </summary>
    public struct AgentAction
    }

    /// <summary>
-    /// Struct that contains all the Agent-specific parameters provided in the 
+    /// Struct that contains all the Agent-specific parameters provided in the
    /// Editor. This excludes the Brain linked to the Agent since it can be
    /// modified programmatically.
    /// </summary>
        /// observations.
        /// </summary>
        public List<Camera> agentCameras = new List<Camera>();
-        
+
        /// <summary>
        /// The list of the RenderTextures the agent uses for visual
        /// observations.

        /// <summary>
-        /// The maximum number of steps the agent takes before being done. 
+        /// The maximum number of steps the agent takes before being done.
        /// </summary>
        /// <remarks>
        /// If set to 0, the agent can only be set to done programmatically (or
        public bool resetOnDone = true;

        /// <summary>
-        /// Whether to enable On Demand Decisions or make a decision at 
+        /// Whether to enable On Demand Decisions or make a decision at
        /// every step.
        /// </summary>
        public bool onDemandDecision;

    /// <summary>
    /// Agent Monobehavior class that is attached to a Unity GameObject, making it
-    /// an Agent. An agent produces observations and takes actions in the 
-    /// environment. Observations are determined by the cameras attached 
+    /// an Agent. An agent produces observations and takes actions in the
+    /// environment. Observations are determined by the cameras attached
    /// to the agent in addition to the vector observations implemented by the
    /// user in <see cref="CollectObservations"/>. On the other hand, actions
    /// are determined by decisions produced by a linked Brain. Currently, this
    /// however, an agent need not send its observation at every step since very
    /// little may have changed between sucessive steps. Currently, how often an
    /// agent updates its brain with a fresh observation is determined by the
-    /// Academy. 
-    /// 
-    /// At any step, an agent may be considered <see cref="done"/>. 
+    /// Academy.
+    ///
+    /// At any step, an agent may be considered <see cref="done"/>.
-    /// 
+    ///
-    /// 
+    ///
-    /// episodes. The academy controls the global episode count and each agent 
+    /// episodes. The academy controls the global episode count and each agent
    /// controls its own local episode count and can reset and start a new local
    /// episode independently (based on its own experience). Thus an academy
    /// (global) episode can be viewed as the upper-bound on an agents episode
    /// value takes precedence (since the agent max step will never be reached).
-    /// 
+    ///
-    /// 
+    ///
    /// Implementation-wise, it is required that this class is extended and the
    /// virtual methods overridden. For sample implementations of agent behavior,
    /// see the Examples/ directory within this Unity project.
    {
        /// <summary>
        /// The Brain attached to this agent. A brain can be attached either
-        /// directly from the Editor through AgentEditor or 
+        /// directly from the Editor through AgentEditor or
        /// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
        /// to not have a brain, as long as no decision is requested.
        /// </summary>
            actionMasker = new ActionMasker(param);
            // If we haven't initialized vectorActions, initialize to 0. This should only
            // happen during the creation of the Agent. In subsequent episodes, vectorAction
-            // should stay the previous action before the Done(), so that it is properly recorded. 
+            // should stay the previous action before the Done(), so that it is properly recorded.
            if (action.vectorActions == null)
            {
                if (param.vectorActionSpaceType == SpaceType.continuous)
                    brain.brainParameters.vectorObservationSize,
                    info.vectorObservation.Count));
            }
-            
+
-            Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation, 
+            Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
                                    info.stackedVectorObservation.Count - info.vectorObservation.Count);

            info.visualObservations.Clear();
                    param.cameraResolutions[i].height);
                info.visualObservations.Add(obsTexture);
            }
-            
+
            //Then add all renderTextures
            var camCount = agentParameters.agentCameras.Count;
            for (int i = 0; i < agentParameters.agentRenderTextures.Count; i++)

        /// <summary>
        /// Collects the (vector, visual, text) observations of the agent.
-        /// The agent observation describes the current environment from the 
+        /// The agent observation describes the current environment from the
-        /// observation could include distances to friends or enemies, or the 
+        /// observation could include distances to friends or enemies, or the
        /// current level of ammunition at its disposal.
        /// Recall that an Agent may attach vector, visual or textual observations.
        /// Vector observations are added by calling the provided helper methods:
        /// needs to match the vectorObservationSize attribute of the linked Brain.
        /// Visual observations are implicitly added from the cameras attached to
        /// the Agent.
-        /// Lastly, textual observations are added using 
+        /// Lastly, textual observations are added using
        /// <see cref="SetTextObs(string)"/>.
        /// </remarks>
        public virtual void CollectObservations()
        }

        /// <summary>
-        /// Specifies the agent behavior when done and 
+        /// Specifies the agent behavior when done and
        /// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
        /// used to remove the agent from the scene.
        /// </summary>
        {
            action.memories = memories;
        }
-        
+
-        
+
        public List<float> GetMemoriesAction()
        {
            return action.memories;
        /// <summary>
        /// Sets the status of the agent.
        /// </summary>
-        /// <param name="academyMaxStep">If set to <c>true</c> 
+        /// <param name="academyMaxStep">If set to <c>true</c>
-        /// <param name="academyDone">If set to <c>true</c> 
+        /// <param name="academyDone">If set to <c>true</c>
        /// The agent must set done.</param>
        /// <param name="academyStepCounter">Number of current steps in episode</param>
        void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
                maxStepReached = true;
            }

-            // If the Academy needs to reset, the agent should reset 
+            // If the Academy needs to reset, the agent should reset
            // even if it reseted recently.
            if (academyDone)
            {
        /// Signals the agent that it must reset if its done flag is set to true.
        void ResetIfDone()
        {
-            // If an agent is done, then it will also 
+            // If an agent is done, then it will also
            // request for a decision and an action
            if (IsDone())
            {
            obsCamera.Render();

            texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0);
-            
+
            obsCamera.targetTexture = prevCameraRT;
            obsCamera.rect = oldRec;
            RenderTexture.active = prevActiveRT;
-        
+
        /// <summary>
        /// Converts a RenderTexture and correspinding resolution to a 2D texture.
        /// </summary>
            {
                texture2D.Resize(width, height);
            }
-            
+
            if(width != obsTexture.width || height != obsTexture.height)
            {
                throw new UnityAgentsException(string.Format(
        {
            info.customObservation = customObservation;
        }
-    }    
+    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
 namespace MLAgents
 {
    /// <summary>
-    /// The batcher is an RL specific class that makes sure that the information each object in 
-    /// Unity (Academy and Brains) wants to send to External is appropriately batched together 
+    /// The batcher is an RL specific class that makes sure that the information each object in
+    /// Unity (Academy and Brains) wants to send to External is appropriately batched together
-    /// 
+    ///
-    /// 
-    /// At each step, the batcher will keep track of the brains that queried the batcher for that 
+    ///
+    /// At each step, the batcher will keep track of the brains that queried the batcher for that
    /// step. The batcher can only send the batched data when all the Brains have queried the
    /// Batcher.
    /// </summary>
        }

        /// <summary>
-        /// Sends the academy parameters through the Communicator. 
+        /// Sends the academy parameters through the Communicator.
        /// Is used by the academy to send the AcademyParameters to the communicator.
        /// </summary>
        /// <returns>The External Initialization Parameters received.</returns>
        /// Registers the done flag of the academy to the next output to be sent
        /// to the communicator.
        /// </summary>
-        /// <param name="done">If set to <c>true</c> 
+        /// <param name="done">If set to <c>true</c>
        /// The academy done state will be sent to External at the next Exchange.</param>
        public void RegisterAcademyDoneFlag(bool done)
        {

        /// <summary>
        /// Sends the brain info. If at least one brain has an agent in need of
-        /// a decision or if the academy is done, the data is sent via 
+        /// a decision or if the academy is done, the data is sent via
        /// Communicator. Else, a new step is realized. The data can only be
        /// sent once all the brains that subscribed to the batcher have tried
        /// to send information.
                {
                    CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
                    m_currentUnityRLOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
+                    // Avoid visual obs memory leak. This should be called AFTER we are done with the visual obs.
+                    // e.g. after recording them to demo and using them for inference.
+                    agentInfo[agent].ClearVisualObs();
                }

                m_hasData[brainKey] = true;
--- a/gym-unity/setup.py
+++ b/gym-unity/setup.py

 setup(
    name="gym_unity",
-    version="0.4.3",
+    version="0.4.3a",
    description="Unity Machine Learning Agents Gym Interface",
    license="Apache License 2.0",
    author="Unity Technologies",
-    install_requires=["gym", "mlagents_envs==0.9.0"],
+    install_requires=["gym", "mlagents_envs==0.9.0a"],
 )
--- a/ml-agents-envs/mlagents/envs/sampler_class.py
+++ b/ml-agents-envs/mlagents/envs/sampler_class.py
    """

    def __init__(
-        self, min_value: Union[int, float], max_value: Union[int, float], **kwargs
+        self,
+        min_value: Union[int, float],
+        max_value: Union[int, float],
+        seed: Optional[int] = None,
+        **kwargs
+        """
+        :param min_value: minimum value of the range to be sampled uniformly from
+        :param max_value: maximum value of the range to be sampled uniformly from
+        :param seed: Random seed used for making draws from the uniform sampler
+        """
+        # Draw from random state to allow for consistent reset parameter draw for a seed
+        self.random_state = np.random.RandomState(seed)
-        return np.random.uniform(self.min_value, self.max_value)
+        """
+        Draws and returns a sample from the specified interval
+        """
+        return self.random_state.uniform(self.min_value, self.max_value)


 class MultiRangeUniformSampler(Sampler):
    it proceeds to pick a value uniformly in that range.
    """

-    def __init__(self, intervals: List[List[Union[int, float]]], **kwargs) -> None:
+    def __init__(
+        self,
+        intervals: List[List[Union[int, float]]],
+        seed: Optional[int] = None,
+        **kwargs
+    ) -> None:
+        """
+        :param intervals: List of intervals to draw uniform samples from
+        :param seed: Random seed used for making uniform draws from the specified intervals
+        """
        self.intervals = intervals
        # Measure the length of the intervals
        interval_lengths = [abs(x[1] - x[0]) for x in self.intervals]
+        # Draw from random state to allow for consistent reset parameter draw for a seed
+        self.random_state = np.random.RandomState(seed)
+        """
+        Selects an interval to pick and then draws a uniform sample from the picked interval
+        """
-            np.random.choice(len(self.intervals), p=self.interval_weights)
+            self.random_state.choice(len(self.intervals), p=self.interval_weights)
-        return np.random.uniform(cur_min, cur_max)
+        return self.random_state.uniform(cur_min, cur_max)


 class GaussianSampler(Sampler):
    """

    def __init__(
-        self, mean: Union[float, int], st_dev: Union[float, int], **kwargs
+        self,
+        mean: Union[float, int],
+        st_dev: Union[float, int],
+        seed: Optional[int] = None,
+        **kwargs
+        """
+        :param mean: Specifies the mean of the gaussian distribution to draw from
+        :param st_dev: Specifies the standard devation of the gaussian distribution to draw from
+        :param seed: Random seed used for making gaussian draws from the sample
+        """
+        # Draw from random state to allow for consistent reset parameter draw for a seed
+        self.random_state = np.random.RandomState(seed)
-        return np.random.normal(self.mean, self.st_dev)
+        """
+        Returns a draw from the specified Gaussian distribution
+        """
+        return self.random_state.normal(self.mean, self.st_dev)


 class SamplerFactory:

    @staticmethod
    def register_sampler(name: str, sampler_cls: Type[Sampler]) -> None:
+        """
+        Registers the sampe in the Sampler Factory to be used later
+        :param name: String name to set as key for the sampler_cls in the factory
+        :param sampler_cls: Sampler object to associate to the name in the factory
+        """
-    def init_sampler_class(name: str, params: Dict[str, Any]):
+    def init_sampler_class(
+        name: str, params: Dict[str, Any], seed: Optional[int] = None
+    ) -> Sampler:
+        """
+        Initializes the sampler class associated with the name with the params
+        :param name: Name of the sampler in the factory to initialize
+        :param params: Parameters associated to the sampler attached to the name
+        :param seed: Random seed to be used to set deterministic random draws for the sampler
+        """
        if name not in SamplerFactory.NAME_TO_CLASS:
            raise SamplerException(
                name + " sampler is not registered in the SamplerFactory."
        sampler_cls = SamplerFactory.NAME_TO_CLASS[name]
+        params["seed"] = seed
        try:
            return sampler_cls(**params)
        except TypeError:


 class SamplerManager:
-    def __init__(self, reset_param_dict: Dict[str, Any]) -> None:
+    def __init__(
+        self, reset_param_dict: Dict[str, Any], seed: Optional[int] = None
+    ) -> None:
+        """
+        :param reset_param_dict: Arguments needed for initializing the samplers
+        :param seed: Random seed to be used for drawing samples from the samplers
+        """
        self.reset_param_dict = reset_param_dict if reset_param_dict else {}
        assert isinstance(self.reset_param_dict, dict)
        self.samplers: Dict[str, Sampler] = {}
                )
            sampler_name = cur_param_dict.pop("sampler-type")
            param_sampler = SamplerFactory.init_sampler_class(
-                sampler_name, cur_param_dict
+                sampler_name, cur_param_dict, seed
            )

            self.samplers[param_name] = param_sampler
        return not bool(self.samplers)

    def sample_all(self) -> Dict[str, float]:
+        """
+        Loop over all samplers and draw a sample from each one for generating
+        next set of reset parameter values.
+        """
        res = {}
        for param_name, param_sampler in list(self.samplers.items()):
            res[param_name] = param_sampler.sample_parameter()
--- a/ml-agents-envs/setup.py
+++ b/ml-agents-envs/setup.py

 setup(
    name="mlagents_envs",
-    version="0.9.0",
+    version="0.9.0a",
    description="Unity Machine Learning Agents Interface",
    url="https://github.com/Unity-Technologies/ml-agents",
    author="Unity Technologies",
--- a/ml-agents/mlagents/trainers/bc/trainer.py
+++ b/ml-agents/mlagents/trainers/bc/trainer.py
        """
        return self.policy.get_current_step()

-    def increment_step(self):
+    def increment_step(self, n_steps: int) -> None:
+
+        :param n_steps: number of steps to increment the step count by
-        self.policy.increment_step()
-        return
+        self.step = self.policy.increment_step(n_steps)

    def add_experiences(
        self,
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
                # Create input ops for next (t+1) visual observations.
                next_visual_input = LearningModel.create_visual_input(
                    self.policy_model.brain.camera_resolutions[i],
-                    name="next_visual_observation_" + str(i),
+                    name="curiosity_next_visual_observation_" + str(i),
                )
                self.next_visual_in.append(next_visual_input)

                    self.encoding_size,
                    LearningModel.swish,
                    1,
-                    "stream_{}_visual_obs_encoder".format(i),
+                    "curiosity_stream_{}_visual_obs_encoder".format(i),
                    False,
                )

                    LearningModel.swish,
                    1,
-                    "stream_{}_visual_obs_encoder".format(i),
+                    "curiosity_stream_{}_visual_obs_encoder".format(i),
                    True,
                )
                visual_encoders.append(encoded_visual)
            self.next_vector_in = tf.placeholder(
                shape=[None, self.policy_model.vec_obs_size],
                dtype=tf.float32,
-                name="next_vector_observation",
+                name="curiosity_next_vector_observation",
            )

            encoded_vector_obs = self.policy_model.create_vector_observation_encoder(
                2,
-                "vector_obs_encoder",
+                "curiosity_vector_obs_encoder",
                False,
            )
            encoded_next_vector_obs = self.policy_model.create_vector_observation_encoder(
                2,
-                "vector_obs_encoder",
+                "curiosity_vector_obs_encoder",
                True,
            )
            encoded_state_list.append(encoded_vector_obs)
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
                # Create input ops for next (t+1) visual observations.
                visual_input = self.policy_model.create_visual_input(
                    self.policy_model.brain.camera_resolutions[i],
-                    name="visual_observation_" + str(i),
+                    name="gail_visual_observation_" + str(i),
                )
                self.expert_visual_in.append(visual_input)

                    LearningModel.swish,
                    1,
-                    "stream_{}_visual_obs_encoder".format(i),
+                    "gail_stream_{}_visual_obs_encoder".format(i),
                    False,
                )

                    LearningModel.swish,
                    1,
-                    "stream_{}_visual_obs_encoder".format(i),
+                    "gail_stream_{}_visual_obs_encoder".format(i),
                    True,
                )
                visual_policy_encoders.append(encoded_policy_visual)
                concat_input,
                self.h_size,
                activation=LearningModel.swish,
-                name="d_hidden_1",
+                name="gail_d_hidden_1",
                reuse=reuse,
            )

                activation=LearningModel.swish,
-                name="d_hidden_2",
+                name="gail_d_hidden_2",
                reuse=reuse,
            )

                    hidden_2,
                    self.z_size,
                    reuse=reuse,
-                    name="z_mean",
+                    name="gail_z_mean",
                    kernel_initializer=LearningModel.scaled_init(0.01),
                )

                estimate_input,
                1,
                activation=tf.nn.sigmoid,
-                name="d_estimate",
+                name="gail_d_estimate",
                reuse=reuse,
            )
            return estimate, z_mean, concat_input
        """
        if self.use_vail:
            self.z_sigma = tf.get_variable(
-                "sigma_vail",
+                "gail_sigma_vail",
                self.z_size,
                dtype=tf.float32,
                initializer=tf.ones_initializer(),
            self.use_noise = tf.placeholder(
-                shape=[1], dtype=tf.float32, name="NoiseLevel"
+                shape=[1], dtype=tf.float32, name="gail_NoiseLevel"
            )
        self.expert_estimate, self.z_mean_expert, _ = self.create_encoder(
            self.encoded_expert, self.expert_action, self.done_expert, reuse=False
            reuse=True,
        )
        self.discriminator_score = tf.reshape(
-            self.policy_estimate, [-1], name="GAIL_reward"
+            self.policy_estimate, [-1], name="gail_reward"
        )
        self.intrinsic_reward = -tf.log(1.0 - self.discriminator_score + EPSILON)

--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
    env = SubprocessEnvManager(env_factory, num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
    sampler_manager, resampling_interval = create_sampler_manager(
-        sampler_file_path, env.reset_parameters
+        sampler_file_path, env.reset_parameters, run_seed
    )

    # Create controller and begin training.
    tc.start_learning(env, trainer_config)


-def create_sampler_manager(sampler_file_path, env_reset_params):
+def create_sampler_manager(sampler_file_path, env_reset_params, run_seed=None):
    sampler_config = None
    resample_interval = None
    if sampler_file_path is not None:
                "Resampling interval was not specified in the sampler file."
                " Please specify it with the 'resampling-interval' key in the sampler config file."
            )
-    sampler_manager = SamplerManager(sampler_config)
+    sampler_manager = SamplerManager(sampler_config, run_seed)
    return sampler_manager, resample_interval


--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py

    buffer.append_update_buffer(0, batch_size=None, training_length=sequence_length)
    return buffer
+
+
+def create_mock_3dball_brain():
+    mock_brain = create_mock_brainparams(
+        vector_action_space_type="continuous",
+        vector_action_space_size=[2],
+        vector_observation_space_size=8,
+    )
+    mock_brain.brain_name = "Ball3DBrain"
+    return mock_brain
+
+
+def create_mock_banana_brain():
+    mock_brain = create_mock_brainparams(
+        number_visual_observations=1,
+        vector_action_space_type="discrete",
+        vector_action_space_size=[3, 3, 3, 2],
+        vector_observation_space_size=0,
+    )
+    return mock_brain
--- a/ml-agents/mlagents/trainers/tests/test_bc.py
+++ b/ml-agents/mlagents/trainers/tests/test_bc.py
 import unittest.mock as mock
 import pytest
+import os

 import numpy as np
 import tensorflow as tf
+import mlagents.trainers.tests.mock_brain as mb
+from mlagents.trainers.bc.offline_trainer import BCTrainer
 from mlagents.envs import UnityEnvironment
 from mlagents.envs.mock_communicator import MockCommunicator

            use_recurrent: false
            sequence_length: 32
            memory_size: 32
+            batches_per_epoch: 1
+            batch_size: 32
+            summary_freq: 2000
+            max_steps: 4000
+
+
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_bc_trainer(mock_env, dummy_config):
+    mock_brain = mb.create_mock_3dball_brain()
+    mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
+    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
+    env = mock_env()
+
+    trainer_parameters = dummy_config
+    trainer_parameters["summary_path"] = "tmp"
+    trainer_parameters["model_path"] = "tmp"
+    trainer_parameters["demo_path"] = (
+        os.path.dirname(os.path.abspath(__file__)) + "/test.demo"
+    )
+    trainer = BCTrainer(
+        mock_brain, trainer_parameters, training=True, load=False, seed=0, run_id=0
+    )
+    trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy, 100)
+    trainer.update_policy()
+    assert len(trainer.stats["Losses/Cloning Loss"]) > 0
+    trainer.increment_step(1)
+    assert trainer.step == 1


@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
    )


-def create_mock_3dball_brain():
-    mock_brain = mb.create_mock_brainparams(
-        vector_action_space_type="continuous",
-        vector_action_space_size=[2],
-        vector_observation_space_size=8,
-    )
-    return mock_brain
-
-
-def create_mock_banana_brain():
-    mock_brain = mb.create_mock_brainparams(
-        number_visual_observations=1,
-        vector_action_space_type="discrete",
-        vector_action_space_size=[3, 3, 3, 2],
-        vector_observation_space_size=0,
-    )
-    return mock_brain
-
-
 def create_ppo_policy_with_bc_mock(
    mock_env, mock_brain, dummy_config, use_rnn, demo_file
 ):
@mock.patch("mlagents.envs.UnityEnvironment")
 def test_bcmodule_defaults(mock_env, dummy_config):
    # See if default values match
-    mock_brain = create_mock_3dball_brain()
+    mock_brain = mb.create_mock_3dball_brain()
    env, policy = create_ppo_policy_with_bc_mock(
        mock_env, mock_brain, dummy_config, False, "test.demo"
    )
 # Test with continuous control env and vector actions
@mock.patch("mlagents.envs.UnityEnvironment")
 def test_bcmodule_update(mock_env, dummy_config):
-    mock_brain = create_mock_3dball_brain()
+    mock_brain = mb.create_mock_3dball_brain()
    env, policy = create_ppo_policy_with_bc_mock(
        mock_env, mock_brain, dummy_config, False, "test.demo"
    )
 # Test with RNN
@mock.patch("mlagents.envs.UnityEnvironment")
 def test_bcmodule_rnn_update(mock_env, dummy_config):
-    mock_brain = create_mock_3dball_brain()
+    mock_brain = mb.create_mock_3dball_brain()
    env, policy = create_ppo_policy_with_bc_mock(
        mock_env, mock_brain, dummy_config, True, "test.demo"
    )
 # Test with discrete control and visual observations
@mock.patch("mlagents.envs.UnityEnvironment")
 def test_bcmodule_dc_visual_update(mock_env, dummy_config):
-    mock_brain = create_mock_banana_brain()
+    mock_brain = mb.create_mock_banana_brain()
    env, policy = create_ppo_policy_with_bc_mock(
        mock_env, mock_brain, dummy_config, False, "testdcvis.demo"
    )
 # Test with discrete control, visual observations and RNN
@mock.patch("mlagents.envs.UnityEnvironment")
 def test_bcmodule_rnn_dc_update(mock_env, dummy_config):
-    mock_brain = create_mock_banana_brain()
+    mock_brain = mb.create_mock_banana_brain()
    env, policy = create_ppo_policy_with_bc_mock(
        mock_env, mock_brain, dummy_config, True, "testdcvis.demo"
    )
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py

 setup(
    name="mlagents",
-    version="0.9.0",
+    version="0.9.0a",
    description="Unity Machine Learning Agents",
    long_description=long_description,
    long_description_content_type="text/markdown",
    ),
    zip_safe=False,
    install_requires=[
-        "mlagents_envs==0.9.0",
+        "mlagents_envs==0.9.0a",
        "tensorflow>=1.7,<1.8",
        "Pillow>=4.2.1",
        "matplotlib",