bump version strings

add env.step() (#3068 )
Release 0.12.1 doc fixes (#3070 )
--- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
    public abstract class Academy : MonoBehaviour
    {
        const string k_ApiVersion = "API-12";
+        const int k_EditorTrainingPort = 5004;

        /// Temporary storage for global gravity value
        /// Used to restore oringal value when deriving Academy modifies it
        }

        // Used to read Python-provided environment parameters
-        static int ReadArgs()
+        static int ReadPortFromArgs()
        {
            var args = System.Environment.GetCommandLineArgs();
            var inputPort = "";
                }
            }

-            return int.Parse(inputPort);
+            try
+            {
+                return int.Parse(inputPort);
+            }
+            catch
+            {
+                // No arg passed, or malformed port number.
+#if UNITY_EDITOR
+                // Try connecting on the default editor port
+                return k_EditorTrainingPort;
+#else
+                // This is an executable, so we don't try to connect.
+                return -1;
+#endif
+            }
+
        }

        /// <summary>
            InitializeAcademy();

            // Try to launch the communicator by using the arguments passed at launch
-            try
+            var port = ReadPortFromArgs();
+            if (port > 0)
-                        port = ReadArgs()
-                    });
-            }
-            catch
-            {
-#if UNITY_EDITOR
-                Communicator = new RpcCommunicator(
-                    new CommunicatorInitParameters
-                    {
-                        port = 5004
-                    });
-#endif
+                        port = port
+                    }
+                );
            }

            if (Communicator != null)
                }
                catch
                {
+                    Debug.Log($"" +
+                        $"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. " +
+                        "Will perform inference instead."
+                    );
                    Communicator = null;
                }

--- a/notebooks/getting-started.ipynb
+++ b/notebooks/getting-started.ipynb
   "outputs": [],
   "source": [
    "env = UnityEnvironment(file_name=env_name)\n",
+    "# Step once to make sure brains are sent over.\n",
+    "env.step()\n",
    "\n",
    "# Set the default brain to work with\n",
    "default_brain = env.external_brain_names[0]\n",
   "metadata": {},
   "source": [
    "### 5. Take random actions in the environment\n",
-    "Once we restart an environment, we can step the environment forward and provide actions to all of the agents within the environment. Here we simply choose random actions based on the `action_space_type` of the default brain. \n",
+    "Once we restart an environment, we can step the environment forward and provide actions to all of the agents within the environment. Here we simply choose random actions based on the `action_space_type` of the default brain.\n",
    "\n",
    "Once this cell is executed, 10 messages will be printed that detail how much reward will be accumulated for the next 10 episodes. The Unity environment will then pause, waiting for further signals telling it what to do next. Thus, not seeing any animation is expected when running this cell."
   ]
    "    while not done:\n",
    "        action_size = brain.vector_action_space_size\n",
    "        if brain.vector_action_space_type == 'continuous':\n",
-    "            env_info = env.step(np.random.randn(len(env_info.agents), \n",
+    "            env_info = env.step(np.random.randn(len(env_info.agents),\n",
    "                                                action_size[0]))[default_brain]\n",
    "        else:\n",
    "            action = np.column_stack([np.random.randint(0, action_size[i], size=(len(env_info.agents))) for i in range(len(action_size))])\n",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
-  }
+  },
 },
 "nbformat": 4,
 "nbformat_minor": 1
--- a/docs/Installation.md
+++ b/docs/Installation.md
 It also contains many [example environments](Learning-Environment-Examples.md)
 to help you get started.

+### Package Installation
+
+If you intend to copy the `UnitySDK` folder in to your project, ensure that
+you have the [Barracuda preview package](https://docs.unity3d.com/Packages/com.unity.barracuda@0.3/manual/index.html) installed.
+
+To install the Barracuda package in Unity **2017.4.x**, you will have to copy the
+`UnityPackageManager` folder under the `UnitySDK` folder to the root directory of your
+project.
+
+To install the Barrcuda package in later versions of Unity, navigate to the Package
+Manager window by navigating to the menu `Window` -> `Package Manager`.  Click on the
+`Adavanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
+is checked.  Search for or select the `Barracuda` package and install the latest version.
+
+<p align="center">
+  <img src="images/barracuda-package.png"
+       alt="Barracuda Package Manager"
+       width="710" border="10"
+       height="569" />
+</p>
+
 The `ml-agents` subdirectory contains a Python package which provides deep reinforcement
 learning trainers to use with Unity environments.

--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
    calculate the rewards used for reinforcement training. You can also implement
    optional methods to reset the Agent when it has finished or failed its task.
 4. Add your Agent subclasses to appropriate GameObjects, typically, the object
-    in the scene that represents the Agent in the simulation. Each Agent object
-    must be assigned a Brain object.
+    in the scene that represents the Agent in the simulation.

 **Note:** If you are unfamiliar with Unity, refer to
 [Learning the interface](https://docs.unity3d.com/Manual/LearningtheInterface.html)
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 * If you use RayPerception3D, replace it with RayPerceptionSensorComponent3D (and similarly for 2D). The settings, such as ray angles and detectable tags, are configured on the component now.
 RayPerception3D would contribute `(# of rays) * (# of tags + 2)` to the State Size in Behavior Parameters, but this is no longer necessary, so you should reduce the State Size by this amount.
 Making this change will require retraining your model, since the observations that RayPerceptionSensorComponent3D produces are different from the old behavior.
+* If you see messages such as `The type or namespace 'Barracuda' could not be found` or `The type or namespace 'Google' could not be found`, you will need to [install the Barracuda preview package](Installation.md#package-installation).

 ## Migrating from ML-Agents toolkit v0.10 to v0.11.0

--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 The output of the training process is a model file containing the optimized
 policy. This model file is a TensorFlow data graph containing the mathematical
 operations and the optimized weights selected during the training process. You
-can use the generated model file with the Learning Brain type in your Unity
-project to decide the best course of action for an agent.
+can set the generated model file in the Behaviors Parameters under your
+Agent in your Unity project to decide the best course of action for an agent.

 Use the command `mlagents-learn` to train your agents. This command is installed
 with the `mlagents` package and its implementation can be found at
-each Brain.
+each Behavior.

 For a broader overview of reinforcement learning, imitation learning and the
 ML-Agents training process, see [ML-Agents Toolkit
--- a/gym-unity/README.md
+++ b/gym-unity/README.md
 from baselines import deepq
 from baselines import logger

-from gym_unity.envs.unity_env import UnityEnv
+from gym_unity.envs import UnityEnv

 def main():
    env = UnityEnv("./envs/GridWorld", 0, use_visual=True, uint8_visual=True)
--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
-__version__ = "0.12.0"
+__version__ = "0.12.1"
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
            self.executable_launcher(file_name, docker_training, no_graphics, args)
        else:
            logger.info(
-                "Start training by pressing the Play button in the Unity Editor."
+                f"Listening on port {self.port}. "
+                f"Start training by pressing the Play button in the Unity Editor."
            )
        self._loaded = True

        if self._unity_version != self._version_:
            self._close()
            raise UnityEnvironmentException(
-                "The API number is not compatible between Unity and python. Python API : {0}, Unity API : "
-                "{1}.\nPlease go to https://github.com/Unity-Technologies/ml-agents to download the latest version "
-                "of ML-Agents.".format(self._version_, self._unity_version)
+                f"The API number is not compatible between Unity and python. "
+                f"Python API: {self._version_}, Unity API: {self._unity_version}.\n"
+                f"Please go to https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release"
+                f"to download the latest version of ML-Agents."
            )
        self._n_agents: Dict[str, int] = {}
        self._is_first_message = True
--- a/ml-agents-envs/mlagents/envs/init.py
+++ b/ml-agents-envs/mlagents/envs/init.py
-__version__ = "0.12.0"
+__version__ = "0.12.1"
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
        self.seed = seed
        self.brain = brain
        self.use_recurrent = trainer_parameters["use_recurrent"]
-        self.memory_dict: Dict[int, np.ndarray] = {}
+        self.memory_dict: Dict[str, np.ndarray] = {}
-        self.previous_action_dict: Dict[int, np.array] = {}
+        self.previous_action_dict: Dict[str, np.array] = {}
        self.normalize = trainer_parameters.get("normalize", False)
        self.use_continuous_act = brain.vector_action_space_type == "continuous"
        if self.use_continuous_act:
        return np.zeros((num_agents, self.m_size), dtype=np.float)

    def save_memories(
-        self, agent_ids: List[int], memory_matrix: Optional[np.ndarray]
+        self, agent_ids: List[str], memory_matrix: Optional[np.ndarray]
    ) -> None:
        if memory_matrix is None:
            return
-    def retrieve_memories(self, agent_ids: List[int]) -> np.ndarray:
+    def retrieve_memories(self, agent_ids: List[str]) -> np.ndarray:
        memory_matrix = np.zeros((len(agent_ids), self.m_size), dtype=np.float)
        for index, agent_id in enumerate(agent_ids):
            if agent_id in self.memory_dict:
        return np.zeros((num_agents, self.num_branches), dtype=np.int)

    def save_previous_action(
-        self, agent_ids: List[int], action_matrix: Optional[np.ndarray]
+        self, agent_ids: List[str], action_matrix: Optional[np.ndarray]
    ) -> None:
        if action_matrix is None:
            return
-    def retrieve_previous_action(self, agent_ids: List[int]) -> np.ndarray:
+    def retrieve_previous_action(self, agent_ids: List[str]) -> np.ndarray:
        action_matrix = np.zeros((len(agent_ids), self.num_branches), dtype=np.int)
        for index, agent_id in enumerate(agent_ids):
            if agent_id in self.previous_action_dict:
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py


 def get_version_string() -> str:
-    return f""" Version information:\n
-    ml-agents: {mlagents.trainers.__version__},
-    ml-agents-envs: {mlagents.envs.__version__},
-    Communicator API: {UnityEnvironment.API_VERSION},
-    TensorFlow: {tf_utils.tf.__version__}
-"""
+    return f""" Version information:
+  ml-agents: {mlagents.trainers.__version__},
+  ml-agents-envs: {mlagents.envs.__version__},
+  Communicator API: {UnityEnvironment.API_VERSION},
+  TensorFlow: {tf_utils.tf.__version__}"""


 def parse_command_line(argv: Optional[List[str]] = None) -> CommandLineOptions:
        "--cpu", default=False, action="store_true", help="Run with CPU only"
    )

-    parser.add_argument("--version", action="version", version=get_version_string())
+    parser.add_argument("--version", action="version", version="")

    args = parser.parse_args(argv)
    return CommandLineOptions.from_argparse(args)
        )
    except Exception:
        print("\n\n\tUnity Technologies\n")
+    print(get_version_string())
    options = parse_command_line()
    trainer_logger = logging.getLogger("mlagents.trainers")
    env_logger = logging.getLogger("mlagents.envs")
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
            ]
        if self.use_vec_obs:
            feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]]
+        agent_id = brain_info.agents[idx]
-            feed_dict[self.model.memory_in] = self.retrieve_memories([idx])
+            feed_dict[self.model.memory_in] = self.retrieve_memories([agent_id])
-            feed_dict[self.model.prev_action] = self.retrieve_previous_action([idx])
+            feed_dict[self.model.prev_action] = self.retrieve_previous_action(
+                [agent_id]
+            )
        value_estimates = self.sess.run(self.model.value_heads, feed_dict)

        value_estimates = {k: float(v) for k, v in value_estimates.items()}
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
-__version__ = "0.12.0"
+__version__ = "0.12.1"
--- a/docs/images/barracuda-package.png
+++ b/docs/images/barracuda-package.png
作者	SHA1	备注	提交日期
Chris Elion	d988fc66	bump version strings	5 年前
GitHub	047cfa82	add env.step() (#3068 )	5 年前
GitHub	f8621d4d	Release 0.12.1 doc fixes (#3070 )	5 年前
GitHub	a71c67d9	better logging for ports and versions (#3048 ) (#3069 )	5 年前
GitHub	681093cf	cherry pick PR#3032 (#3066 )	5 年前