merging dev-broadcast-curriculum

7 年前 · 3b00302a
--- a/docs/Making-a-new-Unity-Environment.md
+++ b/docs/Making-a-new-Unity-Environment.md
 Note that the reward is reset to 0 at every step, you must add to the reward (`reward += rewardIncrement`). If you use `skipFrame` in the Academy and set your rewards instead of incrementing them, you might lose information since the reward is sent at every step, not at every frame.

 ## Agent Monitor
-* You can add the script `AgentMonitor.cs` to any gameObject with a component `YourNameAgent.cs`. In the inspector of this component, you will see:
-  * `Fixed Position` : If this box is checked, the monitor will be on the left corner of the screen and will remain here. Note that you can only have one agent with a fixed monitor or multiple monitors will overlap.
-  * `Vertical Offset`: If `Fixed Position` is unchecked, the monitor will follow the Agent on the screen. Use `Vertical Offset` to decide how far above the agent the monitor should be.
-  * `Display Brain Name` : If this box is checked, the name of the brain will appear in the monitor. (Can be useful if you have similar agents using different brains).
-  * `Display Brain Type` : If this box is checked, the type of the brain of the agent will be displayed.
-  * `Display FrameCount` : If this box is checked, the number of frames that elapsed since the agent was reset will be displayed.
-  * `Display Current Reward`: If this box is checked, the current reward of the agent will be displayed.
-  * `Display Max Reward` : If this box is checked, the maximum reward obtained during this training session will be displayed.
-  * `Display State` : If this box is checked, the current state of the agent will be displayed.
-  * `Display Action` : If this box is checked, the current action the agent performs will be displayed.
+The monitoring of the environment has been changed. You can now track many different things and not only agents. Use the Log function anywhere in your code :
+```csharp
+   Monitor.Log(key, value, displayType , target)
+```
+ * *`key`* is the name of the information you want to display.
+ * *`value`* is the information you want to display.
+ * *`displayType`* is a MonitorType that can be either `text`, `slider`, `bar` or `hist`.
+   * `text` will convert `value` into a string and display it. It can be useful for displaying error messages!
+   * `slider` is used to display a single float between -1 and 1. Note that value must be a float if you want to use a slider. If the value is positive, the slider will be green, if the value is negative, the slider will be red.
+   * `hist` is used to display multiple floats. Note that value must be a list or array of floats. The Histogram will be a sequence of vertical sliders.
+   * `bar` is used to see the proportions. Note that value must be a list or array of positive floats. For each float in values, a rectangle of width of value divided by the sum of all values will be show.
+ * *`target`* is the transform to which you want to attach information. If the transform is `null` the information will be attached to the global monitor.
-If you passed a `value` from an external brain, the value will be displayed as a bar (green if value is positive / red if value is negative) above the monitor. The bar's maximum value is set to 1 by default but if the value of the agent is above this number, it becomes the new maximum.
--- a/python/PPO.ipynb
+++ b/python/PPO.ipynb
    "summary_freq = 10000 # Frequency at which to save training statistics.\n",
    "save_freq = 50000 # Frequency at which to save model.\n",
    "env_name = \"environment\" # Name of the training environment file.\n",
+    "curriculum_file = None\n",
    "\n",
    "### Algorithm-specific parameters for tuning\n",
    "gamma = 0.99 # Reward discount rate.\n",
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
-    "env = UnityEnvironment(file_name=env_name)\n",
+    "env = UnityEnvironment(file_name=env_name, curriculum=curriculum_file)\n",
    "print(str(env))\n",
    "brain_name = env.brain_names[0]"
   ]
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
+    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
+    "if curriculum_file == \"None\":\n",
+    "    curriculum_file = None\n",
+    "\n",
+    "\n",
+    "def get_progress():\n",
+    "    if curriculum_file is not None:\n",
+    "        if env._curriculum.measure_type == \"progress\":\n",
+    "            return steps / max_steps\n",
+    "        elif env._curriculum.measure_type == \"reward\":\n",
+    "            return last_reward\n",
+    "        else:\n",
+    "            return None\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
    "# Create the Tensorflow model graph\n",
    "ppo_model = create_agent_model(env, lr=learning_rate,\n",
    "                               h_size=hidden_units, epsilon=epsilon,\n",
    "        saver.restore(sess, ckpt.model_checkpoint_path)\n",
    "    else:\n",
    "        sess.run(init)\n",
-    "    steps = sess.run(ppo_model.global_step)\n",
+    "    steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward])    \n",
-    "    info = env.reset(train_mode=train_model)[brain_name]\n",
+    "    info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
-    "            info = env.reset(train_mode=train_model)[brain_name]\n",
+    "            info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
    "        # Decide and take an action\n",
    "        new_info = trainer.take_action(info, env, brain_name)\n",
    "        info = new_info\n",
    "            trainer.update_model(batch_size, num_epoch)\n",
    "        if steps % summary_freq == 0 and steps != 0 and train_model:\n",
    "            # Write training statistics to tensorboard.\n",
-    "            trainer.write_summary(summary_writer, steps)\n",
+    "            trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)\n",
+    "        if len(trainer.stats['cumulative_reward']) > 0:\n",
+    "            mean_reward = np.mean(trainer.stats['cumulative_reward'])\n",
+    "            sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})\n",
+    "            last_reward = sess.run(ppo_model.last_reward)\n",
    "    # Final save Tensorflow model\n",
    "    if steps != 0 and train_model:\n",
    "        save_model(sess, model_path=model_path, steps=steps, saver=saver)\n",
--- a/python/ppo.py
+++ b/python/ppo.py

 Options:
  --help                     Show this message.
-  --max-steps=<n>             Maximum number of steps to run environment [default: 1e6].
+  --curriculum=<file>        Curriculum json file for environment [default: None]
+  --max-steps=<n>            Maximum number of steps to run environment [default: 1e6].
-  --train                    Whether to train model, or only run inference [default: True].
+  --train                    Whether to train model, or only run inference [default: False].
  --summary-freq=<n>         Frequency at which to save training statistics [default: 10000].
  --save-freq=<n>            Frequency at which to save model [default: 50000].
  --gamma=<n>                Reward discount rate [default: 0.99].
 env_name = options['<env>']
 keep_checkpoints = int(options['--keep-checkpoints'])
 worker_id = int(options['--worker-id'])
+curriculum_file = str(options['--curriculum'])
+if curriculum_file == "None":
+    curriculum_file = None

 # Algorithm-specific parameters for tuning
 gamma = float(options['--gamma'])
 hidden_units = int(options['--hidden-units'])
 batch_size = int(options['--batch-size'])

-env = UnityEnvironment(file_name=env_name, worker_id=worker_id)
+env = UnityEnvironment(file_name=env_name, worker_id=worker_id, curriculum=curriculum_file)
 print(str(env))
 brain_name = env.brain_names[0]

 init = tf.global_variables_initializer()
 saver = tf.train.Saver(max_to_keep=keep_checkpoints)

+
+def get_progress():
+    if curriculum_file is not None:
+        if env._curriculum.measure_type == "progress":
+            return steps / max_steps
+        elif env._curriculum.measure_type == "reward":
+            return last_reward
+        else:
+            return None
+    else:
+        return None
+
+
 with tf.Session() as sess:
    # Instantiate model parameters
    if load_model:
    else:
        sess.run(init)
-    steps = sess.run(ppo_model.global_step)
+    steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward])
-    info = env.reset(train_mode=train_model)[brain_name]
+    info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
-            info = env.reset(train_mode=train_model)[brain_name]
+            info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
        # Decide and take an action
        new_info = trainer.take_action(info, env, brain_name)
        info = new_info
            trainer.update_model(batch_size, num_epoch)
        if steps % summary_freq == 0 and steps != 0 and train_model:
            # Write training statistics to tensorboard.
-            trainer.write_summary(summary_writer, steps)
+            trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)
+        if len(trainer.stats['cumulative_reward']) > 0:
+            mean_reward = np.mean(trainer.stats['cumulative_reward'])
+            sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})
+            last_reward = sess.run(ppo_model.last_reward)
    # Final save Tensorflow model
    if steps != 0 and train_model:
        save_model(sess, model_path=model_path, steps=steps, saver=saver)
--- a/python/ppo/models.py
+++ b/python/ppo/models.py


 class PPOModel(object):
+    def create_reward_encoder(self):
+        self.last_reward = tf.Variable(0, name="last_reward", trainable=False, dtype=tf.float32)
+        self.new_reward = tf.placeholder(shape=[], dtype=tf.float32, name='new_reward')
+        self.update_reward = tf.assign(self.last_reward, self.new_reward)
+
    def create_visual_encoder(self, o_size_h, o_size_w, bw, h_size, num_streams, activation):
        """
        Builds a set of visual (CNN) encoders.
        s_size = brain.state_space_size
        a_size = brain.action_space_size

+        self.create_reward_encoder()
+
        hidden_state, hidden_visual, hidden_policy, hidden_value = None, None, None, None
        if brain.number_observations > 0:
            h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']
        :param brain: State-space size
        :param h_size: Hidden layer size
        """
+        self.create_reward_encoder()
+
        hidden_state, hidden_visual, hidden = None, None, None
        if brain.number_observations > 0:
            h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']
--- a/python/ppo/trainer.py
+++ b/python/ppo/trainer.py
        for key in self.history_dict:
            self.history_dict[key] = empty_local_history(self.history_dict[key])

-    def write_summary(self, summary_writer, steps):
+    def write_summary(self, summary_writer, steps, lesson_number):
        """
        Saves training statistics to Tensorboard.
        :param summary_writer: writer associated with Tensorflow session.
                stat_mean = float(np.mean(self.stats[key]))
                summary.value.add(tag='Info/{}'.format(key), simple_value=stat_mean)
                self.stats[key] = []
+        summary.value.add(tag='Info/Lesson', simple_value=lesson_number)
        summary_writer.add_summary(summary, steps)
        summary_writer.flush()
--- a/python/test_unityagents.py
+++ b/python/test_unityagents.py
  "AcademyName": "RealFakeAcademy",
  "resetParameters": {},
  "brainNames": ["RealFakeBrain"],
+  "externalBrainNames": ["RealFakeBrain"],
  "brainParameters": [{
      "stateSize": 3,
      "actionSize": 2,
  "agents": [1,2],
  "states": [1,2,3,4,5,6],
  "rewards": [1,2],
-  "actions": null,
+  "actions": [1,2,3,4],
  "memories": [],
  "dones": [false, false]
 }'''.encode(),
  "agents": [1,2,3],
  "states": [1,2,3,4,5,6,7,8,9],
  "rewards": [1,2,3],
-  "actions": null,
+  "actions": [1,2,3,4,5,6],
  "memories": [],
  "dones": [false, false, false]
 }'''.encode(),
  "agents": [1,2,3],
  "states": [1,2,3,4,5,6,7,8,9],
  "rewards": [1,2,3],
-  "actions": null,
+  "actions": [1,2,3,4,5,6],
  "memories": [],
  "dones": [false, false, true]
 }'''.encode(),
--- a/python/unityagents/init.py
+++ b/python/unityagents/init.py
 from .environment import *
 from .brain import *
 from .exception import *
+from .curriculum import *
--- a/python/unityagents/brain.py
+++ b/python/unityagents/brain.py
 class BrainInfo:
-    def __init__(self, observation, state, memory=None, reward=None, agents=None, local_done=None):
+    def __init__(self, observation, state, memory=None, reward=None, agents=None, local_done=None, action =None):
        """
        Describes experience at current step of all agents linked to a brain.
        """
        self.rewards = reward
        self.local_done = local_done
        self.agents = agents
+        self.actions = action


 class BrainParameters:
--- a/python/unityagents/environment.py
+++ b/python/unityagents/environment.py
 import os
 import socket
 import subprocess
+import struct
+from .curriculum import Curriculum

 from PIL import Image
 from sys import platform

 class UnityEnvironment(object):
    def __init__(self, file_name, worker_id=0,
-                 base_port=5005):
+                 base_port=5005, curriculum = None):
        """
        Starts a new unity environment and establishes a connection with the environment.
        Notice: Currently communication between Unity and Python takes place over an open socket without authentication.

        atexit.register(self.close)
        self.port = base_port + worker_id
-        self._buffer_size = 120000
+        self._buffer_size = 12000
        self._loaded = False
        self._open_socket = False

                "The Unity environment took too long to respond. Make sure {} does not need user interaction to launch "
                "and that the Academy and the external Brain(s) are attached to objects in the Scene.".format(
                    str(file_name)))
+        
+
+            self._data = {}
+            self._global_done = None
+            self._academy_name = p["AcademyName"]
+            self._brains = {}
+            self._brain_names = p["brainNames"]
+            self._external_brain_names = p["externalBrainNames"]
+            self._external_brain_names = [] if self._external_brain_names is None else self._external_brain_names
+            self._num_brains = len(self._brain_names)
+            self._num_external_brains = len(self._external_brain_names)
+            self._resetParameters = p["resetParameters"]
+            self._curriculum = Curriculum(curriculum, self._resetParameters)
+            for i in range(self._num_brains):
+                self._brains[self._brain_names[i]] = BrainParameters(self._brain_names[i], p["brainParameters"][i])
+            self._loaded = True
+            logger.info("\n'{}' started successfully!".format(self._academy_name))
+            if (self._num_external_brains == 0):
+                logger.warning(" No External Brains found in the Unity Environment. "
+                    "You will not be able to pass actions to your agent(s).")
-
-        self._data = {}
-        self._global_done = None
-        self._academy_name = p["AcademyName"]
-        self._num_brains = len(p["brainParameters"])
-        self._brains = {}
-        self._brain_names = p["brainNames"]
-        self._resetParameters = p["resetParameters"]
-        for i in range(self._num_brains):
-            self._brains[self._brain_names[i]] = BrainParameters(self._brain_names[i], p["brainParameters"][i])
-        self._conn.send(b".")
-        self._loaded = True
-        logger.info("\n'{}' started successfully!".format(self._academy_name))
-
    @property
    def brains(self):
        return self._brains
        return self._num_brains

    @property
+    def number_external_brains(self):
+        return self._num_external_brains
+
+    @property
+    @property
+    def external_brain_names(self):
+        return self._external_brain_names
+
    @staticmethod
    def _process_pixels(image_bytes=None, bw=False):
        """
                                                             for k in self._resetParameters])) + '\n' + \
               '\n'.join([str(self._brains[b]) for b in self._brains])

+
+    def _recv_bytes(self):
+        s = self._conn.recv(self._buffer_size)
+        message_length = struct.unpack("I", bytearray(s[:4]))[0]
+        s = s[4:]
+        while len(s) != message_length:
+            s += self._conn.recv(self._buffer_size)
+        return s
+
    def _get_state_image(self, bw):
        """
        Receives observation from socket, and confirms.
-        s = self._conn.recv(self._buffer_size)
+        s = self._recv_bytes()
        s = self._process_pixels(image_bytes=s, bw=bw)
        self._conn.send(b"RECEIVED")
        return s
        Receives dictionary of state information from socket, and confirms.
        :return:
        """
-        state = self._conn.recv(self._buffer_size).decode('utf-8')
+        state = self._recv_bytes().decode('utf-8')
-    def reset(self, train_mode=True, config=None):
+    def reset(self, train_mode=True, config=None, progress=None):
-        config = config or {}
+        old_lesson = self._curriculum.get_lesson_number()
+        config = self._curriculum.get_lesson(progress) if config is None else config
+        if old_lesson != self._curriculum.get_lesson_number():
+            logger.info("\nLesson changed. Now in Lesson {0} : \t{1}"
+                .format(self._curriculum.get_lesson_number(),
+                    ', '.join([str(x)+' -> '+str(config[x]) for x in config])))
+        else:
+            logger.info("\nEpisode Reset. In Lesson {0} : \t{1}"
+                .format(self._curriculum.get_lesson_number(),
+                    ', '.join([str(x)+' -> '+str(config[x]) for x in config])))
        if self._loaded:
            self._conn.send(b"RESET")
            self._conn.recv(self._buffer_size)
            rewards = state_dict["rewards"]
            dones = state_dict["dones"]
            agents = state_dict["agents"]
+            # actions = state_dict["actions"]
+            if n_agent > 0 :
+                actions =  np.array(state_dict["actions"]).reshape((n_agent, -1))
+            else :
+                actions = np.array([])

            observations = []
            for o in range(self._brains[b].number_observations):

                observations.append(np.array(obs_n))

-            self._data[b] = BrainInfo(observations, states, memories, rewards, agents, dones)
+            self._data[b] = BrainInfo(observations, states, memories, rewards, agents, dones, actions)

        self._global_done = self._conn.recv(self._buffer_size).decode('utf-8') == 'True'

        arr = [float(x) for x in arr]
        return arr

-    def step(self, action, memory=None, value=None):
+    def step(self, action = None, memory=None, value=None):
        """
        Provides the environment with an action, moves the environment dynamics forward accordingly, and returns
        observation, state, and reward information to the agent.
        :return: A Data structure corresponding to the new state of the environment.
        """
+        action = {} if action is None else action
-                if self._num_brains > 1:
+                if self._num_external_brains == 1:
+                    action = {self._external_brain_names[0]: action}
+                elif self._num_external_brains > 1:
-                    action = {self._brain_names[0]: action}
+                    raise UnityActionException(
+                        "There are no external brains in the environment, " 
+                        "step cannot take an action input")
+                    
-                if self._num_brains > 1:
+                if self._num_external_brains == 1:
+                    memory = {self._external_brain_names[0]: memory}
+                elif self._num_external_brains > 1:
-                    memory = {self._brain_names[0]: memory}
+                    raise UnityActionException(
+                        "There are no external brains in the environment, " 
+                        "step cannot take a memory input")
-                if self._num_brains > 1:
+                if self._num_external_brains == 1:
+                    value = {self._external_brain_names[0]: value}
+                elif self._num_external_brains > 1:  
-                    value = {self._brain_names[0]: value}
+                    raise UnityActionException(
+                        "There are no external brains in the environment, " 
+                        "step cannot take a value input")
-            for b in self._brain_names:
+            for brain_name in list(action.keys()) + list(memory.keys()) + list(value.keys()):
+                if brain_name not in self._external_brain_names:
+                    raise UnityActionException(
+                        "The name {0} does not correspond to an external brain "
+                        "in the environment". format(brain_name))
+
+            for b in self._external_brain_names:
                n_agent = len(self._data[b].agents)
                if b not in action:
                    raise UnityActionException("You need to input an action for the brain {0}".format(b))
--- a/unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DDecision.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DDecision.cs
    {
        if (gameObject.GetComponent<Brain>().brainParameters.actionSpaceType == StateType.continuous)
        {
-            return new float[4]{ 0f, 0f, 0f, 0.0f };
+            
+            return new float[2]{ -10*(state[4]-state[2]), -10*(state[2]+state[4])};

        }
        else
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs
 using System.Collections.Generic;
 using UnityEngine;

-public class BasicDecision : MonoBehaviour, Decision {
+public class BasicDecision : MonoBehaviour, Decision
+{
+
+    public float[] Decide(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
+    {
+        return new float[1]{ 1f };
-	public float[] Decide (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
-	{
-		return default(float[]);
+    }
-	}
+    public float[] MakeMemory(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
+    {
+        return new float[0];
-	public float[] MakeMemory (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
-	{
-		return default(float[]);
-		
-	}
+    }
 }
--- a/unity-environment/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
    {
        float moveX = 0.0f;
        float moveY = 0.0f;
-        if (act[0] == 0f)
+        if (act[0] == 1f)
-        if (act[0] == 1f)
+        if (act[0] == 2f)
-        if (act[0] == 2f)
+        if (act[0] == 0f)
        {
            moveX = 0.0f;
        }
--- a/unity-environment/Assets/ML-Agents/Examples/Tennis/Tennis.unity
+++ b/unity-environment/Assets/ML-Agents/Examples/Tennis/Tennis.unity
    tileSize: 256
    accuratePlacement: 0
  m_NavMeshData: {fileID: 0}
--- !u!114 &2702986
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 0}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
-  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
-  m_EditorClassIdentifier: 
-  brain: {fileID: 1948813725}
--- !u!114 &21374022
+--- !u!114 &10967279
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_PrefabParentObject: {fileID: 0}
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
-  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
+  m_Name: (Clone)(Clone)(Clone)(Clone)
-  continuousPlayerActions:
-  - key: 97
-    index: 0
-    value: -1
-  - key: 100
-    index: 0
-    value: 1
-  - key: 0
-    index: 0
-    value: 0
-  - key: 0
-    index: 0
-    value: 0
-  discretePlayerActions:
-  - key: 97
-    value: 0
-  - key: 100
-    value: 1
-  - key: 0
-    value: 0
-  - key: 32
-    value: 3
-  defaultAction: -1
-  brain: {fileID: 1948813725}
+  graphModel: {fileID: 0}
+  graphScope: 
+  graphPlaceholders: []
+  BatchSizePlaceholderName: batch_size
+  StatePlacholderName: state
+  RecurrentInPlaceholderName: recurrent_in
+  RecurrentOutPlaceholderName: recurrent_out
+  ObservationPlaceholderName: []
+  ActionPlaceholderName: action
+  brain: {fileID: 0}
 --- !u!1 &26143720
 GameObject:
  m_ObjectHideFlags: 0
  m_PrefabParentObject: {fileID: 0}
  m_PrefabInternal: {fileID: 0}
  m_GameObject: {fileID: 36699497}
--- !u!114 &86818458
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 0}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
-  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
-  m_EditorClassIdentifier: 
-  brain: {fileID: 459283648}
 --- !u!1 &459283646
 GameObject:
  m_ObjectHideFlags: 0
    stateSpaceType: 1
  brainType: 2
  CoreBrains:
-  - {fileID: 1364070226}
-  - {fileID: 86818458}
-  - {fileID: 1447029077}
-  - {fileID: 532686528}
-  instanceID: 10250
--- !u!114 &532686528
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 0}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
-  m_Name: (Clone)(Clone)
-  m_EditorClassIdentifier: 
-  graphModel: {fileID: 4900000, guid: c917523464309409996933c3b7063a9f, type: 3}
-  graphScope: 
-  graphPlaceholders: []
-  BatchSizePlaceholderName: batch_size
-  StatePlacholderName: state
-  RecurrentInPlaceholderName: recurrent_in
-  RecurrentOutPlaceholderName: recurrent_out
-  ObservationPlaceholderName: []
-  ActionPlaceholderName: action
-  brain: {fileID: 459283648}
+  - {fileID: 1308476791}
+  - {fileID: 1784104787}
+  - {fileID: 1864001037}
+  - {fileID: 1245752352}
+  instanceID: 19292
 --- !u!1 &629009137
 GameObject:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: e51a3fb0b3186433ea84fc1e0549cc91, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  brain: {fileID: 459283648}
+  brain: {fileID: 1948813725}
  observations: []
  maxStep: 5000
  resetOnDone: 1
  m_Interpolate: 0
  m_Constraints: 122
  m_CollisionDetection: 0
--- !u!114 &668404469
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 0}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
-  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
-  m_EditorClassIdentifier: 
-  brain: {fileID: 0}
 --- !u!1 &731033571
 GameObject:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 05eee2a5536934f5684a65f151efd304, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+--- !u!114 &1245752352
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
+  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_EditorClassIdentifier: 
+  graphModel: {fileID: 4900000, guid: c917523464309409996933c3b7063a9f, type: 3}
+  graphScope: 
+  graphPlaceholders: []
+  BatchSizePlaceholderName: batch_size
+  StatePlacholderName: state
+  RecurrentInPlaceholderName: recurrent_in
+  RecurrentOutPlaceholderName: recurrent_out
+  ObservationPlaceholderName: []
+  ActionPlaceholderName: action
+  brain: {fileID: 459283648}
+--- !u!114 &1247671385
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
+  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_EditorClassIdentifier: 
+  brain: {fileID: 1948813725}
 --- !u!1 &1261870887
 GameObject:
  m_ObjectHideFlags: 0
  m_PrefabInternal: {fileID: 0}
  m_GameObject: {fileID: 1261870887}
  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!114 &1364070226
+--- !u!114 &1308476791
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_PrefabParentObject: {fileID: 0}
  m_EditorHideFlags: 0
  m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
-  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
-    value: 0
+    value: 1
-    value: 1
+    value: 2
-  defaultAction: -1
+  defaultAction: 0
--- !u!114 &1447029077
+--- !u!114 &1344977993
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_PrefabParentObject: {fileID: 0}
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
-  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
+  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
-  brain: {fileID: 459283648}
+  brain: {fileID: 1948813725}
 --- !u!1 &1605015604
 GameObject:
  m_ObjectHideFlags: 0
  m_Father: {fileID: 0}
  m_RootOrder: 1
  m_LocalEulerAnglesHint: {x: 60, y: 30, z: 0}
+--- !u!114 &1784104787
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
+  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_EditorClassIdentifier: 
+  brain: {fileID: 459283648}
 --- !u!1 &1838949272
 GameObject:
  m_ObjectHideFlags: 0
  m_Father: {fileID: 2097046871}
  m_RootOrder: 1
  m_LocalEulerAnglesHint: {x: -90, y: 0, z: 0}
+--- !u!114 &1864001037
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
+  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_EditorClassIdentifier: 
+  brain: {fileID: 459283648}
 --- !u!1 &1871669621
 GameObject:
  m_ObjectHideFlags: 0
  m_PrefabParentObject: {fileID: 0}
  m_PrefabInternal: {fileID: 0}
  m_GameObject: {fileID: 1871669621}
+--- !u!114 &1880810220
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
+  m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
+  m_EditorClassIdentifier: 
+  continuousPlayerActions:
+  - key: 97
+    index: 0
+    value: -1
+  - key: 100
+    index: 0
+    value: 1
+  - key: 0
+    index: 0
+    value: 0
+  - key: 0
+    index: 0
+    value: 0
+  discretePlayerActions:
+  - key: 97
+    value: 1
+  - key: 100
+    value: 2
+  - key: 0
+    value: 0
+  - key: 32
+    value: 3
+  defaultAction: 0
+  brain: {fileID: 1948813725}
 --- !u!1 &1948813723
 GameObject:
  m_ObjectHideFlags: 0
  m_Component:
  - component: {fileID: 1948813724}
  - component: {fileID: 1948813725}
+  - component: {fileID: 1948813726}
  m_Layer: 0
  m_Name: MyBrain
  m_TagString: Untagged
    - 
    actionSpaceType: 0
    stateSpaceType: 1
-  brainType: 0
+  brainType: 1
-  - {fileID: 21374022}
-  - {fileID: 668404469}
-  - {fileID: 2702986}
-  instanceID: 12574
+  - {fileID: 1880810220}
+  - {fileID: 1344977993}
+  - {fileID: 1247671385}
+  - {fileID: 10967279}
+  instanceID: 19482
+--- !u!114 &1948813726
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1948813723}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: c3e2acdaec6974f37a5ca11872f71ae8, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &2073469450
 GameObject:
  m_ObjectHideFlags: 0
--- a/unity-environment/Assets/ML-Agents/Scripts/Academy.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Academy.cs
        GetBrains(gameObject, brains);
        InitializeAcademy();

+        communicator = new ExternalCommunicator(this);
+        if (!communicator.CommunicatorHandShake())
+        {
+            communicator = null;
+        }
+
-
+            
        windowResize = true;
        done = true;
        acceptingSteps = true;
--- a/unity-environment/Assets/ML-Agents/Scripts/Agent.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Agent.cs
        if (brain != null)
        {
            brain.agents.Add(id, gameObject.GetComponent<Agent>());
-            agentStoredAction = new float[brain.brainParameters.actionSize];
+            if (brain.brainParameters.actionSpaceType == StateType.continuous)
+            {
+                agentStoredAction = new float[brain.brainParameters.actionSize];
+            }
+            else
+            {
+                agentStoredAction = new float[1];
+            }
            memory = new float[brain.brainParameters.memorySize];
        }
        InitializeAgent();
        RemoveBrain();
        brain = b;
        brain.agents.Add(id, gameObject.GetComponent<Agent>());
-        agentStoredAction = new float[brain.brainParameters.actionSize];
+        if (brain.brainParameters.actionSpaceType == StateType.continuous)
+        {
+            agentStoredAction = new float[brain.brainParameters.actionSize];
+        }
+        else
+        {
+            agentStoredAction = new float[1];
+        }
        memory = new float[brain.brainParameters.memorySize];
    }

--- a/unity-environment/Assets/ML-Agents/Scripts/Brain.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Brain.cs
    External,
    Internal
 }
+
-Player,
-Heuristic,
-External,
+    Player,
+    Heuristic,
+    External,
 }
 #endif

 public enum StateType
 {
    discrete,
-    continuous
-}
+    continuous}
 ;

 /** Only need to be modified in the brain's inpector.
            }

        }
+        else
+        {
+            foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
+            {
+                if ((int)bt >= CoreBrains.Length)
+                    break;
+                if (CoreBrains[(int)bt] == null)
+                {
+                    CoreBrains[(int)bt] = ScriptableObject.CreateInstance("CoreBrain" + bt.ToString());
+                }
+            }
+        }

        // If the length of CoreBrains does not match the number of BrainTypes, 
        // we increase the length of CoreBrains
        {
            foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
            {
-                CoreBrains[(int)bt] = ScriptableObject.Instantiate(CoreBrains[(int)bt]);
+                if (CoreBrains[(int)bt] == null)
+                {
+                    CoreBrains[(int)bt] = ScriptableObject.CreateInstance("CoreBrain" + bt.ToString());
+                }
+                else
+                {
+                    CoreBrains[(int)bt] = ScriptableObject.Instantiate(CoreBrains[(int)bt]);
+                }
            }
            instanceID = gameObject.GetInstanceID();
        }
        foreach (KeyValuePair<int, Agent> idAgent in agents)
        {
            List<float> states = idAgent.Value.CollectState();
-            if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous ))
+            if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous))
-            if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete ))
+            if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete))
            {
                throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
    Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count));
--- a/unity-environment/Assets/ML-Agents/Scripts/Communicator.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Communicator.cs
    public Dictionary<string, float> resetParameters;
    /**< \brief The default reset parameters are sent via socket*/
    public List<string> brainNames;
-    /**< \brief A list of the External brains names sent via socket*/
+    /**< \brief A list of the all the brains names sent via socket*/
+    public List<string> externalBrainNames;
+    /**< \brief  A list of the External brains names sent via socket*/
 }

 public enum ExternalCommand
    /// Implement this method to allow brains to subscribe to the 
    /// decisions made outside of Unity
    void SubscribeBrain(Brain brain);
+
+    /// First contact between Communicator and external process
+    bool CommunicatorHandShake();

    /// Implement this method to initialize the communicator
    void InitializeCommunicator();
--- a/unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs
    {
        if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
        {
-            coord = new ExternalCommunicator(brain.gameObject.transform.parent.gameObject.GetComponent<Academy>());
-            brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator = coord;
-            coord.SubscribeBrain(brain);
+            throw new UnityAgentsException(string.Format("The brain {0} was set to" +
+                " External mode" +
+                " but Unity was unable to read the" +
+                " arguments passed at launch.", brain.gameObject.name));
+            coord = null;
-        else
+        else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
-            if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
-            {
-                coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
-                coord.SubscribeBrain(brain);
-            }
+            coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
+            coord.SubscribeBrain(brain);
+
    }

    /// Uses the communicator to retrieve the actions, memories and values and
-        brain.SendActions(coord.GetDecidedAction(brain.gameObject.name));
-        brain.SendMemories(coord.GetMemories(brain.gameObject.name));
-        brain.SendValues(coord.GetValues(brain.gameObject.name));
+        if (coord != null)
+        {
+            brain.SendActions(coord.GetDecidedAction(brain.gameObject.name));
+            brain.SendMemories(coord.GetMemories(brain.gameObject.name));
+            brain.SendValues(coord.GetValues(brain.gameObject.name));
+        }
    }

    /// Uses the communicator to send the states, observations, rewards and
-        coord.giveBrainInfo(brain);
+        if (coord != null)
+        {
+            coord.giveBrainInfo(brain);
+        }
    }

    /// Nothing needs to appear in the inspector 
--- a/unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs
    public Brain brain;
    /**< Reference to the brain that uses this CoreBrainHeuristic */

+    ExternalCommunicator coord;
+
    public Decision decision;
    /**< Reference to the Decision component used to decide the actions */

    public void InitializeCoreBrain()
    {
        decision = brain.gameObject.GetComponent<Decision>();
+
+        if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
+        {
+            coord = null;
+        }
+        else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
+        {
+            coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
+            coord.SubscribeBrain(brain);
+        }
    }

    /// Uses the Decision Component to decide that action to take
    /// Nothing needs to be implemented, the states are collected in DecideAction
    public void SendState()
    {
-
+        if (coord!=null)
+        {
+            coord.giveBrainInfo(brain);
+        }
    }

    /// Displays an error if no decision component is attached to the brain
+        EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
        if (brain.gameObject.GetComponent<Decision>() == null)
        {
            EditorGUILayout.HelpBox("You need to add a 'Decision' component to this gameObject", MessageType.Error);
--- a/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
        public enum tensorType
        {
            Integer,
-            FloatingPoint
-        };
+            FloatingPoint}
+
+        ;

        public string name;
        public tensorType valueType;
    }
+
+    ExternalCommunicator coord;

    /// Modify only in inspector : Reference to the Graph asset
    public TextAsset graphModel;
 			
 		}
 #endif
+        if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
+        {
+            coord = null;
+        }
+        else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
+        {
+            coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
+            coord.SubscribeBrain(brain);
+        }

        if (graphModel != null)
        {
        currentBatchSize = brain.agents.Count;
        if (currentBatchSize == 0)
        {
+
+            if (coord != null)
+            {
+                coord.giveBrainInfo(brain);
+            }
            return;
        }

                i++;
            }
        }
-#endif
+
+
+        if (coord != null)
+        {
+            coord.giveBrainInfo(brain);
+        }
+        #endif
    }


        // Create the state tensor
        if (hasState)
        {
-            runner.AddInput(graph[graphScope + StatePlacholderName][0], inputState);
+            if (brain.brainParameters.stateSpaceType == StateType.discrete)
+            {
+                int[,] discreteInputState = new int[currentBatchSize, 1];
+                for (int i = 0; i < currentBatchSize; i++)
+                {
+                    discreteInputState[i, 0] = (int)inputState[i, 0];
+                }
+                runner.AddInput(graph[graphScope + StatePlacholderName][0], discreteInputState);
+            }
+            else
+            {
+                runner.AddInput(graph[graphScope + StatePlacholderName][0], inputState);
+            }
        }

        // Create the observation tensors
        }

+        if (hasRecurrent)
+        {
+            runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
+            runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
+        }
+            
        TFTensor[] networkOutput;
        try
        {
        {
            Dictionary<int, float[]> new_memories = new Dictionary<int, float[]>();

-            runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
-            runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
            float[,] recurrent_tensor = networkOutput[1].GetValue() as float[,];

            int i = 0;
--- a/unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs
        public int index;
        public float value;
    }
+        
+    ExternalCommunicator coord;

    [SerializeField]
    /// Contains the mapping from input to continuous actions
    private DiscretePlayerAction[] discretePlayerActions;
    [SerializeField]
-    private int defaultAction = -1;
+    private int defaultAction = 0;

    /// Reference to the brain that uses this CoreBrainPlayer
    public Brain brain;
    /// Nothing to implement
    public void InitializeCoreBrain()
    {
-
+        if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
+        {
+            coord = null;
+        }
+        else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
+        {
+            coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
+            coord.SubscribeBrain(brain);
+        }
    }

    /// Uses the continuous inputs or dicrete inputs of the player to 
    /// decisions
    public void SendState()
    {
-
+        if (coord!=null)
+        {
+            coord.giveBrainInfo(brain);
+        }
    }

    /// Displays continuous or discrete input mapping in the inspector
--- a/unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs
    private class StepMessage
    {
        public string brain_name { get; set; }
+
+
+
+
+
+
        public List<bool> dones { get; set; }
    }

+
+
        public Dictionary<string, List<float>> value { get; set; }

    }
        public Dictionary<string, float> parameters { get; set; }
+
        public bool train_model { get; set; }
    }

        hasSentState[brain.gameObject.name] = false;
    }

-    /// Contains the logic for the initializtation of the socket.
-    public void InitializeCommunicator()
-    {
+
+    public bool CommunicatorHandShake(){
        try
        {
            ReadArgs();
-            throw new UnityAgentsException("One of the brains was set isExternal" +
-                                           " but Unity was unable to read the" +
-                                           " arguments passed at launch");
+            return false;
+        return true;
+    }
+    /// Contains the logic for the initializtation of the socket.
+    public void InitializeCommunicator()
+    {
        messageHolder = new byte[messageLength];

        // Create a TCP/IP  socket.  
        AcademyParameters accParamerters = new AcademyParameters();
        accParamerters.brainParameters = new List<BrainParameters>();
        accParamerters.brainNames = new List<string>();
+        accParamerters.externalBrainNames = new List<string>();
+            if (b.brainType == BrainType.External)
+            {
+                accParamerters.externalBrainNames.Add(b.gameObject.name);
+            }
        }
        accParamerters.AcademyName = academy.gameObject.name;
        accParamerters.resetParameters = academy.resetParameters;
    }

    /// Sends Academy parameters to external agent
-	private void SendParameters(AcademyParameters envParams)
+    private void SendParameters(AcademyParameters envParams)
-        Receive();
    }

    /// Receives messages from external agent
        return bytes;
    }

+    private byte[] AppendLength(byte[] input){
+        byte[] newArray = new byte[input.Length + 4];
+        input.CopyTo(newArray, 4);
+        System.BitConverter.GetBytes(input.Length).CopyTo(newArray, 0);
+        return newArray;
+    }
+
    /// Collects the information from the brains and sends it accross the socket
    public void giveBrainInfo(Brain brain)
    {
        List<float> concatenatedRewards = new List<float>();
        List<float> concatenatedMemories = new List<float>();
        List<bool> concatenatedDones = new List<bool>();
+        List<float> concatenatedActions = new List<float>();
+        Dictionary<int, float[]> collectedActions = brain.CollectActions();

        foreach (int id in current_agents[brainName])
        {
            concatenatedDones.Add(collectedDones[id]);
+            concatenatedActions = concatenatedActions.Concat(collectedActions[id].ToList()).ToList();
        }
        StepMessage message = new StepMessage()
        {
            rewards = concatenatedRewards,
-            //actions = actionDict,
+            actions = concatenatedActions,
-        sender.Send(Encoding.ASCII.GetBytes(envMessage));
+        sender.Send(AppendLength(Encoding.ASCII.GetBytes(envMessage)));
        Receive();
        int i = 0;
        foreach (resolution res in brain.brainParameters.cameraResolutions)
-                sender.Send(TexToByteArray(brain.ObservationToTex(collectedObservations[id][i], res.width, res.height)));
+                sender.Send(AppendLength(TexToByteArray(brain.ObservationToTex(collectedObservations[id][i], res.width, res.height))));
                Receive();
            }
            i++;

        foreach (Brain brain in brains)
        {
-            string brainName = brain.gameObject.name;
-
-            Dictionary<int, float[]> actionDict = new Dictionary<int, float[]>();
-            for (int i = 0; i < current_agents[brainName].Count; i++)
+            if (brain.brainType == BrainType.External)
-                if (brain.brainParameters.actionSpaceType == StateType.continuous)
+                string brainName = brain.gameObject.name;
+
+                Dictionary<int, float[]> actionDict = new Dictionary<int, float[]>();
+                for (int i = 0; i < current_agents[brainName].Count; i++)
-                    actionDict.Add(current_agents[brainName][i],
-                        agentMessage.action[brainName].GetRange(i * brain.brainParameters.actionSize, brain.brainParameters.actionSize).ToArray());
+                    if (brain.brainParameters.actionSpaceType == StateType.continuous)
+                    {
+                        actionDict.Add(current_agents[brainName][i],
+                            agentMessage.action[brainName].GetRange(i * brain.brainParameters.actionSize, brain.brainParameters.actionSize).ToArray());
+                    }
+                    else
+                    {
+                        actionDict.Add(current_agents[brainName][i],
+                            agentMessage.action[brainName].GetRange(i, 1).ToArray());
+                    }
-                else
+                storedActions[brainName] = actionDict;
+
+                Dictionary<int, float[]> memoryDict = new Dictionary<int, float[]>();
+                for (int i = 0; i < current_agents[brainName].Count; i++)
-                    actionDict.Add(current_agents[brainName][i],
-                        agentMessage.action[brainName].GetRange(i, 1).ToArray());
+                    memoryDict.Add(current_agents[brainName][i],
+                        agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
-            }
-            storedActions[brainName] = actionDict;
+                storedMemories[brainName] = memoryDict;
-            Dictionary<int, float[]> memoryDict = new Dictionary<int, float[]>();
-            for (int i = 0; i < current_agents[brainName].Count; i++)
-            {
-                memoryDict.Add(current_agents[brainName][i],
-                    agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
+                Dictionary<int, float> valueDict = new Dictionary<int, float>();
+                for (int i = 0; i < current_agents[brainName].Count; i++)
+                {
+                    valueDict.Add(current_agents[brainName][i],
+                        agentMessage.value[brainName][i]);
+                }
+                storedValues[brainName] = valueDict;
-            storedMemories[brainName] = memoryDict;
-
-            Dictionary<int, float> valueDict = new Dictionary<int, float>();
-            for (int i = 0; i < current_agents[brainName].Count; i++)
-            {
-                valueDict.Add(current_agents[brainName][i],
-                    agentMessage.value[brainName][i]);
-            }
-            storedValues[brainName] = valueDict;

        }
    }
--- a/unity-environment/Assets/ML-Agents/Template/Scripts/TemplateDecision.cs
+++ b/unity-environment/Assets/ML-Agents/Template/Scripts/TemplateDecision.cs
 using System.Collections.Generic;
 using UnityEngine;

-public class TemplateDecision : MonoBehaviour, Decision {
+public class TemplateDecision : MonoBehaviour, Decision
+{
-	public float[] Decide (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
-	{
-		return default(float[]);
+    public float[] Decide(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
+    {
+        return new float[0];
-	}
+    }
-	public float[] MakeMemory (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
-	{
-		return default(float[]);
+    public float[] MakeMemory(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
+    {
+        return new float[0];
-	}
+    }
 }
--- a/python/curriculum.json
+++ b/python/curriculum.json
+{
+    "measure" : "reward",
+    "thresholds" : [10, 20, 50],
+    "min_lesson_length" : 3,
+    "signal_smoothing" : true, 
+    "parameters" : 
+    {
+        "param1" : [0.7, 0.5, 0.3, 0.1],
+        "param2" : [100, 50, 20, 15],
+        "param3" : [0.2, 0.3, 0.7, 0.9]
+    }
+}
--- a/python/unityagents/curriculum.py
+++ b/python/unityagents/curriculum.py
+import json
+import numpy as np
+
+from .exception import UnityEnvironmentException
+
+
+class Curriculum(object):
+    def __init__(self, location, default_reset_parameters):
+        self.lesson_number = 0
+        self.lesson_length = 0
+        self.measure_type = None
+        if location is None:
+            self.data = None
+        else:
+            try:
+                with open(location) as data_file:
+                    self.data = json.load(data_file)
+            except FileNotFoundError:
+                raise UnityEnvironmentException(
+                    "The file {0} could not be found.".format(location))
+            except UnicodeDecodeError:
+                raise UnityEnvironmentException("There was an error decoding {}".format(location))
+            self.smoothing_value = 0
+            for key in ['parameters', 'measure', 'thresholds',
+                        'min_lesson_length', 'signal_smoothing']:
+                if key not in self.data:
+                    raise UnityEnvironmentException("{0} does not contain a "
+                                                    "{1} field.".format(location, key))
+            parameters = self.data['parameters']
+            self.measure_type = self.data['measure']
+            self.max_lesson_number = len(self.data['thresholds'])
+            for key in parameters:
+                if key not in default_reset_parameters:
+                    raise UnityEnvironmentException(
+                        "The parameter {0} in Curriculum {1} is not present in "
+                        "the Environment".format(key, location))
+            for key in parameters:
+                if len(parameters[key]) != self.max_lesson_number + 1:
+                    raise UnityEnvironmentException(
+                        "The parameter {0} in Curriculum {1} must have {2} values "
+                        "but {3} were found".format(key, location,
+                                                    self.max_lesson_number + 1, len(parameters[key])))
+
+    @property
+    def measure(self):
+        return self.measure_type
+
+    def get_lesson_number(self):
+        return self.lesson_number
+
+    def set_lesson_number(self, value):
+        self.lesson_length = 0
+        self.lesson_number = max(0, min(value, self.max_lesson_number))
+
+    def get_lesson(self, progress):
+        if self.data is None or progress is None:
+            return {}
+        if self.data["signal_smoothing"]:
+            progress = self.smoothing_value * 0.9 + 0.1 * progress
+            self.smoothing_value = progress
+        self.lesson_length += 1
+        if self.lesson_number < self.max_lesson_number:
+            if ((progress > self.data['thresholds'][self.lesson_number]) and
+                    (self.lesson_length > self.data['min_lesson_length'])):
+                self.lesson_length = 0
+                self.lesson_number += 1
+        config = {}
+        parameters = self.data["parameters"]
+        for key in parameters:
+            config[key] = parameters[key][self.lesson_number]
+        return config
--- a/unity-environment/Assets/ML-Agents/Scripts/Monitor.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Monitor.cs
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+using UnityEngine.UI;
+using Newtonsoft.Json;
+using System.Linq;
+
+
+/** The type of monitor the information must be displayed in.
+ * <slider> corresponds to a slingle rectangle which width is given
+ * by a float between -1 and 1. (green is positive, red is negative)
+ * <hist> corresponds to n vertical sliders. 
+ * <text> is a text field.
+ * <bar> is a rectangle of fixed length to represent the proportions 
+ * of a list of floats.
+ */ 
+public enum MonitorType
+{
+    slider,
+    hist,
+    text,
+    bar
+}
+
+/** Monitor is used to display information. Use the log function to add
+ * information to your monitor.
+ */ 
+public class Monitor : MonoBehaviour
+{
+
+    static bool isInstanciated;
+    static GameObject canvas;
+
+    private struct DisplayValue
+    {
+        public float time;
+        public object value;
+        public MonitorType monitorDisplayType;
+    }
+
+    static Dictionary<Transform, Dictionary<string,  DisplayValue>> displayTransformValues;
+    static private Color[] barColors;
+    [HideInInspector]
+    static public float verticalOffset = 3f;
+    /**< \brief This float represents how high above the target the monitors will be. */
+
+    static GUIStyle keyStyle;
+    static GUIStyle valueStyle;
+    static GUIStyle greenStyle;
+    static GUIStyle redStyle;
+    static GUIStyle[] colorStyle;
+    static bool initialized;
+
+
+    /** Use the Monitor.Log static function to attach information to a transform.
+     * If displayType is <text>, value can be any object. 
+     * If sidplayType is <slider>, value must be a float.
+     * If sidplayType is <hist>, value must be a List or Array of floats.
+     * If sidplayType is <bar>, value must be a list or Array of positive floats.
+     * Note that <slider> and <hist> caps values between -1 and 1.
+     * @param key The name of the information you wish to Log.
+     * @param value The value you want to display.
+     * @param displayType The type of display.
+     * @param target The transform you want to attach the information to.
+     */ 
+    public static void Log(
+        string key, 
+        object value, 
+        MonitorType displayType = MonitorType.text, 
+        Transform target = null)
+    {
+
+
+
+        if (!isInstanciated)
+        {
+            InstanciateCanvas();
+            isInstanciated = true;
+
+        }
+
+        if (target == null)
+        {
+            target = canvas.transform;
+        }
+
+        if (!displayTransformValues.Keys.Contains(target))
+        {
+            displayTransformValues[target] = new Dictionary<string, DisplayValue>();
+        }
+
+        Dictionary<string, DisplayValue> displayValues = displayTransformValues[target];
+
+        if (value == null)
+        {
+            RemoveValue(target, key);
+            return;
+        }
+        if (!displayValues.ContainsKey(key))
+        {
+            DisplayValue dv = new DisplayValue();
+            dv.time = Time.timeSinceLevelLoad;
+            dv.value = value;
+            dv.monitorDisplayType = displayType;
+            displayValues[key] = dv;
+            while (displayValues.Count > 20)
+            {
+                string max = displayValues.Aggregate((l, r) => l.Value.time < r.Value.time ? l : r).Key;
+                RemoveValue(target, max);
+            }
+        }
+        else
+        {
+            DisplayValue dv = displayValues[key];
+            dv.value = value;
+            displayValues[key] = dv;
+        }
+    }
+
+    /** Remove a value from a monitor
+     * @param target The transform to which the information is attached
+     * @param key The key of the information you want to remove
+     */ 
+    public static void RemoveValue(Transform target, string key)
+    {
+        if (target == null)
+        {
+            target = canvas.transform;
+        }
+        if (displayTransformValues.Keys.Contains(target))
+        {
+            if (displayTransformValues[target].ContainsKey(key))
+            {
+                displayTransformValues[target].Remove(key);
+                if (displayTransformValues[target].Keys.Count == 0)
+                {
+                    displayTransformValues.Remove(target);
+                }
+            }
+        }
+
+    }
+
+    /** Remove all information from a monitor
+     * @param target The transform to which the information is attached
+     */ 
+    public static void RemoveAllValues(Transform target)
+    {
+        if (target == null)
+        {
+            target = canvas.transform;
+        }
+        if (displayTransformValues.Keys.Contains(target))
+        {
+            displayTransformValues.Remove(target);
+        }
+    
+    }
+
+    /** Use SetActive to enable or disable the Monitor via script
+     * @param active Set the Monitor's status to the value of active
+     */ 
+    public static void SetActive(bool active){
+        if (!isInstanciated)
+        {
+            InstanciateCanvas();
+            isInstanciated = true;
+
+        }
+        canvas.SetActive(active);
+
+    }
+
+    private static void InstanciateCanvas()
+    {
+        canvas = GameObject.Find("AgentMonitorCanvas");
+        if (canvas == null)
+        {
+            canvas = new GameObject();
+            canvas.name = "AgentMonitorCanvas";
+            canvas.AddComponent<Monitor>();
+        }
+        displayTransformValues = new Dictionary<Transform, Dictionary< string , DisplayValue>>();
+
+    }
+
+    private float[] ToFloatArray(object input)
+    {
+        try
+        {
+            return JsonConvert.DeserializeObject<float[]>(
+                JsonConvert.SerializeObject(input, Formatting.None));
+        }
+        catch
+        {
+        }
+        try
+        {
+            return new float[1]
+            {JsonConvert.DeserializeObject<float>(
+                    JsonConvert.SerializeObject(input, Formatting.None))
+            };
+        }
+        catch
+        {
+        }
+
+        return new float[0];
+    }
+
+    void OnGUI()
+    {
+        if (!initialized)
+        {
+            Initialize();
+            initialized = true;
+        }
+
+        var toIterate = displayTransformValues.Keys.ToList();
+        foreach (Transform target in toIterate)
+        {
+            if (target == null)
+            {
+                displayTransformValues.Remove(target);
+                continue;
+            }
+ 
+            float widthScaler = (Screen.width / 1000f);
+            float keyPixelWidth = 100 * widthScaler;
+            float keyPixelHeight = 20 * widthScaler;
+            float paddingwidth = 10 * widthScaler;
+
+            float scale = 1f;
+            Vector2 origin = new Vector3(0, Screen.height);
+            if (!(target == canvas.transform))
+            {
+                Vector3 cam2obj = target.position - Camera.main.transform.position;
+                scale = Mathf.Min(1, 20f / (Vector3.Dot(cam2obj, Camera.main.transform.forward)));
+                Vector3 worldPosition = Camera.main.WorldToScreenPoint(target.position + new Vector3(0, verticalOffset, 0));
+                origin = new Vector3(worldPosition.x - keyPixelWidth * scale, Screen.height - worldPosition.y);
+            }
+            keyPixelWidth *= scale;
+            keyPixelHeight *= scale;
+            paddingwidth *= scale;
+            keyStyle.fontSize = (int)(keyPixelHeight * 0.8f);
+            if (keyStyle.fontSize < 2)
+            {
+                continue;
+            }
+                
+
+            Dictionary<string, DisplayValue> displayValues = displayTransformValues[target];
+
+            int index = 0;
+            foreach (string key in displayValues.Keys.OrderBy(x => -displayValues[x].time))
+            {
+                keyStyle.alignment = TextAnchor.MiddleRight;
+                GUI.Label(new Rect(origin.x, origin.y - (index + 1) * keyPixelHeight, keyPixelWidth, keyPixelHeight), key, keyStyle);
+                if (displayValues[key].monitorDisplayType == MonitorType.text)
+                {
+                    valueStyle.alignment = TextAnchor.MiddleLeft;
+                    GUI.Label(new Rect(
+                            origin.x + paddingwidth + keyPixelWidth,
+                            origin.y - (index + 1) * keyPixelHeight, 
+                            keyPixelWidth, keyPixelHeight), 
+                        JsonConvert.SerializeObject(displayValues[key].value, Formatting.None), valueStyle);
+
+                }
+                else if (displayValues[key].monitorDisplayType == MonitorType.slider)
+                {
+                    float sliderValue = 0f;
+                    if (displayValues[key].value.GetType() == typeof(float))
+                    {
+                        sliderValue = (float)displayValues[key].value;
+                    }
+                    else
+                    {
+                        Debug.LogError(string.Format("The value for {0} could not be displayed as " +
+                                "a slider because it is not a number.", key));
+                    }
+
+                    sliderValue = Mathf.Min(1f, sliderValue);
+                    GUIStyle s = greenStyle;
+                    if (sliderValue < 0)
+                    {
+                        sliderValue = Mathf.Min(1f, -sliderValue);
+                        s = redStyle;
+                    }
+                    GUI.Box(new Rect(
+                            origin.x + paddingwidth + keyPixelWidth,
+                            origin.y - (index + 0.9f) * keyPixelHeight, 
+                            keyPixelWidth * sliderValue, keyPixelHeight * 0.8f), 
+                        GUIContent.none, s);
+
+                }
+                else if (displayValues[key].monitorDisplayType == MonitorType.hist)
+                {
+                    float histWidth = 0.15f;
+                    float[] vals = ToFloatArray(displayValues[key].value);
+                    for (int i = 0; i < vals.Length; i++)
+                    {
+                        float value = Mathf.Min(vals[i], 1);
+                        GUIStyle s = greenStyle;
+                        if (value < 0)
+                        {
+                            value = Mathf.Min(1f, -value);
+                            s = redStyle;
+                        }
+                        GUI.Box(new Rect(
+                                origin.x + paddingwidth + keyPixelWidth + (keyPixelWidth * histWidth + paddingwidth / 2) * i,
+                                origin.y - (index + 0.1f) * keyPixelHeight, 
+                                keyPixelWidth * histWidth, -keyPixelHeight * value), 
+                            GUIContent.none, s);
+                    }
+
+
+                }
+                else if (displayValues[key].monitorDisplayType == MonitorType.bar)
+                {
+                    float[] vals = ToFloatArray(displayValues[key].value);
+                    float valsSum = 0f;
+                    float valsCum = 0f; 
+                    foreach (float f in vals)
+                    {
+                        valsSum += Mathf.Max(f, 0);
+                    }
+                    if (valsSum == 0)
+                    {
+                        Debug.LogError(string.Format("The Monitor value for key {0} must be "
+                                + "a list or array of positive values and cannot be empty.", key));
+                    }
+                    else
+                    {
+                        for (int i = 0; i < vals.Length; i++)
+                        {
+                            float value = Mathf.Max(vals[i], 0) / valsSum;
+                            GUI.Box(new Rect(
+                                    origin.x + paddingwidth + keyPixelWidth + keyPixelWidth * valsCum,
+                                    origin.y - (index + 0.9f) * keyPixelHeight, 
+                                    keyPixelWidth * value, keyPixelHeight * 0.8f), 
+                                GUIContent.none, colorStyle[i % colorStyle.Length]);
+                            valsCum += value;
+
+                        }
+
+                    }
+
+                }
+
+                index++;
+            }
+        }
+    }
+
+    private void Initialize()
+    {
+
+        keyStyle = GUI.skin.label;
+        valueStyle = GUI.skin.label;
+        valueStyle.clipping = TextClipping.Overflow;
+        valueStyle.wordWrap = false;
+
+
+
+        barColors = new Color[6]{ Color.magenta, Color.blue, Color.cyan, Color.green, Color.yellow, Color.red };
+        colorStyle = new GUIStyle[barColors.Length];
+        for (int i = 0; i < barColors.Length; i++)
+        {
+            Texture2D texture = new Texture2D(1, 1, TextureFormat.ARGB32, false);
+            texture.SetPixel(0, 0, barColors[i]);
+            texture.Apply();
+            GUIStyle staticRectStyle = new GUIStyle();
+            staticRectStyle.normal.background = texture;
+            colorStyle[i] = staticRectStyle;
+        }
+        greenStyle = colorStyle[3];
+        redStyle = colorStyle[5];
+    }
+
+}
--- a/unity-environment/Assets/ML-Agents/Scripts/Monitor.cs.meta
+++ b/unity-environment/Assets/ML-Agents/Scripts/Monitor.cs.meta
+fileFormatVersion: 2
+guid: e59a31a1cc2f5464d9a61bef0bc9a53b
+timeCreated: 1508031727
+licenseType: Free
+MonoImporter:
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Scripts/AgentMonitor.cs.meta
+++ b/unity-environment/Assets/ML-Agents/Scripts/AgentMonitor.cs.meta
-fileFormatVersion: 2
-guid: e040eaa8759024abbbb14994dc4c55ee
-timeCreated: 1502056030
-licenseType: Free
-MonoImporter:
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Resources.meta
+++ b/unity-environment/Assets/ML-Agents/Resources.meta
-fileFormatVersion: 2
-guid: 10f3eff160a3b46fcb86042594151eae
-folderAsset: yes
-timeCreated: 1501551323
-licenseType: Free
-DefaultImporter:
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: