Fix discrete state (#33)

* made BrainParameters a class to set default values Modified the error message if the state is discrete * Add discrete state support to PPO and provide discrete state example environment * Add flexibility to continuous control as well * Finish PPO flexible model generation implementation * Fix formatting * Support color observations * Add best practices document * bug fix for non square observations * Update Readme.md * Remove scipy dependency * Add installation doc
7 年前 · aee5d336
--- a/docs/Getting-Started-with-Balance-Ball.md
+++ b/docs/Getting-Started-with-Balance-Ball.md
 * numpy
 * Pillow
 * Python (2 or 3)
-* scipy
 * TensorFlow (1.0+)

 ### Installing Dependencies
--- a/docs/Readme.md
+++ b/docs/Readme.md

 ## Basic 
 * [Unity ML Agents Overview](Unity-Agents-Overview.md)
+ * [Installation & Set-up](installation.md)
+ * [Best practices when designing an Environment](best-practices.md)
 * [How to organize the Scene](Organizing-the-Scene.md)
 * [How to use the Python API](Unity-Agents---Python-API.md)
 * [How to use TensorflowSharp inside Unity [Experimental]](Using-TensorFlow-Sharp-in-Unity-(Experimental).md)
--- a/python/PPO.ipynb
+++ b/python/PPO.ipynb
    "train_model = True # Whether to train the model.\n",
    "summary_freq = 10000 # Frequency at which to save training statistics.\n",
    "save_freq = 50000 # Frequency at which to save model.\n",
-    "env_name = \"simple\" # Name of the training environment file.\n",
+    "env_name = \"environment\" # Name of the training environment file.\n",
    "\n",
    "### Algorithm-specific parameters for tuning\n",
    "gamma = 0.99 # Reward discount rate.\n",
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "env = UnityEnvironment(file_name=env_name)\n",
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
    "\n",
    "is_continuous = (env.brains[brain_name].action_space_type == \"continuous\")\n",
    "use_observations = (env.brains[brain_name].number_observations > 0)\n",
+    "use_states = (env.brains[brain_name].state_space_size > 0)\n",
    "\n",
    "model_path = './models/{}'.format(run_path)\n",
    "summary_path = './summaries/{}'.format(run_path)\n",
    "    steps = sess.run(ppo_model.global_step)\n",
    "    summary_writer = tf.summary.FileWriter(summary_path)\n",
    "    info = env.reset(train_mode=train_model)[brain_name]\n",
-    "    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)\n",
+    "    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)\n",
    "    while steps <= max_steps:\n",
    "        if env.global_done:\n",
    "            info = env.reset(train_mode=train_model)[brain_name]\n",
--- a/python/ppo.py
+++ b/python/ppo.py

 Options:
  --help                     Show this message.
-  --max-steps=<n>             Maximum number of steps to run environment [default: 5e6].
+  --max-steps=<n>             Maximum number of steps to run environment [default: 1e6].
  --run-path=<path>          The sub-directory name for model and summary statistics [default: ppo].
  --load                     Whether to load the model or randomly initialize [default: False].
  --train                    Whether to train model, or only run inference [default: True].

 is_continuous = (env.brains[brain_name].action_space_type == "continuous")
 use_observations = (env.brains[brain_name].number_observations > 0)
+use_states = (env.brains[brain_name].state_space_size > 0)

 if not os.path.exists(model_path):
    os.makedirs(model_path)
    steps = sess.run(ppo_model.global_step)
    summary_writer = tf.summary.FileWriter(summary_path)
    info = env.reset(train_mode=train_model)[brain_name]
-    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)
+    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)
    while steps <= max_steps or not train_model:
        if env.global_done:
            info = env.reset(train_mode=train_model)[brain_name]
--- a/python/ppo/models.py
+++ b/python/ppo/models.py

 def create_agent_model(env, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=5e6):
    """
-    Takes a Unity environment and model-specific hyperparameters and returns the
+    Takes a Unity environment and model-specific hyper-parameters and returns the
    appropriate PPO agent model for the environment.
    :param env: a Unity environment.
    :param lr: Learning rate.
    :return: a sub-class of PPOAgent tailored to the environment.
+    :param max_step: Total number of training steps.
-    if env.brains[brain_name].action_space_type == "continuous":
-        if env.brains[brain_name].number_observations == 0:
-            return ContinuousControlModel(lr, env.brains[brain_name].state_space_size,
-                                          env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step)
-        else:
-            raise UnityEnvironmentException("There is currently no PPO model which supports both a continuous "
-                                            "action space and camera observations.")
-    if env.brains[brain_name].action_space_type == "discrete":
-        if env.brains[brain_name].number_observations == 0:
-            return DiscreteControlModel(lr, env.brains[brain_name].state_space_size,
-                                        env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step)
-        else:
-            brain = env.brains[brain_name]
-            if env.brains[brain_name].state_space_size > 0:
-                print("This brain contains agents with both observations and states. There is currently no PPO model"
-                      "which supports this. Defaulting to Vision-based PPO model.")
-            h, w = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height']
-            return VisualDiscreteControlModel(lr, h, w, env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step)
+    brain = env.brains[brain_name]
+    if brain.action_space_type == "continuous":
+        return ContinuousControlModel(lr, brain, h_size, epsilon, max_step)
+    if brain.action_space_type == "discrete":
+        return DiscreteControlModel(lr, brain, h_size, epsilon, beta, max_step)


 def save_model(sess, saver, model_path="./", steps=0):


 class PPOModel(object):
-    def __init__(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step):
+    def create_visual_encoder(self, o_size_h, o_size_w, bw, h_size, num_streams, activation):
+        """
+        Builds a set of visual (CNN) encoders.
+        :param o_size_h: Height observation size.
+        :param o_size_w: Width observation size.
+        :param bw: Whether image is greyscale {True} or color {False}.
+        :param h_size: Hidden layer size.
+        :param num_streams: Number of visual streams to construct.
+        :param activation: What type of activation function to use for layers.
+        :return: List of hidden layer tensors.
+        """
+        if bw:
+            c_channels = 1
+        else:
+            c_channels = 3
+
+        self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32,
+                                             name='observation_0')
+        streams = []
+        for i in range(num_streams):
+            self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2],
+                                          use_bias=False, activation=activation)
+            self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2],
+                                          use_bias=False, activation=activation)
+            hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=activation)
+            streams.append(hidden)
+        return streams
+
+    def create_continuous_state_encoder(self, s_size, h_size, num_streams, activation):
+        """
+        Builds a set of hidden state encoders.
+        :param s_size: state input size.
+        :param h_size: Hidden layer size.
+        :param num_streams: Number of state streams to construct.
+        :param activation: What type of activation function to use for layers.
+        :return: List of hidden layer tensors.
+        """
+        self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state')
+        streams = []
+        for i in range(num_streams):
+            hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=activation)
+            hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=activation)
+            streams.append(hidden_2)
+        return streams
+
+    def create_discrete_state_encoder(self, s_size, h_size, num_streams, activation):
+        """
+        Builds a set of hidden state encoders from discrete state input.
+        :param s_size: state input size (discrete).
+        :param h_size: Hidden layer size.
+        :param num_streams: Number of state streams to construct.
+        :param activation: What type of activation function to use for layers.
+        :return: List of hidden layer tensors.
+        """
+        self.state_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='state')
+        state_in = tf.reshape(self.state_in, [-1])
+        state_onehot = c_layers.one_hot_encoding(state_in, s_size)
+        streams = []
+        for i in range(num_streams):
+            hidden = tf.layers.dense(state_onehot, h_size, use_bias=False, activation=activation)
+            streams.append(hidden)
+        return streams
+
+    def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step):
        """
        Creates training-specific Tensorflow ops for PPO models.
        :param probs: Current policy probabilities
        :param entropy: Current policy entropy
        :param epsilon: Value for policy-divergence threshold
        :param lr: Learning rate
+        :param max_step: Total number of training steps.
        """
        self.returns_holder = tf.placeholder(shape=[None], dtype=tf.float32, name='discounted_rewards')
        self.advantage = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='advantages')


 class ContinuousControlModel(PPOModel):
-    def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step):
+    def __init__(self, lr, brain, h_size, epsilon, max_step):
-        :param s_size: State-space size
-        :param a_size: Action-space size
+        :param brain: State-space size
-        self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state')
+        s_size = brain.state_space_size
+        a_size = brain.action_space_size
+
+        hidden_state, hidden_visual, hidden_policy, hidden_value = None, None, None, None
+        if brain.number_observations > 0:
+            h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']
+            bw = brain.camera_resolutions[0]['blackAndWhite']
+            hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 2, tf.nn.tanh)
+        if brain.state_space_size > 0:
+            s_size = brain.state_space_size
+            if brain.state_space_type == "continuous":
+                hidden_state = self.create_continuous_state_encoder(s_size, h_size, 2, tf.nn.tanh)
+            else:
+                hidden_state = self.create_discrete_state_encoder(s_size, h_size, 2, tf.nn.tanh)
+
+        if hidden_visual is None and hidden_state is None:
+            raise Exception("No valid network configuration possible. "
+                            "There are no states or observations in this brain")
+        elif hidden_visual is not None and hidden_state is None:
+            hidden_policy, hidden_value = hidden_visual
+        elif hidden_visual is None and hidden_state is not None:
+            hidden_policy, hidden_value = hidden_state
+        elif hidden_visual is not None and hidden_state is not None:
+            hidden_policy = tf.concat([hidden_visual[0], hidden_state[0]], axis=1)
+            hidden_value = tf.concat([hidden_visual[1], hidden_state[1]], axis=1)
+
-        hidden_policy = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.tanh)
-        hidden_value = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.tanh)
-        hidden_policy_2 = tf.layers.dense(hidden_policy, h_size, use_bias=False, activation=tf.nn.tanh)
-        hidden_value_2 = tf.layers.dense(hidden_value, h_size, use_bias=False, activation=tf.nn.tanh)
-        self.mu = tf.layers.dense(hidden_policy_2, a_size, activation=None, use_bias=False,
+        self.mu = tf.layers.dense(hidden_policy, a_size, activation=None, use_bias=False,
                                  kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1))
        self.log_sigma_sq = tf.Variable(tf.zeros([a_size]))
        self.sigma_sq = tf.exp(self.log_sigma_sq)

        self.entropy = tf.reduce_sum(0.5 * tf.log(2 * np.pi * np.e * self.sigma_sq))

-        self.value = tf.layers.dense(hidden_value_2, 1, activation=None, use_bias=False)
+        self.value = tf.layers.dense(hidden_value, 1, activation=None, use_bias=False)
-        PPOModel.__init__(self, self.probs, self.old_probs, self.value, self.entropy, 0.0, epsilon, lr, max_step)
+        self.create_ppo_optimizer(self.probs, self.old_probs, self.value, self.entropy, 0.0, epsilon, lr, max_step)
-    def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step):
+    def __init__(self, lr, brain, h_size, epsilon, beta, max_step):
-        :param s_size: State-space size
-        :param a_size: Action-space size
+        :param brain: State-space size
-        self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state')
-        self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
-        hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.elu)
-        hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=tf.nn.elu)
-        self.policy = tf.layers.dense(hidden_2, a_size, activation=None, use_bias=False,
-                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1))
-        self.probs = tf.nn.softmax(self.policy)
-        self.action = tf.multinomial(self.policy, 1)
-        self.output = tf.identity(self.action, name='action')
-        self.value = tf.layers.dense(hidden_2, 1, activation=None, use_bias=False)
+        hidden_state, hidden_visual, hidden = None, None, None
+        if brain.number_observations > 0:
+            h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']
+            bw = brain.camera_resolutions[0]['blackAndWhite']
+            hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 1, tf.nn.elu)[0]
+        if brain.state_space_size > 0:
+            s_size = brain.state_space_size
+            if brain.state_space_type == "continuous":
+                hidden_state = self.create_continuous_state_encoder(s_size, h_size, 1, tf.nn.elu)[0]
+            else:
+                hidden_state = self.create_discrete_state_encoder(s_size, h_size, 1, tf.nn.elu)[0]
-        self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1)
-
-        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
-        self.selected_actions = c_layers.one_hot_encoding(self.action_holder, a_size)
-        self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities')
-        self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1)
-        self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1)
-
-        PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs,
-                          self.value, self.entropy, beta, epsilon, lr, max_step)
+        if hidden_visual is None and hidden_state is None:
+            raise Exception("No valid network configuration possible. "
+                            "There are no states or observations in this brain")
+        elif hidden_visual is not None and hidden_state is None:
+            hidden = hidden_visual
+        elif hidden_visual is None and hidden_state is not None:
+            hidden = hidden_state
+        elif hidden_visual is not None and hidden_state is not None:
+            hidden = tf.concat([hidden_visual, hidden_state], axis=1)
+        a_size = brain.action_space_size
-class VisualDiscreteControlModel(PPOModel):
-    def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta, max_step):
-        """
-        Creates Discrete Control Actor-Critic model for use with visual observations (images).
-        :param o_size_h: Observation height.
-        :param o_size_w: Observation width.
-        :param a_size: Action-space size.
-        :param h_size: Hidden layer size.
-        """
-        self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32,
-                                             name='observation_0')
-        self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2],
-                                      use_bias=False, activation=tf.nn.elu)
-        self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2],
-                                      use_bias=False, activation=tf.nn.elu)
-        self.batch_size = tf.placeholder(shape=None, dtype=tf.int32)
-        hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=tf.nn.elu)
+        self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
        self.policy = tf.layers.dense(hidden, a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1))
        self.probs = tf.nn.softmax(self.policy)
        self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1)
        self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1)

-        PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs,
-                          self.value, self.entropy, beta, epsilon, lr, max_step)
+        self.create_ppo_optimizer(self.responsible_probs, self.old_responsible_probs,
+                                  self.value, self.entropy, beta, epsilon, lr, max_step)
--- a/python/ppo/trainer.py
+++ b/python/ppo/trainer.py


 class Trainer(object):
-    def __init__(self, ppo_model, sess, info, is_continuous, use_observations):
+    def __init__(self, ppo_model, sess, info, is_continuous, use_observations, use_states):
        """
        Responsible for collecting experinces and training PPO model.
        :param ppo_model: Tensorflow graph defining model.

        self.is_continuous = is_continuous
        self.use_observations = use_observations
+        self.use_states = use_states

    def take_action(self, info, env, brain_name):
        """
        :return: BrainInfo corresponding to new environment state.
        """
        epsi = None
+        feed_dict = {self.model.batch_size: len(info.states)}
-            feed_dict = {self.model.state_in: info.states, self.model.batch_size: len(info.states),
-                         self.model.epsilon: epsi}
-        elif self.use_observations:
-            feed_dict = {self.model.observation_in: np.vstack(info.observations),
-                         self.model.batch_size: len(info.states)}
-        else:
-            feed_dict = {self.model.state_in: info.states, self.model.batch_size: len(info.states)}
+            feed_dict[self.model.epsilon] = epsi
+        if self.use_observations:
+            feed_dict[self.model.observation_in] = np.vstack(info.observations)
+        if self.use_states:
+            feed_dict[self.model.state_in] = info.states
        actions, a_dist, value, ent, learn_rate = self.sess.run([self.model.output, self.model.probs,
                                                                 self.model.value, self.model.entropy,
                                                                 self.model.learning_rate],
                if not info.local_done[idx]:
                    if self.use_observations:
                        history['observations'].append(info.observations[idx])
-                    else:
+                    if self.use_states:
+                    if self.is_continuous:
+                        history['epsilons'].append(epsi[idx])
-                    if self.is_continuous:
-                        history['epsilons'].append(epsi[idx])
                    history['value_estimates'].append(value[idx][0])
                    history['cumulative_reward'] += next_info.rewards[idx]
                    history['episode_steps'] += 1
                if info.local_done[l]:
                    value_next = 0.0
                else:
+                    feed_dict = {self.model.batch_size: len(info.states)}
-                        feed_dict = {self.model.observation_in: np.vstack(info.observations),
-                                     self.model.batch_size: len(info.states)}
-                    else:
-                        feed_dict = {self.model.state_in: info.states,
-                                     self.model.batch_size: len(info.states)}
+                        feed_dict[self.model.observation_in] = np.vstack(info.observations)
+                    if self.use_states:
+                        feed_dict[self.model.state_in] = info.states
                    value_next = self.sess.run(self.model.value, feed_dict)[l]
                history = vectorize_history(self.history_dict[info.agents[l]])
                history['advantages'] = get_gae(rewards=history['rewards'],
                             self.model.old_probs: np.vstack(training_buffer['action_probs'][start:end])}
                if self.is_continuous:
                    feed_dict[self.model.epsilon] = np.vstack(training_buffer['epsilons'][start:end])
-                    feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end])
-                    if self.use_observations:
-                        feed_dict[self.model.observation_in] = np.vstack(training_buffer['observations'][start:end])
-                    else:
-                        feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end])
+                if self.use_states:
+                    feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end])
+                if self.use_observations:
+                    feed_dict[self.model.observation_in] = np.vstack(training_buffer['observations'][start:end])
                v_loss, p_loss, _ = self.sess.run([self.model.value_loss, self.model.policy_loss,
                                                   self.model.update_batch], feed_dict=feed_dict)
                total_v += v_loss
--- a/unity-environment/Assets/ML-Agents/Scripts/Brain.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Brain.cs
 * Defines brain-specific parameters
 */
 [System.Serializable]
-public struct BrainParameters
+public class BrainParameters
-    public int stateSize;
+    public int stateSize = 1;
-    public int actionSize;
+    public int actionSize = 1;
-    public int memorySize;
+    public int memorySize = 0;
    /**< \brief The length of the float vector that holds the memory for the agent */
    public resolution[] cameraResolutions;
    /**<\brief  The list of observation resolutions for the brain */
-    public StateType actionSpaceType;
+    public StateType actionSpaceType = StateType.discrete;
-    public StateType stateSpaceType;
+    public StateType stateSpaceType = StateType.continuous;
-
 }

 /**
 */
 public class Brain : MonoBehaviour
 {
-    public BrainParameters brainParameters;
+    public BrainParameters brainParameters = new BrainParameters();
    /**< \brief Defines brain specific parameters such as the state size*/
    public BrainType brainType;
    /**<  \brief Defines what is the type of the brain : 
        foreach (KeyValuePair<int, Agent> idAgent in agents)
        {
            List<float> states = idAgent.Value.CollectState();
-            if (states.Count != brainParameters.stateSize)
+            if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous ))
+            {
+                throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
+    Was expecting {1} continuous states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count));
+            }
+            if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete ))
-	Was expecting {1} states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count));
+    Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count));
            }
            result.Add(idAgent.Key, states);
        }
--- a/docs/best-practices.md
+++ b/docs/best-practices.md
+# Environment Design Best Practices
+
+## General
+* It is often helpful to being with the simplest version of the problem, to ensure the agent can learn it. From there increase
+complexity over time.
+* When possible, It is often helpful to ensure that you can complete the task by using a Player Brain to control the agent.
+
+## Rewards
+* The magnitude of any given reward should typically not be greater than 1.0 in order to ensure a more stable learning process.
+* Positive rewards are often more helpful to shaping the desired behavior of an agent than negative rewards.
+* For locomotion tasks, a small positive reward (+0.1) for forward progress is typically used. 
+* If you want the agent the finish a task quickly, it is often helpful to provide a small penalty every step (-0.1). 
+
+## States
+* The magnitude of each state variable should be normalized to around 1.0. 
+* States should include all variables relevant to allowing the agent to take the optimally informed decision.
+* Categorical state variables such as type of object (Sword, Shield, Bow) should be encoded in one-hot fashion (ie `3` -> `0, 0, 1`).
+
+## Actions
+* When using continuous control, action values should be clipped to an appropriate range.
--- a/docs/installation.md
+++ b/docs/installation.md
+# Installation & Set-up
+
+## Install **Unity 2017.1** or later (required)
+
+Download link available [here](https://store.unity.com/download?ref=update).
+
+## Clone the repository
+Once installed, you will want to clone the Agents GitHub repository. References will be made 
+throughout to `unity-environment` and `python` directories. Both are located at the root of the repository. 
+
+## Installing Python API
+In order to train an agent within the framework, you will need to install Python 2 or 3, and the dependencies described below.
+
+### Windows Users
+
+If you are a Windows user who is new to Python/TensorFlow, follow [this guide](https://nitishmutha.github.io/tensorflow/2017/01/22/TensorFlow-with-gpu-for-windows.html) to set up your Python environment.
+
+### Requirements
+* Jupyter
+* Matplotlib
+* numpy
+* Pillow
+* Python (2 or 3)
+* docopt (Training)
+* TensorFlow (1.0+) (Training)
+
+### Installing Dependencies
+To install dependencies, go into the `python` directory and run (depending on your python version):
+
+`pip install .`
+
+or 
+
+`pip3 install  .`
+
+If your Python environment doesn't include `pip`, see these [instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers) on installing it.
+
+Once the requirements are successfully installed, the next step is to check out the [Getting Started guide](Getting-Started-with-Balance-Ball.md)
+
+## Installation Help
+
+### Using Jupyter Notebook
+
+For a walkthrough of how to use Jupyter notebook, see [here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/execute.html).
+
+### General Issues
+
+If you run into issues while attempting to install and run Unity ML Agents, see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Limitations-&-Common-Issues.md) for a list of common issues and solutions.
+
+If you have an issue that isn't covered here, feel free to contact us at ml-agents@unity3d.com. Alternatively, feel free to create an issue on the repository.
+Be sure to include relevant information on OS, Python version, and exact error message if possible.
--- a/unity-environment/Assets/ML-Agents/Examples/Basic.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic.meta
+fileFormatVersion: 2
+guid: 230c334ab2f144bcda6eea42d18ebdc8
+folderAsset: yes
+timeCreated: 1506189168
+licenseType: Pro
+DefaultImporter:
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta
+fileFormatVersion: 2
+guid: 0f9b2a7b3f61045b8a791eeae8175dc5
+folderAsset: yes
+timeCreated: 1506189694
+licenseType: Pro
+DefaultImporter:
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!21 &2100000
+Material:
+  serializedVersion: 6
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_Name: agent
+  m_Shader: {fileID: 46, guid: 0000000000000000f000000000000000, type: 0}
+  m_ShaderKeywords: 
+  m_LightmapFlags: 4
+  m_EnableInstancingVariants: 0
+  m_DoubleSidedGI: 0
+  m_CustomRenderQueue: -1
+  stringTagMap: {}
+  disabledShaderPasses: []
+  m_SavedProperties:
+    serializedVersion: 3
+    m_TexEnvs:
+    - _BumpMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _DetailAlbedoMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _DetailMask:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _DetailNormalMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _EmissionMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _MainTex:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _MetallicGlossMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _OcclusionMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _ParallaxMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    m_Floats:
+    - _BumpScale: 1
+    - _Cutoff: 0.5
+    - _DetailNormalMapScale: 1
+    - _DstBlend: 0
+    - _GlossMapScale: 1
+    - _Glossiness: 0.5
+    - _GlossyReflections: 1
+    - _Metallic: 0
+    - _Mode: 0
+    - _OcclusionStrength: 1
+    - _Parallax: 0.02
+    - _SmoothnessTextureChannel: 0
+    - _SpecularHighlights: 1
+    - _SrcBlend: 1
+    - _UVSec: 0
+    - _ZWrite: 1
+    m_Colors:
+    - _Color: {r: 0.10980392, g: 0.6039216, b: 1, a: 0.8392157}
+    - _EmissionColor: {r: 0, g: 0, b: 0, a: 1}
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta
+fileFormatVersion: 2
+guid: 260483cdfc6b14e26823a02f23bd8baa
+timeCreated: 1506189720
+licenseType: Pro
+NativeFormatImporter:
+  mainObjectFileID: 2100000
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!21 &2100000
+Material:
+  serializedVersion: 6
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_Name: goal
+  m_Shader: {fileID: 46, guid: 0000000000000000f000000000000000, type: 0}
+  m_ShaderKeywords: 
+  m_LightmapFlags: 4
+  m_EnableInstancingVariants: 0
+  m_DoubleSidedGI: 0
+  m_CustomRenderQueue: -1
+  stringTagMap: {}
+  disabledShaderPasses: []
+  m_SavedProperties:
+    serializedVersion: 3
+    m_TexEnvs:
+    - _BumpMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _DetailAlbedoMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _DetailMask:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _DetailNormalMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _EmissionMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _MainTex:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _MetallicGlossMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _OcclusionMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    - _ParallaxMap:
+        m_Texture: {fileID: 0}
+        m_Scale: {x: 1, y: 1}
+        m_Offset: {x: 0, y: 0}
+    m_Floats:
+    - _BumpScale: 1
+    - _Cutoff: 0.5
+    - _DetailNormalMapScale: 1
+    - _DstBlend: 0
+    - _GlossMapScale: 1
+    - _Glossiness: 0.5
+    - _GlossyReflections: 1
+    - _Metallic: 0
+    - _Mode: 0
+    - _OcclusionStrength: 1
+    - _Parallax: 0.02
+    - _SmoothnessTextureChannel: 0
+    - _SpecularHighlights: 1
+    - _SrcBlend: 1
+    - _UVSec: 0
+    - _ZWrite: 1
+    m_Colors:
+    - _Color: {r: 0.5058824, g: 0.74509805, b: 0.25490198, a: 1}
+    - _EmissionColor: {r: 0, g: 0, b: 0, a: 1}
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta
+fileFormatVersion: 2
+guid: 624b24bbec31f44babfb57ef2dfbc537
+timeCreated: 1506189863
+licenseType: Pro
+NativeFormatImporter:
+  mainObjectFileID: 2100000
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!29 &1
+OcclusionCullingSettings:
+  m_ObjectHideFlags: 0
+  serializedVersion: 2
+  m_OcclusionBakeSettings:
+    smallestOccluder: 5
+    smallestHole: 0.25
+    backfaceThreshold: 100
+  m_SceneGUID: 00000000000000000000000000000000
+  m_OcclusionCullingData: {fileID: 0}
+--- !u!104 &2
+RenderSettings:
+  m_ObjectHideFlags: 0
+  serializedVersion: 8
+  m_Fog: 0
+  m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
+  m_FogMode: 3
+  m_FogDensity: 0.01
+  m_LinearFogStart: 0
+  m_LinearFogEnd: 300
+  m_AmbientSkyColor: {r: 0.212, g: 0.227, b: 0.259, a: 1}
+  m_AmbientEquatorColor: {r: 0.114, g: 0.125, b: 0.133, a: 1}
+  m_AmbientGroundColor: {r: 0.047, g: 0.043, b: 0.035, a: 1}
+  m_AmbientIntensity: 1
+  m_AmbientMode: 0
+  m_SubtractiveShadowColor: {r: 0.42, g: 0.478, b: 0.627, a: 1}
+  m_SkyboxMaterial: {fileID: 10304, guid: 0000000000000000f000000000000000, type: 0}
+  m_HaloStrength: 0.5
+  m_FlareStrength: 1
+  m_FlareFadeSpeed: 3
+  m_HaloTexture: {fileID: 0}
+  m_SpotCookie: {fileID: 10001, guid: 0000000000000000e000000000000000, type: 0}
+  m_DefaultReflectionMode: 0
+  m_DefaultReflectionResolution: 128
+  m_ReflectionBounces: 1
+  m_ReflectionIntensity: 1
+  m_CustomReflection: {fileID: 0}
+  m_Sun: {fileID: 0}
+  m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
+--- !u!157 &3
+LightmapSettings:
+  m_ObjectHideFlags: 0
+  serializedVersion: 11
+  m_GIWorkflowMode: 1
+  m_GISettings:
+    serializedVersion: 2
+    m_BounceScale: 1
+    m_IndirectOutputScale: 1
+    m_AlbedoBoost: 1
+    m_TemporalCoherenceThreshold: 1
+    m_EnvironmentLightingMode: 0
+    m_EnableBakedLightmaps: 1
+    m_EnableRealtimeLightmaps: 1
+  m_LightmapEditorSettings:
+    serializedVersion: 9
+    m_Resolution: 2
+    m_BakeResolution: 40
+    m_TextureWidth: 1024
+    m_TextureHeight: 1024
+    m_AO: 0
+    m_AOMaxDistance: 1
+    m_CompAOExponent: 1
+    m_CompAOExponentDirect: 0
+    m_Padding: 2
+    m_LightmapParameters: {fileID: 0}
+    m_LightmapsBakeMode: 1
+    m_TextureCompression: 1
+    m_FinalGather: 0
+    m_FinalGatherFiltering: 1
+    m_FinalGatherRayCount: 256
+    m_ReflectionCompression: 2
+    m_MixedBakeMode: 2
+    m_BakeBackend: 0
+    m_PVRSampling: 1
+    m_PVRDirectSampleCount: 32
+    m_PVRSampleCount: 500
+    m_PVRBounces: 2
+    m_PVRFiltering: 0
+    m_PVRFilteringMode: 1
+    m_PVRCulling: 1
+    m_PVRFilteringGaussRadiusDirect: 1
+    m_PVRFilteringGaussRadiusIndirect: 5
+    m_PVRFilteringGaussRadiusAO: 2
+    m_PVRFilteringAtrousColorSigma: 1
+    m_PVRFilteringAtrousNormalSigma: 1
+    m_PVRFilteringAtrousPositionSigma: 1
+  m_LightingDataAsset: {fileID: 0}
+  m_UseShadowmask: 1
+--- !u!196 &4
+NavMeshSettings:
+  serializedVersion: 2
+  m_ObjectHideFlags: 0
+  m_BuildSettings:
+    serializedVersion: 2
+    agentTypeID: 0
+    agentRadius: 0.5
+    agentHeight: 2
+    agentSlope: 45
+    agentClimb: 0.4
+    ledgeDropHeight: 0
+    maxJumpAcrossDistance: 0
+    minRegionArea: 2
+    manualCellSize: 0
+    cellSize: 0.16666667
+    manualTileSize: 0
+    tileSize: 256
+    accuratePlacement: 0
+  m_NavMeshData: {fileID: 0}
+--- !u!1 &282272644
+GameObject:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  serializedVersion: 5
+  m_Component:
+  - component: {fileID: 282272648}
+  - component: {fileID: 282272647}
+  - component: {fileID: 282272646}
+  - component: {fileID: 282272645}
+  - component: {fileID: 282272649}
+  m_Layer: 0
+  m_Name: Agent
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!23 &282272645
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 282272644}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_Materials:
+  - {fileID: 2100000, guid: 260483cdfc6b14e26823a02f23bd8baa, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 1
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &282272646
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 282272644}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!33 &282272647
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 282272644}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!4 &282272648
+Transform:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 282272644}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children: []
+  m_Father: {fileID: 0}
+  m_RootOrder: 3
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!114 &282272649
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 282272644}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 624480a72e46148118ab2e2d89b537de, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  brain: {fileID: 846768605}
+  observations: []
+  maxStep: 0
+  resetOnDone: 1
+  reward: 0
+  done: 0
+  value: 0
+  CummulativeReward: 0
+  stepCounter: 0
+  agentStoredAction: []
+  memory: []
+  id: 0
+  position: 0
+  smallGoalPosition: -3
+  largeGoalPosition: 7
+  largeGoal: {fileID: 984725368}
+  smallGoal: {fileID: 1178588871}
+  minPosition: -10
+  maxPosition: 10
+--- !u!114 &395380616
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
+  m_Name: (Clone)
+  m_EditorClassIdentifier: 
+  brain: {fileID: 0}
+--- !u!114 &577874698
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
+  m_Name: (Clone)
+  m_EditorClassIdentifier: 
+  continuousPlayerActions: []
+  discretePlayerActions:
+  - key: 97
+    value: 0
+  - key: 100
+    value: 1
+  defaultAction: -1
+  brain: {fileID: 846768605}
+--- !u!1 &762086410
+GameObject:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  serializedVersion: 5
+  m_Component:
+  - component: {fileID: 762086412}
+  - component: {fileID: 762086411}
+  m_Layer: 0
+  m_Name: Directional Light
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!108 &762086411
+Light:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 762086410}
+  m_Enabled: 1
+  serializedVersion: 8
+  m_Type: 1
+  m_Color: {r: 1, g: 0.95686275, b: 0.8392157, a: 1}
+  m_Intensity: 1
+  m_Range: 10
+  m_SpotAngle: 30
+  m_CookieSize: 10
+  m_Shadows:
+    m_Type: 2
+    m_Resolution: -1
+    m_CustomResolution: -1
+    m_Strength: 1
+    m_Bias: 0.05
+    m_NormalBias: 0.4
+    m_NearPlane: 0.2
+  m_Cookie: {fileID: 0}
+  m_DrawHalo: 0
+  m_Flare: {fileID: 0}
+  m_RenderMode: 0
+  m_CullingMask:
+    serializedVersion: 2
+    m_Bits: 4294967295
+  m_Lightmapping: 4
+  m_AreaSize: {x: 1, y: 1}
+  m_BounceIntensity: 1
+  m_ColorTemperature: 6570
+  m_UseColorTemperature: 0
+  m_ShadowRadius: 0
+  m_ShadowAngle: 0
+--- !u!4 &762086412
+Transform:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 762086410}
+  m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261}
+  m_LocalPosition: {x: 0, y: 3, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children: []
+  m_Father: {fileID: 0}
+  m_RootOrder: 1
+  m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0}
+--- !u!1 &846768603
+GameObject:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  serializedVersion: 5
+  m_Component:
+  - component: {fileID: 846768604}
+  - component: {fileID: 846768605}
+  m_Layer: 0
+  m_Name: Brain
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!4 &846768604
+Transform:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 846768603}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children: []
+  m_Father: {fileID: 1574236049}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!114 &846768605
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 846768603}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: c676a8ddf5a5f4f64b35e9ed5028679d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  brainParameters:
+    stateSize: 1
+    actionSize: 2
+    memorySize: 0
+    cameraResolutions: []
+    actionDescriptions:
+    - Left
+    - Right
+    actionSpaceType: 0
+    stateSpaceType: 0
+  brainType: 0
+  CoreBrains:
+  - {fileID: 577874698}
+  - {fileID: 395380616}
+  - {fileID: 1503497339}
+  instanceID: 10208
+--- !u!1 &984725368
+GameObject:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  serializedVersion: 5
+  m_Component:
+  - component: {fileID: 984725372}
+  - component: {fileID: 984725371}
+  - component: {fileID: 984725370}
+  - component: {fileID: 984725369}
+  m_Layer: 0
+  m_Name: largeGoal
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!23 &984725369
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 984725368}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_Materials:
+  - {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 1
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!135 &984725370
+SphereCollider:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 984725368}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Radius: 0.5
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!33 &984725371
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 984725368}
+  m_Mesh: {fileID: 10207, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!4 &984725372
+Transform:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 984725368}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children: []
+  m_Father: {fileID: 0}
+  m_RootOrder: 4
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!1 &1178588871
+GameObject:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  serializedVersion: 5
+  m_Component:
+  - component: {fileID: 1178588875}
+  - component: {fileID: 1178588874}
+  - component: {fileID: 1178588873}
+  - component: {fileID: 1178588872}
+  m_Layer: 0
+  m_Name: smallGoal
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!23 &1178588872
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1178588871}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_Materials:
+  - {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 1
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!135 &1178588873
+SphereCollider:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1178588871}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Radius: 0.5
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!33 &1178588874
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1178588871}
+  m_Mesh: {fileID: 10207, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!4 &1178588875
+Transform:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1178588871}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 0.5, y: 0.5, z: 0.5}
+  m_Children: []
+  m_Father: {fileID: 0}
+  m_RootOrder: 5
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!114 &1503497339
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
+  m_Name: (Clone)
+  m_EditorClassIdentifier: 
+  brain: {fileID: 846768605}
+--- !u!1 &1574236047
+GameObject:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  serializedVersion: 5
+  m_Component:
+  - component: {fileID: 1574236049}
+  - component: {fileID: 1574236048}
+  m_Layer: 0
+  m_Name: Academy
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!114 &1574236048
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1574236047}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 19276d4dc78ee49f1ba258293f17636c, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  maxSteps: 0
+  frameToSkip: 0
+  waitTime: 0.5
+  trainingConfiguration:
+    width: 80
+    height: 80
+    qualityLevel: 1
+    timeScale: 100
+    targetFrameRate: 60
+  inferenceConfiguration:
+    width: 1280
+    height: 720
+    qualityLevel: 5
+    timeScale: 1
+    targetFrameRate: 60
+  defaultResetParameters: []
+  done: 0
+  episodeCount: 1
+  currentStep: 0
+  isInference: 0
+  windowResize: 0
+--- !u!4 &1574236049
+Transform:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1574236047}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0.71938086, y: 0.27357092, z: 4.1970553}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children:
+  - {fileID: 846768604}
+  m_Father: {fileID: 0}
+  m_RootOrder: 2
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!1 &1715640920
+GameObject:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  serializedVersion: 5
+  m_Component:
+  - component: {fileID: 1715640925}
+  - component: {fileID: 1715640924}
+  - component: {fileID: 1715640923}
+  - component: {fileID: 1715640922}
+  - component: {fileID: 1715640921}
+  m_Layer: 0
+  m_Name: Main Camera
+  m_TagString: MainCamera
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!81 &1715640921
+AudioListener:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1715640920}
+  m_Enabled: 1
+--- !u!124 &1715640922
+Behaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1715640920}
+  m_Enabled: 1
+--- !u!92 &1715640923
+Behaviour:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1715640920}
+  m_Enabled: 1
+--- !u!20 &1715640924
+Camera:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1715640920}
+  m_Enabled: 1
+  serializedVersion: 2
+  m_ClearFlags: 2
+  m_BackGroundColor: {r: 0.7411765, g: 0.7411765, b: 0.7529412, a: 0}
+  m_NormalizedViewPortRect:
+    serializedVersion: 2
+    x: 0
+    y: 0
+    width: 1
+    height: 1
+  near clip plane: 0.3
+  far clip plane: 1000
+  field of view: 60
+  orthographic: 0
+  orthographic size: 5
+  m_Depth: -1
+  m_CullingMask:
+    serializedVersion: 2
+    m_Bits: 4294967295
+  m_RenderingPath: -1
+  m_TargetTexture: {fileID: 0}
+  m_TargetDisplay: 0
+  m_TargetEye: 3
+  m_HDR: 1
+  m_AllowMSAA: 1
+  m_ForceIntoRT: 0
+  m_OcclusionCulling: 1
+  m_StereoConvergence: 10
+  m_StereoSeparation: 0.022
+  m_StereoMirrorMode: 0
+--- !u!4 &1715640925
+Transform:
+  m_ObjectHideFlags: 0
+  m_PrefabParentObject: {fileID: 0}
+  m_PrefabInternal: {fileID: 0}
+  m_GameObject: {fileID: 1715640920}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 1, z: -10}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children: []
+  m_Father: {fileID: 0}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta
+fileFormatVersion: 2
+guid: cf1d119a8748d406e90ecb623b45f92f
+timeCreated: 1504127824
+licenseType: Pro
+DefaultImporter:
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta
+fileFormatVersion: 2
+guid: fbcbd038eb29041f580c463e454e10fc
+folderAsset: yes
+timeCreated: 1503355437
+licenseType: Free
+DefaultImporter:
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+
+public class BasicAcademy : Academy {
+
+	public override void AcademyReset()
+	{
+
+	}
+
+	public override void AcademyStep()
+	{
+
+	}
+
+}
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta
+fileFormatVersion: 2
+guid: 19276d4dc78ee49f1ba258293f17636c
+timeCreated: 1503355437
+licenseType: Free
+MonoImporter:
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+
+public class BasicAgent : Agent
+{
+
+    public int position;
+    public int smallGoalPosition;
+    public int largeGoalPosition;
+    public GameObject largeGoal;
+    public GameObject smallGoal;
+    public int minPosition;
+    public int maxPosition;
+
+    public override List<float> CollectState()
+    {
+        List<float> state = new List<float>();
+        state.Add(position);
+        return state;
+    }
+
+    public override void AgentStep(float[] act)
+    {
+        float movement = act[0];
+        int direction = 0;
+        if (movement == 0) { direction = -1; }
+        if (movement == 1) { direction = 1; }
+
+        position += direction;
+        if (position < minPosition) { position = minPosition; }
+        if (position > maxPosition) { position = maxPosition; }
+
+        gameObject.transform.position = new Vector3(position, 0f, 0f);
+
+        if (position == smallGoalPosition)
+        {
+            done = true;
+            reward = 0.1f;
+        }
+
+        if (position == largeGoalPosition)
+        {
+            done = true;
+            reward = 1f;
+        }
+    }
+
+    public override void AgentReset()
+    {
+        position = 0;
+        minPosition = -10;
+        maxPosition = 10;
+        smallGoalPosition = -3;
+        largeGoalPosition = 7;
+        smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f);
+        largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f);
+    }
+
+    public override void AgentOnDone()
+    {
+
+    }
+}
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta
+fileFormatVersion: 2
+guid: 624480a72e46148118ab2e2d89b537de
+timeCreated: 1503355437
+licenseType: Free
+MonoImporter:
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+
+public class BasicDecision : MonoBehaviour, Decision {
+
+	public float[] Decide (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
+	{
+		return default(float[]);
+
+	}
+
+	public float[] MakeMemory (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
+	{
+		return default(float[]);
+		
+	}
+}
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta
+fileFormatVersion: 2
+guid: 99399d2439f894b149d8e67b85b6e07a
+timeCreated: 1503355437
+licenseType: Free
+MonoImporter:
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: