浏览代码

Fix discrete state (#33)

* made BrainParameters a class to set default values
Modified the error message if the state is discrete

* Add discrete state support to PPO and provide discrete state example environment

* Add flexibility to continuous control as well

* Finish PPO flexible model generation implementation

* Fix formatting

* Support color observations

* Add best practices document

* bug fix for non square observations

* Update Readme.md

* Remove scipy dependency

* Add installation doc
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
aee5d336
共有 27 个文件被更改,包括 1284 次插入113 次删除
  1. 1
      docs/Getting-Started-with-Balance-Ball.md
  2. 2
      docs/Readme.md
  3. 10
      python/PPO.ipynb
  4. 5
      python/ppo.py
  5. 202
      python/ppo/models.py
  6. 40
      python/ppo/trainer.py
  7. 24
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs
  8. 20
      docs/best-practices.md
  9. 51
      docs/installation.md
  10. 9
      unity-environment/Assets/ML-Agents/Examples/Basic.meta
  11. 9
      unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta
  12. 76
      unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat
  13. 9
      unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta
  14. 76
      unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat
  15. 9
      unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta
  16. 702
      unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity
  17. 8
      unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta
  18. 9
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta
  19. 17
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs
  20. 12
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta
  21. 64
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  22. 12
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta
  23. 18
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs
  24. 12
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta

1
docs/Getting-Started-with-Balance-Ball.md


* numpy
* Pillow
* Python (2 or 3)
* scipy
* TensorFlow (1.0+)
### Installing Dependencies

2
docs/Readme.md


## Basic
* [Unity ML Agents Overview](Unity-Agents-Overview.md)
* [Installation & Set-up](installation.md)
* [Best practices when designing an Environment](best-practices.md)
* [How to organize the Scene](Organizing-the-Scene.md)
* [How to use the Python API](Unity-Agents---Python-API.md)
* [How to use TensorflowSharp inside Unity [Experimental]](Using-TensorFlow-Sharp-in-Unity-(Experimental).md)

10
python/PPO.ipynb


"train_model = True # Whether to train the model.\n",
"summary_freq = 10000 # Frequency at which to save training statistics.\n",
"save_freq = 50000 # Frequency at which to save model.\n",
"env_name = \"simple\" # Name of the training environment file.\n",
"env_name = \"environment\" # Name of the training environment file.\n",
"\n",
"### Algorithm-specific parameters for tuning\n",
"gamma = 0.99 # Reward discount rate.\n",

{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"env = UnityEnvironment(file_name=env_name)\n",

"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],

"\n",
"is_continuous = (env.brains[brain_name].action_space_type == \"continuous\")\n",
"use_observations = (env.brains[brain_name].number_observations > 0)\n",
"use_states = (env.brains[brain_name].state_space_size > 0)\n",
"\n",
"model_path = './models/{}'.format(run_path)\n",
"summary_path = './summaries/{}'.format(run_path)\n",

" steps = sess.run(ppo_model.global_step)\n",
" summary_writer = tf.summary.FileWriter(summary_path)\n",
" info = env.reset(train_mode=train_model)[brain_name]\n",
" trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)\n",
" trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)\n",
" while steps <= max_steps:\n",
" if env.global_done:\n",
" info = env.reset(train_mode=train_model)[brain_name]\n",

5
python/ppo.py


Options:
--help Show this message.
--max-steps=<n> Maximum number of steps to run environment [default: 5e6].
--max-steps=<n> Maximum number of steps to run environment [default: 1e6].
--run-path=<path> The sub-directory name for model and summary statistics [default: ppo].
--load Whether to load the model or randomly initialize [default: False].
--train Whether to train model, or only run inference [default: True].

is_continuous = (env.brains[brain_name].action_space_type == "continuous")
use_observations = (env.brains[brain_name].number_observations > 0)
use_states = (env.brains[brain_name].state_space_size > 0)
if not os.path.exists(model_path):
os.makedirs(model_path)

steps = sess.run(ppo_model.global_step)
summary_writer = tf.summary.FileWriter(summary_path)
info = env.reset(train_mode=train_model)[brain_name]
trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)
trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)
while steps <= max_steps or not train_model:
if env.global_done:
info = env.reset(train_mode=train_model)[brain_name]

202
python/ppo/models.py


def create_agent_model(env, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=5e6):
"""
Takes a Unity environment and model-specific hyperparameters and returns the
Takes a Unity environment and model-specific hyper-parameters and returns the
appropriate PPO agent model for the environment.
:param env: a Unity environment.
:param lr: Learning rate.

:return: a sub-class of PPOAgent tailored to the environment.
:param max_step: Total number of training steps.
if env.brains[brain_name].action_space_type == "continuous":
if env.brains[brain_name].number_observations == 0:
return ContinuousControlModel(lr, env.brains[brain_name].state_space_size,
env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step)
else:
raise UnityEnvironmentException("There is currently no PPO model which supports both a continuous "
"action space and camera observations.")
if env.brains[brain_name].action_space_type == "discrete":
if env.brains[brain_name].number_observations == 0:
return DiscreteControlModel(lr, env.brains[brain_name].state_space_size,
env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step)
else:
brain = env.brains[brain_name]
if env.brains[brain_name].state_space_size > 0:
print("This brain contains agents with both observations and states. There is currently no PPO model"
"which supports this. Defaulting to Vision-based PPO model.")
h, w = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height']
return VisualDiscreteControlModel(lr, h, w, env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step)
brain = env.brains[brain_name]
if brain.action_space_type == "continuous":
return ContinuousControlModel(lr, brain, h_size, epsilon, max_step)
if brain.action_space_type == "discrete":
return DiscreteControlModel(lr, brain, h_size, epsilon, beta, max_step)
def save_model(sess, saver, model_path="./", steps=0):

class PPOModel(object):
def __init__(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step):
def create_visual_encoder(self, o_size_h, o_size_w, bw, h_size, num_streams, activation):
"""
Builds a set of visual (CNN) encoders.
:param o_size_h: Height observation size.
:param o_size_w: Width observation size.
:param bw: Whether image is greyscale {True} or color {False}.
:param h_size: Hidden layer size.
:param num_streams: Number of visual streams to construct.
:param activation: What type of activation function to use for layers.
:return: List of hidden layer tensors.
"""
if bw:
c_channels = 1
else:
c_channels = 3
self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32,
name='observation_0')
streams = []
for i in range(num_streams):
self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2],
use_bias=False, activation=activation)
self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2],
use_bias=False, activation=activation)
hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=activation)
streams.append(hidden)
return streams
def create_continuous_state_encoder(self, s_size, h_size, num_streams, activation):
"""
Builds a set of hidden state encoders.
:param s_size: state input size.
:param h_size: Hidden layer size.
:param num_streams: Number of state streams to construct.
:param activation: What type of activation function to use for layers.
:return: List of hidden layer tensors.
"""
self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state')
streams = []
for i in range(num_streams):
hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=activation)
hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=activation)
streams.append(hidden_2)
return streams
def create_discrete_state_encoder(self, s_size, h_size, num_streams, activation):
"""
Builds a set of hidden state encoders from discrete state input.
:param s_size: state input size (discrete).
:param h_size: Hidden layer size.
:param num_streams: Number of state streams to construct.
:param activation: What type of activation function to use for layers.
:return: List of hidden layer tensors.
"""
self.state_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='state')
state_in = tf.reshape(self.state_in, [-1])
state_onehot = c_layers.one_hot_encoding(state_in, s_size)
streams = []
for i in range(num_streams):
hidden = tf.layers.dense(state_onehot, h_size, use_bias=False, activation=activation)
streams.append(hidden)
return streams
def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step):
"""
Creates training-specific Tensorflow ops for PPO models.
:param probs: Current policy probabilities

:param entropy: Current policy entropy
:param epsilon: Value for policy-divergence threshold
:param lr: Learning rate
:param max_step: Total number of training steps.
"""
self.returns_holder = tf.placeholder(shape=[None], dtype=tf.float32, name='discounted_rewards')
self.advantage = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='advantages')

class ContinuousControlModel(PPOModel):
def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step):
def __init__(self, lr, brain, h_size, epsilon, max_step):
:param s_size: State-space size
:param a_size: Action-space size
:param brain: State-space size
self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state')
s_size = brain.state_space_size
a_size = brain.action_space_size
hidden_state, hidden_visual, hidden_policy, hidden_value = None, None, None, None
if brain.number_observations > 0:
h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']
bw = brain.camera_resolutions[0]['blackAndWhite']
hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 2, tf.nn.tanh)
if brain.state_space_size > 0:
s_size = brain.state_space_size
if brain.state_space_type == "continuous":
hidden_state = self.create_continuous_state_encoder(s_size, h_size, 2, tf.nn.tanh)
else:
hidden_state = self.create_discrete_state_encoder(s_size, h_size, 2, tf.nn.tanh)
if hidden_visual is None and hidden_state is None:
raise Exception("No valid network configuration possible. "
"There are no states or observations in this brain")
elif hidden_visual is not None and hidden_state is None:
hidden_policy, hidden_value = hidden_visual
elif hidden_visual is None and hidden_state is not None:
hidden_policy, hidden_value = hidden_state
elif hidden_visual is not None and hidden_state is not None:
hidden_policy = tf.concat([hidden_visual[0], hidden_state[0]], axis=1)
hidden_value = tf.concat([hidden_visual[1], hidden_state[1]], axis=1)
hidden_policy = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.tanh)
hidden_value = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.tanh)
hidden_policy_2 = tf.layers.dense(hidden_policy, h_size, use_bias=False, activation=tf.nn.tanh)
hidden_value_2 = tf.layers.dense(hidden_value, h_size, use_bias=False, activation=tf.nn.tanh)
self.mu = tf.layers.dense(hidden_policy_2, a_size, activation=None, use_bias=False,
self.mu = tf.layers.dense(hidden_policy, a_size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1))
self.log_sigma_sq = tf.Variable(tf.zeros([a_size]))
self.sigma_sq = tf.exp(self.log_sigma_sq)

self.entropy = tf.reduce_sum(0.5 * tf.log(2 * np.pi * np.e * self.sigma_sq))
self.value = tf.layers.dense(hidden_value_2, 1, activation=None, use_bias=False)
self.value = tf.layers.dense(hidden_value, 1, activation=None, use_bias=False)
PPOModel.__init__(self, self.probs, self.old_probs, self.value, self.entropy, 0.0, epsilon, lr, max_step)
self.create_ppo_optimizer(self.probs, self.old_probs, self.value, self.entropy, 0.0, epsilon, lr, max_step)
def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step):
def __init__(self, lr, brain, h_size, epsilon, beta, max_step):
:param s_size: State-space size
:param a_size: Action-space size
:param brain: State-space size
self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state')
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.elu)
hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=tf.nn.elu)
self.policy = tf.layers.dense(hidden_2, a_size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1))
self.probs = tf.nn.softmax(self.policy)
self.action = tf.multinomial(self.policy, 1)
self.output = tf.identity(self.action, name='action')
self.value = tf.layers.dense(hidden_2, 1, activation=None, use_bias=False)
hidden_state, hidden_visual, hidden = None, None, None
if brain.number_observations > 0:
h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']
bw = brain.camera_resolutions[0]['blackAndWhite']
hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 1, tf.nn.elu)[0]
if brain.state_space_size > 0:
s_size = brain.state_space_size
if brain.state_space_type == "continuous":
hidden_state = self.create_continuous_state_encoder(s_size, h_size, 1, tf.nn.elu)[0]
else:
hidden_state = self.create_discrete_state_encoder(s_size, h_size, 1, tf.nn.elu)[0]
self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1)
self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
self.selected_actions = c_layers.one_hot_encoding(self.action_holder, a_size)
self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities')
self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1)
self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1)
PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs,
self.value, self.entropy, beta, epsilon, lr, max_step)
if hidden_visual is None and hidden_state is None:
raise Exception("No valid network configuration possible. "
"There are no states or observations in this brain")
elif hidden_visual is not None and hidden_state is None:
hidden = hidden_visual
elif hidden_visual is None and hidden_state is not None:
hidden = hidden_state
elif hidden_visual is not None and hidden_state is not None:
hidden = tf.concat([hidden_visual, hidden_state], axis=1)
a_size = brain.action_space_size
class VisualDiscreteControlModel(PPOModel):
def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta, max_step):
"""
Creates Discrete Control Actor-Critic model for use with visual observations (images).
:param o_size_h: Observation height.
:param o_size_w: Observation width.
:param a_size: Action-space size.
:param h_size: Hidden layer size.
"""
self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32,
name='observation_0')
self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2],
use_bias=False, activation=tf.nn.elu)
self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2],
use_bias=False, activation=tf.nn.elu)
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32)
hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=tf.nn.elu)
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
self.policy = tf.layers.dense(hidden, a_size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1))
self.probs = tf.nn.softmax(self.policy)

self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1)
self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1)
PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs,
self.value, self.entropy, beta, epsilon, lr, max_step)
self.create_ppo_optimizer(self.responsible_probs, self.old_responsible_probs,
self.value, self.entropy, beta, epsilon, lr, max_step)

40
python/ppo/trainer.py


class Trainer(object):
def __init__(self, ppo_model, sess, info, is_continuous, use_observations):
def __init__(self, ppo_model, sess, info, is_continuous, use_observations, use_states):
"""
Responsible for collecting experinces and training PPO model.
:param ppo_model: Tensorflow graph defining model.

self.is_continuous = is_continuous
self.use_observations = use_observations
self.use_states = use_states
def take_action(self, info, env, brain_name):
"""

:return: BrainInfo corresponding to new environment state.
"""
epsi = None
feed_dict = {self.model.batch_size: len(info.states)}
feed_dict = {self.model.state_in: info.states, self.model.batch_size: len(info.states),
self.model.epsilon: epsi}
elif self.use_observations:
feed_dict = {self.model.observation_in: np.vstack(info.observations),
self.model.batch_size: len(info.states)}
else:
feed_dict = {self.model.state_in: info.states, self.model.batch_size: len(info.states)}
feed_dict[self.model.epsilon] = epsi
if self.use_observations:
feed_dict[self.model.observation_in] = np.vstack(info.observations)
if self.use_states:
feed_dict[self.model.state_in] = info.states
actions, a_dist, value, ent, learn_rate = self.sess.run([self.model.output, self.model.probs,
self.model.value, self.model.entropy,
self.model.learning_rate],

if not info.local_done[idx]:
if self.use_observations:
history['observations'].append(info.observations[idx])
else:
if self.use_states:
if self.is_continuous:
history['epsilons'].append(epsi[idx])
if self.is_continuous:
history['epsilons'].append(epsi[idx])
history['value_estimates'].append(value[idx][0])
history['cumulative_reward'] += next_info.rewards[idx]
history['episode_steps'] += 1

if info.local_done[l]:
value_next = 0.0
else:
feed_dict = {self.model.batch_size: len(info.states)}
feed_dict = {self.model.observation_in: np.vstack(info.observations),
self.model.batch_size: len(info.states)}
else:
feed_dict = {self.model.state_in: info.states,
self.model.batch_size: len(info.states)}
feed_dict[self.model.observation_in] = np.vstack(info.observations)
if self.use_states:
feed_dict[self.model.state_in] = info.states
value_next = self.sess.run(self.model.value, feed_dict)[l]
history = vectorize_history(self.history_dict[info.agents[l]])
history['advantages'] = get_gae(rewards=history['rewards'],

self.model.old_probs: np.vstack(training_buffer['action_probs'][start:end])}
if self.is_continuous:
feed_dict[self.model.epsilon] = np.vstack(training_buffer['epsilons'][start:end])
feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end])
if self.use_observations:
feed_dict[self.model.observation_in] = np.vstack(training_buffer['observations'][start:end])
else:
feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end])
if self.use_states:
feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end])
if self.use_observations:
feed_dict[self.model.observation_in] = np.vstack(training_buffer['observations'][start:end])
v_loss, p_loss, _ = self.sess.run([self.model.value_loss, self.model.policy_loss,
self.model.update_batch], feed_dict=feed_dict)
total_v += v_loss

24
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


* Defines brain-specific parameters
*/
[System.Serializable]
public struct BrainParameters
public class BrainParameters
public int stateSize;
public int stateSize = 1;
public int actionSize;
public int actionSize = 1;
public int memorySize;
public int memorySize = 0;
/**< \brief The length of the float vector that holds the memory for the agent */
public resolution[] cameraResolutions;
/**<\brief The list of observation resolutions for the brain */

public StateType actionSpaceType;
public StateType actionSpaceType = StateType.discrete;
public StateType stateSpaceType;
public StateType stateSpaceType = StateType.continuous;
}
/**

*/
public class Brain : MonoBehaviour
{
public BrainParameters brainParameters;
public BrainParameters brainParameters = new BrainParameters();
/**< \brief Defines brain specific parameters such as the state size*/
public BrainType brainType;
/**< \brief Defines what is the type of the brain :

foreach (KeyValuePair<int, Agent> idAgent in agents)
{
List<float> states = idAgent.Value.CollectState();
if (states.Count != brainParameters.stateSize)
if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous ))
{
throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
Was expecting {1} continuous states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count));
}
if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete ))
Was expecting {1} states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count));
Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count));
}
result.Add(idAgent.Key, states);
}

20
docs/best-practices.md


# Environment Design Best Practices
## General
* It is often helpful to being with the simplest version of the problem, to ensure the agent can learn it. From there increase
complexity over time.
* When possible, It is often helpful to ensure that you can complete the task by using a Player Brain to control the agent.
## Rewards
* The magnitude of any given reward should typically not be greater than 1.0 in order to ensure a more stable learning process.
* Positive rewards are often more helpful to shaping the desired behavior of an agent than negative rewards.
* For locomotion tasks, a small positive reward (+0.1) for forward progress is typically used.
* If you want the agent the finish a task quickly, it is often helpful to provide a small penalty every step (-0.1).
## States
* The magnitude of each state variable should be normalized to around 1.0.
* States should include all variables relevant to allowing the agent to take the optimally informed decision.
* Categorical state variables such as type of object (Sword, Shield, Bow) should be encoded in one-hot fashion (ie `3` -> `0, 0, 1`).
## Actions
* When using continuous control, action values should be clipped to an appropriate range.

51
docs/installation.md


# Installation & Set-up
## Install **Unity 2017.1** or later (required)
Download link available [here](https://store.unity.com/download?ref=update).
## Clone the repository
Once installed, you will want to clone the Agents GitHub repository. References will be made
throughout to `unity-environment` and `python` directories. Both are located at the root of the repository.
## Installing Python API
In order to train an agent within the framework, you will need to install Python 2 or 3, and the dependencies described below.
### Windows Users
If you are a Windows user who is new to Python/TensorFlow, follow [this guide](https://nitishmutha.github.io/tensorflow/2017/01/22/TensorFlow-with-gpu-for-windows.html) to set up your Python environment.
### Requirements
* Jupyter
* Matplotlib
* numpy
* Pillow
* Python (2 or 3)
* docopt (Training)
* TensorFlow (1.0+) (Training)
### Installing Dependencies
To install dependencies, go into the `python` directory and run (depending on your python version):
`pip install .`
or
`pip3 install .`
If your Python environment doesn't include `pip`, see these [instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers) on installing it.
Once the requirements are successfully installed, the next step is to check out the [Getting Started guide](Getting-Started-with-Balance-Ball.md)
## Installation Help
### Using Jupyter Notebook
For a walkthrough of how to use Jupyter notebook, see [here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/execute.html).
### General Issues
If you run into issues while attempting to install and run Unity ML Agents, see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Limitations-&-Common-Issues.md) for a list of common issues and solutions.
If you have an issue that isn't covered here, feel free to contact us at ml-agents@unity3d.com. Alternatively, feel free to create an issue on the repository.
Be sure to include relevant information on OS, Python version, and exact error message if possible.

9
unity-environment/Assets/ML-Agents/Examples/Basic.meta


fileFormatVersion: 2
guid: 230c334ab2f144bcda6eea42d18ebdc8
folderAsset: yes
timeCreated: 1506189168
licenseType: Pro
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:

9
unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta


fileFormatVersion: 2
guid: 0f9b2a7b3f61045b8a791eeae8175dc5
folderAsset: yes
timeCreated: 1506189694
licenseType: Pro
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:

76
unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!21 &2100000
Material:
serializedVersion: 6
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_Name: agent
m_Shader: {fileID: 46, guid: 0000000000000000f000000000000000, type: 0}
m_ShaderKeywords:
m_LightmapFlags: 4
m_EnableInstancingVariants: 0
m_DoubleSidedGI: 0
m_CustomRenderQueue: -1
stringTagMap: {}
disabledShaderPasses: []
m_SavedProperties:
serializedVersion: 3
m_TexEnvs:
- _BumpMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _DetailAlbedoMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _DetailMask:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _DetailNormalMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _EmissionMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _MainTex:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _MetallicGlossMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _OcclusionMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _ParallaxMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
m_Floats:
- _BumpScale: 1
- _Cutoff: 0.5
- _DetailNormalMapScale: 1
- _DstBlend: 0
- _GlossMapScale: 1
- _Glossiness: 0.5
- _GlossyReflections: 1
- _Metallic: 0
- _Mode: 0
- _OcclusionStrength: 1
- _Parallax: 0.02
- _SmoothnessTextureChannel: 0
- _SpecularHighlights: 1
- _SrcBlend: 1
- _UVSec: 0
- _ZWrite: 1
m_Colors:
- _Color: {r: 0.10980392, g: 0.6039216, b: 1, a: 0.8392157}
- _EmissionColor: {r: 0, g: 0, b: 0, a: 1}

9
unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta


fileFormatVersion: 2
guid: 260483cdfc6b14e26823a02f23bd8baa
timeCreated: 1506189720
licenseType: Pro
NativeFormatImporter:
mainObjectFileID: 2100000
userData:
assetBundleName:
assetBundleVariant:

76
unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!21 &2100000
Material:
serializedVersion: 6
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_Name: goal
m_Shader: {fileID: 46, guid: 0000000000000000f000000000000000, type: 0}
m_ShaderKeywords:
m_LightmapFlags: 4
m_EnableInstancingVariants: 0
m_DoubleSidedGI: 0
m_CustomRenderQueue: -1
stringTagMap: {}
disabledShaderPasses: []
m_SavedProperties:
serializedVersion: 3
m_TexEnvs:
- _BumpMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _DetailAlbedoMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _DetailMask:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _DetailNormalMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _EmissionMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _MainTex:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _MetallicGlossMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _OcclusionMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _ParallaxMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
m_Floats:
- _BumpScale: 1
- _Cutoff: 0.5
- _DetailNormalMapScale: 1
- _DstBlend: 0
- _GlossMapScale: 1
- _Glossiness: 0.5
- _GlossyReflections: 1
- _Metallic: 0
- _Mode: 0
- _OcclusionStrength: 1
- _Parallax: 0.02
- _SmoothnessTextureChannel: 0
- _SpecularHighlights: 1
- _SrcBlend: 1
- _UVSec: 0
- _ZWrite: 1
m_Colors:
- _Color: {r: 0.5058824, g: 0.74509805, b: 0.25490198, a: 1}
- _EmissionColor: {r: 0, g: 0, b: 0, a: 1}

9
unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta


fileFormatVersion: 2
guid: 624b24bbec31f44babfb57ef2dfbc537
timeCreated: 1506189863
licenseType: Pro
NativeFormatImporter:
mainObjectFileID: 2100000
userData:
assetBundleName:
assetBundleVariant:

702
unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!29 &1
OcclusionCullingSettings:
m_ObjectHideFlags: 0
serializedVersion: 2
m_OcclusionBakeSettings:
smallestOccluder: 5
smallestHole: 0.25
backfaceThreshold: 100
m_SceneGUID: 00000000000000000000000000000000
m_OcclusionCullingData: {fileID: 0}
--- !u!104 &2
RenderSettings:
m_ObjectHideFlags: 0
serializedVersion: 8
m_Fog: 0
m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
m_FogMode: 3
m_FogDensity: 0.01
m_LinearFogStart: 0
m_LinearFogEnd: 300
m_AmbientSkyColor: {r: 0.212, g: 0.227, b: 0.259, a: 1}
m_AmbientEquatorColor: {r: 0.114, g: 0.125, b: 0.133, a: 1}
m_AmbientGroundColor: {r: 0.047, g: 0.043, b: 0.035, a: 1}
m_AmbientIntensity: 1
m_AmbientMode: 0
m_SubtractiveShadowColor: {r: 0.42, g: 0.478, b: 0.627, a: 1}
m_SkyboxMaterial: {fileID: 10304, guid: 0000000000000000f000000000000000, type: 0}
m_HaloStrength: 0.5
m_FlareStrength: 1
m_FlareFadeSpeed: 3
m_HaloTexture: {fileID: 0}
m_SpotCookie: {fileID: 10001, guid: 0000000000000000e000000000000000, type: 0}
m_DefaultReflectionMode: 0
m_DefaultReflectionResolution: 128
m_ReflectionBounces: 1
m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0
serializedVersion: 11
m_GIWorkflowMode: 1
m_GISettings:
serializedVersion: 2
m_BounceScale: 1
m_IndirectOutputScale: 1
m_AlbedoBoost: 1
m_TemporalCoherenceThreshold: 1
m_EnvironmentLightingMode: 0
m_EnableBakedLightmaps: 1
m_EnableRealtimeLightmaps: 1
m_LightmapEditorSettings:
serializedVersion: 9
m_Resolution: 2
m_BakeResolution: 40
m_TextureWidth: 1024
m_TextureHeight: 1024
m_AO: 0
m_AOMaxDistance: 1
m_CompAOExponent: 1
m_CompAOExponentDirect: 0
m_Padding: 2
m_LightmapParameters: {fileID: 0}
m_LightmapsBakeMode: 1
m_TextureCompression: 1
m_FinalGather: 0
m_FinalGatherFiltering: 1
m_FinalGatherRayCount: 256
m_ReflectionCompression: 2
m_MixedBakeMode: 2
m_BakeBackend: 0
m_PVRSampling: 1
m_PVRDirectSampleCount: 32
m_PVRSampleCount: 500
m_PVRBounces: 2
m_PVRFiltering: 0
m_PVRFilteringMode: 1
m_PVRCulling: 1
m_PVRFilteringGaussRadiusDirect: 1
m_PVRFilteringGaussRadiusIndirect: 5
m_PVRFilteringGaussRadiusAO: 2
m_PVRFilteringAtrousColorSigma: 1
m_PVRFilteringAtrousNormalSigma: 1
m_PVRFilteringAtrousPositionSigma: 1
m_LightingDataAsset: {fileID: 0}
m_UseShadowmask: 1
--- !u!196 &4
NavMeshSettings:
serializedVersion: 2
m_ObjectHideFlags: 0
m_BuildSettings:
serializedVersion: 2
agentTypeID: 0
agentRadius: 0.5
agentHeight: 2
agentSlope: 45
agentClimb: 0.4
ledgeDropHeight: 0
maxJumpAcrossDistance: 0
minRegionArea: 2
manualCellSize: 0
cellSize: 0.16666667
manualTileSize: 0
tileSize: 256
accuratePlacement: 0
m_NavMeshData: {fileID: 0}
--- !u!1 &282272644
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_Component:
- component: {fileID: 282272648}
- component: {fileID: 282272647}
- component: {fileID: 282272646}
- component: {fileID: 282272645}
- component: {fileID: 282272649}
m_Layer: 0
m_Name: Agent
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!23 &282272645
MeshRenderer:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 282272644}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_Materials:
- {fileID: 2100000, guid: 260483cdfc6b14e26823a02f23bd8baa, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &282272646
BoxCollider:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 282272644}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!33 &282272647
MeshFilter:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 282272644}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!4 &282272648
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 282272644}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 3
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &282272649
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 282272644}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 624480a72e46148118ab2e2d89b537de, type: 3}
m_Name:
m_EditorClassIdentifier:
brain: {fileID: 846768605}
observations: []
maxStep: 0
resetOnDone: 1
reward: 0
done: 0
value: 0
CummulativeReward: 0
stepCounter: 0
agentStoredAction: []
memory: []
id: 0
position: 0
smallGoalPosition: -3
largeGoalPosition: 7
largeGoal: {fileID: 984725368}
smallGoal: {fileID: 1178588871}
minPosition: -10
maxPosition: 10
--- !u!114 &395380616
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)
m_EditorClassIdentifier:
brain: {fileID: 0}
--- !u!114 &577874698
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)
m_EditorClassIdentifier:
continuousPlayerActions: []
discretePlayerActions:
- key: 97
value: 0
- key: 100
value: 1
defaultAction: -1
brain: {fileID: 846768605}
--- !u!1 &762086410
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_Component:
- component: {fileID: 762086412}
- component: {fileID: 762086411}
m_Layer: 0
m_Name: Directional Light
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!108 &762086411
Light:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 762086410}
m_Enabled: 1
serializedVersion: 8
m_Type: 1
m_Color: {r: 1, g: 0.95686275, b: 0.8392157, a: 1}
m_Intensity: 1
m_Range: 10
m_SpotAngle: 30
m_CookieSize: 10
m_Shadows:
m_Type: 2
m_Resolution: -1
m_CustomResolution: -1
m_Strength: 1
m_Bias: 0.05
m_NormalBias: 0.4
m_NearPlane: 0.2
m_Cookie: {fileID: 0}
m_DrawHalo: 0
m_Flare: {fileID: 0}
m_RenderMode: 0
m_CullingMask:
serializedVersion: 2
m_Bits: 4294967295
m_Lightmapping: 4
m_AreaSize: {x: 1, y: 1}
m_BounceIntensity: 1
m_ColorTemperature: 6570
m_UseColorTemperature: 0
m_ShadowRadius: 0
m_ShadowAngle: 0
--- !u!4 &762086412
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 762086410}
m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261}
m_LocalPosition: {x: 0, y: 3, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0}
--- !u!1 &846768603
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_Component:
- component: {fileID: 846768604}
- component: {fileID: 846768605}
m_Layer: 0
m_Name: Brain
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &846768604
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 846768603}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 1574236049}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &846768605
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 846768603}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: c676a8ddf5a5f4f64b35e9ed5028679d, type: 3}
m_Name:
m_EditorClassIdentifier:
brainParameters:
stateSize: 1
actionSize: 2
memorySize: 0
cameraResolutions: []
actionDescriptions:
- Left
- Right
actionSpaceType: 0
stateSpaceType: 0
brainType: 0
CoreBrains:
- {fileID: 577874698}
- {fileID: 395380616}
- {fileID: 1503497339}
instanceID: 10208
--- !u!1 &984725368
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_Component:
- component: {fileID: 984725372}
- component: {fileID: 984725371}
- component: {fileID: 984725370}
- component: {fileID: 984725369}
m_Layer: 0
m_Name: largeGoal
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!23 &984725369
MeshRenderer:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 984725368}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_Materials:
- {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!135 &984725370
SphereCollider:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 984725368}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Radius: 0.5
m_Center: {x: 0, y: 0, z: 0}
--- !u!33 &984725371
MeshFilter:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 984725368}
m_Mesh: {fileID: 10207, guid: 0000000000000000e000000000000000, type: 0}
--- !u!4 &984725372
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 984725368}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1178588871
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_Component:
- component: {fileID: 1178588875}
- component: {fileID: 1178588874}
- component: {fileID: 1178588873}
- component: {fileID: 1178588872}
m_Layer: 0
m_Name: smallGoal
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!23 &1178588872
MeshRenderer:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1178588871}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_Materials:
- {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!135 &1178588873
SphereCollider:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1178588871}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Radius: 0.5
m_Center: {x: 0, y: 0, z: 0}
--- !u!33 &1178588874
MeshFilter:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1178588871}
m_Mesh: {fileID: 10207, guid: 0000000000000000e000000000000000, type: 0}
--- !u!4 &1178588875
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1178588871}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 0.5, y: 0.5, z: 0.5}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 5
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1503497339
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)
m_EditorClassIdentifier:
brain: {fileID: 846768605}
--- !u!1 &1574236047
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_Component:
- component: {fileID: 1574236049}
- component: {fileID: 1574236048}
m_Layer: 0
m_Name: Academy
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!114 &1574236048
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1574236047}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 19276d4dc78ee49f1ba258293f17636c, type: 3}
m_Name:
m_EditorClassIdentifier:
maxSteps: 0
frameToSkip: 0
waitTime: 0.5
trainingConfiguration:
width: 80
height: 80
qualityLevel: 1
timeScale: 100
targetFrameRate: 60
inferenceConfiguration:
width: 1280
height: 720
qualityLevel: 5
timeScale: 1
targetFrameRate: 60
defaultResetParameters: []
done: 0
episodeCount: 1
currentStep: 0
isInference: 0
windowResize: 0
--- !u!4 &1574236049
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1574236047}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0.71938086, y: 0.27357092, z: 4.1970553}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 846768604}
m_Father: {fileID: 0}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1715640920
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_Component:
- component: {fileID: 1715640925}
- component: {fileID: 1715640924}
- component: {fileID: 1715640923}
- component: {fileID: 1715640922}
- component: {fileID: 1715640921}
m_Layer: 0
m_Name: Main Camera
m_TagString: MainCamera
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!81 &1715640921
AudioListener:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_Enabled: 1
--- !u!124 &1715640922
Behaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_Enabled: 1
--- !u!92 &1715640923
Behaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_Enabled: 1
--- !u!20 &1715640924
Camera:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_Enabled: 1
serializedVersion: 2
m_ClearFlags: 2
m_BackGroundColor: {r: 0.7411765, g: 0.7411765, b: 0.7529412, a: 0}
m_NormalizedViewPortRect:
serializedVersion: 2
x: 0
y: 0
width: 1
height: 1
near clip plane: 0.3
far clip plane: 1000
field of view: 60
orthographic: 0
orthographic size: 5
m_Depth: -1
m_CullingMask:
serializedVersion: 2
m_Bits: 4294967295
m_RenderingPath: -1
m_TargetTexture: {fileID: 0}
m_TargetDisplay: 0
m_TargetEye: 3
m_HDR: 1
m_AllowMSAA: 1
m_ForceIntoRT: 0
m_OcclusionCulling: 1
m_StereoConvergence: 10
m_StereoSeparation: 0.022
m_StereoMirrorMode: 0
--- !u!4 &1715640925
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 1, z: -10}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}

8
unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta


fileFormatVersion: 2
guid: cf1d119a8748d406e90ecb623b45f92f
timeCreated: 1504127824
licenseType: Pro
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:

9
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta


fileFormatVersion: 2
guid: fbcbd038eb29041f580c463e454e10fc
folderAsset: yes
timeCreated: 1503355437
licenseType: Free
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:

17
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class BasicAcademy : Academy {
public override void AcademyReset()
{
}
public override void AcademyStep()
{
}
}

12
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta


fileFormatVersion: 2
guid: 19276d4dc78ee49f1ba258293f17636c
timeCreated: 1503355437
licenseType: Free
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

64
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class BasicAgent : Agent
{
public int position;
public int smallGoalPosition;
public int largeGoalPosition;
public GameObject largeGoal;
public GameObject smallGoal;
public int minPosition;
public int maxPosition;
public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add(position);
return state;
}
public override void AgentStep(float[] act)
{
float movement = act[0];
int direction = 0;
if (movement == 0) { direction = -1; }
if (movement == 1) { direction = 1; }
position += direction;
if (position < minPosition) { position = minPosition; }
if (position > maxPosition) { position = maxPosition; }
gameObject.transform.position = new Vector3(position, 0f, 0f);
if (position == smallGoalPosition)
{
done = true;
reward = 0.1f;
}
if (position == largeGoalPosition)
{
done = true;
reward = 1f;
}
}
public override void AgentReset()
{
position = 0;
minPosition = -10;
maxPosition = 10;
smallGoalPosition = -3;
largeGoalPosition = 7;
smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f);
largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f);
}
public override void AgentOnDone()
{
}
}

12
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta


fileFormatVersion: 2
guid: 624480a72e46148118ab2e2d89b537de
timeCreated: 1503355437
licenseType: Free
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

18
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class BasicDecision : MonoBehaviour, Decision {
public float[] Decide (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return default(float[]);
}
public float[] MakeMemory (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return default(float[]);
}
}

12
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta


fileFormatVersion: 2
guid: 99399d2439f894b149d8e67b85b6e07a
timeCreated: 1503355437
licenseType: Free
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存