浏览代码

merging dev-broadcast-curriculum

/tag-0.2.0
vincentpierre 7 年前
当前提交
3b00302a
共有 30 个文件被更改,包括 1032 次插入278 次删除
  1. 23
      docs/Making-a-new-Unity-Environment.md
  2. 35
      python/PPO.ipynb
  3. 35
      python/ppo.py
  4. 9
      python/ppo/models.py
  5. 3
      python/ppo/trainer.py
  6. 7
      python/test_unityagents.py
  7. 1
      python/unityagents/__init__.py
  8. 3
      python/unityagents/brain.py
  9. 117
      python/unityagents/environment.py
  10. 3
      unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DDecision.cs
  11. 21
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs
  12. 6
      unity-environment/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  13. 242
      unity-environment/Assets/ML-Agents/Examples/Tennis/Tennis.unity
  14. 8
      unity-environment/Assets/ML-Agents/Scripts/Academy.cs
  15. 18
      unity-environment/Assets/ML-Agents/Scripts/Agent.cs
  16. 35
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs
  17. 7
      unity-environment/Assets/ML-Agents/Scripts/Communicator.cs
  18. 32
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs
  19. 18
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs
  20. 51
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
  21. 19
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs
  22. 102
      unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs
  23. 19
      unity-environment/Assets/ML-Agents/Template/Scripts/TemplateDecision.cs
  24. 12
      python/curriculum.json
  25. 71
      python/unityagents/curriculum.py
  26. 380
      unity-environment/Assets/ML-Agents/Scripts/Monitor.cs
  27. 12
      unity-environment/Assets/ML-Agents/Scripts/Monitor.cs.meta
  28. 12
      unity-environment/Assets/ML-Agents/Scripts/AgentMonitor.cs.meta
  29. 9
      unity-environment/Assets/ML-Agents/Resources.meta

23
docs/Making-a-new-Unity-Environment.md


Note that the reward is reset to 0 at every step, you must add to the reward (`reward += rewardIncrement`). If you use `skipFrame` in the Academy and set your rewards instead of incrementing them, you might lose information since the reward is sent at every step, not at every frame.
## Agent Monitor
* You can add the script `AgentMonitor.cs` to any gameObject with a component `YourNameAgent.cs`. In the inspector of this component, you will see:
* `Fixed Position` : If this box is checked, the monitor will be on the left corner of the screen and will remain here. Note that you can only have one agent with a fixed monitor or multiple monitors will overlap.
* `Vertical Offset`: If `Fixed Position` is unchecked, the monitor will follow the Agent on the screen. Use `Vertical Offset` to decide how far above the agent the monitor should be.
* `Display Brain Name` : If this box is checked, the name of the brain will appear in the monitor. (Can be useful if you have similar agents using different brains).
* `Display Brain Type` : If this box is checked, the type of the brain of the agent will be displayed.
* `Display FrameCount` : If this box is checked, the number of frames that elapsed since the agent was reset will be displayed.
* `Display Current Reward`: If this box is checked, the current reward of the agent will be displayed.
* `Display Max Reward` : If this box is checked, the maximum reward obtained during this training session will be displayed.
* `Display State` : If this box is checked, the current state of the agent will be displayed.
* `Display Action` : If this box is checked, the current action the agent performs will be displayed.
The monitoring of the environment has been changed. You can now track many different things and not only agents. Use the Log function anywhere in your code :
```csharp
Monitor.Log(key, value, displayType , target)
```
* *`key`* is the name of the information you want to display.
* *`value`* is the information you want to display.
* *`displayType`* is a MonitorType that can be either `text`, `slider`, `bar` or `hist`.
* `text` will convert `value` into a string and display it. It can be useful for displaying error messages!
* `slider` is used to display a single float between -1 and 1. Note that value must be a float if you want to use a slider. If the value is positive, the slider will be green, if the value is negative, the slider will be red.
* `hist` is used to display multiple floats. Note that value must be a list or array of floats. The Histogram will be a sequence of vertical sliders.
* `bar` is used to see the proportions. Note that value must be a list or array of positive floats. For each float in values, a rectangle of width of value divided by the sum of all values will be show.
* *`target`* is the transform to which you want to attach information. If the transform is `null` the information will be attached to the global monitor.
If you passed a `value` from an external brain, the value will be displayed as a bar (green if value is positive / red if value is negative) above the monitor. The bar's maximum value is set to 1 by default but if the value of the agent is above this number, it becomes the new maximum.

35
python/PPO.ipynb


"summary_freq = 10000 # Frequency at which to save training statistics.\n",
"save_freq = 50000 # Frequency at which to save model.\n",
"env_name = \"environment\" # Name of the training environment file.\n",
"curriculum_file = None\n",
"\n",
"### Algorithm-specific parameters for tuning\n",
"gamma = 0.99 # Reward discount rate.\n",

{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"env = UnityEnvironment(file_name=env_name)\n",
"env = UnityEnvironment(file_name=env_name, curriculum=curriculum_file)\n",
"print(str(env))\n",
"brain_name = env.brain_names[0]"
]

"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],

"if curriculum_file == \"None\":\n",
" curriculum_file = None\n",
"\n",
"\n",
"def get_progress():\n",
" if curriculum_file is not None:\n",
" if env._curriculum.measure_type == \"progress\":\n",
" return steps / max_steps\n",
" elif env._curriculum.measure_type == \"reward\":\n",
" return last_reward\n",
" else:\n",
" return None\n",
" else:\n",
" return None\n",
"\n",
"# Create the Tensorflow model graph\n",
"ppo_model = create_agent_model(env, lr=learning_rate,\n",
" h_size=hidden_units, epsilon=epsilon,\n",

" saver.restore(sess, ckpt.model_checkpoint_path)\n",
" else:\n",
" sess.run(init)\n",
" steps = sess.run(ppo_model.global_step)\n",
" steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward]) \n",
" info = env.reset(train_mode=train_model)[brain_name]\n",
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
" info = env.reset(train_mode=train_model)[brain_name]\n",
" info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]\n",
" # Decide and take an action\n",
" new_info = trainer.take_action(info, env, brain_name)\n",
" info = new_info\n",

" trainer.update_model(batch_size, num_epoch)\n",
" if steps % summary_freq == 0 and steps != 0 and train_model:\n",
" # Write training statistics to tensorboard.\n",
" trainer.write_summary(summary_writer, steps)\n",
" trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)\n",
" if len(trainer.stats['cumulative_reward']) > 0:\n",
" mean_reward = np.mean(trainer.stats['cumulative_reward'])\n",
" sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})\n",
" last_reward = sess.run(ppo_model.last_reward)\n",
" # Final save Tensorflow model\n",
" if steps != 0 and train_model:\n",
" save_model(sess, model_path=model_path, steps=steps, saver=saver)\n",

35
python/ppo.py


Options:
--help Show this message.
--max-steps=<n> Maximum number of steps to run environment [default: 1e6].
--curriculum=<file> Curriculum json file for environment [default: None]
--max-steps=<n> Maximum number of steps to run environment [default: 1e6].
--train Whether to train model, or only run inference [default: True].
--train Whether to train model, or only run inference [default: False].
--summary-freq=<n> Frequency at which to save training statistics [default: 10000].
--save-freq=<n> Frequency at which to save model [default: 50000].
--gamma=<n> Reward discount rate [default: 0.99].

env_name = options['<env>']
keep_checkpoints = int(options['--keep-checkpoints'])
worker_id = int(options['--worker-id'])
curriculum_file = str(options['--curriculum'])
if curriculum_file == "None":
curriculum_file = None
# Algorithm-specific parameters for tuning
gamma = float(options['--gamma'])

hidden_units = int(options['--hidden-units'])
batch_size = int(options['--batch-size'])
env = UnityEnvironment(file_name=env_name, worker_id=worker_id)
env = UnityEnvironment(file_name=env_name, worker_id=worker_id, curriculum=curriculum_file)
print(str(env))
brain_name = env.brain_names[0]

init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=keep_checkpoints)
def get_progress():
if curriculum_file is not None:
if env._curriculum.measure_type == "progress":
return steps / max_steps
elif env._curriculum.measure_type == "reward":
return last_reward
else:
return None
else:
return None
with tf.Session() as sess:
# Instantiate model parameters
if load_model:

else:
sess.run(init)
steps = sess.run(ppo_model.global_step)
steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward])
info = env.reset(train_mode=train_model)[brain_name]
info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
info = env.reset(train_mode=train_model)[brain_name]
info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
# Decide and take an action
new_info = trainer.take_action(info, env, brain_name)
info = new_info

trainer.update_model(batch_size, num_epoch)
if steps % summary_freq == 0 and steps != 0 and train_model:
# Write training statistics to tensorboard.
trainer.write_summary(summary_writer, steps)
trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)
if len(trainer.stats['cumulative_reward']) > 0:
mean_reward = np.mean(trainer.stats['cumulative_reward'])
sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})
last_reward = sess.run(ppo_model.last_reward)
# Final save Tensorflow model
if steps != 0 and train_model:
save_model(sess, model_path=model_path, steps=steps, saver=saver)

9
python/ppo/models.py


class PPOModel(object):
def create_reward_encoder(self):
self.last_reward = tf.Variable(0, name="last_reward", trainable=False, dtype=tf.float32)
self.new_reward = tf.placeholder(shape=[], dtype=tf.float32, name='new_reward')
self.update_reward = tf.assign(self.last_reward, self.new_reward)
def create_visual_encoder(self, o_size_h, o_size_w, bw, h_size, num_streams, activation):
"""
Builds a set of visual (CNN) encoders.

s_size = brain.state_space_size
a_size = brain.action_space_size
self.create_reward_encoder()
hidden_state, hidden_visual, hidden_policy, hidden_value = None, None, None, None
if brain.number_observations > 0:
h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']

:param brain: State-space size
:param h_size: Hidden layer size
"""
self.create_reward_encoder()
hidden_state, hidden_visual, hidden = None, None, None
if brain.number_observations > 0:
h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']

3
python/ppo/trainer.py


for key in self.history_dict:
self.history_dict[key] = empty_local_history(self.history_dict[key])
def write_summary(self, summary_writer, steps):
def write_summary(self, summary_writer, steps, lesson_number):
"""
Saves training statistics to Tensorboard.
:param summary_writer: writer associated with Tensorflow session.

stat_mean = float(np.mean(self.stats[key]))
summary.value.add(tag='Info/{}'.format(key), simple_value=stat_mean)
self.stats[key] = []
summary.value.add(tag='Info/Lesson', simple_value=lesson_number)
summary_writer.add_summary(summary, steps)
summary_writer.flush()

7
python/test_unityagents.py


"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"brainParameters": [{
"stateSize": 3,
"actionSize": 2,

"agents": [1,2],
"states": [1,2,3,4,5,6],
"rewards": [1,2],
"actions": null,
"actions": [1,2,3,4],
"memories": [],
"dones": [false, false]
}'''.encode(),

"agents": [1,2,3],
"states": [1,2,3,4,5,6,7,8,9],
"rewards": [1,2,3],
"actions": null,
"actions": [1,2,3,4,5,6],
"memories": [],
"dones": [false, false, false]
}'''.encode(),

"agents": [1,2,3],
"states": [1,2,3,4,5,6,7,8,9],
"rewards": [1,2,3],
"actions": null,
"actions": [1,2,3,4,5,6],
"memories": [],
"dones": [false, false, true]
}'''.encode(),

1
python/unityagents/__init__.py


from .environment import *
from .brain import *
from .exception import *
from .curriculum import *

3
python/unityagents/brain.py


class BrainInfo:
def __init__(self, observation, state, memory=None, reward=None, agents=None, local_done=None):
def __init__(self, observation, state, memory=None, reward=None, agents=None, local_done=None, action =None):
"""
Describes experience at current step of all agents linked to a brain.
"""

self.rewards = reward
self.local_done = local_done
self.agents = agents
self.actions = action
class BrainParameters:

117
python/unityagents/environment.py


import os
import socket
import subprocess
import struct
from .curriculum import Curriculum
from PIL import Image
from sys import platform

class UnityEnvironment(object):
def __init__(self, file_name, worker_id=0,
base_port=5005):
base_port=5005, curriculum = None):
"""
Starts a new unity environment and establishes a connection with the environment.
Notice: Currently communication between Unity and Python takes place over an open socket without authentication.

atexit.register(self.close)
self.port = base_port + worker_id
self._buffer_size = 120000
self._buffer_size = 12000
self._loaded = False
self._open_socket = False

"The Unity environment took too long to respond. Make sure {} does not need user interaction to launch "
"and that the Academy and the external Brain(s) are attached to objects in the Scene.".format(
str(file_name)))
self._data = {}
self._global_done = None
self._academy_name = p["AcademyName"]
self._brains = {}
self._brain_names = p["brainNames"]
self._external_brain_names = p["externalBrainNames"]
self._external_brain_names = [] if self._external_brain_names is None else self._external_brain_names
self._num_brains = len(self._brain_names)
self._num_external_brains = len(self._external_brain_names)
self._resetParameters = p["resetParameters"]
self._curriculum = Curriculum(curriculum, self._resetParameters)
for i in range(self._num_brains):
self._brains[self._brain_names[i]] = BrainParameters(self._brain_names[i], p["brainParameters"][i])
self._loaded = True
logger.info("\n'{}' started successfully!".format(self._academy_name))
if (self._num_external_brains == 0):
logger.warning(" No External Brains found in the Unity Environment. "
"You will not be able to pass actions to your agent(s).")
self._data = {}
self._global_done = None
self._academy_name = p["AcademyName"]
self._num_brains = len(p["brainParameters"])
self._brains = {}
self._brain_names = p["brainNames"]
self._resetParameters = p["resetParameters"]
for i in range(self._num_brains):
self._brains[self._brain_names[i]] = BrainParameters(self._brain_names[i], p["brainParameters"][i])
self._conn.send(b".")
self._loaded = True
logger.info("\n'{}' started successfully!".format(self._academy_name))
@property
def brains(self):
return self._brains

return self._num_brains
@property
def number_external_brains(self):
return self._num_external_brains
@property
@property
def external_brain_names(self):
return self._external_brain_names
@staticmethod
def _process_pixels(image_bytes=None, bw=False):
"""

for k in self._resetParameters])) + '\n' + \
'\n'.join([str(self._brains[b]) for b in self._brains])
def _recv_bytes(self):
s = self._conn.recv(self._buffer_size)
message_length = struct.unpack("I", bytearray(s[:4]))[0]
s = s[4:]
while len(s) != message_length:
s += self._conn.recv(self._buffer_size)
return s
def _get_state_image(self, bw):
"""
Receives observation from socket, and confirms.

s = self._conn.recv(self._buffer_size)
s = self._recv_bytes()
s = self._process_pixels(image_bytes=s, bw=bw)
self._conn.send(b"RECEIVED")
return s

Receives dictionary of state information from socket, and confirms.
:return:
"""
state = self._conn.recv(self._buffer_size).decode('utf-8')
state = self._recv_bytes().decode('utf-8')
def reset(self, train_mode=True, config=None):
def reset(self, train_mode=True, config=None, progress=None):
config = config or {}
old_lesson = self._curriculum.get_lesson_number()
config = self._curriculum.get_lesson(progress) if config is None else config
if old_lesson != self._curriculum.get_lesson_number():
logger.info("\nLesson changed. Now in Lesson {0} : \t{1}"
.format(self._curriculum.get_lesson_number(),
', '.join([str(x)+' -> '+str(config[x]) for x in config])))
else:
logger.info("\nEpisode Reset. In Lesson {0} : \t{1}"
.format(self._curriculum.get_lesson_number(),
', '.join([str(x)+' -> '+str(config[x]) for x in config])))
if self._loaded:
self._conn.send(b"RESET")
self._conn.recv(self._buffer_size)

rewards = state_dict["rewards"]
dones = state_dict["dones"]
agents = state_dict["agents"]
# actions = state_dict["actions"]
if n_agent > 0 :
actions = np.array(state_dict["actions"]).reshape((n_agent, -1))
else :
actions = np.array([])
observations = []
for o in range(self._brains[b].number_observations):

observations.append(np.array(obs_n))
self._data[b] = BrainInfo(observations, states, memories, rewards, agents, dones)
self._data[b] = BrainInfo(observations, states, memories, rewards, agents, dones, actions)
self._global_done = self._conn.recv(self._buffer_size).decode('utf-8') == 'True'

arr = [float(x) for x in arr]
return arr
def step(self, action, memory=None, value=None):
def step(self, action = None, memory=None, value=None):
"""
Provides the environment with an action, moves the environment dynamics forward accordingly, and returns
observation, state, and reward information to the agent.

:return: A Data structure corresponding to the new state of the environment.
"""
action = {} if action is None else action
if self._num_brains > 1:
if self._num_external_brains == 1:
action = {self._external_brain_names[0]: action}
elif self._num_external_brains > 1:
action = {self._brain_names[0]: action}
raise UnityActionException(
"There are no external brains in the environment, "
"step cannot take an action input")
if self._num_brains > 1:
if self._num_external_brains == 1:
memory = {self._external_brain_names[0]: memory}
elif self._num_external_brains > 1:
memory = {self._brain_names[0]: memory}
raise UnityActionException(
"There are no external brains in the environment, "
"step cannot take a memory input")
if self._num_brains > 1:
if self._num_external_brains == 1:
value = {self._external_brain_names[0]: value}
elif self._num_external_brains > 1:
value = {self._brain_names[0]: value}
raise UnityActionException(
"There are no external brains in the environment, "
"step cannot take a value input")
for b in self._brain_names:
for brain_name in list(action.keys()) + list(memory.keys()) + list(value.keys()):
if brain_name not in self._external_brain_names:
raise UnityActionException(
"The name {0} does not correspond to an external brain "
"in the environment". format(brain_name))
for b in self._external_brain_names:
n_agent = len(self._data[b].agents)
if b not in action:
raise UnityActionException("You need to input an action for the brain {0}".format(b))

3
unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DDecision.cs


{
if (gameObject.GetComponent<Brain>().brainParameters.actionSpaceType == StateType.continuous)
{
return new float[4]{ 0f, 0f, 0f, 0.0f };
return new float[2]{ -10*(state[4]-state[2]), -10*(state[2]+state[4])};
}
else

21
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs


using System.Collections.Generic;
using UnityEngine;
public class BasicDecision : MonoBehaviour, Decision {
public class BasicDecision : MonoBehaviour, Decision
{
public float[] Decide(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return new float[1]{ 1f };
public float[] Decide (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return default(float[]);
}
}
public float[] MakeMemory(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return new float[0];
public float[] MakeMemory (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return default(float[]);
}
}
}

6
unity-environment/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


{
float moveX = 0.0f;
float moveY = 0.0f;
if (act[0] == 0f)
if (act[0] == 1f)
if (act[0] == 1f)
if (act[0] == 2f)
if (act[0] == 2f)
if (act[0] == 0f)
{
moveX = 0.0f;
}

242
unity-environment/Assets/ML-Agents/Examples/Tennis/Tennis.unity


tileSize: 256
accuratePlacement: 0
m_NavMeshData: {fileID: 0}
--- !u!114 &2702986
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 1948813725}
--- !u!114 &21374022
--- !u!114 &10967279
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}

m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)
continuousPlayerActions:
- key: 97
index: 0
value: -1
- key: 100
index: 0
value: 1
- key: 0
index: 0
value: 0
- key: 0
index: 0
value: 0
discretePlayerActions:
- key: 97
value: 0
- key: 100
value: 1
- key: 0
value: 0
- key: 32
value: 3
defaultAction: -1
brain: {fileID: 1948813725}
graphModel: {fileID: 0}
graphScope:
graphPlaceholders: []
BatchSizePlaceholderName: batch_size
StatePlacholderName: state
RecurrentInPlaceholderName: recurrent_in
RecurrentOutPlaceholderName: recurrent_out
ObservationPlaceholderName: []
ActionPlaceholderName: action
brain: {fileID: 0}
--- !u!1 &26143720
GameObject:
m_ObjectHideFlags: 0

m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 36699497}
--- !u!114 &86818458
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 459283648}
--- !u!1 &459283646
GameObject:
m_ObjectHideFlags: 0

stateSpaceType: 1
brainType: 2
CoreBrains:
- {fileID: 1364070226}
- {fileID: 86818458}
- {fileID: 1447029077}
- {fileID: 532686528}
instanceID: 10250
--- !u!114 &532686528
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
m_Name: (Clone)(Clone)
m_EditorClassIdentifier:
graphModel: {fileID: 4900000, guid: c917523464309409996933c3b7063a9f, type: 3}
graphScope:
graphPlaceholders: []
BatchSizePlaceholderName: batch_size
StatePlacholderName: state
RecurrentInPlaceholderName: recurrent_in
RecurrentOutPlaceholderName: recurrent_out
ObservationPlaceholderName: []
ActionPlaceholderName: action
brain: {fileID: 459283648}
- {fileID: 1308476791}
- {fileID: 1784104787}
- {fileID: 1864001037}
- {fileID: 1245752352}
instanceID: 19292
--- !u!1 &629009137
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: e51a3fb0b3186433ea84fc1e0549cc91, type: 3}
m_Name:
m_EditorClassIdentifier:
brain: {fileID: 459283648}
brain: {fileID: 1948813725}
observations: []
maxStep: 5000
resetOnDone: 1

m_Interpolate: 0
m_Constraints: 122
m_CollisionDetection: 0
--- !u!114 &668404469
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 0}
--- !u!1 &731033571
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 05eee2a5536934f5684a65f151efd304, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!114 &1245752352
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
graphModel: {fileID: 4900000, guid: c917523464309409996933c3b7063a9f, type: 3}
graphScope:
graphPlaceholders: []
BatchSizePlaceholderName: batch_size
StatePlacholderName: state
RecurrentInPlaceholderName: recurrent_in
RecurrentOutPlaceholderName: recurrent_out
ObservationPlaceholderName: []
ActionPlaceholderName: action
brain: {fileID: 459283648}
--- !u!114 &1247671385
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 1948813725}
--- !u!1 &1261870887
GameObject:
m_ObjectHideFlags: 0

m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1261870887}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!114 &1364070226
--- !u!114 &1308476791
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}

m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
value: 0
value: 1
value: 1
value: 2
defaultAction: -1
defaultAction: 0
--- !u!114 &1447029077
--- !u!114 &1344977993
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}

m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
brain: {fileID: 459283648}
brain: {fileID: 1948813725}
--- !u!1 &1605015604
GameObject:
m_ObjectHideFlags: 0

m_Father: {fileID: 0}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 60, y: 30, z: 0}
--- !u!114 &1784104787
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 459283648}
--- !u!1 &1838949272
GameObject:
m_ObjectHideFlags: 0

m_Father: {fileID: 2097046871}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: -90, y: 0, z: 0}
--- !u!114 &1864001037
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 459283648}
--- !u!1 &1871669621
GameObject:
m_ObjectHideFlags: 0

m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1871669621}
--- !u!114 &1880810220
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
continuousPlayerActions:
- key: 97
index: 0
value: -1
- key: 100
index: 0
value: 1
- key: 0
index: 0
value: 0
- key: 0
index: 0
value: 0
discretePlayerActions:
- key: 97
value: 1
- key: 100
value: 2
- key: 0
value: 0
- key: 32
value: 3
defaultAction: 0
brain: {fileID: 1948813725}
--- !u!1 &1948813723
GameObject:
m_ObjectHideFlags: 0

m_Component:
- component: {fileID: 1948813724}
- component: {fileID: 1948813725}
- component: {fileID: 1948813726}
m_Layer: 0
m_Name: MyBrain
m_TagString: Untagged

-
actionSpaceType: 0
stateSpaceType: 1
brainType: 0
brainType: 1
- {fileID: 21374022}
- {fileID: 668404469}
- {fileID: 2702986}
instanceID: 12574
- {fileID: 1880810220}
- {fileID: 1344977993}
- {fileID: 1247671385}
- {fileID: 10967279}
instanceID: 19482
--- !u!114 &1948813726
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1948813723}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: c3e2acdaec6974f37a5ca11872f71ae8, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!1 &2073469450
GameObject:
m_ObjectHideFlags: 0

8
unity-environment/Assets/ML-Agents/Scripts/Academy.cs


GetBrains(gameObject, brains);
InitializeAcademy();
communicator = new ExternalCommunicator(this);
if (!communicator.CommunicatorHandShake())
{
communicator = null;
}
windowResize = true;
done = true;
acceptingSteps = true;

18
unity-environment/Assets/ML-Agents/Scripts/Agent.cs


if (brain != null)
{
brain.agents.Add(id, gameObject.GetComponent<Agent>());
agentStoredAction = new float[brain.brainParameters.actionSize];
if (brain.brainParameters.actionSpaceType == StateType.continuous)
{
agentStoredAction = new float[brain.brainParameters.actionSize];
}
else
{
agentStoredAction = new float[1];
}
memory = new float[brain.brainParameters.memorySize];
}
InitializeAgent();

RemoveBrain();
brain = b;
brain.agents.Add(id, gameObject.GetComponent<Agent>());
agentStoredAction = new float[brain.brainParameters.actionSize];
if (brain.brainParameters.actionSpaceType == StateType.continuous)
{
agentStoredAction = new float[brain.brainParameters.actionSize];
}
else
{
agentStoredAction = new float[1];
}
memory = new float[brain.brainParameters.memorySize];
}

35
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


External,
Internal
}
Player,
Heuristic,
External,
Player,
Heuristic,
External,
}
#endif

public enum StateType
{
discrete,
continuous
}
continuous}
;
/** Only need to be modified in the brain's inpector.

}
}
else
{
foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
{
if ((int)bt >= CoreBrains.Length)
break;
if (CoreBrains[(int)bt] == null)
{
CoreBrains[(int)bt] = ScriptableObject.CreateInstance("CoreBrain" + bt.ToString());
}
}
}
// If the length of CoreBrains does not match the number of BrainTypes,
// we increase the length of CoreBrains

{
foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
{
CoreBrains[(int)bt] = ScriptableObject.Instantiate(CoreBrains[(int)bt]);
if (CoreBrains[(int)bt] == null)
{
CoreBrains[(int)bt] = ScriptableObject.CreateInstance("CoreBrain" + bt.ToString());
}
else
{
CoreBrains[(int)bt] = ScriptableObject.Instantiate(CoreBrains[(int)bt]);
}
}
instanceID = gameObject.GetInstanceID();
}

foreach (KeyValuePair<int, Agent> idAgent in agents)
{
List<float> states = idAgent.Value.CollectState();
if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous ))
if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous))
if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete ))
if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete))
{
throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count));

7
unity-environment/Assets/ML-Agents/Scripts/Communicator.cs


public Dictionary<string, float> resetParameters;
/**< \brief The default reset parameters are sent via socket*/
public List<string> brainNames;
/**< \brief A list of the External brains names sent via socket*/
/**< \brief A list of the all the brains names sent via socket*/
public List<string> externalBrainNames;
/**< \brief A list of the External brains names sent via socket*/
}
public enum ExternalCommand

/// Implement this method to allow brains to subscribe to the
/// decisions made outside of Unity
void SubscribeBrain(Brain brain);
/// First contact between Communicator and external process
bool CommunicatorHandShake();
/// Implement this method to initialize the communicator
void InitializeCommunicator();

32
unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs


{
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
{
coord = new ExternalCommunicator(brain.gameObject.transform.parent.gameObject.GetComponent<Academy>());
brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator = coord;
coord.SubscribeBrain(brain);
throw new UnityAgentsException(string.Format("The brain {0} was set to" +
" External mode" +
" but Unity was unable to read the" +
" arguments passed at launch.", brain.gameObject.name));
coord = null;
else
else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
{
coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
coord.SubscribeBrain(brain);
}
coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
coord.SubscribeBrain(brain);
}
/// Uses the communicator to retrieve the actions, memories and values and

brain.SendActions(coord.GetDecidedAction(brain.gameObject.name));
brain.SendMemories(coord.GetMemories(brain.gameObject.name));
brain.SendValues(coord.GetValues(brain.gameObject.name));
if (coord != null)
{
brain.SendActions(coord.GetDecidedAction(brain.gameObject.name));
brain.SendMemories(coord.GetMemories(brain.gameObject.name));
brain.SendValues(coord.GetValues(brain.gameObject.name));
}
}
/// Uses the communicator to send the states, observations, rewards and

coord.giveBrainInfo(brain);
if (coord != null)
{
coord.giveBrainInfo(brain);
}
}
/// Nothing needs to appear in the inspector

18
unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs


public Brain brain;
/**< Reference to the brain that uses this CoreBrainHeuristic */
ExternalCommunicator coord;
public Decision decision;
/**< Reference to the Decision component used to decide the actions */

public void InitializeCoreBrain()
{
decision = brain.gameObject.GetComponent<Decision>();
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
{
coord = null;
}
else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
{
coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
coord.SubscribeBrain(brain);
}
}
/// Uses the Decision Component to decide that action to take

/// Nothing needs to be implemented, the states are collected in DecideAction
public void SendState()
{
if (coord!=null)
{
coord.giveBrainInfo(brain);
}
}
/// Displays an error if no decision component is attached to the brain

EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
if (brain.gameObject.GetComponent<Decision>() == null)
{
EditorGUILayout.HelpBox("You need to add a 'Decision' component to this gameObject", MessageType.Error);

51
unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


public enum tensorType
{
Integer,
FloatingPoint
};
FloatingPoint}
;
public string name;
public tensorType valueType;

}
ExternalCommunicator coord;
/// Modify only in inspector : Reference to the Graph asset
public TextAsset graphModel;

}
#endif
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
{
coord = null;
}
else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
{
coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
coord.SubscribeBrain(brain);
}
if (graphModel != null)
{

currentBatchSize = brain.agents.Count;
if (currentBatchSize == 0)
{
if (coord != null)
{
coord.giveBrainInfo(brain);
}
return;
}

i++;
}
}
#endif
if (coord != null)
{
coord.giveBrainInfo(brain);
}
#endif
}

// Create the state tensor
if (hasState)
{
runner.AddInput(graph[graphScope + StatePlacholderName][0], inputState);
if (brain.brainParameters.stateSpaceType == StateType.discrete)
{
int[,] discreteInputState = new int[currentBatchSize, 1];
for (int i = 0; i < currentBatchSize; i++)
{
discreteInputState[i, 0] = (int)inputState[i, 0];
}
runner.AddInput(graph[graphScope + StatePlacholderName][0], discreteInputState);
}
else
{
runner.AddInput(graph[graphScope + StatePlacholderName][0], inputState);
}
}
// Create the observation tensors

}
if (hasRecurrent)
{
runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
}
TFTensor[] networkOutput;
try
{

{
Dictionary<int, float[]> new_memories = new Dictionary<int, float[]>();
runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
float[,] recurrent_tensor = networkOutput[1].GetValue() as float[,];
int i = 0;

19
unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs


public int index;
public float value;
}
ExternalCommunicator coord;
[SerializeField]
/// Contains the mapping from input to continuous actions

private DiscretePlayerAction[] discretePlayerActions;
[SerializeField]
private int defaultAction = -1;
private int defaultAction = 0;
/// Reference to the brain that uses this CoreBrainPlayer
public Brain brain;

/// Nothing to implement
public void InitializeCoreBrain()
{
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
{
coord = null;
}
else if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
{
coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
coord.SubscribeBrain(brain);
}
}
/// Uses the continuous inputs or dicrete inputs of the player to

/// decisions
public void SendState()
{
if (coord!=null)
{
coord.giveBrainInfo(brain);
}
}
/// Displays continuous or discrete input mapping in the inspector

102
unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs


private class StepMessage
{
public string brain_name { get; set; }
public List<bool> dones { get; set; }
}

public Dictionary<string, List<float>> value { get; set; }
}

public Dictionary<string, float> parameters { get; set; }
public bool train_model { get; set; }
}

hasSentState[brain.gameObject.name] = false;
}
/// Contains the logic for the initializtation of the socket.
public void InitializeCommunicator()
{
public bool CommunicatorHandShake(){
try
{
ReadArgs();

throw new UnityAgentsException("One of the brains was set isExternal" +
" but Unity was unable to read the" +
" arguments passed at launch");
return false;
return true;
}
/// Contains the logic for the initializtation of the socket.
public void InitializeCommunicator()
{
messageHolder = new byte[messageLength];
// Create a TCP/IP socket.

AcademyParameters accParamerters = new AcademyParameters();
accParamerters.brainParameters = new List<BrainParameters>();
accParamerters.brainNames = new List<string>();
accParamerters.externalBrainNames = new List<string>();
if (b.brainType == BrainType.External)
{
accParamerters.externalBrainNames.Add(b.gameObject.name);
}
}
accParamerters.AcademyName = academy.gameObject.name;
accParamerters.resetParameters = academy.resetParameters;

}
/// Sends Academy parameters to external agent
private void SendParameters(AcademyParameters envParams)
private void SendParameters(AcademyParameters envParams)
Receive();
}
/// Receives messages from external agent

return bytes;
}
private byte[] AppendLength(byte[] input){
byte[] newArray = new byte[input.Length + 4];
input.CopyTo(newArray, 4);
System.BitConverter.GetBytes(input.Length).CopyTo(newArray, 0);
return newArray;
}
/// Collects the information from the brains and sends it accross the socket
public void giveBrainInfo(Brain brain)
{

List<float> concatenatedRewards = new List<float>();
List<float> concatenatedMemories = new List<float>();
List<bool> concatenatedDones = new List<bool>();
List<float> concatenatedActions = new List<float>();
Dictionary<int, float[]> collectedActions = brain.CollectActions();
foreach (int id in current_agents[brainName])
{

concatenatedDones.Add(collectedDones[id]);
concatenatedActions = concatenatedActions.Concat(collectedActions[id].ToList()).ToList();
}
StepMessage message = new StepMessage()
{

rewards = concatenatedRewards,
//actions = actionDict,
actions = concatenatedActions,
sender.Send(Encoding.ASCII.GetBytes(envMessage));
sender.Send(AppendLength(Encoding.ASCII.GetBytes(envMessage)));
Receive();
int i = 0;
foreach (resolution res in brain.brainParameters.cameraResolutions)

sender.Send(TexToByteArray(brain.ObservationToTex(collectedObservations[id][i], res.width, res.height)));
sender.Send(AppendLength(TexToByteArray(brain.ObservationToTex(collectedObservations[id][i], res.width, res.height))));
Receive();
}
i++;

foreach (Brain brain in brains)
{
string brainName = brain.gameObject.name;
Dictionary<int, float[]> actionDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
if (brain.brainType == BrainType.External)
if (brain.brainParameters.actionSpaceType == StateType.continuous)
string brainName = brain.gameObject.name;
Dictionary<int, float[]> actionDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i * brain.brainParameters.actionSize, brain.brainParameters.actionSize).ToArray());
if (brain.brainParameters.actionSpaceType == StateType.continuous)
{
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i * brain.brainParameters.actionSize, brain.brainParameters.actionSize).ToArray());
}
else
{
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i, 1).ToArray());
}
else
storedActions[brainName] = actionDict;
Dictionary<int, float[]> memoryDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i, 1).ToArray());
memoryDict.Add(current_agents[brainName][i],
agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
}
storedActions[brainName] = actionDict;
storedMemories[brainName] = memoryDict;
Dictionary<int, float[]> memoryDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
memoryDict.Add(current_agents[brainName][i],
agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
Dictionary<int, float> valueDict = new Dictionary<int, float>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
valueDict.Add(current_agents[brainName][i],
agentMessage.value[brainName][i]);
}
storedValues[brainName] = valueDict;
storedMemories[brainName] = memoryDict;
Dictionary<int, float> valueDict = new Dictionary<int, float>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
valueDict.Add(current_agents[brainName][i],
agentMessage.value[brainName][i]);
}
storedValues[brainName] = valueDict;
}
}

19
unity-environment/Assets/ML-Agents/Template/Scripts/TemplateDecision.cs


using System.Collections.Generic;
using UnityEngine;
public class TemplateDecision : MonoBehaviour, Decision {
public class TemplateDecision : MonoBehaviour, Decision
{
public float[] Decide (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return default(float[]);
public float[] Decide(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return new float[0];
}
}
public float[] MakeMemory (List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return default(float[]);
public float[] MakeMemory(List<float> state, List<Camera> observation, float reward, bool done, float[] memory)
{
return new float[0];
}
}
}

12
python/curriculum.json


{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : true,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20, 15],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}

71
python/unityagents/curriculum.py


import json
import numpy as np
from .exception import UnityEnvironmentException
class Curriculum(object):
def __init__(self, location, default_reset_parameters):
self.lesson_number = 0
self.lesson_length = 0
self.measure_type = None
if location is None:
self.data = None
else:
try:
with open(location) as data_file:
self.data = json.load(data_file)
except FileNotFoundError:
raise UnityEnvironmentException(
"The file {0} could not be found.".format(location))
except UnicodeDecodeError:
raise UnityEnvironmentException("There was an error decoding {}".format(location))
self.smoothing_value = 0
for key in ['parameters', 'measure', 'thresholds',
'min_lesson_length', 'signal_smoothing']:
if key not in self.data:
raise UnityEnvironmentException("{0} does not contain a "
"{1} field.".format(location, key))
parameters = self.data['parameters']
self.measure_type = self.data['measure']
self.max_lesson_number = len(self.data['thresholds'])
for key in parameters:
if key not in default_reset_parameters:
raise UnityEnvironmentException(
"The parameter {0} in Curriculum {1} is not present in "
"the Environment".format(key, location))
for key in parameters:
if len(parameters[key]) != self.max_lesson_number + 1:
raise UnityEnvironmentException(
"The parameter {0} in Curriculum {1} must have {2} values "
"but {3} were found".format(key, location,
self.max_lesson_number + 1, len(parameters[key])))
@property
def measure(self):
return self.measure_type
def get_lesson_number(self):
return self.lesson_number
def set_lesson_number(self, value):
self.lesson_length = 0
self.lesson_number = max(0, min(value, self.max_lesson_number))
def get_lesson(self, progress):
if self.data is None or progress is None:
return {}
if self.data["signal_smoothing"]:
progress = self.smoothing_value * 0.9 + 0.1 * progress
self.smoothing_value = progress
self.lesson_length += 1
if self.lesson_number < self.max_lesson_number:
if ((progress > self.data['thresholds'][self.lesson_number]) and
(self.lesson_length > self.data['min_lesson_length'])):
self.lesson_length = 0
self.lesson_number += 1
config = {}
parameters = self.data["parameters"]
for key in parameters:
config[key] = parameters[key][self.lesson_number]
return config

380
unity-environment/Assets/ML-Agents/Scripts/Monitor.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
using Newtonsoft.Json;
using System.Linq;
/** The type of monitor the information must be displayed in.
* <slider> corresponds to a slingle rectangle which width is given
* by a float between -1 and 1. (green is positive, red is negative)
* <hist> corresponds to n vertical sliders.
* <text> is a text field.
* <bar> is a rectangle of fixed length to represent the proportions
* of a list of floats.
*/
public enum MonitorType
{
slider,
hist,
text,
bar
}
/** Monitor is used to display information. Use the log function to add
* information to your monitor.
*/
public class Monitor : MonoBehaviour
{
static bool isInstanciated;
static GameObject canvas;
private struct DisplayValue
{
public float time;
public object value;
public MonitorType monitorDisplayType;
}
static Dictionary<Transform, Dictionary<string, DisplayValue>> displayTransformValues;
static private Color[] barColors;
[HideInInspector]
static public float verticalOffset = 3f;
/**< \brief This float represents how high above the target the monitors will be. */
static GUIStyle keyStyle;
static GUIStyle valueStyle;
static GUIStyle greenStyle;
static GUIStyle redStyle;
static GUIStyle[] colorStyle;
static bool initialized;
/** Use the Monitor.Log static function to attach information to a transform.
* If displayType is <text>, value can be any object.
* If sidplayType is <slider>, value must be a float.
* If sidplayType is <hist>, value must be a List or Array of floats.
* If sidplayType is <bar>, value must be a list or Array of positive floats.
* Note that <slider> and <hist> caps values between -1 and 1.
* @param key The name of the information you wish to Log.
* @param value The value you want to display.
* @param displayType The type of display.
* @param target The transform you want to attach the information to.
*/
public static void Log(
string key,
object value,
MonitorType displayType = MonitorType.text,
Transform target = null)
{
if (!isInstanciated)
{
InstanciateCanvas();
isInstanciated = true;
}
if (target == null)
{
target = canvas.transform;
}
if (!displayTransformValues.Keys.Contains(target))
{
displayTransformValues[target] = new Dictionary<string, DisplayValue>();
}
Dictionary<string, DisplayValue> displayValues = displayTransformValues[target];
if (value == null)
{
RemoveValue(target, key);
return;
}
if (!displayValues.ContainsKey(key))
{
DisplayValue dv = new DisplayValue();
dv.time = Time.timeSinceLevelLoad;
dv.value = value;
dv.monitorDisplayType = displayType;
displayValues[key] = dv;
while (displayValues.Count > 20)
{
string max = displayValues.Aggregate((l, r) => l.Value.time < r.Value.time ? l : r).Key;
RemoveValue(target, max);
}
}
else
{
DisplayValue dv = displayValues[key];
dv.value = value;
displayValues[key] = dv;
}
}
/** Remove a value from a monitor
* @param target The transform to which the information is attached
* @param key The key of the information you want to remove
*/
public static void RemoveValue(Transform target, string key)
{
if (target == null)
{
target = canvas.transform;
}
if (displayTransformValues.Keys.Contains(target))
{
if (displayTransformValues[target].ContainsKey(key))
{
displayTransformValues[target].Remove(key);
if (displayTransformValues[target].Keys.Count == 0)
{
displayTransformValues.Remove(target);
}
}
}
}
/** Remove all information from a monitor
* @param target The transform to which the information is attached
*/
public static void RemoveAllValues(Transform target)
{
if (target == null)
{
target = canvas.transform;
}
if (displayTransformValues.Keys.Contains(target))
{
displayTransformValues.Remove(target);
}
}
/** Use SetActive to enable or disable the Monitor via script
* @param active Set the Monitor's status to the value of active
*/
public static void SetActive(bool active){
if (!isInstanciated)
{
InstanciateCanvas();
isInstanciated = true;
}
canvas.SetActive(active);
}
private static void InstanciateCanvas()
{
canvas = GameObject.Find("AgentMonitorCanvas");
if (canvas == null)
{
canvas = new GameObject();
canvas.name = "AgentMonitorCanvas";
canvas.AddComponent<Monitor>();
}
displayTransformValues = new Dictionary<Transform, Dictionary< string , DisplayValue>>();
}
private float[] ToFloatArray(object input)
{
try
{
return JsonConvert.DeserializeObject<float[]>(
JsonConvert.SerializeObject(input, Formatting.None));
}
catch
{
}
try
{
return new float[1]
{JsonConvert.DeserializeObject<float>(
JsonConvert.SerializeObject(input, Formatting.None))
};
}
catch
{
}
return new float[0];
}
void OnGUI()
{
if (!initialized)
{
Initialize();
initialized = true;
}
var toIterate = displayTransformValues.Keys.ToList();
foreach (Transform target in toIterate)
{
if (target == null)
{
displayTransformValues.Remove(target);
continue;
}
float widthScaler = (Screen.width / 1000f);
float keyPixelWidth = 100 * widthScaler;
float keyPixelHeight = 20 * widthScaler;
float paddingwidth = 10 * widthScaler;
float scale = 1f;
Vector2 origin = new Vector3(0, Screen.height);
if (!(target == canvas.transform))
{
Vector3 cam2obj = target.position - Camera.main.transform.position;
scale = Mathf.Min(1, 20f / (Vector3.Dot(cam2obj, Camera.main.transform.forward)));
Vector3 worldPosition = Camera.main.WorldToScreenPoint(target.position + new Vector3(0, verticalOffset, 0));
origin = new Vector3(worldPosition.x - keyPixelWidth * scale, Screen.height - worldPosition.y);
}
keyPixelWidth *= scale;
keyPixelHeight *= scale;
paddingwidth *= scale;
keyStyle.fontSize = (int)(keyPixelHeight * 0.8f);
if (keyStyle.fontSize < 2)
{
continue;
}
Dictionary<string, DisplayValue> displayValues = displayTransformValues[target];
int index = 0;
foreach (string key in displayValues.Keys.OrderBy(x => -displayValues[x].time))
{
keyStyle.alignment = TextAnchor.MiddleRight;
GUI.Label(new Rect(origin.x, origin.y - (index + 1) * keyPixelHeight, keyPixelWidth, keyPixelHeight), key, keyStyle);
if (displayValues[key].monitorDisplayType == MonitorType.text)
{
valueStyle.alignment = TextAnchor.MiddleLeft;
GUI.Label(new Rect(
origin.x + paddingwidth + keyPixelWidth,
origin.y - (index + 1) * keyPixelHeight,
keyPixelWidth, keyPixelHeight),
JsonConvert.SerializeObject(displayValues[key].value, Formatting.None), valueStyle);
}
else if (displayValues[key].monitorDisplayType == MonitorType.slider)
{
float sliderValue = 0f;
if (displayValues[key].value.GetType() == typeof(float))
{
sliderValue = (float)displayValues[key].value;
}
else
{
Debug.LogError(string.Format("The value for {0} could not be displayed as " +
"a slider because it is not a number.", key));
}
sliderValue = Mathf.Min(1f, sliderValue);
GUIStyle s = greenStyle;
if (sliderValue < 0)
{
sliderValue = Mathf.Min(1f, -sliderValue);
s = redStyle;
}
GUI.Box(new Rect(
origin.x + paddingwidth + keyPixelWidth,
origin.y - (index + 0.9f) * keyPixelHeight,
keyPixelWidth * sliderValue, keyPixelHeight * 0.8f),
GUIContent.none, s);
}
else if (displayValues[key].monitorDisplayType == MonitorType.hist)
{
float histWidth = 0.15f;
float[] vals = ToFloatArray(displayValues[key].value);
for (int i = 0; i < vals.Length; i++)
{
float value = Mathf.Min(vals[i], 1);
GUIStyle s = greenStyle;
if (value < 0)
{
value = Mathf.Min(1f, -value);
s = redStyle;
}
GUI.Box(new Rect(
origin.x + paddingwidth + keyPixelWidth + (keyPixelWidth * histWidth + paddingwidth / 2) * i,
origin.y - (index + 0.1f) * keyPixelHeight,
keyPixelWidth * histWidth, -keyPixelHeight * value),
GUIContent.none, s);
}
}
else if (displayValues[key].monitorDisplayType == MonitorType.bar)
{
float[] vals = ToFloatArray(displayValues[key].value);
float valsSum = 0f;
float valsCum = 0f;
foreach (float f in vals)
{
valsSum += Mathf.Max(f, 0);
}
if (valsSum == 0)
{
Debug.LogError(string.Format("The Monitor value for key {0} must be "
+ "a list or array of positive values and cannot be empty.", key));
}
else
{
for (int i = 0; i < vals.Length; i++)
{
float value = Mathf.Max(vals[i], 0) / valsSum;
GUI.Box(new Rect(
origin.x + paddingwidth + keyPixelWidth + keyPixelWidth * valsCum,
origin.y - (index + 0.9f) * keyPixelHeight,
keyPixelWidth * value, keyPixelHeight * 0.8f),
GUIContent.none, colorStyle[i % colorStyle.Length]);
valsCum += value;
}
}
}
index++;
}
}
}
private void Initialize()
{
keyStyle = GUI.skin.label;
valueStyle = GUI.skin.label;
valueStyle.clipping = TextClipping.Overflow;
valueStyle.wordWrap = false;
barColors = new Color[6]{ Color.magenta, Color.blue, Color.cyan, Color.green, Color.yellow, Color.red };
colorStyle = new GUIStyle[barColors.Length];
for (int i = 0; i < barColors.Length; i++)
{
Texture2D texture = new Texture2D(1, 1, TextureFormat.ARGB32, false);
texture.SetPixel(0, 0, barColors[i]);
texture.Apply();
GUIStyle staticRectStyle = new GUIStyle();
staticRectStyle.normal.background = texture;
colorStyle[i] = staticRectStyle;
}
greenStyle = colorStyle[3];
redStyle = colorStyle[5];
}
}

12
unity-environment/Assets/ML-Agents/Scripts/Monitor.cs.meta


fileFormatVersion: 2
guid: e59a31a1cc2f5464d9a61bef0bc9a53b
timeCreated: 1508031727
licenseType: Free
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

12
unity-environment/Assets/ML-Agents/Scripts/AgentMonitor.cs.meta


fileFormatVersion: 2
guid: e040eaa8759024abbbb14994dc4c55ee
timeCreated: 1502056030
licenseType: Free
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

9
unity-environment/Assets/ML-Agents/Resources.meta


fileFormatVersion: 2
guid: 10f3eff160a3b46fcb86042594151eae
folderAsset: yes
timeCreated: 1501551323
licenseType: Free
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存