浏览代码

Python Testing & Image Inference Improvements (#353)

* Reorganized python tests into separate folder, and make individiual test files for different (sub) modules.
* Add tests for trainer_controller, PPO, and behavioral cloning. More to come soon.
* Minor bug fixes discovered while writing tests.
* Reworked GirdWorld to reset much faster.
* Cleaned ObservationToTex and reworked GetObservationMatrixList to be 3x faster.
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
e11dae1d
共有 25 个文件被更改,包括 888 次插入595 次删除
  1. 11
      python/unityagents/curriculum.py
  2. 1
      python/unityagents/environment.py
  3. 2
      python/unitytrainers/bc/trainer.py
  4. 28
      python/unitytrainers/models.py
  5. 26
      python/unitytrainers/ppo/trainer.py
  6. 40
      python/unitytrainers/trainer_controller.py
  7. 152
      unity-environment/Assets/ML-Agents/Examples/GridWorld/GridWorld.unity
  8. 22
      unity-environment/Assets/ML-Agents/Examples/GridWorld/Resources/agent.prefab
  9. 80
      unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAcademy.cs
  10. 59
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs
  11. 7
      python/unitytrainers/__init__.py
  12. 2
      python/unitytrainers/bc/__init__.py
  13. 37
      python/unitytrainers/bc/models.py
  14. 2
      python/unitytrainers/ppo/__init__.py
  15. 2
      python/tests/__init__.py
  16. 103
      python/tests/test_bc.py
  17. 105
      python/tests/test_ppo.py
  18. 215
      python/tests/test_unityagents.py
  19. 205
      python/tests/test_unitytrainers.py
  20. 37
      python/unitytrainers/bc/bc_models.py
  21. 347
      python/test_unityagents.py
  22. 0
      /python/unitytrainers/bc/trainer.py
  23. 0
      /python/unitytrainers/ppo/models.py
  24. 0
      /python/unitytrainers/ppo/trainer.py

11
python/unityagents/curriculum.py


import json
import numpy as np
class Curriculum(object):
def __init__(self, location, default_reset_parameters):

:param progress: Measure of progress (either reward or percentage steps completed).
"""
if self.data is None or progress is None:
return
return
if self.data["signal_smoothing"]:
progress = self.smoothing_value * 0.25 + 0.75 * progress
self.smoothing_value = progress

config[key] = parameters[key][self.lesson_number]
logger.info("\nLesson changed. Now in Lesson {0} : \t{1}"
.format(self.lesson_number,
', '.join([str(x) + ' -> ' + str(config[x]) for x in config])))
', '.join([str(x) + ' -> ' + str(config[x]) for x in config])))
def get_config(self, lesson = None):
def get_config(self, lesson=None):
"""
Returns reset parameters which correspond to the lesson.
:param lesson: The lesson you want to get the config of. If None, the current lesson is returned.

1
python/unityagents/environment.py


from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from .curriculum import Curriculum
from datetime import datetime
from PIL import Image
from sys import platform

2
python/unitytrainers/bc/trainer.py


import numpy as np
import tensorflow as tf
from unitytrainers.bc.bc_models import BehavioralCloningModel
from unitytrainers.bc.models import BehavioralCloningModel
from unitytrainers.buffer import Buffer
from unitytrainers.trainer import UnityTrainerException, Trainer

28
python/unitytrainers/models.py


hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation)
return hidden
def create_new_obs(self, num_streams, h_size, num_layers):
def create_new_obs(self, num_streams, h_size, num_layers, activation_fn):
if brain.action_space_type == "continuous":
activation_fn = tf.nn.tanh
else:
activation_fn = tf.nn.elu
self.observation_in = []
for i in range(brain.number_observations):

def create_dc_actor_critic(self, h_size, num_layers):
num_streams = 1
hidden_streams = self.create_new_obs(num_streams, h_size, num_layers)
hidden_streams = self.create_new_obs(num_streams, h_size, num_layers, tf.nn.elu)
self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1)
self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1)
self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32, name="action_input")
self.probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1)
self.old_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1)
self.probs = tf.reduce_sum(self.all_probs * self.selected_actions, axis=1)
self.old_probs = tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1)
hidden_streams = self.create_new_obs(num_streams, h_size, num_layers)
hidden_streams = self.create_new_obs(num_streams, h_size, num_layers, tf.nn.tanh)
if self.use_recurrent:
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')

self.output = tf.identity(self.output, name='action')
a = tf.exp(-1 * tf.pow(tf.stop_gradient(self.output) - self.mu, 2) / (2 * self.sigma_sq))
b = 1 / tf.sqrt(2 * self.sigma_sq * np.pi)
self.probs = tf.multiply(a, b, name="action_probs")
self.all_probs = tf.multiply(a, b, name="action_probs")
self.probs = tf.identity(self.all_probs)
self.entropy = tf.reduce_sum(0.5 * tf.log(2 * np.pi * np.e * self.sigma_sq))
self.value = tf.layers.dense(hidden_value, 1, activation=None)
self.value = tf.identity(self.value, name="value_estimate")

26
python/unitytrainers/ppo/trainer.py


import tensorflow as tf
from unitytrainers.buffer import Buffer
from unitytrainers.ppo.ppo_models import PPOModel
from unitytrainers.ppo.models import PPOModel
from unitytrainers.trainer import UnityTrainerException, Trainer
logger = logging.getLogger("unityagents")

with tf.variable_scope(self.variable_scope):
tf.set_random_seed(seed)
self.model = PPOModel(env.brains[brain_name],
lr=float(trainer_parameters['learning_rate']),
h_size=int(trainer_parameters['hidden_units']),
epsilon=float(trainer_parameters['epsilon']),
beta=float(trainer_parameters['beta']),
max_step=float(trainer_parameters['max_steps']),
normalize=trainer_parameters['normalize'],
use_recurrent=trainer_parameters['use_recurrent'],
num_layers=int(trainer_parameters['num_layers']),
m_size=self.m_size)
lr=float(trainer_parameters['learning_rate']),
h_size=int(trainer_parameters['hidden_units']),
epsilon=float(trainer_parameters['epsilon']),
beta=float(trainer_parameters['beta']),
max_step=float(trainer_parameters['max_steps']),
normalize=trainer_parameters['normalize'],
use_recurrent=trainer_parameters['use_recurrent'],
num_layers=int(trainer_parameters['num_layers']),
m_size=self.m_size)
stats = {'cumulative_reward': [], 'episode_length': [], 'value_estimate': [],
'entropy': [], 'value_loss': [], 'policy_loss': [], 'learning_rate': []}

steps = self.get_step
info = info[self.brain_name]
feed_dict = {self.model.batch_size: len(info.states), self.model.sequence_length: 1}
run_list = [self.model.output, self.model.probs, self.model.value, self.model.entropy,
run_list = [self.model.output, self.model.all_probs, self.model.value, self.model.entropy,
self.model.learning_rate]
if self.is_continuous:
run_list.append(self.model.epsilon)

epsi = 0
if self.is_continuous:
epsi = take_action_outputs[self.model.epsilon]
a_dist = take_action_outputs[self.model.probs]
a_dist = take_action_outputs[self.model.all_probs]
value = take_action_outputs[self.model.value]
for agent_id in info.agents:
if agent_id in next_info.agents:

for l in range(len(info.agents)):
agent_actions = self.training_buffer[info.agents[l]]['actions']
if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
and len(agent_actions) > 0):
and len(agent_actions) > 0):
if info.local_done[l] and not info.max_reached[l]:
value_next = 0.0
else:

40
python/unitytrainers/trainer_controller.py


import tensorflow as tf
import yaml
from datetime import datetime
from unitytrainers.ppo.ppo_trainer import PPOTrainer
from unitytrainers.bc.bc_trainer import BehavioralCloningTrainer
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unityagents import UnityEnvironment, UnityEnvironmentException

self.env_name = (env_name.strip().replace('.app', '').replace('.exe', '').replace('.x86_64', '')
.replace('.x86', ''))
self.env_name = os.path.basename(os.path.normpath(self.env_name))
self.logger.info(str(self.env))
progress = 0
progress = 0
progress = 0
for brain_name in self.env.external_brain_names:
progress += self.trainers[brain_name].get_last_reward
return progress

def _initialize_trainers(self, trainer_config, sess):
trainer_parameters_dict = {}
self.trainers = {}
for brain_name in self.env.external_brain_names:
trainer_parameters = trainer_config['default'].copy()
if len(self.env.external_brain_names) > 1:

for brain_name in self.env.external_brain_names:
if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
trainer_parameters_dict[brain_name],
self.train_model, self.seed)
trainer_parameters_dict[brain_name],
self.train_model, self.seed)
elif trainer_parameters_dict[brain_name]['trainer'] == "ppo":
self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name],
self.train_model, self.seed)

def start_learning(self):
self.env.curriculum.set_lesson_number(self.lesson)
self.logger.info(str(self.env))
tf.reset_default_graph()
@staticmethod
def _load_config(config_filename):
with open("trainer_config.yaml") as data_file:
with open(config_filename) as data_file:
return trainer_config
except IOError:
raise UnityEnvironmentException("The file {} could not be found. Will use default Hyperparameters"
.format("trainer_config.yaml"))

@staticmethod
def _create_model_path(model_path):
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
if not os.path.exists(model_path):
os.makedirs(model_path)
.format(self.model_path))
.format(model_path))
def start_learning(self):
self.env.curriculum.set_lesson_number(self.lesson)
trainer_config = self._load_config("trainer_config.yaml")
self._create_model_path(self.model_path)
tf.reset_default_graph()
with tf.Session() as sess:
self._initialize_trainers(trainer_config, sess)

152
unity-environment/Assets/ML-Agents/Examples/GridWorld/GridWorld.unity


--- !u!104 &2
RenderSettings:
m_ObjectHideFlags: 0
serializedVersion: 8
serializedVersion: 9
m_Fog: 0
m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
m_FogMode: 3

m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.43668893, g: 0.4842832, b: 0.56452656, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_EnableBakedLightmaps: 1
m_EnableRealtimeLightmaps: 1
m_LightmapEditorSettings:
serializedVersion: 9
serializedVersion: 10
m_TextureWidth: 1024
m_TextureHeight: 1024
m_AtlasSize: 1024
m_AO: 0
m_AOMaxDistance: 1
m_CompAOExponent: 1

m_EditorClassIdentifier:
maxSteps: 0
frameToSkip: 0
waitTime: 0.2
waitTime: 0
isInference: 0
trainingConfiguration:
width: 84

episodeCount: 0
currentStep: 0
actorObjs: []
players: []
players:
agentPref: {fileID: 1657514749044530, guid: 628960e910f094ad1909ecc88cc8016d, type: 2}
goalPref: {fileID: 1508142483324970, guid: 1ec4e4e96e7514d45b7ebc3ba5a9a481, type: 2}
pitPref: {fileID: 1811317785436014, guid: d13ee2db77b3a4dcc8664d2fe2a0f219, type: 2}
--- !u!4 &2047664
Transform:
m_ObjectHideFlags: 0

m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 0
m_IsActive: 1
--- !u!124 &231883444
Behaviour:
m_ObjectHideFlags: 0

m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0, y: 0}
--- !u!114 &467853281
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
brain: {fileID: 1535917239}
--- !u!1 &486401523
GameObject:
m_ObjectHideFlags: 0

m_Component:
- component: {fileID: 489340224}
- component: {fileID: 489340228}
- component: {fileID: 489340227}
- component: {fileID: 489340226}
m_Layer: 0
m_Name: agentCam
m_TagString: Untagged

m_Father: {fileID: 0}
m_RootOrder: 5
m_LocalEulerAnglesHint: {x: 90, y: 0, z: 0}
--- !u!92 &489340226
Behaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 489340223}
m_Enabled: 1
--- !u!124 &489340227
Behaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 489340223}
m_Enabled: 1
--- !u!20 &489340228
Camera:
m_ObjectHideFlags: 0

m_TargetDisplay: 0
m_TargetEye: 3
m_HDR: 0
m_AllowMSAA: 1
m_AllowMSAA: 0
--- !u!114 &551668186
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 1535917239}
--- !u!114 &633896473
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
continuousPlayerActions: []
discretePlayerActions:
- key: 273
value: 0
- key: 274
value: 1
- key: 276
value: 2
- key: 275
value: 3
defaultAction: -1
brain: {fileID: 1535917239}
--- !u!1 &742849316
GameObject:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 214660f4189b04cada2137381f5c3607, type: 2}
m_StaticBatchInfo:

m_Father: {fileID: 486401524}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &997290694
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
continuousPlayerActions: []
discretePlayerActions:
- key: 273
value: 0
- key: 274
value: 1
- key: 276
value: 2
- key: 275
value: 3
defaultAction: -1
brain: {fileID: 1535917239}
--- !u!1 &1045409640
GameObject:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 214660f4189b04cada2137381f5c3607, type: 2}
m_StaticBatchInfo:

m_Father: {fileID: 486401524}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1094263695
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
brain: {fileID: 1535917239}
--- !u!1 &1208586857
GameObject:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 8d8e8962a89d44eb28cf1b21b88014ec, type: 2}
m_StaticBatchInfo:

- Right
actionSpaceType: 0
stateSpaceType: 1
brainType: 2
brainType: 3
- {fileID: 633896473}
- {fileID: 467853281}
- {fileID: 551668186}
- {fileID: 997290694}
- {fileID: 1094263695}
- {fileID: 2113356186}
instanceID: 14224
instanceID: 12304
--- !u!1 &1553342942
GameObject:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 214660f4189b04cada2137381f5c3607, type: 2}
m_StaticBatchInfo:

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 214660f4189b04cada2137381f5c3607, type: 2}
m_StaticBatchInfo:

propertyPath: m_Name
value: trueAgent
objectReference: {fileID: 0}
- target: {fileID: 54942316128460260, guid: 628960e910f094ad1909ecc88cc8016d,
type: 2}
- target: {fileID: 0}
- {fileID: 0}
m_ParentPrefab: {fileID: 100100000, guid: 628960e910f094ad1909ecc88cc8016d, type: 2}
m_IsPrefabParent: 0
--- !u!1 &2008405822 stripped

memory: []
id: 0
academy: {fileID: 2047663}
--- !u!114 &2113356186
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 1535917239}

22
unity-environment/Assets/ML-Agents/Examples/GridWorld/Resources/agent.prefab


- component: {fileID: 33731433020831250}
- component: {fileID: 65005393801495654}
- component: {fileID: 114841131957396212}
- component: {fileID: 54942316128460260}
m_Layer: 8
m_Name: agent
m_TagString: agent

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 0457b417479684a52a3403f88f1b6b72, type: 2}
m_StaticBatchInfo:

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 0457b417479684a52a3403f88f1b6b72, type: 2}
m_StaticBatchInfo:

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 0457b417479684a52a3403f88f1b6b72, type: 2}
m_StaticBatchInfo:

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 0457b417479684a52a3403f88f1b6b72, type: 2}
m_StaticBatchInfo:

m_PrefabInternal: {fileID: 100100000}
m_GameObject: {fileID: 1124730825420414}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!54 &54942316128460260
Rigidbody:
m_ObjectHideFlags: 1
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 100100000}
m_GameObject: {fileID: 1657514749044530}
serializedVersion: 2
m_Mass: 1
m_Drag: 0
m_AngularDrag: 0.05
m_UseGravity: 0
m_IsKinematic: 0
m_Interpolate: 0
m_Constraints: 0
m_CollisionDetection: 0
--- !u!65 &65005393801495654
BoxCollider:
m_ObjectHideFlags: 1

maxStep: 100
resetOnDone: 1
state: []
stackedStates: []
maxStepReached: 0
value: 0
CumulativeReward: 0
stepCounter: 0

80
unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAcademy.cs


using System.Collections;
using System.Collections.Generic;
using System.Collections.Generic;
using UnityEngine.UI;
using Newtonsoft.Json;
using System.Net;
using System.Net.Sockets;
using System.Text;
public class GridAcademy : Academy
{

public string[] players;
public int[] players;
public GameObject trueAgent;

Camera cam;
Camera agentCam;
public GameObject agentPref;
public GameObject goalPref;
public GameObject pitPref;
GameObject[] objects;
GameObject plane;
GameObject sN;
GameObject sS;
GameObject sE;
GameObject sW;
objects = new GameObject[3] {agentPref, goalPref, pitPref};
agentCam = GameObject.Find("agentCam").GetComponent<Camera>();
actorObjs = new List<GameObject>();
plane = GameObject.Find("Plane");
sN = GameObject.Find("sN");
sS = GameObject.Find("sS");
sW = GameObject.Find("sW");
sE = GameObject.Find("sE");
cam.transform.position = new Vector3(-((int)resetParameters["gridSize"] - 1) / 2f, (int)resetParameters["gridSize"] * 1.25f, -((int)resetParameters["gridSize"] - 1) / 2f);
cam.transform.position = new Vector3(-((int)resetParameters["gridSize"] - 1) / 2f,
(int)resetParameters["gridSize"] * 1.25f,
-((int)resetParameters["gridSize"] - 1) / 2f);
List<string> playersList = new List<string>();
actorObjs = new List<GameObject>();
List<int> playersList = new List<int>();
playersList.Add("pit");
playersList.Add(2);
playersList.Add("goal");
playersList.Add(1);
GameObject.Find("Plane").transform.localScale = new Vector3(gridSize / 10.0f, 1f, gridSize / 10.0f);
GameObject.Find("Plane").transform.position = new Vector3((gridSize - 1) / 2f, -0.5f, (gridSize - 1) / 2f);
GameObject.Find("sN").transform.localScale = new Vector3(1, 1, gridSize + 2);
GameObject.Find("sS").transform.localScale = new Vector3(1, 1, gridSize + 2);
GameObject.Find("sN").transform.position = new Vector3((gridSize - 1) / 2f, 0.0f, gridSize);
GameObject.Find("sS").transform.position = new Vector3((gridSize - 1) / 2f, 0.0f, -1);
GameObject.Find("sE").transform.localScale = new Vector3(1, 1, gridSize + 2);
GameObject.Find("sW").transform.localScale = new Vector3(1, 1, gridSize + 2);
GameObject.Find("sE").transform.position = new Vector3(gridSize, 0.0f, (gridSize - 1) / 2f);
GameObject.Find("sW").transform.position = new Vector3(-1, 0.0f, (gridSize - 1) / 2f);
Camera aCam = GameObject.Find("agentCam").GetComponent<Camera>();
aCam.orthographicSize = (gridSize) / 2f;
aCam.transform.position = new Vector3((gridSize - 1) / 2f, gridSize + 1f, (gridSize - 1) / 2f);
plane.transform.localScale = new Vector3(gridSize / 10.0f, 1f, gridSize / 10.0f);
plane.transform.position = new Vector3((gridSize - 1) / 2f, -0.5f, (gridSize - 1) / 2f);
sN.transform.localScale = new Vector3(1, 1, gridSize + 2);
sS.transform.localScale = new Vector3(1, 1, gridSize + 2);
sN.transform.position = new Vector3((gridSize - 1) / 2f, 0.0f, gridSize);
sS.transform.position = new Vector3((gridSize - 1) / 2f, 0.0f, -1);
sE.transform.localScale = new Vector3(1, 1, gridSize + 2);
sW.transform.localScale = new Vector3(1, 1, gridSize + 2);
sE.transform.position = new Vector3(gridSize, 0.0f, (gridSize - 1) / 2f);
sW.transform.position = new Vector3(-1, 0.0f, (gridSize - 1) / 2f);
agentCam.orthographicSize = (gridSize) / 2f;
agentCam.transform.position = new Vector3((gridSize - 1) / 2f, gridSize + 1f, (gridSize - 1) / 2f);
}

}
SetEnvironment();
actorObjs = new List<GameObject>();
actorObjs.Clear();
HashSet<int> numbers = new HashSet<int>();
while (numbers.Count < players.Length + 1)

{
int x = (numbersA[i]) / gridSize;
int y = (numbersA[i]) % gridSize;
GameObject actorObj = (GameObject)GameObject.Instantiate(Resources.Load(players[i]));
GameObject actorObj = Instantiate(objects[players[i]]);
actorObj.name = players[i];
actorObjs.Add(actorObj);
}

59
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


}
/** Contains logic for coverting a camera component into a Texture2D. */
public Texture2D ObservationToTex(Camera camera, int width, int height)
public Texture2D ObservationToTex(Camera cam, int width, int height)
Camera cam = camera;
Rect oldRec = camera.rect;
Rect oldRec = cam.rect;
bool supportsAntialiasing = false;
bool needsRescale = false;
var antiAliasing = (supportsAntialiasing) ? Mathf.Max(1, QualitySettings.antiAliasing) : 1;
var finalRT =
RenderTexture.GetTemporary(width, height, depth, format, readWrite, antiAliasing);
var renderRT = (!needsRescale) ? finalRT :
RenderTexture.GetTemporary(width, height, depth, format, readWrite, antiAliasing);
var tempRT =
RenderTexture.GetTemporary(width, height, depth, format, readWrite);
var tex = new Texture2D(width, height, TextureFormat.RGB24, false);
var prevActiveRT = RenderTexture.active;

RenderTexture.active = renderRT;
cam.targetTexture = renderRT;
RenderTexture.active = tempRT;
cam.targetTexture = tempRT;
if (needsRescale)
{
RenderTexture.active = finalRT;
Graphics.Blit(renderRT, finalRT);
RenderTexture.ReleaseTemporary(renderRT);
}
RenderTexture.ReleaseTemporary(finalRT);
RenderTexture.ReleaseTemporary(tempRT);
}
/// Contains logic to convert the agent's cameras into observation list

var observation_matrix_list = new List<float[,,,]>();
int numImageObservations = brainParameters.cameraResolutions.Length;
var observationMatrixList = new List<float[,,,]>(numImageObservations);
for (int obs_number = 0; obs_number < brainParameters.cameraResolutions.Length; obs_number++)
for (int obs_number = 0; obs_number < numImageObservations; obs_number++)
{
var width = brainParameters.cameraResolutions[obs_number].width;
var height = brainParameters.cameraResolutions[obs_number].height;

pixels = 1;
else
pixels = 3;
float[,,,] observation_matrix = new float[agent_keys.Count
, height
, width
, pixels];
var observationMatrix = new float[agent_keys.Count, height,
width, pixels];
Texture2D tex = ObservationToTex(agent_obs, width, height);
var tex = ObservationToTex(agent_obs, width, height);
Color32[] cc = tex.GetPixels32();
int texHeight = tex.height;
Color c = tex.GetPixel(w, h);
Color32 currentPixel = cc[h * width + w];
observation_matrix[i, tex.height - h - 1, w, 0] = c.r;
observation_matrix[i, tex.height - h - 1, w, 1] = c.g;
observation_matrix[i, tex.height - h - 1, w, 2] = c.b;
observationMatrix[i, texHeight - h - 1, w, 0] = currentPixel.r;
observationMatrix[i, texHeight - h - 1, w, 1] = currentPixel.g;
observationMatrix[i, texHeight - h - 1, w, 2] = currentPixel.b;
observation_matrix[i, tex.height - h - 1, w, 0] = (c.r + c.g + c.b) / 3;
observationMatrix[i, texHeight - h - 1, w, 0] = (currentPixel.r + currentPixel.g + currentPixel.b) / 3;
UnityEngine.Object.DestroyImmediate(tex);
DestroyImmediate(tex);
observation_matrix_list.Add(observation_matrix);
observationMatrixList.Add(observationMatrix);
return observation_matrix_list;
return observationMatrixList;
}
}

7
python/unitytrainers/__init__.py


from .buffer import *
from .models import *
from .trainer_controller import *
from .bc.models import *
from .bc.trainer import *
from .ppo.models import *
from .ppo.trainer import *

2
python/unitytrainers/bc/__init__.py


from .models import *
from .trainer import *

37
python/unitytrainers/bc/models.py


import tensorflow as tf
import tensorflow.contrib.layers as c_layers
from unitytrainers.models import LearningModel
class BehavioralCloningModel(LearningModel):
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
normalize=False, use_recurrent=False):
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain)
num_streams = 1
hidden_streams = self.create_new_obs(num_streams, h_size, n_layers)
hidden = hidden_streams[0]
self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
if self.use_recurrent:
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in)
self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
if brain.action_space_type == "discrete":
self.action_probs = tf.nn.softmax(self.policy)
self.sample_action = tf.cast(tf.multinomial(self.policy, 1, name="action"), tf.int32)
self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="expert_action")
self.action_oh = tf.one_hot(self.true_action, self.a_size)
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
self.action_percent = tf.reduce_mean(tf.cast(
tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
else:
self.sample_action = tf.identity(self.policy, name="action")
self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name="expert_action")
self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action))
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
self.update = optimizer.minimize(self.loss)

2
python/unitytrainers/ppo/__init__.py


from .models import *
from .trainer import *

2
python/tests/__init__.py


from unityagents import *
from unitytrainers import *

103
python/tests/test_bc.py


import mock
import pytest
import numpy as np
import tensorflow as tf
from unitytrainers.bc.models import BehavioralCloningModel
from unityagents import UnityEnvironment
def test_cc_bc_model():
c_action_c_state_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"stackedStates": 2,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"actionDescriptions": ["",""],
"actionSpaceType": 1,
"stateSpaceType": 1
}]
}'''.encode()
tf.reset_default_graph()
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
# End of mock
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = c_action_c_state_start
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
def test_dc_bc_model():
d_action_c_state_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"stackedStates": 2,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [{"width":30,"height":40,"blackAndWhite":false}],
"actionDescriptions": ["",""],
"actionSpaceType": 0,
"stateSpaceType": 1
}]
}'''.encode()
tf.reset_default_graph()
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = d_action_c_state_start
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.observation_in[0]: np.ones([2, 40, 30, 3])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
if __name__ == '__main__':
pytest.main()

105
python/tests/test_ppo.py


import mock
import pytest
import numpy as np
import tensorflow as tf
from unitytrainers.ppo.models import PPOModel
from unityagents import UnityEnvironment
def test_ppo_model_continuous():
c_action_c_state_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"stackedStates": 2,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"actionDescriptions": ["",""],
"actionSpaceType": 1,
"stateSpaceType": 1
}]
}'''.encode()
tf.reset_default_graph()
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
# End of mock
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = c_action_c_state_start
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
def test_ppo_model_discrete():
d_action_c_state_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"stackedStates": 2,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [{"width":30,"height":40,"blackAndWhite":false}],
"actionDescriptions": ["",""],
"actionSpaceType": 0,
"stateSpaceType": 1
}]
}'''.encode()
tf.reset_default_graph()
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
# End of mock
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = d_action_c_state_start
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.observation_in[0]: np.ones([2, 40, 30, 3])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()
if __name__ == '__main__':
pytest.main()

215
python/tests/test_unityagents.py


import json
import mock
import pytest
import struct
import numpy as np
from unityagents import UnityEnvironment, UnityEnvironmentException, UnityActionException, \
BrainInfo, Curriculum
def append_length(partial_string):
return struct.pack("I", len(partial_string.encode())) + partial_string.encode()
dummy_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"stackedStates" : 2,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"actionDescriptions": ["",""],
"actionSpaceType": 1,
"stateSpaceType": 1
}]
}'''.encode()
dummy_reset = [
'CONFIG_REQUEST'.encode(),
append_length(
'''
{
"brain_name": "RealFakeBrain",
"agents": [1,2],
"states": [1,2,3,4,5,6,1,2,3,4,5,6],
"rewards": [1,2],
"actions": [1,2,3,4],
"memories": [],
"dones": [false, false],
"maxes": [false, false]
}'''),
'False'.encode()]
dummy_step = ['actions'.encode(),
append_length('''
{
"brain_name": "RealFakeBrain",
"agents": [1,2,3],
"states": [1,2,3,4,5,6,7,8,9,1,2,3,4,5,6,7,8,9],
"rewards": [1,2,3],
"actions": [1,2,3,4,5,6],
"memories": [],
"dones": [false, false, false],
"maxes": [false, false, false]
}'''),
'False'.encode(),
'actions'.encode(),
append_length('''
{
"brain_name": "RealFakeBrain",
"agents": [1,2,3],
"states": [1,2,3,4,5,6,7,8,9,1,2,3,4,5,6,7,8,9],
"rewards": [1,2,3],
"actions": [1,2,3,4,5,6],
"memories": [],
"dones": [false, false, true],
"maxes": [false, false, false]
}'''),
'True'.encode()]
dummy_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : true,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20, 15],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
bad_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : false,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
def test_handles_bad_filename():
with pytest.raises(UnityEnvironmentException):
UnityEnvironment(' ')
def test_initialization():
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
with pytest.raises(UnityActionException):
env.step([0])
assert env.brain_names[0] == 'RealFakeBrain'
env.close()
def test_reset():
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
mock_socket.recv.side_effect = dummy_reset
brain_info = env.reset()
env.close()
assert not env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].observations, list)
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray)
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents)
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size * brain.stacked_states
def test_step():
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
mock_socket.recv.side_effect = dummy_reset
brain_info = env.reset()
mock_socket.recv.side_effect = dummy_step
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents))
with pytest.raises(UnityActionException):
env.step([0])
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents))
with pytest.raises(UnityActionException):
env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents))
env.close()
assert env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].observations, list)
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray)
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents)
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size * brain.stacked_states
assert not brain_info['RealFakeBrain'].local_done[0]
assert brain_info['RealFakeBrain'].local_done[2]
def test_close():
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
assert env._loaded
env.close()
assert not env._loaded
mock_socket.close.assert_called_once()
def test_curriculum():
open_name = '%s.open' % __name__
with mock.patch('json.load') as mock_load:
with mock.patch(open_name, create=True) as mock_open:
mock_open.return_value = 0
mock_load.return_value = bad_curriculum
with pytest.raises(UnityEnvironmentException):
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
mock_load.return_value = dummy_curriculum
with pytest.raises(UnityEnvironmentException):
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1})
curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
assert curriculum.get_lesson_number == 0
curriculum.set_lesson_number(1)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(10)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(30)
curriculum.increment_lesson(30)
assert curriculum.get_lesson_number == 1
assert curriculum.lesson_length == 3
curriculum.increment_lesson(30)
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}
assert curriculum.lesson_length == 0
assert curriculum.get_lesson_number == 2

205
python/tests/test_unitytrainers.py


import yaml
import mock
import pytest
from unitytrainers.trainer_controller import TrainerController
from unitytrainers.buffer import Buffer
from unitytrainers.models import *
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unityagents import UnityEnvironmentException
dummy_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"stackedStates" : 2,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"actionDescriptions": ["",""],
"actionSpaceType": 1,
"stateSpaceType": 1
}]
}'''.encode()
dummy_config = yaml.load('''
default:
trainer: ppo
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
''')
dummy_bc_config = yaml.load('''
default:
trainer: imitation
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
''')
dummy_bad_config = yaml.load('''
default:
trainer: incorrect_trainer
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
''')
def test_initialization():
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1)
assert(tc.env.brain_names[0] == 'RealFakeBrain')
def test_load_config():
open_name = '%s.open' % __name__
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as mock_open:
mock_open.return_value = 0
mock_load.return_value = dummy_config
config = TrainerController._load_config("tests/test_unitytrainers.py")
assert(len(config) == 1)
assert(config['default']['trainer'] == "ppo")
def test_initialize_trainers():
open_name = '%s.open' % __name__
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as mock_open:
mock_open.return_value = 0
with mock.patch('subprocess.Popen'):
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1)
# Test for PPO trainer
mock_load.return_value = dummy_config
config = tc._load_config("tests/test_unitytrainers.py")
tf.reset_default_graph()
with tf.Session() as sess:
tc._initialize_trainers(config, sess)
assert(len(tc.trainers) == 1)
assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
# Test for Behavior Cloning Trainer
mock_load.return_value = dummy_bc_config
config = tc._load_config("tests/test_unitytrainers.py")
tf.reset_default_graph()
with tf.Session() as sess:
tc._initialize_trainers(config, sess)
assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
# Test for proper exception when trainer name is incorrect
mock_load.return_value = dummy_bad_config
config = tc._load_config("tests/test_unitytrainers.py")
tf.reset_default_graph()
with tf.Session() as sess:
with pytest.raises(UnityEnvironmentException):
tc._initialize_trainers(config, sess)
def assert_array(a, b):
assert a.shape == b.shape
la = list(a.flatten())
lb = list(b.flatten())
for i in range(len(la)):
assert la[i] == lb[i]
def test_buffer():
b = Buffer()
for fake_agent_id in range(4):
for step in range(9):
b[fake_agent_id]['state'].append(
[100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,
100 * fake_agent_id + 10 * step + 3]
)
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
100 * fake_agent_id + 10 * step + 5])
a = b[1]['state'].get_batch(batch_size=2, training_length=None, sequential=True)
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=True)
assert_array(a, np.array([
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=False)
assert_array(a, np.array([
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
b[4].reset_agent()
assert len(b[4]) == 0
b.append_update_buffer(3,
batch_size=None, training_length=2)
b.append_update_buffer(2,
batch_size=None, training_length=2)
assert len(b.update_buffer['action']) == 10
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
if __name__ == '__main__':
pytest.main()

37
python/unitytrainers/bc/bc_models.py


import tensorflow as tf
import tensorflow.contrib.layers as c_layers
from unitytrainers.models import LearningModel
class BehavioralCloningModel(LearningModel):
def __init__(self, h_size, lr, n_layers, m_size, normalize, use_recurrent, brain):
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain)
num_streams = 1
hidden_streams = self.create_new_obs(num_streams, h_size, n_layers)
hidden = hidden_streams[0]
self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
if self.use_recurrent:
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in)
self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
if brain.action_space_type == "discrete":
self.action_probs = tf.nn.softmax(self.policy)
self.sample_action = tf.multinomial(self.policy, 1, name="action")
self.true_action = tf.placeholder(shape=[None], dtype=tf.int32)
self.action_oh = tf.one_hot(self.true_action, self.a_size)
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
self.action_percent = tf.reduce_mean(tf.cast(
tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
else:
self.sample_action = tf.identity(self.policy, name="action")
self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32)
self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action))
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
self.update = optimizer.minimize(self.loss)

347
python/test_unityagents.py


import json
import mock
import pytest
import struct
from unitytrainers.buffer import Buffer
from unitytrainers.models import *
from unityagents import UnityEnvironment, UnityEnvironmentException, UnityActionException, \
BrainInfo, Curriculum
def append_length(input):
return struct.pack("I", len(input.encode())) + input.encode()
dummy_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"actionDescriptions": ["",""],
"actionSpaceType": 1,
"stateSpaceType": 1
}]
}'''.encode()
dummy_reset = [
'CONFIG_REQUEST'.encode(),
append_length(
'''
{
"brain_name": "RealFakeBrain",
"agents": [1,2],
"states": [1,2,3,4,5,6],
"rewards": [1,2],
"actions": [1,2,3,4],
"memories": [],
"dones": [false, false]
}'''),
'False'.encode()]
dummy_step = ['actions'.encode(),
append_length('''
{
"brain_name": "RealFakeBrain",
"agents": [1,2,3],
"states": [1,2,3,4,5,6,7,8,9],
"rewards": [1,2,3],
"actions": [1,2,3,4,5,6],
"memories": [],
"dones": [false, false, false]
}'''),
'False'.encode(),
'actions'.encode(),
append_length('''
{
"brain_name": "RealFakeBrain",
"agents": [1,2,3],
"states": [1,2,3,4,5,6,7,8,9],
"rewards": [1,2,3],
"actions": [1,2,3,4,5,6],
"memories": [],
"dones": [false, false, true]
}'''),
'True'.encode()]
def test_handles_bad_filename():
with pytest.raises(UnityEnvironmentException):
UnityEnvironment(' ')
def test_initialization():
with mock.patch('subprocess.Popen') as mock_subproc_popen:
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
with pytest.raises(UnityActionException):
env.step([0])
assert env.brain_names[0] == 'RealFakeBrain'
env.close()
def test_reset():
with mock.patch('subprocess.Popen') as mock_subproc_popen:
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
mock_socket.recv.side_effect = dummy_reset
brain_info = env.reset()
env.close()
assert not env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].observations, list)
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray)
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents)
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size
def test_step():
with mock.patch('subprocess.Popen') as mock_subproc_popen:
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
mock_socket.recv.side_effect = dummy_reset
brain_info = env.reset()
mock_socket.recv.side_effect = dummy_step
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents))
with pytest.raises(UnityActionException):
env.step([0])
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents))
with pytest.raises(UnityActionException):
env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents))
env.close()
assert env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].observations, list)
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray)
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents)
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size
assert not brain_info['RealFakeBrain'].local_done[0]
assert brain_info['RealFakeBrain'].local_done[2]
def test_close():
with mock.patch('subprocess.Popen') as mock_subproc_popen:
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = dummy_start
env = UnityEnvironment(' ')
assert env._loaded
env.close()
assert not env._loaded
mock_socket.close.assert_called_once()
dummy_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : true,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20, 15],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
bad_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : false,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
def test_curriculum():
open_name = '%s.open' % __name__
with mock.patch('json.load') as mock_load:
with mock.patch(open_name, create=True) as mock_open:
mock_open.return_value = 0
mock_load.return_value = bad_curriculum
with pytest.raises(UnityEnvironmentException):
curriculum = Curriculum('test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
mock_load.return_value = dummy_curriculum
with pytest.raises(UnityEnvironmentException):
curriculum = Curriculum('test_unityagents.py', {"param1": 1, "param2": 1})
curriculum = Curriculum('test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
assert curriculum.get_lesson_number == 0
curriculum.set_lesson_number(1)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(10)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(30)
curriculum.increment_lesson(30)
assert curriculum.get_lesson_number == 1
assert curriculum.lesson_length == 3
curriculum.increment_lesson(30)
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}
assert curriculum.lesson_length == 0
assert curriculum.get_lesson_number == 2
c_action_c_state_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"actionDescriptions": ["",""],
"actionSpaceType": 1,
"stateSpaceType": 1
}]
}'''.encode()
def test_ppo_model_continuous():
tf.reset_default_graph()
with mock.patch('subprocess.Popen') as mock_subproc_popen:
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
# End of mock
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = c_action_c_state_start
env = UnityEnvironment(' ')
model = create_agent_model(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3], [3, 4, 5]]),
model.epsilon: np.random.randn(2, 2)
}
sess.run(run_list, feed_dict=feed_dict)
env.close()
d_action_c_state_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-2",
"brainParameters": [{
"stateSize": 3,
"actionSize": 2,
"memorySize": 0,
"cameraResolutions": [{"width":30,"height":40,"blackAndWhite":false}],
"actionDescriptions": ["",""],
"actionSpaceType": 0,
"stateSpaceType": 1
}]
}'''.encode()
def test_ppo_model_discrete():
tf.reset_default_graph()
with mock.patch('subprocess.Popen') as mock_subproc_popen:
with mock.patch('socket.socket') as mock_socket:
with mock.patch('glob.glob') as mock_glob:
# End of mock
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_glob.return_value = ['FakeLaunchPath']
mock_socket.return_value.accept.return_value = (mock_socket, 0)
mock_socket.recv.return_value.decode.return_value = d_action_c_state_start
env = UnityEnvironment(' ')
model = create_agent_model(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3], [3, 4, 5]]),
model.observation_in[0]: np.ones([2, 40, 30, 3])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()
def assert_array(a, b):
assert a.shape == b.shape
la = list(a.flatten())
lb = list(b.flatten())
for i in range(len(la)):
assert la[i] == lb[i]
def test_buffer():
b = Buffer()
for fake_agent_id in range(4):
for i in range(9):
b[fake_agent_id]['state'].append(
[100 * fake_agent_id + 10 * i + 1, 100 * fake_agent_id + 10 * i + 2, 100 * fake_agent_id + 10 * i + 3]
)
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * i + 4, 100 * fake_agent_id + 10 * i + 5])
a = b[1]['state'].get_batch(batch_size=2, training_length=None, sequential=True)
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=True)
assert_array(a, np.array([
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=False)
assert_array(a, np.array([
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
b[4].reset_agent()
assert len(b[4]) == 0
b.append_update_buffer(3,
batch_size=None, training_length=2)
b.append_update_buffer(2,
batch_size=None, training_length=2)
assert len(b.update_buffer['action']) == 10
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
if __name__ == '__main__':
pytest.main()

/python/unitytrainers/bc/bc_trainer.py → /python/unitytrainers/bc/trainer.py

/python/unitytrainers/ppo/ppo_models.py → /python/unitytrainers/ppo/models.py

/python/unitytrainers/ppo/ppo_trainer.py → /python/unitytrainers/ppo/trainer.py

正在加载...
取消
保存