浏览代码

Merge pull request #1934 from Unity-Technologies/develop-black

Black formatting
/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
70d14910
共有 74 个文件被更改,包括 6023 次插入3767 次删除
  1. 9
      .circleci/config.yml
  2. 5
      CONTRIBUTING.md
  3. 131
      gym-unity/gym_unity/envs/unity_env.py
  4. 67
      gym-unity/gym_unity/tests/test_gym.py
  5. 21
      gym-unity/setup.py
  6. 4
      ml-agents-envs/mlagents/envs/base_unity_environment.py
  7. 141
      ml-agents-envs/mlagents/envs/brain.py
  8. 1
      ml-agents-envs/mlagents/envs/communicator.py
  9. 212
      ml-agents-envs/mlagents/envs/communicator_objects/agent_action_proto_pb2.py
  10. 387
      ml-agents-envs/mlagents/envs/communicator_objects/agent_info_proto_pb2.py
  11. 298
      ml-agents-envs/mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py
  12. 61
      ml-agents-envs/mlagents/envs/communicator_objects/command_proto_pb2.py
  13. 72
      ml-agents-envs/mlagents/envs/communicator_objects/custom_action_pb2.py
  14. 72
      ml-agents-envs/mlagents/envs/communicator_objects/custom_observation_pb2.py
  15. 72
      ml-agents-envs/mlagents/envs/communicator_objects/custom_reset_parameters_pb2.py
  16. 198
      ml-agents-envs/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py
  17. 223
      ml-agents-envs/mlagents/envs/communicator_objects/engine_configuration_proto_pb2.py
  18. 250
      ml-agents-envs/mlagents/envs/communicator_objects/environment_parameters_proto_pb2.py
  19. 123
      ml-agents-envs/mlagents/envs/communicator_objects/header_pb2.py
  20. 148
      ml-agents-envs/mlagents/envs/communicator_objects/resolution_proto_pb2.py
  21. 62
      ml-agents-envs/mlagents/envs/communicator_objects/space_type_proto_pb2.py
  22. 148
      ml-agents-envs/mlagents/envs/communicator_objects/unity_input_pb2.py
  23. 182
      ml-agents-envs/mlagents/envs/communicator_objects/unity_message_pb2.py
  24. 148
      ml-agents-envs/mlagents/envs/communicator_objects/unity_output_pb2.py
  25. 100
      ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_input_pb2.py
  26. 225
      ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py
  27. 398
      ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_input_pb2.py
  28. 326
      ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_output_pb2.py
  29. 62
      ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py
  30. 55
      ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2_grpc.py
  31. 358
      ml-agents-envs/mlagents/envs/environment.py
  32. 39
      ml-agents-envs/mlagents/envs/exception.py
  33. 63
      ml-agents-envs/mlagents/envs/mock_communicator.py
  34. 10
      ml-agents-envs/mlagents/envs/rpc_communicator.py
  35. 17
      ml-agents-envs/mlagents/envs/socket_communicator.py
  36. 88
      ml-agents-envs/mlagents/envs/subprocess_environment.py
  37. 124
      ml-agents-envs/mlagents/envs/tests/test_envs.py
  38. 1
      ml-agents-envs/mlagents/envs/tests/test_rpc_communicator.py
  39. 60
      ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py
  40. 38
      ml-agents-envs/setup.py
  41. 2
      ml-agents/mlagents/trainers/__init__.py
  42. 335
      ml-agents/mlagents/trainers/barracuda.py
  43. 100
      ml-agents/mlagents/trainers/bc/models.py
  44. 58
      ml-agents/mlagents/trainers/bc/offline_trainer.py
  45. 111
      ml-agents/mlagents/trainers/bc/online_trainer.py
  46. 61
      ml-agents/mlagents/trainers/bc/policy.py
  47. 53
      ml-agents/mlagents/trainers/bc/trainer.py
  48. 73
      ml-agents/mlagents/trainers/buffer.py
  49. 67
      ml-agents/mlagents/trainers/curriculum.py
  50. 44
      ml-agents/mlagents/trainers/demo_loader.py
  51. 5
      ml-agents/mlagents/trainers/exception.py
  52. 210
      ml-agents/mlagents/trainers/learn.py
  53. 56
      ml-agents/mlagents/trainers/meta_curriculum.py
  54. 464
      ml-agents/mlagents/trainers/models.py
  55. 83
      ml-agents/mlagents/trainers/policy.py
  56. 205
      ml-agents/mlagents/trainers/ppo/models.py
  57. 179
      ml-agents/mlagents/trainers/ppo/policy.py
  58. 321
      ml-agents/mlagents/trainers/ppo/trainer.py
  59. 877
      ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
  60. 12
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  61. 110
      ml-agents/mlagents/trainers/tests/test_bc.py
  62. 62
      ml-agents/mlagents/trainers/tests/test_buffer.py
  63. 31
      ml-agents/mlagents/trainers/tests/test_curriculum.py
  64. 12
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  65. 60
      ml-agents/mlagents/trainers/tests/test_learn.py
  66. 98
      ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
  67. 31
      ml-agents/mlagents/trainers/tests/test_policy.py
  68. 391
      ml-agents/mlagents/trainers/tests/test_ppo.py
  69. 206
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  70. 47
      ml-agents/mlagents/trainers/tests/test_trainer_metrics.py
  71. 117
      ml-agents/mlagents/trainers/trainer.py
  72. 195
      ml-agents/mlagents/trainers/trainer_controller.py
  73. 53
      ml-agents/mlagents/trainers/trainer_metrics.py
  74. 62
      ml-agents/setup.py

9
.circleci/config.yml


version: 2.1
version: 2.0
jobs:
build:

. venv/bin/activate
cd ml-agents-envs && pip install -e .
cd ../ml-agents && pip install -e .
pip install pytest-cov==2.6.1 codacy-coverage==1.3.11
pip install black pytest-cov==2.6.1 codacy-coverage==1.3.11
cd ../gym-unity && pip install -e .
- save_cache:

- run:
name: Run Tests for ml-agents and gym_unity
command: |

python-codacy-coverage -r coverage.xml
black --check ml-agents
black --check ml-agents-envs
black --check gym-unity
- store_test_results:
path: test-reports

5
CONTRIBUTING.md


## Style Guide
When performing changes to the codebase, ensure that you follow the style guide
of the file you're modifying. For Python, we follow
[PEP 8](https://www.python.org/dev/peps/pep-0008/).
For C#, we will soon be adding a formal style guide for our repository.
When performing changes to the codebase, please ensure that all python code is reformatted using the [black](https://github.com/ambv/black) formatter. For C#, we will soon be requirements for style and formatting.

131
gym-unity/gym_unity/envs/unity_env.py


"""
Any error related to the gym wrapper of ml-agents.
"""
pass

https://github.com/openai/multiagent-particle-envs
"""
def __init__(self, environment_filename: str, worker_id=0, use_visual=False, uint8_visual=False, multiagent=False, flatten_branched=False):
def __init__(
self,
environment_filename: str,
worker_id=0,
use_visual=False,
uint8_visual=False,
multiagent=False,
flatten_branched=False,
):
"""
Environment initialization
:param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.

self._n_agents = None
self._multiagent = multiagent
self._flattener = None
self.game_over = False # Hidden flag used by Atari environments to determine if the game is over
self.game_over = (
False
) # Hidden flag used by Atari environments to determine if the game is over
"if it is wrapped in a gym.")
"if it is wrapped in a gym."
)
raise UnityGymException("`use_visual` was set to True, however there are no"
" visual observations as part of this environment.")
raise UnityGymException(
"`use_visual` was set to True, however there are no"
" visual observations as part of this environment."
)
logger.warning("`uint8_visual was set to true, but visual observations are not in use. "
"This setting will not have any effect.")
logger.warning(
"`uint8_visual was set to true, but visual observations are not in use. "
"This setting will not have any effect."
)
logger.warning("The environment contains more than one visual observation. "
"Please note that only the first will be provided in the observation.")
logger.warning(
"The environment contains more than one visual observation. "
"Please note that only the first will be provided in the observation."
)
"if it is wrapped in a gym.")
"if it is wrapped in a gym."
)
# Check for number of agents in scene.
initial_info = self._env.reset()[self.brain_name]

self._flattener = ActionFlattener(brain.vector_action_space_size)
self._action_space = self._flattener.action_space
else:
self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size)
self._action_space = spaces.MultiDiscrete(
brain.vector_action_space_size
)
logger.warning("The environment has a non-discrete action space. It will "
"not be flattened.")
logger.warning(
"The environment has a non-discrete action space. It will "
"not be flattened."
)
high = np.array([1] * brain.vector_action_space_size[0])
self._action_space = spaces.Box(-high, high, dtype=np.float32)
high = np.array([np.inf] * brain.vector_observation_space_size)

depth = 1
else:
depth = 3
self._observation_space = spaces.Box(0, 1, dtype=np.float32,
shape=(brain.camera_resolutions[0]["height"],
brain.camera_resolutions[0]["width"],
depth))
self._observation_space = spaces.Box(
0,
1,
dtype=np.float32,
shape=(
brain.camera_resolutions[0]["height"],
brain.camera_resolutions[0]["width"],
depth,
),
)
else:
self._observation_space = spaces.Box(-high, high, dtype=np.float32)

# Use random actions for all other agents in environment.
if self._multiagent:
if not isinstance(action, list):
raise UnityGymException("The environment was expecting `action` to be a list.")
raise UnityGymException(
"The environment was expecting `action` to be a list."
)
"The environment was expecting a list of {} actions.".format(self._n_agents))
"The environment was expecting a list of {} actions.".format(
self._n_agents
)
)
else:
if self._flattener is not None:
# Action space is discrete and flattened - we expect a list of scalars

def _single_step(self, info):
if self.use_visual:
self.visual_obs = self._preprocess_single(info.visual_observations[0][0, :, :, :])
self.visual_obs = self._preprocess_single(
info.visual_observations[0][0, :, :, :]
)
return default_observation, info.rewards[0], info.local_done[0], {
"text_observation": info.text_observations[0],
"brain_info": info}
return (
default_observation,
info.rewards[0],
info.local_done[0],
{"text_observation": info.text_observations[0], "brain_info": info},
)
return (255.0*single_visual_obs).astype(np.uint8)
return (255.0 * single_visual_obs).astype(np.uint8)
else:
return single_visual_obs

default_observation = self.visual_obs
else:
default_observation = info.vector_observations
return list(default_observation), info.rewards, info.local_done, {
"text_observation": info.text_observations,
"brain_info": info}
return (
list(default_observation),
info.rewards,
info.local_done,
{"text_observation": info.text_observations, "brain_info": info},
)
return [(255.0*_visual_obs).astype(np.uint8) for _visual_obs in multiple_visual_obs]
return [
(255.0 * _visual_obs).astype(np.uint8)
for _visual_obs in multiple_visual_obs
]
def render(self, mode='rgb_array'):
def render(self, mode="rgb_array"):
return self.visual_obs
def close(self):

if not self._multiagent and n_agents > 1:
raise UnityGymException(
"The environment was launched as a single-agent environment, however"
"there is more than one agent in the scene.")
"there is more than one agent in the scene."
)
"there is only one agent in the scene.")
"there is only one agent in the scene."
)
raise UnityGymException("The number of agents in the environment has changed since "
"initialization. This is not supported.")
raise UnityGymException(
"The number of agents in the environment has changed since "
"initialization. This is not supported."
)
return {'render.modes': ['rgb_array']}
return {"render.modes": ["rgb_array"]}
return -float('inf'), float('inf')
return -float("inf"), float("inf")
@property
def spec(self):

def number_agents(self):
return self._n_agents
class ActionFlattener():
class ActionFlattener:
def __init__(self,branched_action_space):
def __init__(self, branched_action_space):
"""
Initialize the flattener.
:param branched_action_space: A List containing the sizes of each branch of the action

possible_vals = [range(_num) for _num in branched_action_space]
all_actions = [list(_action) for _action in itertools.product(*possible_vals)]
# Dict should be faster than List for large action spaces
action_lookup = {_scalar: _action for (_scalar, _action) in enumerate(all_actions)}
action_lookup = {
_scalar: _action for (_scalar, _action) in enumerate(all_actions)
}
return action_lookup
def lookup_action(self, action):

67
gym-unity/gym_unity/tests/test_gym.py


from gym_unity.envs import UnityEnv, UnityGymException
@mock.patch('gym_unity.envs.unity_env.UnityEnvironment')
@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
env = UnityEnv(' ', use_visual=False, multiagent=False)
env = UnityEnv(" ", use_visual=False, multiagent=False)
assert isinstance(env, UnityEnv)
assert isinstance(env.reset(), np.ndarray)
actions = env.action_space.sample()

assert isinstance(info, dict)
@mock.patch('gym_unity.envs.unity_env.UnityEnvironment')
@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
def test_multi_agent(mock_env):
mock_brain = create_mock_brainparams()
mock_braininfo = create_mock_vector_braininfo(num_agents=2)

UnityEnv(' ', multiagent=False)
UnityEnv(" ", multiagent=False)
env = UnityEnv(' ', use_visual=False, multiagent=True)
env = UnityEnv(" ", use_visual=False, multiagent=True)
assert isinstance(env.reset(), list)
actions = [env.action_space.sample() for i in range(env.number_agents)]
obs, rew, done, info = env.step(actions)

assert isinstance(info, dict)
@mock.patch('gym_unity.envs.unity_env.UnityEnvironment')
@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
mock_brain = create_mock_brainparams(vector_action_space_type='discrete', vector_action_space_size=[2,2,3])
mock_brain = create_mock_brainparams(
vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
)
env = UnityEnv(' ', use_visual=False, multiagent=False, flatten_branched=True)
env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=True)
assert env.action_space.n==12
assert env._flattener.lookup_action(0)==[0,0,0]
assert env._flattener.lookup_action(11)==[1,1,2]
assert env.action_space.n == 12
assert env._flattener.lookup_action(0) == [0, 0, 0]
assert env._flattener.lookup_action(11) == [1, 1, 2]
env = UnityEnv(' ', use_visual=False, multiagent=False, flatten_branched=False)
env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=False)
def create_mock_brainparams(number_visual_observations=0, num_stacked_vector_observations=1,
vector_action_space_type='continuous', vector_observation_space_size=3,
vector_action_space_size=None):
def create_mock_brainparams(
number_visual_observations=0,
num_stacked_vector_observations=1,
vector_action_space_type="continuous",
vector_observation_space_size=3,
vector_action_space_size=None,
):
"""
Creates a mock BrainParameters object with parameters.
"""

mock_brain = mock.Mock()
mock_brain.return_value.number_visual_observations = number_visual_observations
mock_brain.return_value.num_stacked_vector_observations = num_stacked_vector_observations
mock_brain.return_value.num_stacked_vector_observations = (
num_stacked_vector_observations
)
mock_brain.return_value.vector_observation_space_size = vector_observation_space_size
mock_brain.return_value.vector_observation_space_size = (
vector_observation_space_size
)
def create_mock_vector_braininfo(num_agents = 1):
def create_mock_vector_braininfo(num_agents=1):
"""
Creates a mock BrainInfo with vector observations. Imitates constant
vector observations, rewards, dones, and agents.

mock_braininfo = mock.Mock()
mock_braininfo.return_value.vector_observations = np.array([num_agents*[1, 2, 3,]])
mock_braininfo.return_value.rewards = num_agents*[1.0]
mock_braininfo.return_value.local_done = num_agents*[False]
mock_braininfo.return_value.text_observations = num_agents*['']
mock_braininfo.return_value.agents = range(0,num_agents)
mock_braininfo.return_value.vector_observations = np.array([num_agents * [1, 2, 3]])
mock_braininfo.return_value.rewards = num_agents * [1.0]
mock_braininfo.return_value.local_done = num_agents * [False]
mock_braininfo.return_value.text_observations = num_agents * [""]
mock_braininfo.return_value.agents = range(0, num_agents)
return mock_braininfo()

:Mock mock_brain: A mock Brain object that specifies the params of this environment.
:Mock mock_braininfo: A mock BrainInfo object that will be returned at each step and reset.
"""
mock_env.return_value.academy_name = 'MockAcademy'
mock_env.return_value.brains = {'MockBrain':mock_brain}
mock_env.return_value.external_brain_names = ['MockBrain']
mock_env.return_value.reset.return_value = {'MockBrain':mock_braininfo}
mock_env.return_value.step.return_value = {'MockBrain':mock_braininfo}
mock_env.return_value.academy_name = "MockAcademy"
mock_env.return_value.brains = {"MockBrain": mock_brain}
mock_env.return_value.external_brain_names = ["MockBrain"]
mock_env.return_value.reset.return_value = {"MockBrain": mock_braininfo}
mock_env.return_value.step.return_value = {"MockBrain": mock_braininfo}

21
gym-unity/setup.py


from setuptools import setup, find_packages
setup(name='gym_unity',
version='0.4.1',
description='Unity Machine Learning Agents Gym Interface',
license='Apache License 2.0',
author='Unity Technologies',
author_email='ML-Agents@unity3d.com',
url='https://github.com/Unity-Technologies/ml-agents',
packages=find_packages(),
install_requires=['gym', 'mlagents_envs==0.8.1']
)
setup(
name="gym_unity",
version="0.4.1",
description="Unity Machine Learning Agents Gym Interface",
license="Apache License 2.0",
author="Unity Technologies",
author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs==0.8.1"],
)

4
ml-agents-envs/mlagents/envs/base_unity_environment.py


class BaseUnityEnvironment(ABC):
@abstractmethod
def step(self, vector_action=None, memory=None, text_action=None, value=None) -> AllBrainInfo:
def step(
self, vector_action=None, memory=None, text_action=None, value=None
) -> AllBrainInfo:
pass
@abstractmethod

141
ml-agents-envs/mlagents/envs/brain.py


class BrainInfo:
def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
reward=None, agents=None, local_done=None,
vector_action=None, text_action=None, max_reached=None, action_mask=None,
custom_observations=None):
def __init__(
self,
visual_observation,
vector_observation,
text_observations,
memory=None,
reward=None,
agents=None,
local_done=None,
vector_action=None,
text_action=None,
max_reached=None,
action_mask=None,
custom_observations=None,
):
"""
Describes experience at current step of all agents linked to a brain.
"""

def merge(self, other):
for i in range(len(self.visual_observations)):
self.visual_observations[i].extend(other.visual_observations[i])
self.vector_observations = np.append(self.vector_observations, other.vector_observations, axis=0)
self.vector_observations = np.append(
self.vector_observations, other.vector_observations, axis=0
)
self.memories = self.merge_memories(self.memories, other.memories, self.agents, other.agents)
self.memories = self.merge_memories(
self.memories, other.memories, self.agents, other.agents
)
self.rewards = safe_concat_lists(self.rewards, other.rewards)
self.local_done = safe_concat_lists(self.local_done, other.local_done)
self.max_reached = safe_concat_lists(self.max_reached, other.max_reached)

self.previous_text_actions = safe_concat_lists(
self.previous_text_actions, other.previous_text_actions
)
self.action_masks = safe_concat_np_ndarray(self.action_masks, other.action_masks)
self.custom_observations = safe_concat_lists(self.custom_observations, other.custom_observations)
self.action_masks = safe_concat_np_ndarray(
self.action_masks, other.action_masks
)
self.custom_observations = safe_concat_lists(
self.custom_observations, other.custom_observations
)
@staticmethod
def merge_memories(m1, m2, agents1, agents2):

m2 = np.zeros((len(agents2), m1.shape[1]))
elif m2.shape[1] > m1.shape[1]:
new_m1 = np.zeros((m1.shape[0], m2.shape[1]))
new_m1[0:m1.shape[0], 0:m1.shape[1]] = m1
new_m1[0 : m1.shape[0], 0 : m1.shape[1]] = m1
new_m2[0:m2.shape[0], 0:m2.shape[1]] = m2
new_m2[0 : m2.shape[0], 0 : m2.shape[1]] = m2
return np.append(m1, new_m2, axis=0)
return np.append(m1, m2, axis=0)

"""
vis_obs = []
for i in range(brain_params.number_visual_observations):
obs = [BrainInfo.process_pixels(x.visual_observations[i],
brain_params.camera_resolutions[i]['blackAndWhite'])
for x in agent_info_list]
obs = [
BrainInfo.process_pixels(
x.visual_observations[i],
brain_params.camera_resolutions[i]["blackAndWhite"],
)
for x in agent_info_list
]
vis_obs += [obs]
if len(agent_info_list) == 0:
memory_size = 0

memory = np.zeros((0, 0))
else:
[x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list]
[
x.memories.extend([0] * (memory_size - len(x.memories)))
for x in agent_info_list
]
memory = np.array([list(x.memories) for x in agent_info_list])
total_num_actions = sum(brain_params.vector_action_space_size)
mask_actions = np.ones((len(agent_info_list), total_num_actions))

mask_actions[agent_index, :] = [
0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)]
0 if agent_info.action_mask[k] else 1
for k in range(total_num_actions)
]
logger.warning("An agent had a NaN reward for brain " + brain_params.brain_name)
logger.warning(
"An agent had a NaN reward for brain " + brain_params.brain_name
)
logger.warning("An agent had a NaN observation for brain " + brain_params.brain_name)
logger.warning(
"An agent had a NaN observation for brain " + brain_params.brain_name
)
(0, brain_params.vector_observation_space_size * brain_params.num_stacked_vector_observations)
(
0,
brain_params.vector_observation_space_size
* brain_params.num_stacked_vector_observations,
)
)
else:
vector_obs = np.nan_to_num(

text_action=[list(x.stored_text_actions) for x in agent_info_list],
max_reached=[x.max_step_reached for x in agent_info_list],
custom_observations=[x.custom_observation for x in agent_info_list],
action_mask=mask_actions
action_mask=mask_actions,
)
return brain_info

class BrainParameters:
def __init__(self,
brain_name: str,
vector_observation_space_size: int,
num_stacked_vector_observations: int,
camera_resolutions: List[Dict],
vector_action_space_size: List[int],
vector_action_descriptions: List[str],
vector_action_space_type: int):
def __init__(
self,
brain_name: str,
vector_observation_space_size: int,
num_stacked_vector_observations: int,
camera_resolutions: List[Dict],
vector_action_space_size: List[int],
vector_action_descriptions: List[str],
vector_action_space_type: int,
):
"""
Contains all brain-specific parameters.
"""

self.camera_resolutions = camera_resolutions
self.vector_action_space_size = vector_action_space_size
self.vector_action_descriptions = vector_action_descriptions
self.vector_action_space_type = ["discrete", "continuous"][vector_action_space_type]
self.vector_action_space_type = ["discrete", "continuous"][
vector_action_space_type
]
return '''Unity brain name: {}
return """Unity brain name: {}
Vector Action descriptions: {}'''.format(self.brain_name,
str(self.number_visual_observations),
str(self.vector_observation_space_size),
str(self.num_stacked_vector_observations),
self.vector_action_space_type,
str(self.vector_action_space_size),
', '.join(self.vector_action_descriptions))
Vector Action descriptions: {}""".format(
self.brain_name,
str(self.number_visual_observations),
str(self.vector_observation_space_size),
str(self.num_stacked_vector_observations),
self.vector_action_space_type,
str(self.vector_action_space_size),
", ".join(self.vector_action_descriptions),
)
@staticmethod
def from_proto(brain_param_proto):

:return: BrainParameter object.
"""
resolution = [{
"height": x.height,
"width": x.width,
"blackAndWhite": x.gray_scale
} for x in brain_param_proto.camera_resolutions]
brain_params = BrainParameters(brain_param_proto.brain_name,
brain_param_proto.vector_observation_size,
brain_param_proto.num_stacked_vector_observations,
resolution,
list(brain_param_proto.vector_action_size),
list(brain_param_proto.vector_action_descriptions),
brain_param_proto.vector_action_space_type)
return brain_params
resolution = [
{"height": x.height, "width": x.width, "blackAndWhite": x.gray_scale}
for x in brain_param_proto.camera_resolutions
]
brain_params = BrainParameters(
brain_param_proto.brain_name,
brain_param_proto.vector_observation_size,
brain_param_proto.num_stacked_vector_observations,
resolution,
list(brain_param_proto.vector_action_size),
list(brain_param_proto.vector_action_descriptions),
brain_param_proto.vector_action_space_type,
)
return brain_params

1
ml-agents-envs/mlagents/envs/communicator.py


"""
Sends a shutdown signal to the unity environment, and closes the connection.
"""

212
ml-agents-envs/mlagents/envs/communicator_objects/agent_action_proto_pb2.py


# source: mlagents/envs/communicator_objects/agent_action_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import custom_action_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2
from mlagents.envs.communicator_objects import (
custom_action_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2,
)
name='mlagents/envs/communicator_objects/agent_action_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n;mlagents/envs/communicator_objects/agent_action_proto.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/custom_action.proto\"\x9c\x01\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\x10\n\x08memories\x18\x03 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x12\x39\n\rcustom_action\x18\x05 \x01(\x0b\x32\".communicator_objects.CustomActionB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/agent_action_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n;mlagents/envs/communicator_objects/agent_action_proto.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/custom_action.proto"\x9c\x01\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\x10\n\x08memories\x18\x03 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x12\x39\n\rcustom_action\x18\x05 \x01(\x0b\x32".communicator_objects.CustomActionB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2.DESCRIPTOR
],
)
name='AgentActionProto',
full_name='communicator_objects.AgentActionProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='vector_actions', full_name='communicator_objects.AgentActionProto.vector_actions', index=0,
number=1, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='text_actions', full_name='communicator_objects.AgentActionProto.text_actions', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='memories', full_name='communicator_objects.AgentActionProto.memories', index=2,
number=3, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='value', full_name='communicator_objects.AgentActionProto.value', index=3,
number=4, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='custom_action', full_name='communicator_objects.AgentActionProto.custom_action', index=4,
number=5, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=142,
serialized_end=298,
name="AgentActionProto",
full_name="communicator_objects.AgentActionProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="vector_actions",
full_name="communicator_objects.AgentActionProto.vector_actions",
index=0,
number=1,
type=2,
cpp_type=6,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="text_actions",
full_name="communicator_objects.AgentActionProto.text_actions",
index=1,
number=2,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="memories",
full_name="communicator_objects.AgentActionProto.memories",
index=2,
number=3,
type=2,
cpp_type=6,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="value",
full_name="communicator_objects.AgentActionProto.value",
index=3,
number=4,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="custom_action",
full_name="communicator_objects.AgentActionProto.custom_action",
index=4,
number=5,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=142,
serialized_end=298,
_AGENTACTIONPROTO.fields_by_name['custom_action'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2._CUSTOMACTION
DESCRIPTOR.message_types_by_name['AgentActionProto'] = _AGENTACTIONPROTO
_AGENTACTIONPROTO.fields_by_name[
"custom_action"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2._CUSTOMACTION
)
DESCRIPTOR.message_types_by_name["AgentActionProto"] = _AGENTACTIONPROTO
AgentActionProto = _reflection.GeneratedProtocolMessageType('AgentActionProto', (_message.Message,), dict(
DESCRIPTOR = _AGENTACTIONPROTO,
__module__ = 'mlagents.envs.communicator_objects.agent_action_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.AgentActionProto)
))
AgentActionProto = _reflection.GeneratedProtocolMessageType(
"AgentActionProto",
(_message.Message,),
dict(
DESCRIPTOR=_AGENTACTIONPROTO,
__module__="mlagents.envs.communicator_objects.agent_action_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.AgentActionProto)
),
)
_sym_db.RegisterMessage(AgentActionProto)

387
ml-agents-envs/mlagents/envs/communicator_objects/agent_info_proto_pb2.py


# source: mlagents/envs/communicator_objects/agent_info_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import custom_observation_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2
from mlagents.envs.communicator_objects import (
custom_observation_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2,
)
name='mlagents/envs/communicator_objects/agent_info_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n9mlagents/envs/communicator_objects/agent_info_proto.proto\x12\x14\x63ommunicator_objects\x1a;mlagents/envs/communicator_objects/custom_observation.proto\"\xd7\x02\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x1b\n\x13visual_observations\x18\x02 \x03(\x0c\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12\x43\n\x12\x63ustom_observation\x18\x0c \x01(\x0b\x32\'.communicator_objects.CustomObservationB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/agent_info_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n9mlagents/envs/communicator_objects/agent_info_proto.proto\x12\x14\x63ommunicator_objects\x1a;mlagents/envs/communicator_objects/custom_observation.proto"\xd7\x02\n\x0e\x41gentInfoProto\x12"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x1b\n\x13visual_observations\x18\x02 \x03(\x0c\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12\x43\n\x12\x63ustom_observation\x18\x0c \x01(\x0b\x32\'.communicator_objects.CustomObservationB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2.DESCRIPTOR
],
)
name='AgentInfoProto',
full_name='communicator_objects.AgentInfoProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='stacked_vector_observation', full_name='communicator_objects.AgentInfoProto.stacked_vector_observation', index=0,
number=1, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='visual_observations', full_name='communicator_objects.AgentInfoProto.visual_observations', index=1,
number=2, type=12, cpp_type=9, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='text_observation', full_name='communicator_objects.AgentInfoProto.text_observation', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='stored_vector_actions', full_name='communicator_objects.AgentInfoProto.stored_vector_actions', index=3,
number=4, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='stored_text_actions', full_name='communicator_objects.AgentInfoProto.stored_text_actions', index=4,
number=5, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='memories', full_name='communicator_objects.AgentInfoProto.memories', index=5,
number=6, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='reward', full_name='communicator_objects.AgentInfoProto.reward', index=6,
number=7, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='done', full_name='communicator_objects.AgentInfoProto.done', index=7,
number=8, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='max_step_reached', full_name='communicator_objects.AgentInfoProto.max_step_reached', index=8,
number=9, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='id', full_name='communicator_objects.AgentInfoProto.id', index=9,
number=10, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=10,
number=11, type=8, cpp_type=7, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='custom_observation', full_name='communicator_objects.AgentInfoProto.custom_observation', index=11,
number=12, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=145,
serialized_end=488,
name="AgentInfoProto",
full_name="communicator_objects.AgentInfoProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="stacked_vector_observation",
full_name="communicator_objects.AgentInfoProto.stacked_vector_observation",
index=0,
number=1,
type=2,
cpp_type=6,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="visual_observations",
full_name="communicator_objects.AgentInfoProto.visual_observations",
index=1,
number=2,
type=12,
cpp_type=9,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="text_observation",
full_name="communicator_objects.AgentInfoProto.text_observation",
index=2,
number=3,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="stored_vector_actions",
full_name="communicator_objects.AgentInfoProto.stored_vector_actions",
index=3,
number=4,
type=2,
cpp_type=6,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="stored_text_actions",
full_name="communicator_objects.AgentInfoProto.stored_text_actions",
index=4,
number=5,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="memories",
full_name="communicator_objects.AgentInfoProto.memories",
index=5,
number=6,
type=2,
cpp_type=6,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="reward",
full_name="communicator_objects.AgentInfoProto.reward",
index=6,
number=7,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="done",
full_name="communicator_objects.AgentInfoProto.done",
index=7,
number=8,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="max_step_reached",
full_name="communicator_objects.AgentInfoProto.max_step_reached",
index=8,
number=9,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="id",
full_name="communicator_objects.AgentInfoProto.id",
index=9,
number=10,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="action_mask",
full_name="communicator_objects.AgentInfoProto.action_mask",
index=10,
number=11,
type=8,
cpp_type=7,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="custom_observation",
full_name="communicator_objects.AgentInfoProto.custom_observation",
index=11,
number=12,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=145,
serialized_end=488,
_AGENTINFOPROTO.fields_by_name['custom_observation'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2._CUSTOMOBSERVATION
DESCRIPTOR.message_types_by_name['AgentInfoProto'] = _AGENTINFOPROTO
_AGENTINFOPROTO.fields_by_name[
"custom_observation"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2._CUSTOMOBSERVATION
)
DESCRIPTOR.message_types_by_name["AgentInfoProto"] = _AGENTINFOPROTO
AgentInfoProto = _reflection.GeneratedProtocolMessageType('AgentInfoProto', (_message.Message,), dict(
DESCRIPTOR = _AGENTINFOPROTO,
__module__ = 'mlagents.envs.communicator_objects.agent_info_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.AgentInfoProto)
))
AgentInfoProto = _reflection.GeneratedProtocolMessageType(
"AgentInfoProto",
(_message.Message,),
dict(
DESCRIPTOR=_AGENTINFOPROTO,
__module__="mlagents.envs.communicator_objects.agent_info_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.AgentInfoProto)
),
)
_sym_db.RegisterMessage(AgentInfoProto)

298
ml-agents-envs/mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py


# source: mlagents/envs/communicator_objects/brain_parameters_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import resolution_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2
from mlagents.envs.communicator_objects import space_type_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_space__type__proto__pb2
from mlagents.envs.communicator_objects import (
resolution_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2,
)
from mlagents.envs.communicator_objects import (
space_type_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_space__type__proto__pb2,
)
name='mlagents/envs/communicator_objects/brain_parameters_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n?mlagents/envs/communicator_objects/brain_parameters_proto.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/resolution_proto.proto\x1a\x39mlagents/envs/communicator_objects/space_type_proto.proto\"\xd4\x02\n\x14\x42rainParametersProto\x12\x1f\n\x17vector_observation_size\x18\x01 \x01(\x05\x12\'\n\x1fnum_stacked_vector_observations\x18\x02 \x01(\x05\x12\x1a\n\x12vector_action_size\x18\x03 \x03(\x05\x12\x41\n\x12\x63\x61mera_resolutions\x18\x04 \x03(\x0b\x32%.communicator_objects.ResolutionProto\x12\"\n\x1avector_action_descriptions\x18\x05 \x03(\t\x12\x46\n\x18vector_action_space_type\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_space__type__proto__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/brain_parameters_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n?mlagents/envs/communicator_objects/brain_parameters_proto.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/resolution_proto.proto\x1a\x39mlagents/envs/communicator_objects/space_type_proto.proto"\xd4\x02\n\x14\x42rainParametersProto\x12\x1f\n\x17vector_observation_size\x18\x01 \x01(\x05\x12\'\n\x1fnum_stacked_vector_observations\x18\x02 \x01(\x05\x12\x1a\n\x12vector_action_size\x18\x03 \x03(\x05\x12\x41\n\x12\x63\x61mera_resolutions\x18\x04 \x03(\x0b\x32%.communicator_objects.ResolutionProto\x12"\n\x1avector_action_descriptions\x18\x05 \x03(\t\x12\x46\n\x18vector_action_space_type\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_space__type__proto__pb2.DESCRIPTOR,
],
)
name='BrainParametersProto',
full_name='communicator_objects.BrainParametersProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='vector_observation_size', full_name='communicator_objects.BrainParametersProto.vector_observation_size', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='num_stacked_vector_observations', full_name='communicator_objects.BrainParametersProto.num_stacked_vector_observations', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='vector_action_size', full_name='communicator_objects.BrainParametersProto.vector_action_size', index=2,
number=3, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='camera_resolutions', full_name='communicator_objects.BrainParametersProto.camera_resolutions', index=3,
number=4, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='vector_action_descriptions', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions', index=4,
number=5, type=9, cpp_type=9, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='vector_action_space_type', full_name='communicator_objects.BrainParametersProto.vector_action_space_type', index=5,
number=6, type=14, cpp_type=8, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='brain_name', full_name='communicator_objects.BrainParametersProto.brain_name', index=6,
number=7, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='is_training', full_name='communicator_objects.BrainParametersProto.is_training', index=7,
number=8, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=208,
serialized_end=548,
name="BrainParametersProto",
full_name="communicator_objects.BrainParametersProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="vector_observation_size",
full_name="communicator_objects.BrainParametersProto.vector_observation_size",
index=0,
number=1,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="num_stacked_vector_observations",
full_name="communicator_objects.BrainParametersProto.num_stacked_vector_observations",
index=1,
number=2,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="vector_action_size",
full_name="communicator_objects.BrainParametersProto.vector_action_size",
index=2,
number=3,
type=5,
cpp_type=1,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="camera_resolutions",
full_name="communicator_objects.BrainParametersProto.camera_resolutions",
index=3,
number=4,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="vector_action_descriptions",
full_name="communicator_objects.BrainParametersProto.vector_action_descriptions",
index=4,
number=5,
type=9,
cpp_type=9,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="vector_action_space_type",
full_name="communicator_objects.BrainParametersProto.vector_action_space_type",
index=5,
number=6,
type=14,
cpp_type=8,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="brain_name",
full_name="communicator_objects.BrainParametersProto.brain_name",
index=6,
number=7,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="is_training",
full_name="communicator_objects.BrainParametersProto.is_training",
index=7,
number=8,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=208,
serialized_end=548,
_BRAINPARAMETERSPROTO.fields_by_name['camera_resolutions'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2._RESOLUTIONPROTO
_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type'].enum_type = mlagents_dot_envs_dot_communicator__objects_dot_space__type__proto__pb2._SPACETYPEPROTO
DESCRIPTOR.message_types_by_name['BrainParametersProto'] = _BRAINPARAMETERSPROTO
_BRAINPARAMETERSPROTO.fields_by_name[
"camera_resolutions"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2._RESOLUTIONPROTO
)
_BRAINPARAMETERSPROTO.fields_by_name[
"vector_action_space_type"
].enum_type = (
mlagents_dot_envs_dot_communicator__objects_dot_space__type__proto__pb2._SPACETYPEPROTO
)
DESCRIPTOR.message_types_by_name["BrainParametersProto"] = _BRAINPARAMETERSPROTO
BrainParametersProto = _reflection.GeneratedProtocolMessageType('BrainParametersProto', (_message.Message,), dict(
DESCRIPTOR = _BRAINPARAMETERSPROTO,
__module__ = 'mlagents.envs.communicator_objects.brain_parameters_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.BrainParametersProto)
))
BrainParametersProto = _reflection.GeneratedProtocolMessageType(
"BrainParametersProto",
(_message.Message,),
dict(
DESCRIPTOR=_BRAINPARAMETERSPROTO,
__module__="mlagents.envs.communicator_objects.brain_parameters_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.BrainParametersProto)
),
)
_sym_db.RegisterMessage(BrainParametersProto)

61
ml-agents-envs/mlagents/envs/communicator_objects/command_proto_pb2.py


# source: mlagents/envs/communicator_objects/command_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/command_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n6mlagents/envs/communicator_objects/command_proto.proto\x12\x14\x63ommunicator_objects*-\n\x0c\x43ommandProto\x12\x08\n\x04STEP\x10\x00\x12\t\n\x05RESET\x10\x01\x12\x08\n\x04QUIT\x10\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/command_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
"\n6mlagents/envs/communicator_objects/command_proto.proto\x12\x14\x63ommunicator_objects*-\n\x0c\x43ommandProto\x12\x08\n\x04STEP\x10\x00\x12\t\n\x05RESET\x10\x01\x12\x08\n\x04QUIT\x10\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3"
),
name='CommandProto',
full_name='communicator_objects.CommandProto',
filename=None,
file=DESCRIPTOR,
values=[
_descriptor.EnumValueDescriptor(
name='STEP', index=0, number=0,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='RESET', index=1, number=1,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='QUIT', index=2, number=2,
serialized_options=None,
type=None),
],
containing_type=None,
serialized_options=None,
serialized_start=80,
serialized_end=125,
name="CommandProto",
full_name="communicator_objects.CommandProto",
filename=None,
file=DESCRIPTOR,
values=[
_descriptor.EnumValueDescriptor(
name="STEP", index=0, number=0, serialized_options=None, type=None
),
_descriptor.EnumValueDescriptor(
name="RESET", index=1, number=1, serialized_options=None, type=None
),
_descriptor.EnumValueDescriptor(
name="QUIT", index=2, number=2, serialized_options=None, type=None
),
],
containing_type=None,
serialized_options=None,
serialized_start=80,
serialized_end=125,
)
_sym_db.RegisterEnumDescriptor(_COMMANDPROTO)

QUIT = 2
DESCRIPTOR.enum_types_by_name['CommandProto'] = _COMMANDPROTO
DESCRIPTOR.enum_types_by_name["CommandProto"] = _COMMANDPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

72
ml-agents-envs/mlagents/envs/communicator_objects/custom_action_pb2.py


# source: mlagents/envs/communicator_objects/custom_action.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/custom_action.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n6mlagents/envs/communicator_objects/custom_action.proto\x12\x14\x63ommunicator_objects\"\x0e\n\x0c\x43ustomActionB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/custom_action.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n6mlagents/envs/communicator_objects/custom_action.proto\x12\x14\x63ommunicator_objects"\x0e\n\x0c\x43ustomActionB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='CustomAction',
full_name='communicator_objects.CustomAction',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=80,
serialized_end=94,
name="CustomAction",
full_name="communicator_objects.CustomAction",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=80,
serialized_end=94,
DESCRIPTOR.message_types_by_name['CustomAction'] = _CUSTOMACTION
DESCRIPTOR.message_types_by_name["CustomAction"] = _CUSTOMACTION
CustomAction = _reflection.GeneratedProtocolMessageType('CustomAction', (_message.Message,), dict(
DESCRIPTOR = _CUSTOMACTION,
__module__ = 'mlagents.envs.communicator_objects.custom_action_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.CustomAction)
))
CustomAction = _reflection.GeneratedProtocolMessageType(
"CustomAction",
(_message.Message,),
dict(
DESCRIPTOR=_CUSTOMACTION,
__module__="mlagents.envs.communicator_objects.custom_action_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.CustomAction)
),
)
_sym_db.RegisterMessage(CustomAction)

72
ml-agents-envs/mlagents/envs/communicator_objects/custom_observation_pb2.py


# source: mlagents/envs/communicator_objects/custom_observation.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/custom_observation.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n;mlagents/envs/communicator_objects/custom_observation.proto\x12\x14\x63ommunicator_objects\"\x13\n\x11\x43ustomObservationB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/custom_observation.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n;mlagents/envs/communicator_objects/custom_observation.proto\x12\x14\x63ommunicator_objects"\x13\n\x11\x43ustomObservationB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='CustomObservation',
full_name='communicator_objects.CustomObservation',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=85,
serialized_end=104,
name="CustomObservation",
full_name="communicator_objects.CustomObservation",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=85,
serialized_end=104,
DESCRIPTOR.message_types_by_name['CustomObservation'] = _CUSTOMOBSERVATION
DESCRIPTOR.message_types_by_name["CustomObservation"] = _CUSTOMOBSERVATION
CustomObservation = _reflection.GeneratedProtocolMessageType('CustomObservation', (_message.Message,), dict(
DESCRIPTOR = _CUSTOMOBSERVATION,
__module__ = 'mlagents.envs.communicator_objects.custom_observation_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.CustomObservation)
))
CustomObservation = _reflection.GeneratedProtocolMessageType(
"CustomObservation",
(_message.Message,),
dict(
DESCRIPTOR=_CUSTOMOBSERVATION,
__module__="mlagents.envs.communicator_objects.custom_observation_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.CustomObservation)
),
)
_sym_db.RegisterMessage(CustomObservation)

72
ml-agents-envs/mlagents/envs/communicator_objects/custom_reset_parameters_pb2.py


# source: mlagents/envs/communicator_objects/custom_reset_parameters.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/custom_reset_parameters.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n@mlagents/envs/communicator_objects/custom_reset_parameters.proto\x12\x14\x63ommunicator_objects\"\x17\n\x15\x43ustomResetParametersB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/custom_reset_parameters.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n@mlagents/envs/communicator_objects/custom_reset_parameters.proto\x12\x14\x63ommunicator_objects"\x17\n\x15\x43ustomResetParametersB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='CustomResetParameters',
full_name='communicator_objects.CustomResetParameters',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=90,
serialized_end=113,
name="CustomResetParameters",
full_name="communicator_objects.CustomResetParameters",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=90,
serialized_end=113,
DESCRIPTOR.message_types_by_name['CustomResetParameters'] = _CUSTOMRESETPARAMETERS
DESCRIPTOR.message_types_by_name["CustomResetParameters"] = _CUSTOMRESETPARAMETERS
CustomResetParameters = _reflection.GeneratedProtocolMessageType('CustomResetParameters', (_message.Message,), dict(
DESCRIPTOR = _CUSTOMRESETPARAMETERS,
__module__ = 'mlagents.envs.communicator_objects.custom_reset_parameters_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.CustomResetParameters)
))
CustomResetParameters = _reflection.GeneratedProtocolMessageType(
"CustomResetParameters",
(_message.Message,),
dict(
DESCRIPTOR=_CUSTOMRESETPARAMETERS,
__module__="mlagents.envs.communicator_objects.custom_reset_parameters_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.CustomResetParameters)
),
)
_sym_db.RegisterMessage(CustomResetParameters)

198
ml-agents-envs/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py


# source: mlagents/envs/communicator_objects/demonstration_meta_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/demonstration_meta_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\nAmlagents/envs/communicator_objects/demonstration_meta_proto.proto\x12\x14\x63ommunicator_objects\"\x8d\x01\n\x16\x44\x65monstrationMetaProto\x12\x13\n\x0b\x61pi_version\x18\x01 \x01(\x05\x12\x1a\n\x12\x64\x65monstration_name\x18\x02 \x01(\t\x12\x14\n\x0cnumber_steps\x18\x03 \x01(\x05\x12\x17\n\x0fnumber_episodes\x18\x04 \x01(\x05\x12\x13\n\x0bmean_reward\x18\x05 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/demonstration_meta_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\nAmlagents/envs/communicator_objects/demonstration_meta_proto.proto\x12\x14\x63ommunicator_objects"\x8d\x01\n\x16\x44\x65monstrationMetaProto\x12\x13\n\x0b\x61pi_version\x18\x01 \x01(\x05\x12\x1a\n\x12\x64\x65monstration_name\x18\x02 \x01(\t\x12\x14\n\x0cnumber_steps\x18\x03 \x01(\x05\x12\x17\n\x0fnumber_episodes\x18\x04 \x01(\x05\x12\x13\n\x0bmean_reward\x18\x05 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='DemonstrationMetaProto',
full_name='communicator_objects.DemonstrationMetaProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='api_version', full_name='communicator_objects.DemonstrationMetaProto.api_version', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='demonstration_name', full_name='communicator_objects.DemonstrationMetaProto.demonstration_name', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='number_steps', full_name='communicator_objects.DemonstrationMetaProto.number_steps', index=2,
number=3, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='number_episodes', full_name='communicator_objects.DemonstrationMetaProto.number_episodes', index=3,
number=4, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='mean_reward', full_name='communicator_objects.DemonstrationMetaProto.mean_reward', index=4,
number=5, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=92,
serialized_end=233,
name="DemonstrationMetaProto",
full_name="communicator_objects.DemonstrationMetaProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="api_version",
full_name="communicator_objects.DemonstrationMetaProto.api_version",
index=0,
number=1,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="demonstration_name",
full_name="communicator_objects.DemonstrationMetaProto.demonstration_name",
index=1,
number=2,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="number_steps",
full_name="communicator_objects.DemonstrationMetaProto.number_steps",
index=2,
number=3,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="number_episodes",
full_name="communicator_objects.DemonstrationMetaProto.number_episodes",
index=3,
number=4,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="mean_reward",
full_name="communicator_objects.DemonstrationMetaProto.mean_reward",
index=4,
number=5,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=92,
serialized_end=233,
DESCRIPTOR.message_types_by_name['DemonstrationMetaProto'] = _DEMONSTRATIONMETAPROTO
DESCRIPTOR.message_types_by_name["DemonstrationMetaProto"] = _DEMONSTRATIONMETAPROTO
DemonstrationMetaProto = _reflection.GeneratedProtocolMessageType('DemonstrationMetaProto', (_message.Message,), dict(
DESCRIPTOR = _DEMONSTRATIONMETAPROTO,
__module__ = 'mlagents.envs.communicator_objects.demonstration_meta_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.DemonstrationMetaProto)
))
DemonstrationMetaProto = _reflection.GeneratedProtocolMessageType(
"DemonstrationMetaProto",
(_message.Message,),
dict(
DESCRIPTOR=_DEMONSTRATIONMETAPROTO,
__module__="mlagents.envs.communicator_objects.demonstration_meta_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.DemonstrationMetaProto)
),
)
_sym_db.RegisterMessage(DemonstrationMetaProto)

223
ml-agents-envs/mlagents/envs/communicator_objects/engine_configuration_proto_pb2.py


# source: mlagents/envs/communicator_objects/engine_configuration_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/engine_configuration_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\nCmlagents/envs/communicator_objects/engine_configuration_proto.proto\x12\x14\x63ommunicator_objects\"\x95\x01\n\x18\x45ngineConfigurationProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x15\n\rquality_level\x18\x03 \x01(\x05\x12\x12\n\ntime_scale\x18\x04 \x01(\x02\x12\x19\n\x11target_frame_rate\x18\x05 \x01(\x05\x12\x14\n\x0cshow_monitor\x18\x06 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/engine_configuration_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\nCmlagents/envs/communicator_objects/engine_configuration_proto.proto\x12\x14\x63ommunicator_objects"\x95\x01\n\x18\x45ngineConfigurationProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x15\n\rquality_level\x18\x03 \x01(\x05\x12\x12\n\ntime_scale\x18\x04 \x01(\x02\x12\x19\n\x11target_frame_rate\x18\x05 \x01(\x05\x12\x14\n\x0cshow_monitor\x18\x06 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='EngineConfigurationProto',
full_name='communicator_objects.EngineConfigurationProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='width', full_name='communicator_objects.EngineConfigurationProto.width', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='height', full_name='communicator_objects.EngineConfigurationProto.height', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='quality_level', full_name='communicator_objects.EngineConfigurationProto.quality_level', index=2,
number=3, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='time_scale', full_name='communicator_objects.EngineConfigurationProto.time_scale', index=3,
number=4, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='target_frame_rate', full_name='communicator_objects.EngineConfigurationProto.target_frame_rate', index=4,
number=5, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='show_monitor', full_name='communicator_objects.EngineConfigurationProto.show_monitor', index=5,
number=6, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=94,
serialized_end=243,
name="EngineConfigurationProto",
full_name="communicator_objects.EngineConfigurationProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="width",
full_name="communicator_objects.EngineConfigurationProto.width",
index=0,
number=1,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="height",
full_name="communicator_objects.EngineConfigurationProto.height",
index=1,
number=2,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="quality_level",
full_name="communicator_objects.EngineConfigurationProto.quality_level",
index=2,
number=3,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="time_scale",
full_name="communicator_objects.EngineConfigurationProto.time_scale",
index=3,
number=4,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="target_frame_rate",
full_name="communicator_objects.EngineConfigurationProto.target_frame_rate",
index=4,
number=5,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="show_monitor",
full_name="communicator_objects.EngineConfigurationProto.show_monitor",
index=5,
number=6,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=94,
serialized_end=243,
DESCRIPTOR.message_types_by_name['EngineConfigurationProto'] = _ENGINECONFIGURATIONPROTO
DESCRIPTOR.message_types_by_name["EngineConfigurationProto"] = _ENGINECONFIGURATIONPROTO
EngineConfigurationProto = _reflection.GeneratedProtocolMessageType('EngineConfigurationProto', (_message.Message,), dict(
DESCRIPTOR = _ENGINECONFIGURATIONPROTO,
__module__ = 'mlagents.envs.communicator_objects.engine_configuration_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.EngineConfigurationProto)
))
EngineConfigurationProto = _reflection.GeneratedProtocolMessageType(
"EngineConfigurationProto",
(_message.Message,),
dict(
DESCRIPTOR=_ENGINECONFIGURATIONPROTO,
__module__="mlagents.envs.communicator_objects.engine_configuration_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.EngineConfigurationProto)
),
)
_sym_db.RegisterMessage(EngineConfigurationProto)

250
ml-agents-envs/mlagents/envs/communicator_objects/environment_parameters_proto_pb2.py


# source: mlagents/envs/communicator_objects/environment_parameters_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import custom_reset_parameters_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_custom__reset__parameters__pb2
from mlagents.envs.communicator_objects import (
custom_reset_parameters_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_custom__reset__parameters__pb2,
)
name='mlagents/envs/communicator_objects/environment_parameters_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\nEmlagents/envs/communicator_objects/environment_parameters_proto.proto\x12\x14\x63ommunicator_objects\x1a@mlagents/envs/communicator_objects/custom_reset_parameters.proto\"\x83\x02\n\x1a\x45nvironmentParametersProto\x12_\n\x10\x66loat_parameters\x18\x01 \x03(\x0b\x32\x45.communicator_objects.EnvironmentParametersProto.FloatParametersEntry\x12L\n\x17\x63ustom_reset_parameters\x18\x02 \x01(\x0b\x32+.communicator_objects.CustomResetParameters\x1a\x36\n\x14\x46loatParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_custom__reset__parameters__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/environment_parameters_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\nEmlagents/envs/communicator_objects/environment_parameters_proto.proto\x12\x14\x63ommunicator_objects\x1a@mlagents/envs/communicator_objects/custom_reset_parameters.proto"\x83\x02\n\x1a\x45nvironmentParametersProto\x12_\n\x10\x66loat_parameters\x18\x01 \x03(\x0b\x32\x45.communicator_objects.EnvironmentParametersProto.FloatParametersEntry\x12L\n\x17\x63ustom_reset_parameters\x18\x02 \x01(\x0b\x32+.communicator_objects.CustomResetParameters\x1a\x36\n\x14\x46loatParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_custom__reset__parameters__pb2.DESCRIPTOR
],
)
name='FloatParametersEntry',
full_name='communicator_objects.EnvironmentParametersProto.FloatParametersEntry',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='key', full_name='communicator_objects.EnvironmentParametersProto.FloatParametersEntry.key', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='value', full_name='communicator_objects.EnvironmentParametersProto.FloatParametersEntry.value', index=1,
number=2, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=_b('8\001'),
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=367,
serialized_end=421,
name="FloatParametersEntry",
full_name="communicator_objects.EnvironmentParametersProto.FloatParametersEntry",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="key",
full_name="communicator_objects.EnvironmentParametersProto.FloatParametersEntry.key",
index=0,
number=1,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="value",
full_name="communicator_objects.EnvironmentParametersProto.FloatParametersEntry.value",
index=1,
number=2,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=_b("8\001"),
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=367,
serialized_end=421,
name='EnvironmentParametersProto',
full_name='communicator_objects.EnvironmentParametersProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='float_parameters', full_name='communicator_objects.EnvironmentParametersProto.float_parameters', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='custom_reset_parameters', full_name='communicator_objects.EnvironmentParametersProto.custom_reset_parameters', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[_ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY, ],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=162,
serialized_end=421,
name="EnvironmentParametersProto",
full_name="communicator_objects.EnvironmentParametersProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="float_parameters",
full_name="communicator_objects.EnvironmentParametersProto.float_parameters",
index=0,
number=1,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="custom_reset_parameters",
full_name="communicator_objects.EnvironmentParametersProto.custom_reset_parameters",
index=1,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[_ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=162,
serialized_end=421,
_ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY.containing_type = _ENVIRONMENTPARAMETERSPROTO
_ENVIRONMENTPARAMETERSPROTO.fields_by_name['float_parameters'].message_type = _ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY
_ENVIRONMENTPARAMETERSPROTO.fields_by_name['custom_reset_parameters'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_custom__reset__parameters__pb2._CUSTOMRESETPARAMETERS
DESCRIPTOR.message_types_by_name['EnvironmentParametersProto'] = _ENVIRONMENTPARAMETERSPROTO
_ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY.containing_type = (
_ENVIRONMENTPARAMETERSPROTO
)
_ENVIRONMENTPARAMETERSPROTO.fields_by_name[
"float_parameters"
].message_type = _ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY
_ENVIRONMENTPARAMETERSPROTO.fields_by_name[
"custom_reset_parameters"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_custom__reset__parameters__pb2._CUSTOMRESETPARAMETERS
)
DESCRIPTOR.message_types_by_name[
"EnvironmentParametersProto"
] = _ENVIRONMENTPARAMETERSPROTO
EnvironmentParametersProto = _reflection.GeneratedProtocolMessageType('EnvironmentParametersProto', (_message.Message,), dict(
FloatParametersEntry = _reflection.GeneratedProtocolMessageType('FloatParametersEntry', (_message.Message,), dict(
DESCRIPTOR = _ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY,
__module__ = 'mlagents.envs.communicator_objects.environment_parameters_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.EnvironmentParametersProto.FloatParametersEntry)
))
,
DESCRIPTOR = _ENVIRONMENTPARAMETERSPROTO,
__module__ = 'mlagents.envs.communicator_objects.environment_parameters_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.EnvironmentParametersProto)
))
EnvironmentParametersProto = _reflection.GeneratedProtocolMessageType(
"EnvironmentParametersProto",
(_message.Message,),
dict(
FloatParametersEntry=_reflection.GeneratedProtocolMessageType(
"FloatParametersEntry",
(_message.Message,),
dict(
DESCRIPTOR=_ENVIRONMENTPARAMETERSPROTO_FLOATPARAMETERSENTRY,
__module__="mlagents.envs.communicator_objects.environment_parameters_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.EnvironmentParametersProto.FloatParametersEntry)
),
),
DESCRIPTOR=_ENVIRONMENTPARAMETERSPROTO,
__module__="mlagents.envs.communicator_objects.environment_parameters_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.EnvironmentParametersProto)
),
)
_sym_db.RegisterMessage(EnvironmentParametersProto)
_sym_db.RegisterMessage(EnvironmentParametersProto.FloatParametersEntry)

123
ml-agents-envs/mlagents/envs/communicator_objects/header_pb2.py


# source: mlagents/envs/communicator_objects/header.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/header.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n/mlagents/envs/communicator_objects/header.proto\x12\x14\x63ommunicator_objects\")\n\x06Header\x12\x0e\n\x06status\x18\x01 \x01(\x05\x12\x0f\n\x07message\x18\x02 \x01(\tB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/header.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n/mlagents/envs/communicator_objects/header.proto\x12\x14\x63ommunicator_objects")\n\x06Header\x12\x0e\n\x06status\x18\x01 \x01(\x05\x12\x0f\n\x07message\x18\x02 \x01(\tB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='Header',
full_name='communicator_objects.Header',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='status', full_name='communicator_objects.Header.status', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='message', full_name='communicator_objects.Header.message', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=73,
serialized_end=114,
name="Header",
full_name="communicator_objects.Header",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="status",
full_name="communicator_objects.Header.status",
index=0,
number=1,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="message",
full_name="communicator_objects.Header.message",
index=1,
number=2,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=73,
serialized_end=114,
DESCRIPTOR.message_types_by_name['Header'] = _HEADER
DESCRIPTOR.message_types_by_name["Header"] = _HEADER
Header = _reflection.GeneratedProtocolMessageType('Header', (_message.Message,), dict(
DESCRIPTOR = _HEADER,
__module__ = 'mlagents.envs.communicator_objects.header_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.Header)
))
Header = _reflection.GeneratedProtocolMessageType(
"Header",
(_message.Message,),
dict(
DESCRIPTOR=_HEADER,
__module__="mlagents.envs.communicator_objects.header_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.Header)
),
)
_sym_db.RegisterMessage(Header)

148
ml-agents-envs/mlagents/envs/communicator_objects/resolution_proto_pb2.py


# source: mlagents/envs/communicator_objects/resolution_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/resolution_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n9mlagents/envs/communicator_objects/resolution_proto.proto\x12\x14\x63ommunicator_objects\"D\n\x0fResolutionProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x12\n\ngray_scale\x18\x03 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/resolution_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n9mlagents/envs/communicator_objects/resolution_proto.proto\x12\x14\x63ommunicator_objects"D\n\x0fResolutionProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x12\n\ngray_scale\x18\x03 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='ResolutionProto',
full_name='communicator_objects.ResolutionProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='width', full_name='communicator_objects.ResolutionProto.width', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='height', full_name='communicator_objects.ResolutionProto.height', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='gray_scale', full_name='communicator_objects.ResolutionProto.gray_scale', index=2,
number=3, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=83,
serialized_end=151,
name="ResolutionProto",
full_name="communicator_objects.ResolutionProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="width",
full_name="communicator_objects.ResolutionProto.width",
index=0,
number=1,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="height",
full_name="communicator_objects.ResolutionProto.height",
index=1,
number=2,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="gray_scale",
full_name="communicator_objects.ResolutionProto.gray_scale",
index=2,
number=3,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=83,
serialized_end=151,
DESCRIPTOR.message_types_by_name['ResolutionProto'] = _RESOLUTIONPROTO
DESCRIPTOR.message_types_by_name["ResolutionProto"] = _RESOLUTIONPROTO
ResolutionProto = _reflection.GeneratedProtocolMessageType('ResolutionProto', (_message.Message,), dict(
DESCRIPTOR = _RESOLUTIONPROTO,
__module__ = 'mlagents.envs.communicator_objects.resolution_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.ResolutionProto)
))
ResolutionProto = _reflection.GeneratedProtocolMessageType(
"ResolutionProto",
(_message.Message,),
dict(
DESCRIPTOR=_RESOLUTIONPROTO,
__module__="mlagents.envs.communicator_objects.resolution_proto_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.ResolutionProto)
),
)
_sym_db.RegisterMessage(ResolutionProto)

62
ml-agents-envs/mlagents/envs/communicator_objects/space_type_proto_pb2.py


# source: mlagents/envs/communicator_objects/space_type_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import resolution_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2
from mlagents.envs.communicator_objects import (
resolution_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2,
)
name='mlagents/envs/communicator_objects/space_type_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n9mlagents/envs/communicator_objects/space_type_proto.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/resolution_proto.proto*.\n\x0eSpaceTypeProto\x12\x0c\n\x08\x64iscrete\x10\x00\x12\x0e\n\ncontinuous\x10\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/space_type_proto.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
"\n9mlagents/envs/communicator_objects/space_type_proto.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/resolution_proto.proto*.\n\x0eSpaceTypeProto\x12\x0c\n\x08\x64iscrete\x10\x00\x12\x0e\n\ncontinuous\x10\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3"
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR
],
)
name='SpaceTypeProto',
full_name='communicator_objects.SpaceTypeProto',
filename=None,
file=DESCRIPTOR,
values=[
_descriptor.EnumValueDescriptor(
name='discrete', index=0, number=0,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='continuous', index=1, number=1,
serialized_options=None,
type=None),
],
containing_type=None,
serialized_options=None,
serialized_start=142,
serialized_end=188,
name="SpaceTypeProto",
full_name="communicator_objects.SpaceTypeProto",
filename=None,
file=DESCRIPTOR,
values=[
_descriptor.EnumValueDescriptor(
name="discrete", index=0, number=0, serialized_options=None, type=None
),
_descriptor.EnumValueDescriptor(
name="continuous", index=1, number=1, serialized_options=None, type=None
),
],
containing_type=None,
serialized_options=None,
serialized_start=142,
serialized_end=188,
)
_sym_db.RegisterEnumDescriptor(_SPACETYPEPROTO)

DESCRIPTOR.enum_types_by_name['SpaceTypeProto'] = _SPACETYPEPROTO
DESCRIPTOR.enum_types_by_name["SpaceTypeProto"] = _SPACETYPEPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

148
ml-agents-envs/mlagents/envs/communicator_objects/unity_input_pb2.py


# source: mlagents/envs/communicator_objects/unity_input.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import unity_rl_input_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__input__pb2
from mlagents.envs.communicator_objects import unity_rl_initialization_input_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__input__pb2
from mlagents.envs.communicator_objects import (
unity_rl_input_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__input__pb2,
)
from mlagents.envs.communicator_objects import (
unity_rl_initialization_input_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__input__pb2,
)
name='mlagents/envs/communicator_objects/unity_input.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n4mlagents/envs/communicator_objects/unity_input.proto\x12\x14\x63ommunicator_objects\x1a\x37mlagents/envs/communicator_objects/unity_rl_input.proto\x1a\x46mlagents/envs/communicator_objects/unity_rl_initialization_input.proto\"\x95\x01\n\nUnityInput\x12\x34\n\x08rl_input\x18\x01 \x01(\x0b\x32\".communicator_objects.UnityRLInput\x12Q\n\x17rl_initialization_input\x18\x02 \x01(\x0b\x32\x30.communicator_objects.UnityRLInitializationInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__input__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__input__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/unity_input.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n4mlagents/envs/communicator_objects/unity_input.proto\x12\x14\x63ommunicator_objects\x1a\x37mlagents/envs/communicator_objects/unity_rl_input.proto\x1a\x46mlagents/envs/communicator_objects/unity_rl_initialization_input.proto"\x95\x01\n\nUnityInput\x12\x34\n\x08rl_input\x18\x01 \x01(\x0b\x32".communicator_objects.UnityRLInput\x12Q\n\x17rl_initialization_input\x18\x02 \x01(\x0b\x32\x30.communicator_objects.UnityRLInitializationInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__input__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__input__pb2.DESCRIPTOR,
],
)
name='UnityInput',
full_name='communicator_objects.UnityInput',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='rl_input', full_name='communicator_objects.UnityInput.rl_input', index=0,
number=1, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='rl_initialization_input', full_name='communicator_objects.UnityInput.rl_initialization_input', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=208,
serialized_end=357,
name="UnityInput",
full_name="communicator_objects.UnityInput",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="rl_input",
full_name="communicator_objects.UnityInput.rl_input",
index=0,
number=1,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="rl_initialization_input",
full_name="communicator_objects.UnityInput.rl_initialization_input",
index=1,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=208,
serialized_end=357,
_UNITYINPUT.fields_by_name['rl_input'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__input__pb2._UNITYRLINPUT
_UNITYINPUT.fields_by_name['rl_initialization_input'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__input__pb2._UNITYRLINITIALIZATIONINPUT
DESCRIPTOR.message_types_by_name['UnityInput'] = _UNITYINPUT
_UNITYINPUT.fields_by_name[
"rl_input"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__input__pb2._UNITYRLINPUT
)
_UNITYINPUT.fields_by_name[
"rl_initialization_input"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__input__pb2._UNITYRLINITIALIZATIONINPUT
)
DESCRIPTOR.message_types_by_name["UnityInput"] = _UNITYINPUT
UnityInput = _reflection.GeneratedProtocolMessageType('UnityInput', (_message.Message,), dict(
DESCRIPTOR = _UNITYINPUT,
__module__ = 'mlagents.envs.communicator_objects.unity_input_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityInput)
))
UnityInput = _reflection.GeneratedProtocolMessageType(
"UnityInput",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYINPUT,
__module__="mlagents.envs.communicator_objects.unity_input_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityInput)
),
)
_sym_db.RegisterMessage(UnityInput)

182
ml-agents-envs/mlagents/envs/communicator_objects/unity_message_pb2.py


# source: mlagents/envs/communicator_objects/unity_message.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import unity_output_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__output__pb2
from mlagents.envs.communicator_objects import unity_input_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__input__pb2
from mlagents.envs.communicator_objects import header_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_header__pb2
from mlagents.envs.communicator_objects import (
unity_output_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__output__pb2,
)
from mlagents.envs.communicator_objects import (
unity_input_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__input__pb2,
)
from mlagents.envs.communicator_objects import (
header_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_header__pb2,
)
name='mlagents/envs/communicator_objects/unity_message.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n6mlagents/envs/communicator_objects/unity_message.proto\x12\x14\x63ommunicator_objects\x1a\x35mlagents/envs/communicator_objects/unity_output.proto\x1a\x34mlagents/envs/communicator_objects/unity_input.proto\x1a/mlagents/envs/communicator_objects/header.proto\"\xac\x01\n\x0cUnityMessage\x12,\n\x06header\x18\x01 \x01(\x0b\x32\x1c.communicator_objects.Header\x12\x37\n\x0cunity_output\x18\x02 \x01(\x0b\x32!.communicator_objects.UnityOutput\x12\x35\n\x0bunity_input\x18\x03 \x01(\x0b\x32 .communicator_objects.UnityInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__output__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_unity__input__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_header__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/unity_message.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n6mlagents/envs/communicator_objects/unity_message.proto\x12\x14\x63ommunicator_objects\x1a\x35mlagents/envs/communicator_objects/unity_output.proto\x1a\x34mlagents/envs/communicator_objects/unity_input.proto\x1a/mlagents/envs/communicator_objects/header.proto"\xac\x01\n\x0cUnityMessage\x12,\n\x06header\x18\x01 \x01(\x0b\x32\x1c.communicator_objects.Header\x12\x37\n\x0cunity_output\x18\x02 \x01(\x0b\x32!.communicator_objects.UnityOutput\x12\x35\n\x0bunity_input\x18\x03 \x01(\x0b\x32 .communicator_objects.UnityInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_unity__output__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_unity__input__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_header__pb2.DESCRIPTOR,
],
)
name='UnityMessage',
full_name='communicator_objects.UnityMessage',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='header', full_name='communicator_objects.UnityMessage.header', index=0,
number=1, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='unity_output', full_name='communicator_objects.UnityMessage.unity_output', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='unity_input', full_name='communicator_objects.UnityMessage.unity_input', index=2,
number=3, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=239,
serialized_end=411,
name="UnityMessage",
full_name="communicator_objects.UnityMessage",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="header",
full_name="communicator_objects.UnityMessage.header",
index=0,
number=1,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="unity_output",
full_name="communicator_objects.UnityMessage.unity_output",
index=1,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="unity_input",
full_name="communicator_objects.UnityMessage.unity_input",
index=2,
number=3,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=239,
serialized_end=411,
_UNITYMESSAGE.fields_by_name['header'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_header__pb2._HEADER
_UNITYMESSAGE.fields_by_name['unity_output'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_unity__output__pb2._UNITYOUTPUT
_UNITYMESSAGE.fields_by_name['unity_input'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_unity__input__pb2._UNITYINPUT
DESCRIPTOR.message_types_by_name['UnityMessage'] = _UNITYMESSAGE
_UNITYMESSAGE.fields_by_name[
"header"
].message_type = mlagents_dot_envs_dot_communicator__objects_dot_header__pb2._HEADER
_UNITYMESSAGE.fields_by_name[
"unity_output"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_unity__output__pb2._UNITYOUTPUT
)
_UNITYMESSAGE.fields_by_name[
"unity_input"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_unity__input__pb2._UNITYINPUT
)
DESCRIPTOR.message_types_by_name["UnityMessage"] = _UNITYMESSAGE
UnityMessage = _reflection.GeneratedProtocolMessageType('UnityMessage', (_message.Message,), dict(
DESCRIPTOR = _UNITYMESSAGE,
__module__ = 'mlagents.envs.communicator_objects.unity_message_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityMessage)
))
UnityMessage = _reflection.GeneratedProtocolMessageType(
"UnityMessage",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYMESSAGE,
__module__="mlagents.envs.communicator_objects.unity_message_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityMessage)
),
)
_sym_db.RegisterMessage(UnityMessage)

148
ml-agents-envs/mlagents/envs/communicator_objects/unity_output_pb2.py


# source: mlagents/envs/communicator_objects/unity_output.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import unity_rl_output_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__output__pb2
from mlagents.envs.communicator_objects import unity_rl_initialization_output_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__output__pb2
from mlagents.envs.communicator_objects import (
unity_rl_output_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__output__pb2,
)
from mlagents.envs.communicator_objects import (
unity_rl_initialization_output_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__output__pb2,
)
name='mlagents/envs/communicator_objects/unity_output.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n5mlagents/envs/communicator_objects/unity_output.proto\x12\x14\x63ommunicator_objects\x1a\x38mlagents/envs/communicator_objects/unity_rl_output.proto\x1aGmlagents/envs/communicator_objects/unity_rl_initialization_output.proto\"\x9a\x01\n\x0bUnityOutput\x12\x36\n\trl_output\x18\x01 \x01(\x0b\x32#.communicator_objects.UnityRLOutput\x12S\n\x18rl_initialization_output\x18\x02 \x01(\x0b\x32\x31.communicator_objects.UnityRLInitializationOutputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__output__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__output__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/unity_output.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n5mlagents/envs/communicator_objects/unity_output.proto\x12\x14\x63ommunicator_objects\x1a\x38mlagents/envs/communicator_objects/unity_rl_output.proto\x1aGmlagents/envs/communicator_objects/unity_rl_initialization_output.proto"\x9a\x01\n\x0bUnityOutput\x12\x36\n\trl_output\x18\x01 \x01(\x0b\x32#.communicator_objects.UnityRLOutput\x12S\n\x18rl_initialization_output\x18\x02 \x01(\x0b\x32\x31.communicator_objects.UnityRLInitializationOutputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__output__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__output__pb2.DESCRIPTOR,
],
)
name='UnityOutput',
full_name='communicator_objects.UnityOutput',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='rl_output', full_name='communicator_objects.UnityOutput.rl_output', index=0,
number=1, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='rl_initialization_output', full_name='communicator_objects.UnityOutput.rl_initialization_output', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=211,
serialized_end=365,
name="UnityOutput",
full_name="communicator_objects.UnityOutput",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="rl_output",
full_name="communicator_objects.UnityOutput.rl_output",
index=0,
number=1,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="rl_initialization_output",
full_name="communicator_objects.UnityOutput.rl_initialization_output",
index=1,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=211,
serialized_end=365,
_UNITYOUTPUT.fields_by_name['rl_output'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__output__pb2._UNITYRLOUTPUT
_UNITYOUTPUT.fields_by_name['rl_initialization_output'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__output__pb2._UNITYRLINITIALIZATIONOUTPUT
DESCRIPTOR.message_types_by_name['UnityOutput'] = _UNITYOUTPUT
_UNITYOUTPUT.fields_by_name[
"rl_output"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__output__pb2._UNITYRLOUTPUT
)
_UNITYOUTPUT.fields_by_name[
"rl_initialization_output"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_unity__rl__initialization__output__pb2._UNITYRLINITIALIZATIONOUTPUT
)
DESCRIPTOR.message_types_by_name["UnityOutput"] = _UNITYOUTPUT
UnityOutput = _reflection.GeneratedProtocolMessageType('UnityOutput', (_message.Message,), dict(
DESCRIPTOR = _UNITYOUTPUT,
__module__ = 'mlagents.envs.communicator_objects.unity_output_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityOutput)
))
UnityOutput = _reflection.GeneratedProtocolMessageType(
"UnityOutput",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYOUTPUT,
__module__="mlagents.envs.communicator_objects.unity_output_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityOutput)
),
)
_sym_db.RegisterMessage(UnityOutput)

100
ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_input_pb2.py


# source: mlagents/envs/communicator_objects/unity_rl_initialization_input.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
name='mlagents/envs/communicator_objects/unity_rl_initialization_input.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\nFmlagents/envs/communicator_objects/unity_rl_initialization_input.proto\x12\x14\x63ommunicator_objects\"*\n\x1aUnityRLInitializationInput\x12\x0c\n\x04seed\x18\x01 \x01(\x05\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
name="mlagents/envs/communicator_objects/unity_rl_initialization_input.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\nFmlagents/envs/communicator_objects/unity_rl_initialization_input.proto\x12\x14\x63ommunicator_objects"*\n\x1aUnityRLInitializationInput\x12\x0c\n\x04seed\x18\x01 \x01(\x05\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
name='UnityRLInitializationInput',
full_name='communicator_objects.UnityRLInitializationInput',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='seed', full_name='communicator_objects.UnityRLInitializationInput.seed', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=96,
serialized_end=138,
name="UnityRLInitializationInput",
full_name="communicator_objects.UnityRLInitializationInput",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="seed",
full_name="communicator_objects.UnityRLInitializationInput.seed",
index=0,
number=1,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
)
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=96,
serialized_end=138,
DESCRIPTOR.message_types_by_name['UnityRLInitializationInput'] = _UNITYRLINITIALIZATIONINPUT
DESCRIPTOR.message_types_by_name[
"UnityRLInitializationInput"
] = _UNITYRLINITIALIZATIONINPUT
UnityRLInitializationInput = _reflection.GeneratedProtocolMessageType('UnityRLInitializationInput', (_message.Message,), dict(
DESCRIPTOR = _UNITYRLINITIALIZATIONINPUT,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_initialization_input_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationInput)
))
UnityRLInitializationInput = _reflection.GeneratedProtocolMessageType(
"UnityRLInitializationInput",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYRLINITIALIZATIONINPUT,
__module__="mlagents.envs.communicator_objects.unity_rl_initialization_input_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationInput)
),
)
_sym_db.RegisterMessage(UnityRLInitializationInput)

225
ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py


# source: mlagents/envs/communicator_objects/unity_rl_initialization_output.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import brain_parameters_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__proto__pb2
from mlagents.envs.communicator_objects import environment_parameters_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2
from mlagents.envs.communicator_objects import (
brain_parameters_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__proto__pb2,
)
from mlagents.envs.communicator_objects import (
environment_parameters_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2,
)
name='mlagents/envs/communicator_objects/unity_rl_initialization_output.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\nGmlagents/envs/communicator_objects/unity_rl_initialization_output.proto\x12\x14\x63ommunicator_objects\x1a?mlagents/envs/communicator_objects/brain_parameters_proto.proto\x1a\x45mlagents/envs/communicator_objects/environment_parameters_proto.proto\"\xe6\x01\n\x1bUnityRLInitializationOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08log_path\x18\x03 \x01(\t\x12\x44\n\x10\x62rain_parameters\x18\x05 \x03(\x0b\x32*.communicator_objects.BrainParametersProto\x12P\n\x16\x65nvironment_parameters\x18\x06 \x01(\x0b\x32\x30.communicator_objects.EnvironmentParametersProtoB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__proto__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/unity_rl_initialization_output.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\nGmlagents/envs/communicator_objects/unity_rl_initialization_output.proto\x12\x14\x63ommunicator_objects\x1a?mlagents/envs/communicator_objects/brain_parameters_proto.proto\x1a\x45mlagents/envs/communicator_objects/environment_parameters_proto.proto"\xe6\x01\n\x1bUnityRLInitializationOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08log_path\x18\x03 \x01(\t\x12\x44\n\x10\x62rain_parameters\x18\x05 \x03(\x0b\x32*.communicator_objects.BrainParametersProto\x12P\n\x16\x65nvironment_parameters\x18\x06 \x01(\x0b\x32\x30.communicator_objects.EnvironmentParametersProtoB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__proto__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2.DESCRIPTOR,
],
)
name='UnityRLInitializationOutput',
full_name='communicator_objects.UnityRLInitializationOutput',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='name', full_name='communicator_objects.UnityRLInitializationOutput.name', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='version', full_name='communicator_objects.UnityRLInitializationOutput.version', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='log_path', full_name='communicator_objects.UnityRLInitializationOutput.log_path', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='brain_parameters', full_name='communicator_objects.UnityRLInitializationOutput.brain_parameters', index=3,
number=5, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='environment_parameters', full_name='communicator_objects.UnityRLInitializationOutput.environment_parameters', index=4,
number=6, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=234,
serialized_end=464,
name="UnityRLInitializationOutput",
full_name="communicator_objects.UnityRLInitializationOutput",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="name",
full_name="communicator_objects.UnityRLInitializationOutput.name",
index=0,
number=1,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="version",
full_name="communicator_objects.UnityRLInitializationOutput.version",
index=1,
number=2,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="log_path",
full_name="communicator_objects.UnityRLInitializationOutput.log_path",
index=2,
number=3,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="brain_parameters",
full_name="communicator_objects.UnityRLInitializationOutput.brain_parameters",
index=3,
number=5,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="environment_parameters",
full_name="communicator_objects.UnityRLInitializationOutput.environment_parameters",
index=4,
number=6,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=234,
serialized_end=464,
_UNITYRLINITIALIZATIONOUTPUT.fields_by_name['brain_parameters'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__proto__pb2._BRAINPARAMETERSPROTO
_UNITYRLINITIALIZATIONOUTPUT.fields_by_name['environment_parameters'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2._ENVIRONMENTPARAMETERSPROTO
DESCRIPTOR.message_types_by_name['UnityRLInitializationOutput'] = _UNITYRLINITIALIZATIONOUTPUT
_UNITYRLINITIALIZATIONOUTPUT.fields_by_name[
"brain_parameters"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__proto__pb2._BRAINPARAMETERSPROTO
)
_UNITYRLINITIALIZATIONOUTPUT.fields_by_name[
"environment_parameters"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2._ENVIRONMENTPARAMETERSPROTO
)
DESCRIPTOR.message_types_by_name[
"UnityRLInitializationOutput"
] = _UNITYRLINITIALIZATIONOUTPUT
UnityRLInitializationOutput = _reflection.GeneratedProtocolMessageType('UnityRLInitializationOutput', (_message.Message,), dict(
DESCRIPTOR = _UNITYRLINITIALIZATIONOUTPUT,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_initialization_output_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationOutput)
))
UnityRLInitializationOutput = _reflection.GeneratedProtocolMessageType(
"UnityRLInitializationOutput",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYRLINITIALIZATIONOUTPUT,
__module__="mlagents.envs.communicator_objects.unity_rl_initialization_output_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationOutput)
),
)
_sym_db.RegisterMessage(UnityRLInitializationOutput)

398
ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_input_pb2.py


# source: mlagents/envs/communicator_objects/unity_rl_input.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import agent_action_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_agent__action__proto__pb2
from mlagents.envs.communicator_objects import environment_parameters_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2
from mlagents.envs.communicator_objects import command_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_command__proto__pb2
from mlagents.envs.communicator_objects import (
agent_action_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_agent__action__proto__pb2,
)
from mlagents.envs.communicator_objects import (
environment_parameters_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2,
)
from mlagents.envs.communicator_objects import (
command_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_command__proto__pb2,
)
name='mlagents/envs/communicator_objects/unity_rl_input.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n7mlagents/envs/communicator_objects/unity_rl_input.proto\x12\x14\x63ommunicator_objects\x1a;mlagents/envs/communicator_objects/agent_action_proto.proto\x1a\x45mlagents/envs/communicator_objects/environment_parameters_proto.proto\x1a\x36mlagents/envs/communicator_objects/command_proto.proto\"\xb4\x03\n\x0cUnityRLInput\x12K\n\ragent_actions\x18\x01 \x03(\x0b\x32\x34.communicator_objects.UnityRLInput.AgentActionsEntry\x12P\n\x16\x65nvironment_parameters\x18\x02 \x01(\x0b\x32\x30.communicator_objects.EnvironmentParametersProto\x12\x13\n\x0bis_training\x18\x03 \x01(\x08\x12\x33\n\x07\x63ommand\x18\x04 \x01(\x0e\x32\".communicator_objects.CommandProto\x1aM\n\x14ListAgentActionProto\x12\x35\n\x05value\x18\x01 \x03(\x0b\x32&.communicator_objects.AgentActionProto\x1al\n\x11\x41gentActionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.communicator_objects.UnityRLInput.ListAgentActionProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_agent__action__proto__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_command__proto__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/unity_rl_input.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n7mlagents/envs/communicator_objects/unity_rl_input.proto\x12\x14\x63ommunicator_objects\x1a;mlagents/envs/communicator_objects/agent_action_proto.proto\x1a\x45mlagents/envs/communicator_objects/environment_parameters_proto.proto\x1a\x36mlagents/envs/communicator_objects/command_proto.proto"\xb4\x03\n\x0cUnityRLInput\x12K\n\ragent_actions\x18\x01 \x03(\x0b\x32\x34.communicator_objects.UnityRLInput.AgentActionsEntry\x12P\n\x16\x65nvironment_parameters\x18\x02 \x01(\x0b\x32\x30.communicator_objects.EnvironmentParametersProto\x12\x13\n\x0bis_training\x18\x03 \x01(\x08\x12\x33\n\x07\x63ommand\x18\x04 \x01(\x0e\x32".communicator_objects.CommandProto\x1aM\n\x14ListAgentActionProto\x12\x35\n\x05value\x18\x01 \x03(\x0b\x32&.communicator_objects.AgentActionProto\x1al\n\x11\x41gentActionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.communicator_objects.UnityRLInput.ListAgentActionProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_agent__action__proto__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2.DESCRIPTOR,
mlagents_dot_envs_dot_communicator__objects_dot_command__proto__pb2.DESCRIPTOR,
],
)
name='ListAgentActionProto',
full_name='communicator_objects.UnityRLInput.ListAgentActionProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='value', full_name='communicator_objects.UnityRLInput.ListAgentActionProto.value', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=519,
serialized_end=596,
name="ListAgentActionProto",
full_name="communicator_objects.UnityRLInput.ListAgentActionProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="value",
full_name="communicator_objects.UnityRLInput.ListAgentActionProto.value",
index=0,
number=1,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
)
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=519,
serialized_end=596,
name='AgentActionsEntry',
full_name='communicator_objects.UnityRLInput.AgentActionsEntry',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='key', full_name='communicator_objects.UnityRLInput.AgentActionsEntry.key', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='value', full_name='communicator_objects.UnityRLInput.AgentActionsEntry.value', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=_b('8\001'),
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=598,
serialized_end=706,
name="AgentActionsEntry",
full_name="communicator_objects.UnityRLInput.AgentActionsEntry",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="key",
full_name="communicator_objects.UnityRLInput.AgentActionsEntry.key",
index=0,
number=1,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="value",
full_name="communicator_objects.UnityRLInput.AgentActionsEntry.value",
index=1,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=_b("8\001"),
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=598,
serialized_end=706,
name='UnityRLInput',
full_name='communicator_objects.UnityRLInput',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='agent_actions', full_name='communicator_objects.UnityRLInput.agent_actions', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='environment_parameters', full_name='communicator_objects.UnityRLInput.environment_parameters', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='is_training', full_name='communicator_objects.UnityRLInput.is_training', index=2,
number=3, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='command', full_name='communicator_objects.UnityRLInput.command', index=3,
number=4, type=14, cpp_type=8, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[_UNITYRLINPUT_LISTAGENTACTIONPROTO, _UNITYRLINPUT_AGENTACTIONSENTRY, ],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=270,
serialized_end=706,
name="UnityRLInput",
full_name="communicator_objects.UnityRLInput",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="agent_actions",
full_name="communicator_objects.UnityRLInput.agent_actions",
index=0,
number=1,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="environment_parameters",
full_name="communicator_objects.UnityRLInput.environment_parameters",
index=1,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="is_training",
full_name="communicator_objects.UnityRLInput.is_training",
index=2,
number=3,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="command",
full_name="communicator_objects.UnityRLInput.command",
index=3,
number=4,
type=14,
cpp_type=8,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[_UNITYRLINPUT_LISTAGENTACTIONPROTO, _UNITYRLINPUT_AGENTACTIONSENTRY],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=270,
serialized_end=706,
_UNITYRLINPUT_LISTAGENTACTIONPROTO.fields_by_name['value'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_agent__action__proto__pb2._AGENTACTIONPROTO
_UNITYRLINPUT_LISTAGENTACTIONPROTO.fields_by_name[
"value"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_agent__action__proto__pb2._AGENTACTIONPROTO
)
_UNITYRLINPUT_AGENTACTIONSENTRY.fields_by_name['value'].message_type = _UNITYRLINPUT_LISTAGENTACTIONPROTO
_UNITYRLINPUT_AGENTACTIONSENTRY.fields_by_name[
"value"
].message_type = _UNITYRLINPUT_LISTAGENTACTIONPROTO
_UNITYRLINPUT.fields_by_name['agent_actions'].message_type = _UNITYRLINPUT_AGENTACTIONSENTRY
_UNITYRLINPUT.fields_by_name['environment_parameters'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2._ENVIRONMENTPARAMETERSPROTO
_UNITYRLINPUT.fields_by_name['command'].enum_type = mlagents_dot_envs_dot_communicator__objects_dot_command__proto__pb2._COMMANDPROTO
DESCRIPTOR.message_types_by_name['UnityRLInput'] = _UNITYRLINPUT
_UNITYRLINPUT.fields_by_name[
"agent_actions"
].message_type = _UNITYRLINPUT_AGENTACTIONSENTRY
_UNITYRLINPUT.fields_by_name[
"environment_parameters"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__proto__pb2._ENVIRONMENTPARAMETERSPROTO
)
_UNITYRLINPUT.fields_by_name[
"command"
].enum_type = (
mlagents_dot_envs_dot_communicator__objects_dot_command__proto__pb2._COMMANDPROTO
)
DESCRIPTOR.message_types_by_name["UnityRLInput"] = _UNITYRLINPUT
UnityRLInput = _reflection.GeneratedProtocolMessageType('UnityRLInput', (_message.Message,), dict(
ListAgentActionProto = _reflection.GeneratedProtocolMessageType('ListAgentActionProto', (_message.Message,), dict(
DESCRIPTOR = _UNITYRLINPUT_LISTAGENTACTIONPROTO,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_input_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.ListAgentActionProto)
))
,
AgentActionsEntry = _reflection.GeneratedProtocolMessageType('AgentActionsEntry', (_message.Message,), dict(
DESCRIPTOR = _UNITYRLINPUT_AGENTACTIONSENTRY,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_input_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.AgentActionsEntry)
))
,
DESCRIPTOR = _UNITYRLINPUT,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_input_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput)
))
UnityRLInput = _reflection.GeneratedProtocolMessageType(
"UnityRLInput",
(_message.Message,),
dict(
ListAgentActionProto=_reflection.GeneratedProtocolMessageType(
"ListAgentActionProto",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYRLINPUT_LISTAGENTACTIONPROTO,
__module__="mlagents.envs.communicator_objects.unity_rl_input_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.ListAgentActionProto)
),
),
AgentActionsEntry=_reflection.GeneratedProtocolMessageType(
"AgentActionsEntry",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYRLINPUT_AGENTACTIONSENTRY,
__module__="mlagents.envs.communicator_objects.unity_rl_input_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.AgentActionsEntry)
),
),
DESCRIPTOR=_UNITYRLINPUT,
__module__="mlagents.envs.communicator_objects.unity_rl_input_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput)
),
)
_sym_db.RegisterMessage(UnityRLInput)
_sym_db.RegisterMessage(UnityRLInput.ListAgentActionProto)
_sym_db.RegisterMessage(UnityRLInput.AgentActionsEntry)

326
ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_output_pb2.py


# source: mlagents/envs/communicator_objects/unity_rl_output.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import agent_info_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_agent__info__proto__pb2
from mlagents.envs.communicator_objects import (
agent_info_proto_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_agent__info__proto__pb2,
)
name='mlagents/envs/communicator_objects/unity_rl_output.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n8mlagents/envs/communicator_objects/unity_rl_output.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/agent_info_proto.proto\"\xa3\x02\n\rUnityRLOutput\x12\x13\n\x0bglobal_done\x18\x01 \x01(\x08\x12G\n\nagentInfos\x18\x02 \x03(\x0b\x32\x33.communicator_objects.UnityRLOutput.AgentInfosEntry\x1aI\n\x12ListAgentInfoProto\x12\x33\n\x05value\x18\x01 \x03(\x0b\x32$.communicator_objects.AgentInfoProto\x1ai\n\x0f\x41gentInfosEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x45\n\x05value\x18\x02 \x01(\x0b\x32\x36.communicator_objects.UnityRLOutput.ListAgentInfoProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_agent__info__proto__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/unity_rl_output.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n8mlagents/envs/communicator_objects/unity_rl_output.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/agent_info_proto.proto"\xa3\x02\n\rUnityRLOutput\x12\x13\n\x0bglobal_done\x18\x01 \x01(\x08\x12G\n\nagentInfos\x18\x02 \x03(\x0b\x32\x33.communicator_objects.UnityRLOutput.AgentInfosEntry\x1aI\n\x12ListAgentInfoProto\x12\x33\n\x05value\x18\x01 \x03(\x0b\x32$.communicator_objects.AgentInfoProto\x1ai\n\x0f\x41gentInfosEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x45\n\x05value\x18\x02 \x01(\x0b\x32\x36.communicator_objects.UnityRLOutput.ListAgentInfoProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_agent__info__proto__pb2.DESCRIPTOR
],
)
name='ListAgentInfoProto',
full_name='communicator_objects.UnityRLOutput.ListAgentInfoProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='value', full_name='communicator_objects.UnityRLOutput.ListAgentInfoProto.value', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=253,
serialized_end=326,
name="ListAgentInfoProto",
full_name="communicator_objects.UnityRLOutput.ListAgentInfoProto",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="value",
full_name="communicator_objects.UnityRLOutput.ListAgentInfoProto.value",
index=0,
number=1,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
)
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=253,
serialized_end=326,
name='AgentInfosEntry',
full_name='communicator_objects.UnityRLOutput.AgentInfosEntry',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='key', full_name='communicator_objects.UnityRLOutput.AgentInfosEntry.key', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='value', full_name='communicator_objects.UnityRLOutput.AgentInfosEntry.value', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=_b('8\001'),
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=328,
serialized_end=433,
name="AgentInfosEntry",
full_name="communicator_objects.UnityRLOutput.AgentInfosEntry",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="key",
full_name="communicator_objects.UnityRLOutput.AgentInfosEntry.key",
index=0,
number=1,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=_b("").decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="value",
full_name="communicator_objects.UnityRLOutput.AgentInfosEntry.value",
index=1,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=_b("8\001"),
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=328,
serialized_end=433,
name='UnityRLOutput',
full_name='communicator_objects.UnityRLOutput',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='global_done', full_name='communicator_objects.UnityRLOutput.global_done', index=0,
number=1, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='agentInfos', full_name='communicator_objects.UnityRLOutput.agentInfos', index=1,
number=2, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[_UNITYRLOUTPUT_LISTAGENTINFOPROTO, _UNITYRLOUTPUT_AGENTINFOSENTRY, ],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=142,
serialized_end=433,
name="UnityRLOutput",
full_name="communicator_objects.UnityRLOutput",
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name="global_done",
full_name="communicator_objects.UnityRLOutput.global_done",
index=0,
number=1,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
_descriptor.FieldDescriptor(
name="agentInfos",
full_name="communicator_objects.UnityRLOutput.agentInfos",
index=1,
number=2,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
),
],
extensions=[],
nested_types=[_UNITYRLOUTPUT_LISTAGENTINFOPROTO, _UNITYRLOUTPUT_AGENTINFOSENTRY],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=142,
serialized_end=433,
_UNITYRLOUTPUT_LISTAGENTINFOPROTO.fields_by_name['value'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_agent__info__proto__pb2._AGENTINFOPROTO
_UNITYRLOUTPUT_LISTAGENTINFOPROTO.fields_by_name[
"value"
].message_type = (
mlagents_dot_envs_dot_communicator__objects_dot_agent__info__proto__pb2._AGENTINFOPROTO
)
_UNITYRLOUTPUT_AGENTINFOSENTRY.fields_by_name['value'].message_type = _UNITYRLOUTPUT_LISTAGENTINFOPROTO
_UNITYRLOUTPUT_AGENTINFOSENTRY.fields_by_name[
"value"
].message_type = _UNITYRLOUTPUT_LISTAGENTINFOPROTO
_UNITYRLOUTPUT.fields_by_name['agentInfos'].message_type = _UNITYRLOUTPUT_AGENTINFOSENTRY
DESCRIPTOR.message_types_by_name['UnityRLOutput'] = _UNITYRLOUTPUT
_UNITYRLOUTPUT.fields_by_name[
"agentInfos"
].message_type = _UNITYRLOUTPUT_AGENTINFOSENTRY
DESCRIPTOR.message_types_by_name["UnityRLOutput"] = _UNITYRLOUTPUT
UnityRLOutput = _reflection.GeneratedProtocolMessageType('UnityRLOutput', (_message.Message,), dict(
ListAgentInfoProto = _reflection.GeneratedProtocolMessageType('ListAgentInfoProto', (_message.Message,), dict(
DESCRIPTOR = _UNITYRLOUTPUT_LISTAGENTINFOPROTO,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_output_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.ListAgentInfoProto)
))
,
AgentInfosEntry = _reflection.GeneratedProtocolMessageType('AgentInfosEntry', (_message.Message,), dict(
DESCRIPTOR = _UNITYRLOUTPUT_AGENTINFOSENTRY,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_output_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.AgentInfosEntry)
))
,
DESCRIPTOR = _UNITYRLOUTPUT,
__module__ = 'mlagents.envs.communicator_objects.unity_rl_output_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput)
))
UnityRLOutput = _reflection.GeneratedProtocolMessageType(
"UnityRLOutput",
(_message.Message,),
dict(
ListAgentInfoProto=_reflection.GeneratedProtocolMessageType(
"ListAgentInfoProto",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYRLOUTPUT_LISTAGENTINFOPROTO,
__module__="mlagents.envs.communicator_objects.unity_rl_output_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.ListAgentInfoProto)
),
),
AgentInfosEntry=_reflection.GeneratedProtocolMessageType(
"AgentInfosEntry",
(_message.Message,),
dict(
DESCRIPTOR=_UNITYRLOUTPUT_AGENTINFOSENTRY,
__module__="mlagents.envs.communicator_objects.unity_rl_output_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.AgentInfosEntry)
),
),
DESCRIPTOR=_UNITYRLOUTPUT,
__module__="mlagents.envs.communicator_objects.unity_rl_output_pb2"
# @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput)
),
)
_sym_db.RegisterMessage(UnityRLOutput)
_sym_db.RegisterMessage(UnityRLOutput.ListAgentInfoProto)
_sym_db.RegisterMessage(UnityRLOutput.AgentInfosEntry)

62
ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py


# source: mlagents/envs/communicator_objects/unity_to_external.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
from mlagents.envs.communicator_objects import unity_message_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2
from mlagents.envs.communicator_objects import (
unity_message_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2,
)
name='mlagents/envs/communicator_objects/unity_to_external.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n:mlagents/envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/unity_message.proto2g\n\x0fUnityToExternal\x12T\n\x08\x45xchange\x12\".communicator_objects.UnityMessage\x1a\".communicator_objects.UnityMessage\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])
name="mlagents/envs/communicator_objects/unity_to_external.proto",
package="communicator_objects",
syntax="proto3",
serialized_options=_b("\252\002\034MLAgents.CommunicatorObjects"),
serialized_pb=_b(
'\n:mlagents/envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/unity_message.proto2g\n\x0fUnityToExternal\x12T\n\x08\x45xchange\x12".communicator_objects.UnityMessage\x1a".communicator_objects.UnityMessage"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3'
),
dependencies=[
mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR
],
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

_UNITYTOEXTERNAL = _descriptor.ServiceDescriptor(
name='UnityToExternal',
full_name='communicator_objects.UnityToExternal',
file=DESCRIPTOR,
index=0,
serialized_options=None,
serialized_start=140,
serialized_end=243,
methods=[
_descriptor.MethodDescriptor(
name='Exchange',
full_name='communicator_objects.UnityToExternal.Exchange',
name="UnityToExternal",
full_name="communicator_objects.UnityToExternal",
file=DESCRIPTOR,
containing_service=None,
input_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
output_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
),
])
serialized_start=140,
serialized_end=243,
methods=[
_descriptor.MethodDescriptor(
name="Exchange",
full_name="communicator_objects.UnityToExternal.Exchange",
index=0,
containing_service=None,
input_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
output_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
serialized_options=None,
)
],
)
DESCRIPTOR.services_by_name['UnityToExternal'] = _UNITYTOEXTERNAL
DESCRIPTOR.services_by_name["UnityToExternal"] = _UNITYTOEXTERNAL
# @@protoc_insertion_point(module_scope)

55
ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2_grpc.py


# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
import grpc
from mlagents.envs.communicator_objects import unity_message_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2
from mlagents.envs.communicator_objects import (
unity_message_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2,
)
# missing associated documentation comment in .proto file
pass
# missing associated documentation comment in .proto file
pass
def __init__(self, channel):
"""Constructor.
def __init__(self, channel):
"""Constructor.
self.Exchange = channel.unary_unary(
'/communicator_objects.UnityToExternal/Exchange',
request_serializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
response_deserializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
self.Exchange = channel.unary_unary(
"/communicator_objects.UnityToExternal/Exchange",
request_serializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
response_deserializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
# missing associated documentation comment in .proto file
pass
# missing associated documentation comment in .proto file
pass
def Exchange(self, request, context):
"""Sends the academy parameters
def Exchange(self, request, context):
"""Sends the academy parameters
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details("Method not implemented!")
raise NotImplementedError("Method not implemented!")
rpc_method_handlers = {
'Exchange': grpc.unary_unary_rpc_method_handler(
servicer.Exchange,
request_deserializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
response_serializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'communicator_objects.UnityToExternal', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
rpc_method_handlers = {
"Exchange": grpc.unary_unary_rpc_method_handler(
servicer.Exchange,
request_deserializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
response_serializer=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
)
}
generic_handler = grpc.method_handlers_generic_handler(
"communicator_objects.UnityToExternal", rpc_method_handlers
)
server.add_generic_rpc_handlers((generic_handler,))

358
ml-agents-envs/mlagents/envs/environment.py


from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from .brain import AllBrainInfo, BrainInfo, BrainParameters
from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from .exception import (
UnityEnvironmentException,
UnityActionException,
UnityTimeOutException,
)
from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto, \
EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput, \
UnityInput, UnityOutput, CustomResetParameters, CustomAction
from .communicator_objects import (
UnityRLInput,
UnityRLOutput,
AgentActionProto,
EnvironmentParametersProto,
UnityRLInitializationInput,
UnityRLInitializationOutput,
UnityInput,
UnityOutput,
CustomResetParameters,
CustomAction,
)
from .rpc_communicator import RpcCommunicator
from sys import platform

SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
SINGLE_BRAIN_TEXT_TYPES = (str, list, np.ndarray)
def __init__(self,
file_name: Optional[str] = None,
worker_id: int = 0,
base_port: int = 5005,
seed: int = 0,
docker_training: bool = False,
no_graphics: bool = False,
timeout_wait: int = 30):
def __init__(
self,
file_name: Optional[str] = None,
worker_id: int = 0,
base_port: int = 5005,
seed: int = 0,
docker_training: bool = False,
no_graphics: bool = False,
timeout_wait: int = 30,
):
"""
Starts a new unity environment and establishes a connection with the environment.
Notice: Currently communication between Unity and Python takes place over an open socket without authentication.

self.port = base_port + worker_id
self._buffer_size = 12000
self._version_ = "API-8"
self._loaded = False # If true, this means the environment was successfully loaded
self.proc1 = None # The process that is started. If None, no process was started
self._loaded = (
False
) # If true, this means the environment was successfully loaded
self.proc1 = (
None
) # The process that is started. If None, no process was started
self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
# If the environment name is None, a new environment will not be launched

raise UnityEnvironmentException(
"If the environment name is None, "
"the worker-id must be 0 in order to connect with the Editor.")
"the worker-id must be 0 in order to connect with the Editor."
)
logger.info("Start training by pressing the Play button in the Unity Editor.")
logger.info(
"Start training by pressing the Play button in the Unity Editor."
)
rl_init_parameters_in = UnityRLInitializationInput(
seed=seed
)
rl_init_parameters_in = UnityRLInitializationInput(seed=seed)
try:
aca_params = self.send_academy_parameters(rl_init_parameters_in)
except UnityTimeOutException:

raise UnityEnvironmentException(
"The API number is not compatible between Unity and python. Python API : {0}, Unity API : "
"{1}.\nPlease go to https://github.com/Unity-Technologies/ml-agents to download the latest version "
"of ML-Agents.".format(self._version_, self._unity_version))
"of ML-Agents.".format(self._version_, self._unity_version)
)
self._n_agents = {}
self._global_done = None
self._academy_name = aca_params.name

self._external_brain_names = []
for brain_param in aca_params.brain_parameters:
self._brain_names += [brain_param.brain_name]
self._brains[brain_param.brain_name] = BrainParameters.from_proto(brain_param)
self._brains[brain_param.brain_name] = BrainParameters.from_proto(
brain_param
)
logger.info("\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self)))
logger.info(
"\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self))
)
logger.warning(" No Learning Brains set to train found in the Unity Environment. "
"You will not be able to pass actions to your agent(s).")
logger.warning(
" No Learning Brains set to train found in the Unity Environment. "
"You will not be able to pass actions to your agent(s)."
)
@property
def logfile_path(self):

def executable_launcher(self, file_name, docker_training, no_graphics):
cwd = os.getcwd()
file_name = (file_name.strip()
.replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86',
''))
file_name = (
file_name.strip()
.replace(".app", "")
.replace(".exe", "")
.replace(".x86_64", "")
.replace(".x86", "")
)
logger.debug('The true file name is {}'.format(true_filename))
logger.debug("The true file name is {}".format(true_filename))
candidates = glob.glob(os.path.join(cwd, file_name) + '.x86_64')
candidates = glob.glob(os.path.join(cwd, file_name) + ".x86_64")
candidates = glob.glob(os.path.join(cwd, file_name) + '.x86')
candidates = glob.glob(os.path.join(cwd, file_name) + ".x86")
candidates = glob.glob(file_name + '.x86_64')
candidates = glob.glob(file_name + ".x86_64")
candidates = glob.glob(file_name + '.x86')
candidates = glob.glob(file_name + ".x86")
elif platform == 'darwin':
elif platform == "darwin":
os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', true_filename))
os.path.join(
cwd, file_name + ".app", "Contents", "MacOS", true_filename
)
)
os.path.join(file_name + '.app', 'Contents', 'MacOS', true_filename))
os.path.join(file_name + ".app", "Contents", "MacOS", true_filename)
)
os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', '*'))
os.path.join(cwd, file_name + ".app", "Contents", "MacOS", "*")
)
candidates = glob.glob(os.path.join(file_name + '.app', 'Contents', 'MacOS', '*'))
candidates = glob.glob(
os.path.join(file_name + ".app", "Contents", "MacOS", "*")
)
elif platform == 'win32':
candidates = glob.glob(os.path.join(cwd, file_name + '.exe'))
elif platform == "win32":
candidates = glob.glob(os.path.join(cwd, file_name + ".exe"))
candidates = glob.glob(file_name + '.exe')
candidates = glob.glob(file_name + ".exe")
raise UnityEnvironmentException("Couldn't launch the {0} environment. "
"Provided filename does not match any environments."
.format(true_filename))
raise UnityEnvironmentException(
"Couldn't launch the {0} environment. "
"Provided filename does not match any environments.".format(
true_filename
)
)
else:
logger.debug("This is the launch string {}".format(launch_string))
# Launch Unity environment

[launch_string, '-nographics', '-batchmode',
'--port', str(self.port)])
[
launch_string,
"-nographics",
"-batchmode",
"--port",
str(self.port),
]
)
[launch_string, '--port', str(self.port)])
[launch_string, "--port", str(self.port)]
)
else:
"""
Comments for future maintenance:

launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
we created with `xvfb`.
"""
docker_ls = ("exec xvfb-run --auto-servernum"
" --server-args='-screen 0 640x480x24'"
" {0} --port {1}").format(launch_string, str(self.port))
self.proc1 = subprocess.Popen(docker_ls,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
docker_ls = (
"exec xvfb-run --auto-servernum"
" --server-args='-screen 0 640x480x24'"
" {0} --port {1}"
).format(launch_string, str(self.port))
self.proc1 = subprocess.Popen(
docker_ls,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
)
return '''Unity Academy name: {0}
return (
"""Unity Academy name: {0}
Reset Parameters :\n\t\t{3}'''.format(self._academy_name, str(self._num_brains),
str(self._num_external_brains),
"\n\t\t".join(
[str(k) + " -> " + str(self._resetParameters[k])
for k in self._resetParameters])) + '\n' + \
'\n'.join([str(self._brains[b]) for b in self._brains])
Reset Parameters :\n\t\t{3}""".format(
self._academy_name,
str(self._num_brains),
str(self._num_external_brains),
"\n\t\t".join(
[
str(k) + " -> " + str(self._resetParameters[k])
for k in self._resetParameters
]
),
)
+ "\n"
+ "\n".join([str(self._brains[b]) for b in self._brains])
)
def reset(self, config=None, train_mode=True, custom_reset_parameters=None) -> AllBrainInfo:
def reset(
self, config=None, train_mode=True, custom_reset_parameters=None
) -> AllBrainInfo:
"""
Sends a signal to reset the unity environment.
:return: AllBrainInfo : A data structure corresponding to the initial reset state of the environment.

elif config:
logger.info("Academy reset with parameters: {0}"
.format(', '.join([str(x) + ' -> ' + str(config[x]) for x in config])))
logger.info(
"Academy reset with parameters: {0}".format(
", ".join([str(x) + " -> " + str(config[x]) for x in config])
)
)
"The value for parameter '{0}'' must be an Integer or a Float.".format(k))
"The value for parameter '{0}'' must be an Integer or a Float.".format(
k
)
)
"The parameter '{0}' is not a valid parameter.".format(k))
"The parameter '{0}' is not a valid parameter.".format(k)
)
if self._loaded:
outputs = self.communicator.exchange(

else:
raise UnityEnvironmentException("No Unity environment is loaded.")
def step(self, vector_action=None, memory=None, text_action=None, value=None, custom_action=None) -> AllBrainInfo:
def step(
self,
vector_action=None,
memory=None,
text_action=None,
value=None,
custom_action=None,
) -> AllBrainInfo:
"""
Provides the environment with an action, moves the environment dynamics forward accordingly,
and returns observation, state, and reward information to the agent.

elif self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names a keys, "
"and vector_actions as values".format(self._num_brains))
"and vector_actions as values".format(self._num_brains)
)
"step cannot take a vector_action input")
"step cannot take a vector_action input"
)
if isinstance(memory, self.SINGLE_BRAIN_ACTION_TYPES):
if self._num_external_brains == 1:

"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and memories as values".format(self._num_brains))
"and memories as values".format(self._num_brains)
)
"step cannot take a memory input")
"step cannot take a memory input"
)
if isinstance(text_action, self.SINGLE_BRAIN_TEXT_TYPES):
if self._num_external_brains == 1:

"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and text_actions as values".format(self._num_brains))
"and text_actions as values".format(self._num_brains)
)
"step cannot take a value input")
"step cannot take a value input"
)
if isinstance(value, self.SINGLE_BRAIN_ACTION_TYPES):
if self._num_external_brains == 1:

"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and state/action value estimates as values".format(self._num_brains))
"and state/action value estimates as values".format(
self._num_brains
)
)
"step cannot take a value input")
"step cannot take a value input"
)
if isinstance(custom_action, CustomAction):
if self._num_external_brains == 1:

"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and CustomAction instances as values".format(self._num_brains))
"and CustomAction instances as values".format(self._num_brains)
)
"step cannot take a custom_action input")
"step cannot take a custom_action input"
)
for brain_name in list(vector_action.keys()) + list(memory.keys()) + list(
text_action.keys()):
for brain_name in (
list(vector_action.keys())
+ list(memory.keys())
+ list(text_action.keys())
):
"in the environment".format(brain_name))
"in the environment".format(brain_name)
)
vector_action[brain_name] = [0.0] * n_agent * len(
self._brains[brain_name].vector_action_space_size)
vector_action[brain_name] = (
[0.0]
* n_agent
* len(self._brains[brain_name].vector_action_space_size)
)
vector_action[brain_name] = [0.0] * n_agent * \
self._brains[
brain_name].vector_action_space_size[0]
vector_action[brain_name] = (
[0.0]
* n_agent
* self._brains[brain_name].vector_action_space_size[0]
)
else:
vector_action[brain_name] = self._flatten(vector_action[brain_name])
if brain_name not in memory:

if custom_action[brain_name] is None:
custom_action[brain_name] = [None] * n_agent
if isinstance(custom_action[brain_name], CustomAction):
custom_action[brain_name] = [custom_action[brain_name]] * n_agent
custom_action[brain_name] = [
custom_action[brain_name]
] * n_agent
number_text_actions = len(text_action[brain_name])
if not ((number_text_actions == n_agent) or number_text_actions == 0):

"The brain {0} expected {1} text_action but was given {2}".format(
brain_name, n_agent, number_text_actions))
brain_name, n_agent, number_text_actions
)
)
discrete_check = self._brains[brain_name].vector_action_space_type == "discrete"
discrete_check = (
self._brains[brain_name].vector_action_space_type == "discrete"
)
self._brains[brain_name].vector_action_space_size)
self._brains[brain_name].vector_action_space_size
)
continuous_check = self._brains[brain_name].vector_action_space_type == "continuous"
continuous_check = (
self._brains[brain_name].vector_action_space_type == "continuous"
)
expected_continuous_size = self._brains[brain_name].vector_action_space_size[
0] * n_agent
expected_continuous_size = (
self._brains[brain_name].vector_action_space_size[0] * n_agent
)
if not ((discrete_check and len(
vector_action[brain_name]) == expected_discrete_size) or
(continuous_check and len(
vector_action[brain_name]) == expected_continuous_size)):
if not (
(
discrete_check
and len(vector_action[brain_name]) == expected_discrete_size
)
or (
continuous_check
and len(vector_action[brain_name]) == expected_continuous_size
)
):
"The brain {0} expected {1} {2} action(s), but was provided: {3}"
.format(brain_name, str(expected_discrete_size)
if discrete_check
else str(expected_continuous_size),
self._brains[brain_name].vector_action_space_type,
str(vector_action[brain_name])))
"The brain {0} expected {1} {2} action(s), but was provided: {3}".format(
brain_name,
str(expected_discrete_size)
if discrete_check
else str(expected_continuous_size),
self._brains[brain_name].vector_action_space_type,
str(vector_action[brain_name]),
)
)
self._generate_step_input(vector_action, memory, text_action, value, custom_action))
self._generate_step_input(
vector_action, memory, text_action, value, custom_action
)
)
if outputs is None:
raise KeyboardInterrupt
rl_output = outputs.rl_output

raise UnityEnvironmentException("No Unity environment is loaded.")
elif self._global_done:
raise UnityActionException(
"The episode is completed. Reset the environment with 'reset()'")
"The episode is completed. Reset the environment with 'reset()'"
)
"Reset the environment with 'reset()'")
"Reset the environment with 'reset()'"
)
def close(self):
"""

global_done = output.global_done
for brain_name in output.agentInfos:
agent_info_list = output.agentInfos[brain_name].value
_data[brain_name] = BrainInfo.from_agent_proto(agent_info_list,
self.brains[brain_name])
_data[brain_name] = BrainInfo.from_agent_proto(
agent_info_list, self.brains[brain_name]
)
def _generate_step_input(self, vector_action, memory, text_action, value, custom_action) -> UnityRLInput:
def _generate_step_input(
self, vector_action, memory, text_action, value, custom_action
) -> UnityRLInput:
rl_in = UnityRLInput()
for b in vector_action:
n_agents = self._n_agents[b]

_m_s = len(memory[b]) // n_agents
for i in range(n_agents):
action = AgentActionProto(
vector_actions=vector_action[b][i * _a_s: (i + 1) * _a_s],
memories=memory[b][i * _m_s: (i + 1) * _m_s],
vector_actions=vector_action[b][i * _a_s : (i + 1) * _a_s],
memories=memory[b][i * _m_s : (i + 1) * _m_s],
custom_action=custom_action[b][i]
custom_action=custom_action[b][i],
)
if b in value:
if value[b] is not None:

return self.wrap_unity_input(rl_in)
def _generate_reset_input(self, training, config, custom_reset_parameters) -> UnityRLInput:
def _generate_reset_input(
self, training, config, custom_reset_parameters
) -> UnityRLInput:
rl_in = UnityRLInput()
rl_in.is_training = training
rl_in.environment_parameters.CopyFrom(EnvironmentParametersProto())

rl_in.environment_parameters.custom_reset_parameters.CopyFrom(custom_reset_parameters)
rl_in.environment_parameters.custom_reset_parameters.CopyFrom(
custom_reset_parameters
)
def send_academy_parameters(self,
init_parameters: UnityRLInitializationInput) -> UnityRLInitializationOutput:
def send_academy_parameters(
self, init_parameters: UnityRLInitializationInput
) -> UnityRLInitializationOutput:
inputs = UnityInput()
inputs.rl_initialization_input.CopyFrom(init_parameters)
return self.communicator.initialize(inputs).rl_initialization_output

39
ml-agents-envs/mlagents/envs/exception.py


import logging
pass

"""
def __init__(self, message, log_file_path = None):
def __init__(self, message, log_file_path=None):
unity_error = '\n'
unity_error = "\n"
l=l.strip()
if (l == 'Exception') or (l=='Error'):
l = l.strip()
if (l == "Exception") or (l == "Error"):
unity_error += '----------------------\n'
if (l == ''):
unity_error += "----------------------\n"
if l == "":
unity_error += l + '\n'
unity_error += l + "\n"
logger.error("An error might have occured in the environment. "
"You can check the logfile for more information at {}".format(log_file_path))
logger.error(
"An error might have occured in the environment. "
"You can check the logfile for more information at {}".format(
log_file_path
)
)
logger.error("An error might have occured in the environment. "
"No UnitySDK.log file could be found.")
logger.error(
"An error might have occured in the environment. "
"No UnitySDK.log file could be found."
)
super(UnityTimeOutException, self).__init__(message)

MESSAGE_TEMPLATE = (
"Couldn't start socket communication because worker number {} is still in use. "
"You may need to manually close a previously opened environment "
"or use a different worker number.")
"or use a different worker number."
)
def __init__(self, worker_id):
message = self.MESSAGE_TEMPLATE.format(str(worker_id))

63
ml-agents-envs/mlagents/envs/mock_communicator.py


from .communicator import Communicator
from .communicator_objects import UnityOutput, UnityInput, \
ResolutionProto, BrainParametersProto, UnityRLInitializationOutput, \
AgentInfoProto, UnityRLOutput
from .communicator_objects import (
UnityOutput,
UnityInput,
ResolutionProto,
BrainParametersProto,
UnityRLInitializationOutput,
AgentInfoProto,
UnityRLOutput,
)
def __init__(self, discrete_action=False, visual_inputs=0, stack=True, num_agents=3,
brain_name="RealFakeBrain", vec_obs_size=3):
def __init__(
self,
discrete_action=False,
visual_inputs=0,
stack=True,
num_agents=3,
brain_name="RealFakeBrain",
vec_obs_size=3,
):
"""
Python side of the grpc communication. Python is the client and Unity the server

self.num_stacks = 1
def initialize(self, inputs: UnityInput) -> UnityOutput:
resolutions = [ResolutionProto(
width=30,
height=40,
gray_scale=False) for i in range(self.visual_inputs)]
resolutions = [
ResolutionProto(width=30, height=40, gray_scale=False)
for i in range(self.visual_inputs)
]
bp = BrainParametersProto(
vector_observation_size=self.vec_obs_size,
num_stacked_vector_observations=self.num_stacks,

vector_action_space_type=int(not self.is_discrete),
brain_name=self.brain_name,
is_training=True
is_training=True,
name="RealFakeAcademy",
version="API-8",
log_path="",
brain_parameters=[bp]
name="RealFakeAcademy", version="API-8", log_path="", brain_parameters=[bp]
return UnityOutput(
rl_initialization_output=rl_init
)
return UnityOutput(rl_initialization_output=rl_init)
def exchange(self, inputs: UnityInput) -> UnityOutput:
dict_agent_info = {}

memories=[],
done=(i == 2),
max_step_reached=False,
id=i
))
dict_agent_info["RealFakeBrain"] = \
UnityRLOutput.ListAgentInfoProto(value=list_agent_info)
id=i,
)
)
dict_agent_info["RealFakeBrain"] = UnityRLOutput.ListAgentInfoProto(
value=list_agent_info
)
global_done = (fake_brain.value[0].vector_actions[0] == -1)
global_done = fake_brain.value[0].vector_actions[0] == -1
result = UnityRLOutput(
global_done=global_done,
agentInfos=dict_agent_info
)
return UnityOutput(
rl_output=result
)
result = UnityRLOutput(global_done=global_done, agentInfos=dict_agent_info)
return UnityOutput(rl_output=result)
def close(self):
"""

10
ml-agents-envs/mlagents/envs/rpc_communicator.py


from concurrent.futures import ThreadPoolExecutor
from .communicator import Communicator
from .communicator_objects import UnityToExternalServicer, add_UnityToExternalServicer_to_server
from .communicator_objects import (
UnityToExternalServicer,
add_UnityToExternalServicer_to_server,
)
from .communicator_objects import UnityMessage, UnityInput, UnityOutput
from .exception import UnityTimeOutException, UnityWorkerInUseException

add_UnityToExternalServicer_to_server(self.unity_to_external, self.server)
# Using unspecified address, which means that grpc is communicating on all IPs
# This is so that the docker container can connect.
self.server.add_insecure_port('[::]:' + str(self.port))
self.server.add_insecure_port("[::]:" + str(self.port))
self.server.start()
self.is_open = True
except:

"\t The environment does not need user interaction to launch\n"
"\t The Academy's Broadcast Hub is configured correctly\n"
"\t The Agents are linked to the appropriate Brains\n"
"\t The environment and the Python interface have compatible versions.")
"\t The environment and the Python interface have compatible versions."
)
aca_param = self.unity_to_external.parent_conn.recv().unity_output
message = UnityMessage()
message.header.status = 200

17
ml-agents-envs/mlagents/envs/socket_communicator.py


class SocketCommunicator(Communicator):
def __init__(self, worker_id=0,
base_port=5005):
def __init__(self, worker_id=0, base_port=5005):
"""
Python side of the socket communication

self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self._socket.bind(("localhost", self.port))
except:
raise UnityTimeOutException("Couldn't start socket communication because worker number {} is still in use. "
"You may need to manually close a previously opened environment "
"or use a different worker number.".format(str(self.worker_id)))
raise UnityTimeOutException(
"Couldn't start socket communication because worker number {} is still in use. "
"You may need to manually close a previously opened environment "
"or use a different worker number.".format(str(self.worker_id))
)
except :
except:
"\t The environment and the Python interface have compatible versions.")
"\t The environment and the Python interface have compatible versions."
)
message = UnityMessage()
message.header.status = 200
message.unity_input.CopyFrom(inputs)

if self._socket is not None:
self._conn.close()
self._conn = None

88
ml-agents-envs/mlagents/envs/subprocess_environment.py


def close(self):
try:
self.conn.send(EnvironmentCommand('close'))
self.conn.send(EnvironmentCommand("close"))
except (BrokenPipeError, EOFError):
pass
self.process.join()

env_factory: Callable[[int], UnityEnvironment] = cloudpickle.loads(pickled_env_factory)
env_factory: Callable[[int], UnityEnvironment] = cloudpickle.loads(
pickled_env_factory
)
parent_conn.send(
EnvironmentResponse(cmd_name, worker_id, payload)
)
parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))
if cmd.name == 'step':
if cmd.name == "step":
_send_response('step', all_brain_info)
elif cmd.name == 'external_brains':
_send_response('external_brains', env.external_brains)
elif cmd.name == 'reset_parameters':
_send_response('reset_parameters', env.reset_parameters)
elif cmd.name == 'reset':
_send_response("step", all_brain_info)
elif cmd.name == "external_brains":
_send_response("external_brains", env.external_brains)
elif cmd.name == "reset_parameters":
_send_response("reset_parameters", env.reset_parameters)
elif cmd.name == "reset":
_send_response('reset', all_brain_info)
elif cmd.name == 'global_done':
_send_response('global_done', env.global_done)
elif cmd.name == 'close':
_send_response("reset", all_brain_info)
elif cmd.name == "global_done":
_send_response("global_done", env.global_done)
elif cmd.name == "close":
print('UnityEnvironment worker: keyboard interrupt')
print("UnityEnvironment worker: keyboard interrupt")
def __init__(self,
env_factory: Callable[[int], BaseUnityEnvironment],
n_env: int = 1):
def __init__(
self, env_factory: Callable[[int], BaseUnityEnvironment], n_env: int = 1
):
self.envs = []
self.env_agent_counts = {}
self.waiting = False

@staticmethod
def create_worker(
worker_id: int,
env_factory: Callable[[int], BaseUnityEnvironment]
worker_id: int, env_factory: Callable[[int], BaseUnityEnvironment]
) -> UnityEnvWorker:
parent_conn, child_conn = Pipe()

child_process = Process(target=worker, args=(child_conn, pickled_env_factory, worker_id))
child_process = Process(
target=worker, args=(child_conn, pickled_env_factory, worker_id)
)
def step_async(self, vector_action, memory=None, text_action=None, value=None) -> None:
def step_async(
self, vector_action, memory=None, text_action=None, value=None
) -> None:
'Tried to take an environment step bore previous step has completed.'
"Tried to take an environment step bore previous step has completed."
)
agent_counts_cum = {}

start_ind = agent_counts_cum[brain_name][worker_id - 1]
end_ind = agent_counts_cum[brain_name][worker_id]
if vector_action.get(brain_name) is not None:
env_actions[brain_name] = vector_action[brain_name][start_ind:end_ind]
env_actions[brain_name] = vector_action[brain_name][
start_ind:end_ind
]
env_text_action[brain_name] = text_action[brain_name][start_ind:end_ind]
env_text_action[brain_name] = text_action[brain_name][
start_ind:end_ind
]
env.send('step', (env_actions, env_memory, env_text_action, env_value))
env.send("step", (env_actions, env_memory, env_text_action, env_value))
raise UnityEnvironmentException('Tried to await an environment step, but no async step was taken.')
raise UnityEnvironmentException(
"Tried to await an environment step, but no async step was taken."
)
steps = [self.envs[i].recv() for i in range(len(self.envs))]
self._get_agent_counts(map(lambda s: s.payload, steps))

def step(self, vector_action=None, memory=None, text_action=None, value=None) -> AllBrainInfo:
def step(
self, vector_action=None, memory=None, text_action=None, value=None
) -> AllBrainInfo:
self._broadcast_message('reset', (config, train_mode))
self._broadcast_message("reset", (config, train_mode))
reset_results = [self.envs[i].recv() for i in range(len(self.envs))]
self._get_agent_counts(map(lambda r: r.payload, reset_results))

def global_done(self):
self._broadcast_message('global_done')
self._broadcast_message("global_done")
dones: List[EnvironmentResponse] = [
self.envs[i].recv().payload for i in range(len(self.envs))
]

def external_brains(self):
self.envs[0].send('external_brains')
self.envs[0].send("external_brains")
self.envs[0].send('reset_parameters')
self.envs[0].send("reset_parameters")
return self.envs[0].recv().payload
def close(self):

all_brain_info: AllBrainInfo = env_step.payload
for brain_name, brain_info in all_brain_info.items():
for i in range(len(brain_info.agents)):
brain_info.agents[i] = str(env_step.worker_id) + '-' + str(brain_info.agents[i])
brain_info.agents[i] = (
str(env_step.worker_id) + "-" + str(brain_info.agents[i])
)
if accumulated_brain_info:
accumulated_brain_info[brain_name].merge(brain_info)
if not accumulated_brain_info:

def _broadcast_message(self, name: str, payload = None):
def _broadcast_message(self, name: str, payload=None):
env.send(name, payload)
env.send(name, payload)

124
ml-agents-envs/mlagents/envs/tests/test_envs.py


import numpy as np
from mlagents.envs import UnityEnvironment, UnityEnvironmentException, UnityActionException, \
BrainInfo
from mlagents.envs import (
UnityEnvironment,
UnityEnvironmentException,
UnityActionException,
BrainInfo,
)
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
UnityEnvironment(' ')
UnityEnvironment(" ")
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
assert env.brain_names[0] == 'RealFakeBrain'
assert env.brain_names[0] == "RealFakeBrain"
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
brain = env.brains["RealFakeBrain"]
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
assert len(brain_info['RealFakeBrain'].vector_observations) == \
len(brain_info['RealFakeBrain'].agents)
assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \
brain.vector_observation_space_size * brain.num_stacked_vector_observations
assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
assert isinstance(brain_info["RealFakeBrain"].vector_observations, np.ndarray)
assert (
len(brain_info["RealFakeBrain"].visual_observations)
== brain.number_visual_observations
)
assert len(brain_info["RealFakeBrain"].vector_observations) == len(
brain_info["RealFakeBrain"].agents
)
assert (
len(brain_info["RealFakeBrain"].vector_observations[0])
== brain.vector_observation_space_size * brain.num_stacked_vector_observations
)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
brain = env.brains["RealFakeBrain"]
brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
brain_info = env.step(
[0]
* brain.vector_action_space_size[0]
* len(brain_info["RealFakeBrain"].agents)
)
brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
brain_info = env.step(
[-1]
* brain.vector_action_space_size[0]
* len(brain_info["RealFakeBrain"].agents)
)
env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
env.step(
[0]
* brain.vector_action_space_size[0]
* len(brain_info["RealFakeBrain"].agents)
)
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
assert len(brain_info['RealFakeBrain'].vector_observations) == \
len(brain_info['RealFakeBrain'].agents)
assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \
brain.vector_observation_space_size * brain.num_stacked_vector_observations
assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
assert isinstance(brain_info["RealFakeBrain"].vector_observations, np.ndarray)
assert (
len(brain_info["RealFakeBrain"].visual_observations)
== brain.number_visual_observations
)
assert len(brain_info["RealFakeBrain"].vector_observations) == len(
brain_info["RealFakeBrain"].agents
)
assert (
len(brain_info["RealFakeBrain"].vector_observations[0])
== brain.vector_observation_space_size * brain.num_stacked_vector_observations
)
print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done))
assert not brain_info['RealFakeBrain'].local_done[0]
assert brain_info['RealFakeBrain'].local_done[2]
print("\n\n\n\n\n\n\n" + str(brain_info["RealFakeBrain"].local_done))
assert not brain_info["RealFakeBrain"].local_done[0]
assert brain_info["RealFakeBrain"].local_done[2]
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
comm = MockCommunicator(
discrete_action=False, visual_inputs=0)
comm = MockCommunicator(discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
env = UnityEnvironment(" ")
assert env._loaded
env.close()
assert not env._loaded

if __name__ == '__main__':
if __name__ == "__main__":
pytest.main()

1
ml-agents-envs/mlagents/envs/tests/test_rpc_communicator.py


second_comm = RpcCommunicator(worker_id=1)
first_comm.close()
second_comm.close()

60
ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py


SubprocessUnityEnvironment.create_worker = MagicMock()
env = SubprocessUnityEnvironment(mock_env_factory, 2)
# Creates two processes
self.assertEqual(env.create_worker.call_args_list, [
mock.call(0, mock_env_factory),
mock.call(1, mock_env_factory)
])
self.assertEqual(
env.create_worker.call_args_list,
[mock.call(0, mock_env_factory), mock.call(1, mock_env_factory)],
)
self.assertEqual(len(env.envs), 2)
def test_step_async_fails_when_waiting(self):

@staticmethod
def test_step_async_splits_input_by_agent_count():
env = SubprocessUnityEnvironment(mock_env_factory, 0)
env.env_agent_counts = {
'MockBrain': [1, 3, 5]
}
env.envs = [
MockEnvWorker(0),
MockEnvWorker(1),
MockEnvWorker(2),
]
env.env_agent_counts = {"MockBrain": [1, 3, 5]}
env.envs = [MockEnvWorker(0), MockEnvWorker(1), MockEnvWorker(2)]
env_1_actions = ([[3.0, 4.0]] * 3)
env_2_actions = ([[5.0, 6.0]] * 5)
vector_action = {
'MockBrain': env_0_actions + env_1_actions + env_2_actions
}
env_1_actions = [[3.0, 4.0]] * 3
env_2_actions = [[5.0, 6.0]] * 5
vector_action = {"MockBrain": env_0_actions + env_1_actions + env_2_actions}
env.envs[0].send.assert_called_with('step', ({'MockBrain': env_0_actions}, {}, {}, {}))
env.envs[1].send.assert_called_with('step', ({'MockBrain': env_1_actions}, {}, {}, {}))
env.envs[2].send.assert_called_with('step', ({'MockBrain': env_2_actions}, {}, {}, {}))
env.envs[0].send.assert_called_with(
"step", ({"MockBrain": env_0_actions}, {}, {}, {})
)
env.envs[1].send.assert_called_with(
"step", ({"MockBrain": env_1_actions}, {}, {}, {})
)
env.envs[2].send.assert_called_with(
"step", ({"MockBrain": env_2_actions}, {}, {}, {})
)
def test_step_async_sets_waiting(self):
env = SubprocessUnityEnvironment(mock_env_factory, 0)

def test_step_await_combines_brain_info(self):
all_brain_info_env0 = {
'MockBrain': BrainInfo([], [[1.0, 2.0], [1.0, 2.0]], [], agents=[1, 2], memory=np.zeros((0,0)))
"MockBrain": BrainInfo(
[], [[1.0, 2.0], [1.0, 2.0]], [], agents=[1, 2], memory=np.zeros((0, 0))
)
'MockBrain': BrainInfo([], [[3.0, 4.0]], [], agents=[3], memory=np.zeros((0,0)))
"MockBrain": BrainInfo(
[], [[3.0, 4.0]], [], agents=[3], memory=np.zeros((0, 0))
)
env_worker_0.recv.return_value = EnvironmentResponse('step', 0, all_brain_info_env0)
env_worker_0.recv.return_value = EnvironmentResponse(
"step", 0, all_brain_info_env0
)
env_worker_1.recv.return_value = EnvironmentResponse('step', 1, all_brain_info_env1)
env_worker_1.recv.return_value = EnvironmentResponse(
"step", 1, all_brain_info_env1
)
combined_braininfo = env.step_await()['MockBrain']
combined_braininfo = env.step_await()["MockBrain"]
[[1.0, 2.0], [1.0, 2.0], [3.0, 4.0]]
[[1.0, 2.0], [1.0, 2.0], [3.0, 4.0]],
self.assertEqual(combined_braininfo.agents, ['0-1', '0-2', '1-3'])
self.assertEqual(combined_braininfo.agents, ["0-1", "0-2", "1-3"])

38
ml-agents-envs/setup.py


here = path.abspath(path.dirname(__file__))
setup(
name='mlagents_envs',
version='0.8.1',
description='Unity Machine Learning Agents Interface',
url='https://github.com/Unity-Technologies/ml-agents',
author='Unity Technologies',
author_email='ML-Agents@unity3d.com',
name="mlagents_envs",
version="0.8.1",
description="Unity Machine Learning Agents Interface",
url="https://github.com/Unity-Technologies/ml-agents",
author="Unity Technologies",
author_email="ML-Agents@unity3d.com",
'Intended Audience :: Developers',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.6'
"Intended Audience :: Developers",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.6",
packages=['mlagents.envs', 'mlagents.envs.communicator_objects'], # Required
packages=["mlagents.envs", "mlagents.envs.communicator_objects"], # Required
'Pillow>=4.2.1,<=5.4.1',
'numpy>=1.13.3,<=1.16.1',
'pytest>=3.2.2,<4.0.0',
'protobuf>=3.6,<3.7',
'grpcio>=1.11.0,<1.12.0',
'cloudpickle==0.8.1'],
"Pillow>=4.2.1,<=5.4.1",
"numpy>=1.13.3,<=1.16.1",
"pytest>=3.2.2,<4.0.0",
"protobuf>=3.6,<3.7",
"grpcio>=1.11.0,<1.12.0",
"cloudpickle==0.8.1",
],
python_requires=">=3.5,<3.8",
)

2
ml-agents/mlagents/trainers/__init__.py


from .ppo.trainer import *
from .ppo.policy import *
from .exception import *
from .demo_loader import *
from .demo_loader import *

335
ml-agents/mlagents/trainers/barracuda.py


from collections import defaultdict
import numpy as np
import json
import struct # convert from Python values and C structs
import struct # convert from Python values and C structs
import re
import argparse
import os.path

self.globals = []
self.memories = []
def __init__(self, **entries): self.__dict__.update(entries)
def __init__(self, **entries):
self.__dict__.update(entries)
parser.add_argument('source_file', help=help)
parser.add_argument('target_file', help='output Barracuda binary file')
parser.add_argument('-trim', '--trim-unused-by-output')
parser.add_argument('--print-layers', action='store_true')
parser.add_argument('--print-source-json', action='store_true')
parser.add_argument('-json', '--print-barracuda-json', action='store_true')
parser.add_argument('--print-layer-links', action='store_true')
parser.add_argument('--print-patterns', action='store_true')
parser.add_argument('--print-tensors', action='store_true')
parser.add_argument('--verbose', action='store_true')
parser.add_argument("source_file", help=help)
parser.add_argument("target_file", help="output Barracuda binary file")
parser.add_argument("-trim", "--trim-unused-by-output")
parser.add_argument("--print-layers", action="store_true")
parser.add_argument("--print-source-json", action="store_true")
parser.add_argument("-json", "--print-barracuda-json", action="store_true")
parser.add_argument("--print-layer-links", action="store_true")
parser.add_argument("--print-patterns", action="store_true")
parser.add_argument("--print-tensors", action="store_true")
parser.add_argument("--verbose", action="store_true")
args.compress_f16 = False # TEMP: disabled, until properly implemented parser.add_argument('-f16', '--compress-f16', action='store_true')
output_extension = '.bc' if not args.compress_f16 else '.f16.bc'
args.compress_f16 = (
False
) # TEMP: disabled, until properly implemented parser.add_argument('-f16', '--compress-f16', action='store_true')
output_extension = ".bc" if not args.compress_f16 else ".f16.bc"
print('File', args.source_file, 'does not exist.')
print("File", args.source_file, "does not exist.")
return os.path.splitext(os.path.basename(filename))[0] + newExtenstion;
return os.path.splitext(os.path.basename(filename))[0] + newExtenstion
args.target_file = os.path.join(args.target_file, replaceFilenameExtension(args.source_file, output_extension))
args.target_file = os.path.join(
args.target_file,
replaceFilenameExtension(args.source_file, output_extension),
)
# Fuse training time BatchNorm tensors into Scale & Bias
def fuse_batchnorm_weights(gamma, beta, mean, var, epsilon):

bias = beta - gamma * mean / np.sqrt(var + epsilon)
return [scale, bias]
if hasattr(model, 'layers'):
if hasattr(model, "layers"):
model = model.layers
inputs_and_memories = set(list(inputs) + list(memories[1::3]))

ready.add(l.name)
return missing
# Class to represent a graph
# Class to represent a graph
class Graph:
def __init__(self,vertices):
self.graph = defaultdict(list) #dictionary containing adjacency List
self.V = vertices #No. of vertices
# function to add an edge to graph
def addEdge(self,u,v):
self.graph[u].append(v)
# A recursive function used by topologicalSort
def topologicalSortUtil(self,v,visited,stack):
# Mark the current node as visited.
class Graph:
def __init__(self, vertices):
self.graph = defaultdict(list) # dictionary containing adjacency List
self.V = vertices # No. of vertices
# function to add an edge to graph
def addEdge(self, u, v):
self.graph[u].append(v)
# A recursive function used by topologicalSort
def topologicalSortUtil(self, v, visited, stack):
# Mark the current node as visited.
# Recur for all the vertices adjacent to this vertex
for i in self.graph[v]:
if visited[i] == False:
self.topologicalSortUtil(i,visited,stack)
# Push current vertex to stack which stores result
stack.insert(0,v)
# Recur for all the vertices adjacent to this vertex
for i in self.graph[v]:
if visited[i] == False:
self.topologicalSortUtil(i, visited, stack)
# The function to do Topological Sort. It uses recursive
# topologicalSortUtil()
def topologicalSort(self):
# Mark all the vertices as not visited
visited = [False]*self.V
stack =[]
# Call the recursive helper function to store Topological
# Sort starting from all vertices one by one
for i in range(self.V):
if visited[i] == False:
self.topologicalSortUtil(i,visited,stack)
#print(stack)
# Push current vertex to stack which stores result
stack.insert(0, v)
# The function to do Topological Sort. It uses recursive
# topologicalSortUtil()
def topologicalSort(self):
# Mark all the vertices as not visited
visited = [False] * self.V
stack = []
# Call the recursive helper function to store Topological
# Sort starting from all vertices one by one
for i in range(self.V):
if visited[i] == False:
self.topologicalSortUtil(i, visited, stack)
# print(stack)
if (len(find_missing_inputs(model, inputs_and_memories)) == 0):
if len(find_missing_inputs(model, inputs_and_memories)) == 0:
return model
g = Graph(len(model))

for l in model:
layers[l.name] = id;
layers[l.name] = id
id += 1
for layer in model:

print("SORTED:", sorted_layer_indices)
new_model = [model[idx] for idx in sorted_layer_indices]
assert(len(find_missing_inputs(new_model, inputs_and_memories)) == 0)
assert len(find_missing_inputs(new_model, inputs_and_memories)) == 0
if hasattr(model, 'layers'):
if hasattr(model, "layers"):
def flatten(items,enter=lambda x:isinstance(x, list)):
def flatten(items, enter=lambda x: isinstance(x, list)):
# http://stackoverflow.com/a/40857703
# https://github.com/ctmakro/canton/blob/master/canton/misc.py
"""Yield items from any nested iterable; see REF."""

yield x
def trim_model(model, outputs):
layers = {l.name:l for l in model}
layers = {l.name: l for l in model}
connected = {o for o in outputs}
while len(outputs) > 0:
outputs = set(flatten([layers[o].inputs for o in outputs if o in layers]))

connected.add(o)
trimmed = [l.name for l in model if l.name not in connected]
return str(arr)[1:-1] # array to string without brackets
return str(arr)[1:-1] # array to string without brackets
print("TRIMMED:", array_without_brackets(trimmed))
return [l for l in model if l.name in connected]

print("Trimming model given outputs to preserve:", preserve_outputs)
model = trim_model(model, preserve_outputs)
else:
print("WARNING: Trim couldn't find any layers to match:", criteria_regexp_string)
print(
"WARNING: Trim couldn't find any layers to match:", criteria_regexp_string
)
compress_classes = {
'Dense'
}
compress_classes = {"Dense"}
if (l.class_name in compress_classes):
print("Compressing %s layer '%s' weights to float16" % (l.class_name, l.name))
if l.class_name in compress_classes:
print(
"Compressing %s layer '%s' weights to float16" % (l.class_name, l.name)
)
if isinstance(o, np.ndarray): # skip binary data packed inside ndarray
if isinstance(o, np.ndarray): # skip binary data packed inside ndarray
if getattr(o, '__dict__', None):
if getattr(o, "__dict__", None):
s = json.dumps(model.layers, cls=StructEncoder, separators=(', ',':'))
s = json.dumps(model.layers, cls=StructEncoder, separators=(", ", ":"))
s = s.replace(']}, {', ']},\n{')
s = s.replace(':[{', ':[\n\t{')
s = s.replace('}, {', '},\n\t{')
s = s.replace("]}, {", "]},\n{")
s = s.replace(":[{", ":[\n\t{")
s = s.replace("}, {", "},\n\t{")
return str(arr)[1:-1] # array to string without brackets
return str(arr)[1:-1] # array to string without brackets
if print_layer_links:
for l in model.layers:

if model.globals:
if isinstance(model.globals, dict):
model.globals = {x.name:x.shape for x in model.globals}
model.globals = {x.name: x.shape for x in model.globals}
ins = {i:model.inputs[i] for i in l.inputs if i in model.inputs}
ins = {i: model.inputs[i] for i in l.inputs if i in model.inputs}
else:
ins = [i for i in l.inputs if i in model.inputs]
if ins:

print("OUT:", array_without_brackets(model.outputs))
if (print_tensors):
if print_tensors:
def __init__(self, scope=''):
def __init__(self, scope=""):
if attr == '_':
if attr == "_":
return self.layers[-1].name if len(self.layer) > 0 else self.scope
raise AttributeError(attr)

i = 1
while name in self.names_taken:
name = self.layers[-1].op + '_' + str(i)
name = self.layers[-1].op + "_" + str(i)
self.layers[-1].name = self.scope + ('/' if self.scope else '') + name
self.layers[-1].name = self.scope + ("/" if self.scope else "") + name
def concat(self, a, b, out=''):
self.layers += [Struct(name=out, op='Concat', input=[a, b])]
def concat(self, a, b, out=""):
self.layers += [Struct(name=out, op="Concat", input=[a, b])]
def mad(self, x, kernel, bias, out=''):
self.layers += [Struct(name=out, op='Dense', input=[x, kernel, bias])]
def mad(self, x, kernel, bias, out=""):
self.layers += [Struct(name=out, op="Dense", input=[x, kernel, bias])]
def mul(self, a, b, out=''):
self.layers += [Struct(name=out, op='Mul', input=[a, b])]
def mul(self, a, b, out=""):
self.layers += [Struct(name=out, op="Mul", input=[a, b])]
def add(self, a, b, out=''):
self.layers += [Struct(name=out, op='Add', input=[a, b])]
def add(self, a, b, out=""):
self.layers += [Struct(name=out, op="Add", input=[a, b])]
def sub(self, a, b, out=''):
self.layers += [Struct(name=out, op='Sub', input=[a, b])]
def sub(self, a, b, out=""):
self.layers += [Struct(name=out, op="Sub", input=[a, b])]
def sigmoid(self, x, out=''):
self.layers += [Struct(name=out, op='Sigmoid', input=[x])]
def sigmoid(self, x, out=""):
self.layers += [Struct(name=out, op="Sigmoid", input=[x])]
def tanh(self, x, out=''):
self.layers += [Struct(name=out, op='Tanh', input=[x])]
def tanh(self, x, out=""):
self.layers += [Struct(name=out, op="Tanh", input=[x])]
def rnn(name, input, state, kernel, bias, new_state, number_of_gates = 2):
''' - Ht = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
'''
def rnn(name, input, state, kernel, bias, new_state, number_of_gates=2):
""" - Ht = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
"""
nn.tanh(
nn.mad(kernel=kernel, bias=bias,
x=nn.concat(input, state)),
out=new_state);
return nn.layers;
nn.tanh(nn.mad(kernel=kernel, bias=bias, x=nn.concat(input, state)), out=new_state)
return nn.layers
def gru(name, input, state, kernel_r, kernel_u, kernel_c, bias_r, bias_u, bias_c, new_state, number_of_gates = 2):
''' - zt = f(Xt*Wz + Ht_1*Rz + Wbz + Rbz)
def gru(
name,
input,
state,
kernel_r,
kernel_u,
kernel_c,
bias_r,
bias_u,
bias_c,
new_state,
number_of_gates=2,
):
""" - zt = f(Xt*Wz + Ht_1*Rz + Wbz + Rbz)
'''
"""
nn = Build(name)
inputs = nn.concat(input, state)

c = nn.tanh(nn.mad(kernel=kernel_c, bias=bias_c,
x=nn.concat(input, r_state)))
c = nn.tanh(nn.mad(kernel=kernel_c, bias=bias_c, x=nn.concat(input, r_state)))
# new_h = u' * state + (1 - u') * c'
# = u' * state + c' - u' * c'

# - u' * c'
nn.sub(nn._, nn.mul(u, c),
out=new_state)
nn.sub(nn._, nn.mul(u, c), out=new_state)
return nn.layers
return nn.layers;
def lstm(name, input, state_c, state_h, kernel_i, kernel_j, kernel_f, kernel_o, bias_i, bias_j, bias_f, bias_o, new_state_c, new_state_h):
''' Full:
def lstm(
name,
input,
state_c,
state_h,
kernel_i,
kernel_j,
kernel_f,
kernel_o,
bias_i,
bias_j,
bias_f,
bias_o,
new_state_c,
new_state_h,
):
""" Full:
- it = f(Xt*Wi + Ht_1*Ri + Pi . Ct_1 + Wbi + Rbi)
- ft = f(Xt*Wf + Ht_1*Rf + Pf . Ct_1 + Wbf + Rbf)
- ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)

'''
"""
''' No peephole:
""" No peephole:
- it = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
- ft = f(Xt*Wf + Ht_1*Rf + Wbf + Rbf)
- ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)

'''
"""
j = nn.tanh(nn.mad(inputs, kernel_j, bias_j))
j = nn.tanh(nn.mad(inputs, kernel_j, bias_j))
nn.add(
nn.mul(state_c, f), nn.mul(i, j),
out=new_state_c)
nn.add(nn.mul(state_c, f), nn.mul(i, j), out=new_state_c)
# new_h =
nn.mul(o, nn.tanh(new_state_c),
out=new_state_h)
# new_h =
nn.mul(o, nn.tanh(new_state_c), out=new_state_h)
# Serialize
class BarracudaWriter:

self.f = open(filename, 'wb+')
self.f = open(filename, "wb+")
def __enter__(self):
return self

def write_str(self, s):
self.write_int32(len(s))
self.f.write(s.encode('ascii'))
self.f.write(s.encode("ascii"))
self.f.write(struct.pack('<f', d))
self.f.write(struct.pack("<f", d))
self.f.write(struct.pack('<i', d))
self.f.write(struct.pack("<i", d))
self.f.write(struct.pack('<q', d))
self.f.write(struct.pack("<q", d))
def write_shape(self, s):
self.write_int32(len(s))

def close(self):
self.f.close()
#VERSION = 0xBA22AC0DA000 + BARRACUDA_VERSION
# VERSION = 0xBA22AC0DA000 + BARRACUDA_VERSION
w.write_int64(BARRACUDA_VERSION)
# inputs

w.write_str_array(model.outputs)
# memories
w.write_int32(len(model.memories)//3)
for mem_shape, mem_in, mem_out in zip(model.memories[0::3], model.memories[1::3], model.memories[2::3]):
w.write_int32(len(model.memories) // 3)
for mem_shape, mem_in, mem_out in zip(
model.memories[0::3], model.memories[1::3], model.memories[2::3]
):
w.write_shape(mem_shape)
w.write_str(mem_in)
w.write_str(mem_out)

w.write_int32(len(model.layers))
for l in model.layers:
assert(not l.name in l.inputs)
assert not l.name in l.inputs
w.write_int32(0) #dummy
w.write_int32(0) #dummy
w.write_int32(0) # dummy
w.write_int32(0) # dummy
w.write_shape(l.pads)
w.write_shape(l.strides)
w.write_shape(l.pool_size)

w.write_int32(0) #dummy
w.write_int32(0) # dummy
assert(len(x.shape) == 4)
assert(x.data.nbytes % 4 == 0)
length = x.data.nbytes >> 2 # length is measured in float32s (at least for now)
assert len(x.shape) == 4
assert x.data.nbytes % 4 == 0
length = (
x.data.nbytes >> 2
) # length is measured in float32s (at least for now)
w.write_str(x.name)
w.write_shape(x.shape)

for x in all_tensors:
w.write_array(x.data)

100
ml-agents/mlagents/trainers/bc/models.py


class BehavioralCloningModel(LearningModel):
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
normalize=False, use_recurrent=False, seed=0):
def __init__(
self,
brain,
h_size=128,
lr=1e-4,
n_layers=2,
m_size=128,
normalize=False,
use_recurrent=False,
seed=0,
):
self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
self.dropout_rate = tf.placeholder(
dtype=tf.float32, shape=[], name="dropout_rate"
)
tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
self.sequence_length)
self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
tf.Variable(
self.m_size, name="memory_size", trainable=False, dtype=tf.int32
)
self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden_reg, self.memory_out = self.create_recurrent_encoder(
hidden_reg, self.memory_in, self.sequence_length
)
self.memory_out = tf.identity(self.memory_out, name="recurrent_out")
if brain.vector_action_space_type == "discrete":
policy_branches = []

size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
kernel_initializer=c_layers.variance_scaling_initializer(
factor=0.01
),
)
)
[tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
[tf.nn.softmax(branch) for branch in policy_branches],
axis=1,
name="action_probs",
)
self.action_masks = tf.placeholder(
shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
)
tf.concat(policy_branches, axis=1), self.action_masks, self.act_size)
tf.identity(normalized_logits, name='action')
tf.concat(policy_branches, axis=1), self.action_masks, self.act_size
)
tf.identity(normalized_logits, name="action")
self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")
self.action_oh = tf.concat([
tf.one_hot(self.true_action[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
self.action_percent = tf.reduce_mean(tf.cast(
tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
self.true_action = tf.placeholder(
shape=[None, len(policy_branches)],
dtype=tf.int32,
name="teacher_action",
)
self.action_oh = tf.concat(
[
tf.one_hot(self.true_action[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
self.loss = tf.reduce_sum(
-tf.log(self.action_probs + 1e-10) * self.action_oh
)
self.action_percent = tf.reduce_mean(
tf.cast(
tf.equal(
tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32),
self.sample_action,
),
tf.float32,
)
)
self.policy = tf.layers.dense(hidden_reg, self.act_size[0], activation=None, use_bias=False, name='pre_action',
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
self.policy = tf.layers.dense(
hidden_reg,
self.act_size[0],
activation=None,
use_bias=False,
name="pre_action",
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01),
)
self.true_action = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action")
self.true_action = tf.placeholder(
shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action"
)
self.loss = tf.reduce_sum(tf.squared_difference(self.clipped_true_action, self.sample_action))
self.loss = tf.reduce_sum(
tf.squared_difference(self.clipped_true_action, self.sample_action)
)
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
self.update = optimizer.minimize(self.loss)

58
ml-agents/mlagents/trainers/bc/offline_trainer.py


class OfflineBCTrainer(BCTrainer):
"""The OfflineBCTrainer is an implementation of Offline Behavioral Cloning."""
def __init__(self, brain, trainer_parameters, training, load, seed,
run_id):
def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param trainer_parameters: The parameters for the trainer (dictionary).

:param run_id: The The identifier of the current run
"""
super(OfflineBCTrainer, self).__init__(
brain, trainer_parameters, training, load, seed, run_id)
brain, trainer_parameters, training, load, seed, run_id
)
self.param_keys = ['batch_size', 'summary_freq', 'max_steps',
'batches_per_epoch', 'use_recurrent',
'hidden_units', 'learning_rate', 'num_layers',
'sequence_length', 'memory_size', 'model_path',
'demo_path']
self.param_keys = [
"batch_size",
"summary_freq",
"max_steps",
"batches_per_epoch",
"use_recurrent",
"hidden_units",
"learning_rate",
"num_layers",
"sequence_length",
"memory_size",
"model_path",
"demo_path",
]
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
1)
self.batches_per_epoch = trainer_parameters["batches_per_epoch"]
self.n_sequences = max(
int(trainer_parameters["batch_size"] / self.policy.sequence_length), 1
)
trainer_parameters['demo_path'],
self.policy.sequence_length)
trainer_parameters["demo_path"], self.policy.sequence_length
)
policy_brain.pop('brain_name')
expert_brain.pop('brain_name')
policy_brain.pop("brain_name")
expert_brain.pop("brain_name")
raise UnityTrainerException("The provided demonstration is not compatible with the "
"brain being used for performance evaluation.")
raise UnityTrainerException(
"The provided demonstration is not compatible with the "
"brain being used for performance evaluation."
)
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(
['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
return """Hyperparameters for the Imitation Trainer of brain {0}: \n{1}""".format(
self.brain_name,
"\n".join(
[
"\t{0}:\t{1}".format(x, self.trainer_parameters[x])
for x in self.param_keys
]
),
)

111
ml-agents/mlagents/trainers/bc/online_trainer.py


class OnlineBCTrainer(BCTrainer):
"""The OnlineBCTrainer is an implementation of Online Behavioral Cloning."""
def __init__(self, brain, trainer_parameters, training, load, seed,
run_id):
def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param trainer_parameters: The parameters for the trainer (dictionary).

:param run_id: The The identifier of the current run
"""
super(OnlineBCTrainer, self).__init__(brain, trainer_parameters, training, load, seed,
run_id)
super(OnlineBCTrainer, self).__init__(
brain, trainer_parameters, training, load, seed, run_id
)
self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
'summary_freq', 'max_steps',
'batches_per_epoch', 'use_recurrent',
'hidden_units', 'learning_rate', 'num_layers',
'sequence_length', 'memory_size', 'model_path']
self.param_keys = [
"brain_to_imitate",
"batch_size",
"time_horizon",
"summary_freq",
"max_steps",
"batches_per_epoch",
"use_recurrent",
"hidden_units",
"learning_rate",
"num_layers",
"sequence_length",
"memory_size",
"model_path",
]
self.brain_to_imitate = trainer_parameters['brain_to_imitate']
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
1)
self.brain_to_imitate = trainer_parameters["brain_to_imitate"]
self.batches_per_epoch = trainer_parameters["batches_per_epoch"]
self.n_sequences = max(
int(trainer_parameters["batch_size"] / self.policy.sequence_length), 1
)
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(
['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
return """Hyperparameters for the Imitation Trainer of brain {0}: \n{1}""".format(
self.brain_name,
"\n".join(
[
"\t{0}:\t{1}".format(x, self.trainer_parameters[x])
for x in self.param_keys
]
),
)
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
take_action_outputs):
def add_experiences(
self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take_action_outputs
):
"""
Adds experiences to each agent's experience history.
:param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).

idx = stored_info_teacher.agents.index(agent_id)
next_idx = next_info_teacher.agents.index(agent_id)
if stored_info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = \
info_teacher_record, info_teacher_reset = (
next_info_teacher_record, next_info_teacher_reset = \
next_info_teacher.text_observations[idx]. \
lower().split(",")
)
next_info_teacher_record, next_info_teacher_reset = (
next_info_teacher.text_observations[idx].lower().split(",")
)
if next_info_teacher_reset == "true":
self.demonstration_buffer.reset_update_buffer()
else:

for i in range(self.policy.vis_obs_size):
self.demonstration_buffer[agent_id]['visual_obs%d' % i] \
.append(stored_info_teacher.visual_observations[i][idx])
self.demonstration_buffer[agent_id][
"visual_obs%d" % i
].append(stored_info_teacher.visual_observations[i][idx])
self.demonstration_buffer[agent_id]['vector_obs'] \
.append(stored_info_teacher.vector_observations[idx])
self.demonstration_buffer[agent_id]["vector_obs"].append(
stored_info_teacher.vector_observations[idx]
)
(len(stored_info_teacher.agents),
self.policy.m_size))
self.demonstration_buffer[agent_id]['memory'].append(
stored_info_teacher.memories[idx])
self.demonstration_buffer[agent_id]['actions'].append(
next_info_teacher.previous_vector_actions[next_idx])
(
len(stored_info_teacher.agents),
self.policy.m_size,
)
)
self.demonstration_buffer[agent_id]["memory"].append(
stored_info_teacher.memories[idx]
)
self.demonstration_buffer[agent_id]["actions"].append(
next_info_teacher.previous_vector_actions[next_idx]
)
super(OnlineBCTrainer, self).add_experiences(curr_info, next_info, take_action_outputs)
super(OnlineBCTrainer, self).add_experiences(
curr_info, next_info, take_action_outputs
)
def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
"""

"""
info_teacher = next_info[self.brain_to_imitate]
for l in range(len(info_teacher.agents)):
teacher_action_list = len(self.demonstration_buffer[info_teacher.agents[l]]['actions'])
horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
teacher_filled = len(self.demonstration_buffer[info_teacher.agents[l]]['actions']) > 0
teacher_action_list = len(
self.demonstration_buffer[info_teacher.agents[l]]["actions"]
)
horizon_reached = (
teacher_action_list > self.trainer_parameters["time_horizon"]
)
teacher_filled = (
len(self.demonstration_buffer[info_teacher.agents[l]]["actions"]) > 0
)
agent_id, batch_size=None, training_length=self.policy.sequence_length)
agent_id,
batch_size=None,
training_length=self.policy.sequence_length,
)
self.demonstration_buffer[agent_id].reset_agent()
super(OnlineBCTrainer, self).process_experiences(current_info, next_info)

61
ml-agents/mlagents/trainers/bc/policy.py


with self.graph.as_default():
with self.graph.as_default():
self.model = BehavioralCloningModel(
h_size=int(trainer_parameters['hidden_units']),
lr=float(trainer_parameters['learning_rate']),
n_layers=int(trainer_parameters['num_layers']),
h_size=int(trainer_parameters["hidden_units"]),
lr=float(trainer_parameters["learning_rate"]),
n_layers=int(trainer_parameters["num_layers"]),
use_recurrent=trainer_parameters['use_recurrent'],
use_recurrent=trainer_parameters["use_recurrent"],
seed=seed)
seed=seed,
)
if load:
self._load_graph()

self.inference_dict = {'action': self.model.sample_action}
self.update_dict = {'policy_loss': self.model.loss,
'update_batch': self.model.update}
self.inference_dict = {"action": self.model.sample_action}
self.update_dict = {
"policy_loss": self.model.loss,
"update_batch": self.model.update,
}
self.inference_dict['memory_out'] = self.model.memory_out
self.inference_dict["memory_out"] = self.model.memory_out
self.evaluate_rate = 1.0
self.update_rate = 0.5

:param brain_info: BrainInfo input to network.
:return: Results of evaluation.
"""
feed_dict = {self.model.dropout_rate: self.evaluate_rate,
self.model.sequence_length: 1}
feed_dict = {
self.model.dropout_rate: self.evaluate_rate,
self.model.sequence_length: 1,
}
feed_dict = self._fill_eval_dict(feed_dict, brain_info)
if self.use_recurrent:

:return: Results of update.
"""
feed_dict = {self.model.dropout_rate: self.update_rate,
self.model.batch_size: num_sequences,
self.model.sequence_length: self.sequence_length}
feed_dict = {
self.model.dropout_rate: self.update_rate,
self.model.batch_size: num_sequences,
self.model.sequence_length: self.sequence_length,
}
feed_dict[self.model.true_action] = mini_batch['actions']. \
reshape([-1, self.brain.vector_action_space_size[0]])
feed_dict[self.model.true_action] = mini_batch["actions"].reshape(
[-1, self.brain.vector_action_space_size[0]]
)
feed_dict[self.model.true_action] = mini_batch['actions'].reshape(
[-1, len(self.brain.vector_action_space_size)])
feed_dict[self.model.true_action] = mini_batch["actions"].reshape(
[-1, len(self.brain.vector_action_space_size)]
)
(num_sequences, sum(self.brain.vector_action_space_size)))
(num_sequences, sum(self.brain.vector_action_space_size))
)
apparent_obs_size = self.brain.vector_observation_space_size * \
self.brain.num_stacked_vector_observations
feed_dict[self.model.vector_in] = mini_batch['vector_obs'] \
.reshape([-1,apparent_obs_size])
apparent_obs_size = (
self.brain.vector_observation_space_size
* self.brain.num_stacked_vector_observations
)
feed_dict[self.model.vector_in] = mini_batch["vector_obs"].reshape(
[-1, apparent_obs_size]
)
visual_obs = mini_batch['visual_obs%d' % i]
visual_obs = mini_batch["visual_obs%d" % i]
feed_dict[self.model.visual_in[i]] = visual_obs
if self.use_recurrent:
feed_dict[self.model.memory_in] = np.zeros([num_sequences, self.m_size])

53
ml-agents/mlagents/trainers/bc/trainer.py


class BCTrainer(Trainer):
"""The BCTrainer is an implementation of Behavioral Cloning."""
def __init__(self, brain, trainer_parameters, training, load, seed,
run_id):
def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param trainer_parameters: The parameters for the trainer (dictionary).

:param run_id: The The identifier of the current run
"""
super(BCTrainer, self).__init__(brain, trainer_parameters, training,
run_id)
super(BCTrainer, self).__init__(brain, trainer_parameters, training, run_id)
self.stats = {'Losses/Cloning Loss': [], 'Environment/Episode Length': [],
'Environment/Cumulative Reward': []}
self.stats = {
"Losses/Cloning Loss": [],
"Environment/Episode Length": [],
"Environment/Cumulative Reward": [],
}
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.batches_per_epoch = trainer_parameters["batches_per_epoch"]
self.demonstration_buffer = Buffer()
self.evaluation_buffer = Buffer()

Returns the maximum number of steps. Is used to know when the trainer should be stopped.
:return: The maximum number of steps of the trainer
"""
return float(self.trainer_parameters['max_steps'])
return float(self.trainer_parameters["max_steps"])
@property
def get_step(self):

Returns the last reward the trainer has had
:return: the new last reward
"""
if len(self.stats['Environment/Cumulative Reward']) > 0:
return np.mean(self.stats['Environment/Cumulative Reward'])
if len(self.stats["Environment/Cumulative Reward"]) > 0:
return np.mean(self.stats["Environment/Cumulative Reward"])
else:
return 0

self.policy.increment_step()
return
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
take_action_outputs):
def add_experiences(
self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take_action_outputs
):
"""
Adds experiences to each agent's experience history.
:param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).

for l in range(len(info_student.agents)):
if info_student.local_done[l]:
agent_id = info_student.agents[l]
self.stats['Environment/Cumulative Reward'].append(
self.cumulative_rewards.get(agent_id, 0))
self.stats['Environment/Episode Length'].append(
self.episode_steps.get(agent_id, 0))
self.stats["Environment/Cumulative Reward"].append(
self.cumulative_rewards.get(agent_id, 0)
)
self.stats["Environment/Episode Length"].append(
self.episode_steps.get(agent_id, 0)
)
self.cumulative_rewards[agent_id] = 0
self.episode_steps[agent_id] = 0

Returns whether or not the trainer has enough elements to run update model
:return: A boolean corresponding to whether or not update_model() can be run
"""
return len(self.demonstration_buffer.update_buffer['actions']) > self.n_sequences
return (
len(self.demonstration_buffer.update_buffer["actions"]) > self.n_sequences
)
def update_policy(self):
"""

batch_losses = []
num_batches = min(len(self.demonstration_buffer.update_buffer['actions']) //
self.n_sequences, self.batches_per_epoch)
num_batches = min(
len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences,
self.batches_per_epoch,
)
for i in range(num_batches):
update_buffer = self.demonstration_buffer.update_buffer
start = i * self.n_sequences

loss = run_out['policy_loss']
loss = run_out["policy_loss"]
self.stats['Losses/Cloning Loss'].append(np.mean(batch_losses))
self.stats["Losses/Cloning Loss"].append(np.mean(batch_losses))
self.stats['Losses/Cloning Loss'].append(0)
self.stats["Losses/Cloning Loss"].append(0)

73
ml-agents/mlagents/trainers/buffer.py


"""
Related to errors with the Buffer.
"""
pass

# The number of sequences of length training_length taken from a list of len(self) elements
# with overlapping is equal to batch_size
if (len(self) - training_length + 1) < batch_size:
raise BufferException("The batch size and training length requested for get_batch where"
" too large given the current number of data points.")
raise BufferException(
"The batch size and training length requested for get_batch where"
" too large given the current number of data points."
)
tmp_list += [np.array(self[end - training_length:end])]
tmp_list += [np.array(self[end - training_length : end])]
return np.array(tmp_list)
if sequential:
# The sequences will not have overlapping elements (this involves padding)

# retrieve the maximum number of elements
batch_size = len(self) // training_length + 1 * (leftover != 0)
batch_size = len(self) // training_length + 1 * (
leftover != 0
)
if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
raise BufferException("The batch size and training length requested for get_batch where"
" too large given the current number of data points.")
if batch_size > (
len(self) // training_length + 1 * (leftover != 0)
):
raise BufferException(
"The batch size and training length requested for get_batch where"
" too large given the current number of data points."
)
for end in range(len(self), len(self) % training_length, -training_length)[:batch_size]:
tmp_list += [np.array(self[end - training_length:end])]
for end in range(
len(self), len(self) % training_length, -training_length
)[:batch_size]:
tmp_list += [np.array(self[end - training_length : end])]
tmp_list += [np.array([padding] * (training_length - leftover) + self[:leftover])]
tmp_list += [
np.array(
[padding] * (training_length - leftover)
+ self[:leftover]
)
]
tmp_list.reverse()
return np.array(tmp_list)

super(Buffer.AgentBuffer, self).__init__()
def __str__(self):
return ", ".join(["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()])
return ", ".join(
["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()]
)
def reset_agent(self):
"""

if key_list is None:
key_list = list(self.keys())
if not self.check_length(key_list):
raise BufferException("Unable to shuffle if the fields are not of same length")
raise BufferException(
"Unable to shuffle if the fields are not of same length"
)
s = np.arange(len(self[key_list[0]]))
np.random.shuffle(s)
for key in key_list:

super(Buffer, self).__init__()
def __str__(self):
return "update buffer :\n\t{0}\nlocal_buffers :\n{1}".format(str(self.update_buffer),
'\n'.join(
['\tagent {0} :{1}'.format(k, str(self[k])) for
k in self.keys()]))
return "update buffer :\n\t{0}\nlocal_buffers :\n{1}".format(
str(self.update_buffer),
"\n".join(
["\tagent {0} :{1}".format(k, str(self[k])) for k in self.keys()]
),
)
def __getitem__(self, key):
if key not in self.keys():

for k in agent_ids:
self[k].reset_agent()
def append_update_buffer(self, agent_id, key_list=None, batch_size=None, training_length=None):
def append_update_buffer(
self, agent_id, key_list=None, batch_size=None, training_length=None
):
"""
Appends the buffer of an agent to the update buffer.
:param agent_id: The id of the agent which data will be appended

if key_list is None:
key_list = self[agent_id].keys()
if not self[agent_id].check_length(key_list):
raise BufferException("The length of the fields {0} for agent {1} where not of same length"
.format(key_list, agent_id))
raise BufferException(
"The length of the fields {0} for agent {1} where not of same length".format(
key_list, agent_id
)
)
self[agent_id][field_key].get_batch(batch_size=batch_size, training_length=training_length)
self[agent_id][field_key].get_batch(
batch_size=batch_size, training_length=training_length
)
def append_all_agent_batch_to_update_buffer(self, key_list=None, batch_size=None, training_length=None):
def append_all_agent_batch_to_update_buffer(
self, key_list=None, batch_size=None, training_length=None
):
"""
Appends the buffer of all agents to the update buffer.
:param key_list: The fields that must be added. If None: all fields will be appended.

67
ml-agents/mlagents/trainers/curriculum.py


import logging
logger = logging.getLogger('mlagents.trainers')
logger = logging.getLogger("mlagents.trainers")
class Curriculum(object):

self._lesson_num = 0
# The name of the brain should be the basename of the file without the
# extension.
self._brain_name = os.path.basename(location).split('.')[0]
self._brain_name = os.path.basename(location).split(".")[0]
raise CurriculumError(
'The file {0} could not be found.'.format(location))
raise CurriculumError("The file {0} could not be found.".format(location))
raise CurriculumError('There was an error decoding {}'
.format(location))
raise CurriculumError("There was an error decoding {}".format(location))
for key in ['parameters', 'measure', 'thresholds',
'min_lesson_length', 'signal_smoothing']:
for key in [
"parameters",
"measure",
"thresholds",
"min_lesson_length",
"signal_smoothing",
]:
raise CurriculumError("{0} does not contain a "
"{1} field."
.format(location, key))
raise CurriculumError(
"{0} does not contain a " "{1} field.".format(location, key)
)
self.measure = self.data['measure']
self.min_lesson_length = self.data['min_lesson_length']
self.max_lesson_num = len(self.data['thresholds'])
self.measure = self.data["measure"]
self.min_lesson_length = self.data["min_lesson_length"]
self.max_lesson_num = len(self.data["thresholds"])
parameters = self.data['parameters']
parameters = self.data["parameters"]
'The parameter {0} in Curriculum {1} is not present in '
'the Environment'.format(key, location))
"The parameter {0} in Curriculum {1} is not present in "
"the Environment".format(key, location)
)
'The parameter {0} in Curriculum {1} must have {2} values '
'but {3} were found'.format(key, location,
self.max_lesson_num + 1,
len(parameters[key])))
"The parameter {0} in Curriculum {1} must have {2} values "
"but {3} were found".format(
key, location, self.max_lesson_num + 1, len(parameters[key])
)
)
@property
def lesson_num(self):

"""
if not self.data or not measure_val or math.isnan(measure_val):
return False
if self.data['signal_smoothing']:
if self.data["signal_smoothing"]:
if measure_val > self.data['thresholds'][self.lesson_num]:
if measure_val > self.data["thresholds"][self.lesson_num]:
parameters = self.data['parameters']
parameters = self.data["parameters"]
logger.info('{0} lesson changed. Now in lesson {1}: {2}'
.format(self._brain_name,
self.lesson_num,
', '.join([str(x) + ' -> ' + str(config[x])
for x in config])))
logger.info(
"{0} lesson changed. Now in lesson {1}: {2}".format(
self._brain_name,
self.lesson_num,
", ".join([str(x) + " -> " + str(config[x]) for x in config]),
)
)
return True
return False

lesson = self.lesson_num
lesson = max(0, min(lesson, self.max_lesson_num))
config = {}
parameters = self.data['parameters']
parameters = self.data["parameters"]
for key in parameters:
config[key] = parameters[key][lesson]
return config

44
ml-agents/mlagents/trainers/demo_loader.py


current_brain_info = brain_infos[idx]
next_brain_info = brain_infos[idx + 1]
demo_buffer[0].last_brain_info = current_brain_info
demo_buffer[0]['done'].append(next_brain_info.local_done[0])
demo_buffer[0]['rewards'].append(next_brain_info.rewards[0])
demo_buffer[0]["done"].append(next_brain_info.local_done[0])
demo_buffer[0]["rewards"].append(next_brain_info.rewards[0])
demo_buffer[0]['visual_obs%d' % i] \
.append(current_brain_info.visual_observations[i][0])
demo_buffer[0]["visual_obs%d" % i].append(
current_brain_info.visual_observations[i][0]
)
demo_buffer[0]['vector_obs'] \
.append(current_brain_info.vector_observations[0])
demo_buffer[0]['actions'].append(next_brain_info.previous_vector_actions[0])
demo_buffer[0]["vector_obs"].append(
current_brain_info.vector_observations[0]
)
demo_buffer[0]["actions"].append(next_brain_info.previous_vector_actions[0])
demo_buffer.append_update_buffer(0, batch_size=None,
training_length=sequence_length)
demo_buffer.append_update_buffer(
0, batch_size=None, training_length=sequence_length
)
demo_buffer.append_update_buffer(0, batch_size=None,
training_length=sequence_length)
demo_buffer.append_update_buffer(
0, batch_size=None, training_length=sequence_length
)
return demo_buffer

INITIAL_POS = 33
if not os.path.isfile(file_path):
raise FileNotFoundError("The demonstration file {} does not exist.".format(file_path))
raise FileNotFoundError(
"The demonstration file {} does not exist.".format(file_path)
)
if file_extension != '.demo':
raise ValueError("The file is not a '.demo' file. Please provide a file with the "
"correct extension.")
if file_extension != ".demo":
raise ValueError(
"The file is not a '.demo' file. Please provide a file with the "
"correct extension."
)
brain_params = None
brain_infos = []

next_pos, pos = _DecodeVarint32(data, pos)
if obs_decoded == 0:
meta_data_proto = DemonstrationMetaProto()
meta_data_proto.ParseFromString(data[pos:pos + next_pos])
meta_data_proto.ParseFromString(data[pos : pos + next_pos])
brain_param_proto.ParseFromString(data[pos:pos + next_pos])
brain_param_proto.ParseFromString(data[pos : pos + next_pos])
agent_info.ParseFromString(data[pos:pos + next_pos])
agent_info.ParseFromString(data[pos : pos + next_pos])
brain_info = BrainInfo.from_agent_proto([agent_info], brain_params)
brain_infos.append(brain_info)
if len(brain_infos) == total_expected:

5
ml-agents/mlagents/trainers/exception.py


Contains exceptions for the trainers package.
"""
class MetaCurriculumError(TrainerError):
"""

210
ml-agents/mlagents/trainers/learn.py


:param run_options: Command line arguments for training.
"""
# Docker Parameters
docker_target_name = (run_options['--docker-target-name']
if run_options['--docker-target-name'] != 'None' else None)
docker_target_name = (
run_options["--docker-target-name"]
if run_options["--docker-target-name"] != "None"
else None
)
env_path = (run_options['--env']
if run_options['--env'] != 'None' else None)
run_id = run_options['--run-id']
load_model = run_options['--load']
train_model = run_options['--train']
save_freq = int(run_options['--save-freq'])
keep_checkpoints = int(run_options['--keep-checkpoints'])
base_port = int(run_options['--base-port'])
num_envs = int(run_options['--num-envs'])
curriculum_folder = (run_options['--curriculum']
if run_options['--curriculum'] != 'None' else None)
lesson = int(run_options['--lesson'])
fast_simulation = not bool(run_options['--slow'])
no_graphics = run_options['--no-graphics']
trainer_config_path = run_options['<trainer-config-path>']
env_path = run_options["--env"] if run_options["--env"] != "None" else None
run_id = run_options["--run-id"]
load_model = run_options["--load"]
train_model = run_options["--train"]
save_freq = int(run_options["--save-freq"])
keep_checkpoints = int(run_options["--keep-checkpoints"])
base_port = int(run_options["--base-port"])
num_envs = int(run_options["--num-envs"])
curriculum_folder = (
run_options["--curriculum"] if run_options["--curriculum"] != "None" else None
)
lesson = int(run_options["--lesson"])
fast_simulation = not bool(run_options["--slow"])
no_graphics = run_options["--no-graphics"]
trainer_config_path = run_options["<trainer-config-path>"]
model_path = './models/{run_id}-{sub_id}'.format(run_id=run_id, sub_id=sub_id)
summaries_dir = './summaries'
model_path = "./models/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id)
summaries_dir = "./summaries"
trainer_config_path = \
'/{docker_target_name}/{trainer_config_path}'.format(
trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
docker_target_name=docker_target_name,
trainer_config_path=trainer_config_path,
)
if curriculum_folder is not None:
curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
trainer_config_path=trainer_config_path)
if curriculum_folder is not None:
curriculum_folder = \
'/{docker_target_name}/{curriculum_folder}'.format(
docker_target_name=docker_target_name,
curriculum_folder=curriculum_folder)
model_path = '/{docker_target_name}/models/{run_id}-{sub_id}'.format(
docker_target_name=docker_target_name,
run_id=run_id,
sub_id=sub_id)
summaries_dir = '/{docker_target_name}/summaries'.format(
docker_target_name=docker_target_name)
curriculum_folder=curriculum_folder,
)
model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id
)
summaries_dir = "/{docker_target_name}/summaries".format(
docker_target_name=docker_target_name
)
trainer_config = load_config(trainer_config_path)
env_factory = create_environment_factory(

run_seed,
base_port + (sub_id * num_envs)
base_port + (sub_id * num_envs),
tc = TrainerController(model_path, summaries_dir, run_id + '-' + str(sub_id),
save_freq, maybe_meta_curriculum,
load_model, train_model,
keep_checkpoints, lesson, env.external_brains,
run_seed, fast_simulation)
tc = TrainerController(
model_path,
summaries_dir,
run_id + "-" + str(sub_id),
save_freq,
maybe_meta_curriculum,
load_model,
train_model,
keep_checkpoints,
lesson,
env.external_brains,
run_seed,
fast_simulation,
)
# Signal that environment has been launched.
process_queue.put(True)

def try_create_meta_curriculum(curriculum_folder: Optional[str], env: BaseUnityEnvironment) -> Optional[MetaCurriculum]:
def try_create_meta_curriculum(
curriculum_folder: Optional[str], env: BaseUnityEnvironment
) -> Optional[MetaCurriculum]:
if curriculum_folder is None:
return None
else:

if brain_name not in env.external_brains.keys():
raise MetaCurriculumError('One of the curricula '
'defined in ' +
curriculum_folder + ' '
'does not have a corresponding '
'Brain. Check that the '
'curriculum file has the same '
'name as the Brain '
'whose curriculum it defines.')
raise MetaCurriculumError(
"One of the curricula "
"defined in " + curriculum_folder + " "
"does not have a corresponding "
"Brain. Check that the "
"curriculum file has the same "
"name as the Brain "
"whose curriculum it defines."
)
for f in glob.glob('/{docker_target_name}/*'.format(
docker_target_name=docker_target_name)):
for f in glob.glob(
"/{docker_target_name}/*".format(docker_target_name=docker_target_name)
):
shutil.copytree(f,
'/ml-agents/{b}'.format(b=b))
shutil.copytree(f, "/ml-agents/{b}".format(b=b))
src_f = '/{docker_target_name}/{b}'.format(
docker_target_name=docker_target_name, b=b)
dst_f = '/ml-agents/{b}'.format(b=b)
src_f = "/{docker_target_name}/{b}".format(
docker_target_name=docker_target_name, b=b
)
dst_f = "/ml-agents/{b}".format(b=b)
logging.getLogger('mlagents.trainers').info(e)
env_path = '/ml-agents/{env_path}'.format(env_path=env_path)
logging.getLogger("mlagents.trainers").info(e)
env_path = "/ml-agents/{env_path}".format(env_path=env_path)
return env_path

trainer_config = yaml.load(data_file)
return trainer_config
except IOError:
raise UnityEnvironmentException('Parameter file could not be found '
'at {}.'
.format(trainer_config_path))
raise UnityEnvironmentException(
"Parameter file could not be found " "at {}.".format(trainer_config_path)
)
raise UnityEnvironmentException('There was an error decoding '
'Trainer Config from this path : {}'
.format(trainer_config_path))
raise UnityEnvironmentException(
"There was an error decoding "
"Trainer Config from this path : {}".format(trainer_config_path)
)
env_path: str,
docker_target_name: str,
no_graphics: bool,
seed: Optional[int],
start_port: int
env_path: str,
docker_target_name: str,
no_graphics: bool,
seed: Optional[int],
start_port: int,
env_path = (env_path.strip()
.replace('.app', '')
.replace('.exe', '')
.replace('.x86_64', '')
.replace('.x86', ''))
env_path = (
env_path.strip()
.replace(".app", "")
.replace(".exe", "")
.replace(".x86_64", "")
.replace(".x86", "")
)
"""
"""
Comments for future maintenance:
Some OS/VM instances (e.g. COS GCP Image) mount filesystems
with COS flag which prevents execution of the Unity scene,

# Navigate in docker path and find env_path and copy it.
env_path = prepare_for_docker_run(docker_target_name,
env_path)
# Navigate in docker path and find env_path and copy it.
env_path = prepare_for_docker_run(docker_target_name, env_path)
seed_count = 10000
seed_pool = [np.random.randint(0, seed_count) for _ in range(seed_count)]

seed=env_seed,
docker_training=docker_training,
no_graphics=no_graphics,
base_port=start_port
base_port=start_port,
print('''
print(
"""

`
¬`
''')
"""
)
print('\n\n\tUnity Technologies\n')
print("\n\n\tUnity Technologies\n")
_USAGE = '''
_USAGE = """
Usage:
mlagents-learn <trainer-config-path> [options]
mlagents-learn --help

--docker-target-name=<dt> Docker volume to store training-specific files [default: None].
--no-graphics Whether to run the environment in no-graphics mode [default: False].
--debug Whether to run ML-Agents in debug mode with detailed logging [default: False].
'''
"""
trainer_logger = logging.getLogger('mlagents.trainers')
env_logger = logging.getLogger('mlagents.envs')
trainer_logger = logging.getLogger("mlagents.trainers")
env_logger = logging.getLogger("mlagents.envs")
if options['--debug']:
trainer_logger.setLevel('DEBUG')
env_logger.setLevel('DEBUG')
num_runs = int(options['--num-runs'])
seed = int(options['--seed'])
if options["--debug"]:
trainer_logger.setLevel("DEBUG")
env_logger.setLevel("DEBUG")
num_runs = int(options["--num-runs"])
seed = int(options["--seed"])
if options['--env'] == 'None' and num_runs > 1:
raise TrainerError('It is not possible to launch more than one concurrent training session '
'when training from the editor.')
if options["--env"] == "None" and num_runs > 1:
raise TrainerError(
"It is not possible to launch more than one concurrent training session "
"when training from the editor."
)
jobs = []
run_seed = seed

job.join()
except KeyboardInterrupt:
pass
# For python debugger to directly run this script
if __name__ == "__main__":

56
ml-agents/mlagents/trainers/meta_curriculum.py


import logging
logger = logging.getLogger('mlagents.trainers')
logger = logging.getLogger("mlagents.trainers")
class MetaCurriculum(object):

try:
for curriculum_filename in os.listdir(curriculum_folder):
brain_name = curriculum_filename.split('.')[0]
curriculum_filepath = \
os.path.join(curriculum_folder, curriculum_filename)
curriculum = Curriculum(curriculum_filepath,
default_reset_parameters)
brain_name = curriculum_filename.split(".")[0]
curriculum_filepath = os.path.join(
curriculum_folder, curriculum_filename
)
curriculum = Curriculum(curriculum_filepath, default_reset_parameters)
if any([(parameter in curriculum.get_config().keys())
for parameter in used_reset_parameters]):
logger.warning('Two or more curriculums will '
'attempt to change the same reset '
'parameter. The result will be '
'non-deterministic.')
if any(
[
(parameter in curriculum.get_config().keys())
for parameter in used_reset_parameters
]
):
logger.warning(
"Two or more curriculums will "
"attempt to change the same reset "
"parameter. The result will be "
"non-deterministic."
)
raise MetaCurriculumError(curriculum_folder + ' is not a '
'directory. Refer to the ML-Agents '
'curriculum learning docs.')
raise MetaCurriculumError(
curriculum_folder + " is not a "
"directory. Refer to the ML-Agents "
"curriculum learning docs."
)
@property
def brains_to_curriculums(self):

Whether the curriculum of the specified brain should attempt to
increment its lesson.
"""
return reward_buff_size >= (self.brains_to_curriculums[brain_name]
.min_lesson_length)
return reward_buff_size >= (
self.brains_to_curriculums[brain_name].min_lesson_length
)
def increment_lessons(self, measure_vals, reward_buff_sizes=None):
"""Attempts to increments all the lessons of all the curriculums in this

for brain_name, buff_size in reward_buff_sizes.items():
if self._lesson_ready_to_increment(brain_name, buff_size):
measure_val = measure_vals[brain_name]
ret[brain_name] = (self.brains_to_curriculums[brain_name]
.increment_lesson(measure_val))
ret[brain_name] = self.brains_to_curriculums[
brain_name
].increment_lesson(measure_val)
ret[brain_name] = (self.brains_to_curriculums[brain_name]
.increment_lesson(measure_val))
ret[brain_name] = self.brains_to_curriculums[
brain_name
].increment_lesson(measure_val)
def set_all_curriculums_to_lesson_num(self, lesson_num):
"""Sets all the curriculums in this meta curriculum to a specified

"""
for _, curriculum in self.brains_to_curriculums.items():
curriculum.lesson_num = lesson_num
def get_config(self):
"""Get the combined configuration of all curriculums in this

464
ml-agents/mlagents/trainers/models.py


self.vector_in = None
self.global_step, self.increment_step = self.create_global_steps()
self.visual_in = []
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length')
self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name='masks')
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name="batch_size")
self.sequence_length = tf.placeholder(
shape=None, dtype=tf.int32, name="sequence_length"
)
self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name="masks")
self.mask = tf.cast(self.mask_input, tf.int32)
self.use_recurrent = use_recurrent
if self.use_recurrent:

self.normalize = normalize
self.act_size = brain.vector_action_space_size
self.vec_obs_size = brain.vector_observation_space_size * \
brain.num_stacked_vector_observations
self.vec_obs_size = (
brain.vector_observation_space_size * brain.num_stacked_vector_observations
)
tf.Variable(int(brain.vector_action_space_type == 'continuous'),
name='is_continuous_control', trainable=False, dtype=tf.int32)
tf.Variable(self._version_number_, name='version_number', trainable=False, dtype=tf.int32)
tf.Variable(
int(brain.vector_action_space_type == "continuous"),
name="is_continuous_control",
trainable=False,
dtype=tf.int32,
)
tf.Variable(
self._version_number_,
name="version_number",
trainable=False,
dtype=tf.int32,
)
if brain.vector_action_space_type == 'continuous':
tf.Variable(self.act_size[0], name="action_output_shape", trainable=False, dtype=tf.int32)
if brain.vector_action_space_type == "continuous":
tf.Variable(
self.act_size[0],
name="action_output_shape",
trainable=False,
dtype=tf.int32,
)
tf.Variable(sum(self.act_size), name="action_output_shape", trainable=False, dtype=tf.int32)
tf.Variable(
sum(self.act_size),
name="action_output_shape",
trainable=False,
dtype=tf.int32,
)
global_step = tf.Variable(0, name="global_step", trainable=False, dtype=tf.int32)
global_step = tf.Variable(
0, name="global_step", trainable=False, dtype=tf.int32
)
increment_step = tf.assign(global_step, tf.add(global_step, 1))
return global_step, increment_step

:param name: Desired name of input op.
:return: input op.
"""
o_size_h = camera_parameters['height']
o_size_w = camera_parameters['width']
bw = camera_parameters['blackAndWhite']
o_size_h = camera_parameters["height"]
o_size_w = camera_parameters["width"]
bw = camera_parameters["blackAndWhite"]
if bw:
c_channels = 1

visual_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32,
name=name)
visual_in = tf.placeholder(
shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32, name=name
)
def create_vector_input(self, name='vector_observation'):
def create_vector_input(self, name="vector_observation"):
"""
Creates ops for vector observation input.
:param name: Name of the placeholder op.

self.vector_in = tf.placeholder(shape=[None, self.vec_obs_size], dtype=tf.float32,
name=name)
self.vector_in = tf.placeholder(
shape=[None, self.vec_obs_size], dtype=tf.float32, name=name
)
self.running_mean = tf.get_variable("running_mean", [self.vec_obs_size],
trainable=False, dtype=tf.float32,
initializer=tf.zeros_initializer())
self.running_variance = tf.get_variable("running_variance", [self.vec_obs_size],
trainable=False,
dtype=tf.float32,
initializer=tf.ones_initializer())
self.update_mean, self.update_variance = self.create_normalizer_update(self.vector_in)
self.running_mean = tf.get_variable(
"running_mean",
[self.vec_obs_size],
trainable=False,
dtype=tf.float32,
initializer=tf.zeros_initializer(),
)
self.running_variance = tf.get_variable(
"running_variance",
[self.vec_obs_size],
trainable=False,
dtype=tf.float32,
initializer=tf.ones_initializer(),
)
self.update_mean, self.update_variance = self.create_normalizer_update(
self.vector_in
)
self.normalized_state = tf.clip_by_value((self.vector_in - self.running_mean) / tf.sqrt(
self.running_variance / (tf.cast(self.global_step, tf.float32) + 1)), -5, 5,
name="normalized_state")
self.normalized_state = tf.clip_by_value(
(self.vector_in - self.running_mean)
/ tf.sqrt(
self.running_variance / (tf.cast(self.global_step, tf.float32) + 1)
),
-5,
5,
name="normalized_state",
)
return self.normalized_state
else:
return self.vector_in

new_mean = self.running_mean + (mean_current_observation - self.running_mean) / \
tf.cast(tf.add(self.global_step, 1), tf.float32)
new_variance = self.running_variance + (mean_current_observation - new_mean) * \
(mean_current_observation - self.running_mean)
new_mean = self.running_mean + (
mean_current_observation - self.running_mean
) / tf.cast(tf.add(self.global_step, 1), tf.float32)
new_variance = self.running_variance + (mean_current_observation - new_mean) * (
mean_current_observation - self.running_mean
)
def create_vector_observation_encoder(observation_input, h_size, activation, num_layers, scope,
reuse):
def create_vector_observation_encoder(
observation_input, h_size, activation, num_layers, scope, reuse
):
"""
Builds a set of hidden state encoders.
:param reuse: Whether to re-use the weights within the same scope.

with tf.variable_scope(scope):
hidden = observation_input
for i in range(num_layers):
hidden = tf.layers.dense(hidden, h_size, activation=activation, reuse=reuse,
name="hidden_{}".format(i),
kernel_initializer=c_layers.variance_scaling_initializer(
1.0))
hidden = tf.layers.dense(
hidden,
h_size,
activation=activation,
reuse=reuse,
name="hidden_{}".format(i),
kernel_initializer=c_layers.variance_scaling_initializer(1.0),
)
def create_visual_observation_encoder(self, image_input, h_size, activation, num_layers, scope,
reuse):
def create_visual_observation_encoder(
self, image_input, h_size, activation, num_layers, scope, reuse
):
"""
Builds a set of visual (CNN) encoders.
:param reuse: Whether to re-use the weights within the same scope.

:return: List of hidden layer tensors.
"""
with tf.variable_scope(scope):
conv1 = tf.layers.conv2d(image_input, 16, kernel_size=[8, 8], strides=[4, 4],
activation=tf.nn.elu, reuse=reuse, name="conv_1")
conv2 = tf.layers.conv2d(conv1, 32, kernel_size=[4, 4], strides=[2, 2],
activation=tf.nn.elu, reuse=reuse, name="conv_2")
conv1 = tf.layers.conv2d(
image_input,
16,
kernel_size=[8, 8],
strides=[4, 4],
activation=tf.nn.elu,
reuse=reuse,
name="conv_1",
)
conv2 = tf.layers.conv2d(
conv1,
32,
kernel_size=[4, 4],
strides=[2, 2],
activation=tf.nn.elu,
reuse=reuse,
name="conv_2",
)
with tf.variable_scope(scope + '/' + 'flat_encoding'):
hidden_flat = self.create_vector_observation_encoder(hidden, h_size, activation,
num_layers, scope, reuse)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = self.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat
@staticmethod

:return: The action output dimension [batch_size, num_branches] and the concatenated normalized logits
"""
action_idx = [0] + list(np.cumsum(action_size))
branches_logits = [all_logits[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]
branch_masks = [action_masks[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]
raw_probs = [tf.multiply(tf.nn.softmax(branches_logits[k]) + 1.0e-10, branch_masks[k])
for k in range(len(action_size))]
branches_logits = [
all_logits[:, action_idx[i] : action_idx[i + 1]]
for i in range(len(action_size))
]
branch_masks = [
action_masks[:, action_idx[i] : action_idx[i + 1]]
for i in range(len(action_size))
]
raw_probs = [
tf.multiply(tf.nn.softmax(branches_logits[k]) + 1.0e-10, branch_masks[k])
for k in range(len(action_size))
]
for k in range(len(action_size))]
output = tf.concat([tf.multinomial(tf.log(normalized_probs[k]), 1) for k in range(len(action_size))], axis=1)
return output, tf.concat([tf.log(normalized_probs[k] + 1.0e-10) for k in range(len(action_size))], axis=1)
for k in range(len(action_size))
]
output = tf.concat(
[
tf.multinomial(tf.log(normalized_probs[k]), 1)
for k in range(len(action_size))
],
axis=1,
)
return (
output,
tf.concat(
[
tf.log(normalized_probs[k] + 1.0e-10)
for k in range(len(action_size))
],
axis=1,
),
)
def create_observation_streams(self, num_streams, h_size, num_layers):
"""

self.visual_in = []
for i in range(brain.number_visual_observations):
visual_input = self.create_visual_input(brain.camera_resolutions[i],
name="visual_observation_" + str(i))
visual_input = self.create_visual_input(
brain.camera_resolutions[i], name="visual_observation_" + str(i)
)
self.visual_in.append(visual_input)
vector_observation_input = self.create_vector_input()

hidden_state, hidden_visual = None, None
if self.vis_obs_size > 0:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_visual_observation_encoder(self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}"
.format(i, j), False)
encoded_visual = self.create_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
hidden_state = self.create_vector_observation_encoder(vector_observation_input,
h_size, activation_fn,
num_layers,
"main_graph_{}".format(i),
False)
hidden_state = self.create_vector_observation_encoder(
vector_observation_input,
h_size,
activation_fn,
num_layers,
"main_graph_{}".format(i),
False,
)
if hidden_state is not None and hidden_visual is not None:
final_hidden = tf.concat([hidden_visual, hidden_state], axis=1)
elif hidden_state is None and hidden_visual is not None:

else:
raise Exception("No valid network configuration possible. "
"There are no states or observations in this brain")
raise Exception(
"No valid network configuration possible. "
"There are no states or observations in this brain"
)
def create_recurrent_encoder(input_state, memory_in, sequence_length, name='lstm'):
def create_recurrent_encoder(input_state, memory_in, sequence_length, name="lstm"):
"""
Builds a recurrent encoder for either state or observations (LSTM).
:param sequence_length: Length of sequence to unroll.

_half_point = int(m_size / 2)
with tf.variable_scope(name):
rnn_cell = tf.contrib.rnn.BasicLSTMCell(_half_point)
lstm_vector_in = tf.contrib.rnn.LSTMStateTuple(memory_in[:, :_half_point],
memory_in[:, _half_point:])
recurrent_output, lstm_state_out = tf.nn.dynamic_rnn(rnn_cell, lstm_input_state,
initial_state=lstm_vector_in)
lstm_vector_in = tf.contrib.rnn.LSTMStateTuple(
memory_in[:, :_half_point], memory_in[:, _half_point:]
)
recurrent_output, lstm_state_out = tf.nn.dynamic_rnn(
rnn_cell, lstm_input_state, initial_state=lstm_vector_in
)
recurrent_output = tf.reshape(recurrent_output, shape=[-1, _half_point])
return recurrent_output, tf.concat([lstm_state_out.c, lstm_state_out.h], axis=1)

hidden_streams = self.create_observation_streams(2, h_size, num_layers)
if self.use_recurrent:
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32,
name='recurrent_in')
self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden_streams[0], self.memory_in[:, :_half_point], self.sequence_length,
name='lstm_policy')
hidden_streams[0],
self.memory_in[:, :_half_point],
self.sequence_length,
name="lstm_policy",
)
hidden_streams[1], self.memory_in[:, _half_point:], self.sequence_length,
name='lstm_value')
self.memory_out = tf.concat([memory_policy_out, memory_value_out], axis=1,
name='recurrent_out')
hidden_streams[1],
self.memory_in[:, _half_point:],
self.sequence_length,
name="lstm_value",
)
self.memory_out = tf.concat(
[memory_policy_out, memory_value_out], axis=1, name="recurrent_out"
)
mu = tf.layers.dense(hidden_policy, self.act_size[0], activation=None,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
mu = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01),
)
log_sigma_sq = tf.get_variable("log_sigma_squared", [self.act_size[0]], dtype=tf.float32,
initializer=tf.zeros_initializer())
log_sigma_sq = tf.get_variable(
"log_sigma_squared",
[self.act_size[0]],
dtype=tf.float32,
initializer=tf.zeros_initializer(),
)
self.epsilon = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name='epsilon')
self.epsilon = tf.placeholder(
shape=[None, self.act_size[0]], dtype=tf.float32, name="epsilon"
)
self.output = tf.identity(output_post, name='action')
self.output = tf.identity(output_post, name="action")
all_probs = - 0.5 * tf.square(tf.stop_gradient(self.output_pre) - mu) / sigma_sq \
- 0.5 * tf.log(2.0 * np.pi) - 0.5 * log_sigma_sq
all_probs = (
-0.5 * tf.square(tf.stop_gradient(self.output_pre) - mu) / sigma_sq
- 0.5 * tf.log(2.0 * np.pi)
- 0.5 * log_sigma_sq
)
self.all_log_probs = tf.identity(all_probs, name='action_probs')
self.all_log_probs = tf.identity(all_probs, name="action_probs")
self.entropy = 0.5 * tf.reduce_mean(tf.log(2 * np.pi * np.e) + log_sigma_sq)

self.all_old_log_probs = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32,
name='old_probabilities')
self.all_old_log_probs = tf.placeholder(
shape=[None, self.act_size[0]], dtype=tf.float32, name="old_probabilities"
)
self.log_probs = tf.reduce_sum((tf.identity(self.all_log_probs)), axis=1, keepdims=True)
self.old_log_probs = tf.reduce_sum((tf.identity(self.all_old_log_probs)), axis=1,
keepdims=True)
self.log_probs = tf.reduce_sum(
(tf.identity(self.all_log_probs)), axis=1, keepdims=True
)
self.old_log_probs = tf.reduce_sum(
(tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
)
def create_dc_actor_critic(self, h_size, num_layers):
"""

hidden = hidden_streams[0]
if self.use_recurrent:
self.prev_action = tf.placeholder(shape=[None, len(self.act_size)], dtype=tf.int32,
name='prev_action')
prev_action_oh = tf.concat([
tf.one_hot(self.prev_action[:, i], self.act_size[i]) for i in
range(len(self.act_size))], axis=1)
self.prev_action = tf.placeholder(
shape=[None, len(self.act_size)], dtype=tf.int32, name="prev_action"
)
prev_action_oh = tf.concat(
[
tf.one_hot(self.prev_action[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32,
name='recurrent_in')
hidden, memory_out = self.create_recurrent_encoder(hidden, self.memory_in,
self.sequence_length)
self.memory_out = tf.identity(memory_out, name='recurrent_out')
self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden, memory_out = self.create_recurrent_encoder(
hidden, self.memory_in, self.sequence_length
)
self.memory_out = tf.identity(memory_out, name="recurrent_out")
policy_branches.append(tf.layers.dense(hidden, size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
policy_branches.append(
tf.layers.dense(
hidden,
size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(
factor=0.01
),
)
)
self.all_log_probs = tf.concat([branch for branch in policy_branches], axis=1, name="action_probs")
self.all_log_probs = tf.concat(
[branch for branch in policy_branches], axis=1, name="action_probs"
)
self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
self.action_masks = tf.placeholder(
shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
)
self.all_log_probs, self.action_masks, self.act_size)
self.all_log_probs, self.action_masks, self.act_size
)
self.normalized_logits = tf.identity(normalized_logits, name='action')
self.normalized_logits = tf.identity(normalized_logits, name="action")
shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder")
self.action_oh = tf.concat([
tf.one_hot(self.action_holder[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
)
self.action_oh = tf.concat(
[
tf.one_hot(self.action_holder[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
shape=[None, sum(self.act_size)], dtype=tf.float32, name='old_probabilities')
shape=[None, sum(self.act_size)], dtype=tf.float32, name="old_probabilities"
)
self.all_old_log_probs, self.action_masks, self.act_size)
self.all_old_log_probs, self.action_masks, self.act_size
)
self.entropy = tf.reduce_sum((tf.stack([
tf.nn.softmax_cross_entropy_with_logits_v2(
labels=tf.nn.softmax(self.all_log_probs[:, action_idx[i]:action_idx[i + 1]]),
logits=self.all_log_probs[:, action_idx[i]:action_idx[i + 1]])
for i in range(len(self.act_size))], axis=1)), axis=1)
self.entropy = tf.reduce_sum(
(
tf.stack(
[
tf.nn.softmax_cross_entropy_with_logits_v2(
labels=tf.nn.softmax(
self.all_log_probs[:, action_idx[i] : action_idx[i + 1]]
),
logits=self.all_log_probs[
:, action_idx[i] : action_idx[i + 1]
],
)
for i in range(len(self.act_size))
],
axis=1,
)
),
axis=1,
)
self.log_probs = tf.reduce_sum((tf.stack([
-tf.nn.softmax_cross_entropy_with_logits_v2(
labels=self.action_oh[:, action_idx[i]:action_idx[i + 1]],
logits=normalized_logits[:, action_idx[i]:action_idx[i + 1]]
)
for i in range(len(self.act_size))], axis=1)), axis=1, keepdims=True)
self.old_log_probs = tf.reduce_sum((tf.stack([
-tf.nn.softmax_cross_entropy_with_logits_v2(
labels=self.action_oh[:, action_idx[i]:action_idx[i + 1]],
logits=old_normalized_logits[:, action_idx[i]:action_idx[i + 1]]
)
for i in range(len(self.act_size))], axis=1)), axis=1, keepdims=True)
self.log_probs = tf.reduce_sum(
(
tf.stack(
[
-tf.nn.softmax_cross_entropy_with_logits_v2(
labels=self.action_oh[:, action_idx[i] : action_idx[i + 1]],
logits=normalized_logits[
:, action_idx[i] : action_idx[i + 1]
],
)
for i in range(len(self.act_size))
],
axis=1,
)
),
axis=1,
keepdims=True,
)
self.old_log_probs = tf.reduce_sum(
(
tf.stack(
[
-tf.nn.softmax_cross_entropy_with_logits_v2(
labels=self.action_oh[:, action_idx[i] : action_idx[i + 1]],
logits=old_normalized_logits[
:, action_idx[i] : action_idx[i + 1]
],
)
for i in range(len(self.act_size))
],
axis=1,
)
),
axis=1,
keepdims=True,
)

83
ml-agents/mlagents/trainers/policy.py


"""
Related to errors with the Trainer.
"""
pass

functions to interact with it to perform evaluate and updating.
"""
possible_output_nodes = ['action', 'value_estimate',
'action_probs', 'recurrent_out', 'memory_size',
'version_number', 'is_continuous_control',
'action_output_shape']
possible_output_nodes = [
"action",
"value_estimate",
"action_probs",
"recurrent_out",
"memory_size",
"version_number",
"is_continuous_control",
"action_output_shape",
]
def __init__(self, seed, brain, trainer_parameters):
"""

self.seed = seed
self.brain = brain
self.use_recurrent = trainer_parameters["use_recurrent"]
self.use_continuous_act = (brain.vector_action_space_type == "continuous")
self.use_continuous_act = brain.vector_action_space_type == "continuous"
self.model_path = trainer_parameters["model_path"]
self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
self.graph = tf.Graph()

self.m_size = trainer_parameters["memory_size"]
self.sequence_length = trainer_parameters["sequence_length"]
if self.m_size == 0:
raise UnityPolicyException("The memory size for brain {0} is 0 even "
"though the trainer uses recurrent."
.format(brain.brain_name))
raise UnityPolicyException(
"The memory size for brain {0} is 0 even "
"though the trainer uses recurrent.".format(brain.brain_name)
)
raise UnityPolicyException("The memory size for brain {0} is {1} "
"but it must be divisible by 4."
.format(brain.brain_name, self.m_size))
raise UnityPolicyException(
"The memory size for brain {0} is {1} "
"but it must be divisible by 4.".format(
brain.brain_name, self.m_size
)
)
def _initialize_graph(self):
with self.graph.as_default():

def _load_graph(self):
with self.graph.as_default():
self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
logger.info('Loading Model for brain {}'.format(self.brain.brain_name))
logger.info("Loading Model for brain {}".format(self.brain.brain_name))
logger.info('The model {0} could not be found. Make '
'sure you specified the right '
'--run-id'
.format(self.model_path))
logger.info(
"The model {0} could not be found. Make "
"sure you specified the right "
"--run-id".format(self.model_path)
)
self.saver.restore(self.sess, ckpt.model_checkpoint_path)
def evaluate(self, brain_info: BrainInfo):

run_out = self.evaluate(brain_info)
return ActionInfo(
action=run_out.get('action'),
memory=run_out.get('memory_out'),
action=run_out.get("action"),
memory=run_out.get("memory_out"),
value=run_out.get('value'),
outputs=run_out
value=run_out.get("value"),
outputs=run_out,
)
def update(self, mini_batch, num_sequences):

:return:
"""
with self.graph.as_default():
last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
last_checkpoint = self.model_path + "/model-" + str(steps) + ".cptk"
tf.train.write_graph(self.graph, self.model_path,
'raw_graph_def.pb', as_text=False)
tf.train.write_graph(
self.graph, self.model_path, "raw_graph_def.pb", as_text=False
)
def export_model(self):
"""

with self.graph.as_default():
target_nodes = ','.join(self._process_graph())
target_nodes = ",".join(self._process_graph())
input_graph=self.model_path + '/raw_graph_def.pb',
input_graph=self.model_path + "/raw_graph_def.pb",
output_graph=(self.model_path + '/frozen_graph_def.pb'),
clear_devices=True, initializer_nodes='', input_saver='',
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0')
output_graph=(self.model_path + "/frozen_graph_def.pb"),
clear_devices=True,
initializer_nodes="",
input_saver="",
restore_op_name="save/restore_all",
filename_tensor_name="save/Const:0",
)
tf2bc.convert(self.model_path + '/frozen_graph_def.pb', self.model_path + '.nn')
logger.info('Exported ' + self.model_path + '.nn file')
tf2bc.convert(self.model_path + "/frozen_graph_def.pb", self.model_path + ".nn")
logger.info("Exported " + self.model_path + ".nn file")
def _process_graph(self):
"""

all_nodes = [x.name for x in self.graph.as_graph_def().node]
nodes = [x for x in all_nodes if x in self.possible_output_nodes]
logger.info('List of nodes to export for brain :' + self.brain.brain_name)
logger.info("List of nodes to export for brain :" + self.brain.brain_name)
logger.info('\t' + n)
logger.info("\t" + n)
return nodes
@property

205
ml-agents/mlagents/trainers/ppo/models.py


class PPOModel(LearningModel):
def __init__(self, brain, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=5e6,
normalize=False, use_recurrent=False, num_layers=2, m_size=None, use_curiosity=False,
curiosity_strength=0.01, curiosity_enc_size=128, seed=0):
def __init__(
self,
brain,
lr=1e-4,
h_size=128,
epsilon=0.2,
beta=1e-3,
max_step=5e6,
normalize=False,
use_recurrent=False,
num_layers=2,
m_size=None,
use_curiosity=False,
curiosity_strength=0.01,
curiosity_enc_size=128,
seed=0,
):
"""
Takes a Unity environment and model-specific hyper-parameters and returns the
appropriate PPO agent model for the environment.

self.use_curiosity = use_curiosity
if num_layers < 1:
num_layers = 1
self.last_reward, self.new_reward, self.update_reward = self.create_reward_encoder()
self.last_reward, self.new_reward, self.update_reward = (
self.create_reward_encoder()
)
if brain.vector_action_space_type == "continuous":
self.create_cc_actor_critic(h_size, num_layers)
self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy

encoded_state, encoded_next_state = self.create_curiosity_encoders()
self.create_inverse_model(encoded_state, encoded_next_state)
self.create_forward_model(encoded_state, encoded_next_state)
self.create_ppo_optimizer(self.log_probs, self.old_log_probs, self.value,
self.entropy, beta, epsilon, lr, max_step)
self.create_ppo_optimizer(
self.log_probs,
self.old_log_probs,
self.value,
self.entropy,
beta,
epsilon,
lr,
max_step,
)
last_reward = tf.Variable(0, name="last_reward", trainable=False, dtype=tf.float32)
new_reward = tf.placeholder(shape=[], dtype=tf.float32, name='new_reward')
last_reward = tf.Variable(
0, name="last_reward", trainable=False, dtype=tf.float32
)
new_reward = tf.placeholder(shape=[], dtype=tf.float32, name="new_reward")
update_reward = tf.assign(last_reward, new_reward)
return last_reward, new_reward, update_reward

next_visual_encoders = []
for i in range(self.vis_obs_size):
# Create input ops for next (t+1) visual observations.
next_visual_input = self.create_visual_input(self.brain.camera_resolutions[i],
name="next_visual_observation_" + str(i))
next_visual_input = self.create_visual_input(
self.brain.camera_resolutions[i],
name="next_visual_observation_" + str(i),
)
encoded_visual = self.create_visual_observation_encoder(self.visual_in[i], self.curiosity_enc_size,
self.swish, 1, "stream_{}_visual_obs_encoder"
.format(i), False)
encoded_visual = self.create_visual_observation_encoder(
self.visual_in[i],
self.curiosity_enc_size,
self.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
False,
)
encoded_next_visual = self.create_visual_observation_encoder(self.next_visual_in[i],
self.curiosity_enc_size,
self.swish, 1,
"stream_{}_visual_obs_encoder".format(i),
True)
encoded_next_visual = self.create_visual_observation_encoder(
self.next_visual_in[i],
self.curiosity_enc_size,
self.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
True,
)
visual_encoders.append(encoded_visual)
next_visual_encoders.append(encoded_next_visual)

if self.vec_obs_size > 0:
# Create the encoder ops for current and next vector input. Not that these encoders are siamese.
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(shape=[None, self.vec_obs_size], dtype=tf.float32,
name='next_vector_observation')
self.next_vector_in = tf.placeholder(
shape=[None, self.vec_obs_size],
dtype=tf.float32,
name="next_vector_observation",
)
encoded_vector_obs = self.create_vector_observation_encoder(self.vector_in,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
False)
encoded_next_vector_obs = self.create_vector_observation_encoder(self.next_vector_in,
self.curiosity_enc_size,
self.swish, 2,
"vector_obs_encoder",
True)
encoded_vector_obs = self.create_vector_observation_encoder(
self.vector_in,
self.curiosity_enc_size,
self.swish,
2,
"vector_obs_encoder",
False,
)
encoded_next_vector_obs = self.create_vector_observation_encoder(
self.next_vector_in,
self.curiosity_enc_size,
self.swish,
2,
"vector_obs_encoder",
True,
)
encoded_state_list.append(encoded_vector_obs)
encoded_next_state_list.append(encoded_next_vector_obs)

hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
if self.brain.vector_action_space_type == "continuous":
pred_action = tf.layers.dense(hidden, self.act_size[0], activation=None)
squared_difference = tf.reduce_sum(tf.squared_difference(pred_action, self.selected_actions), axis=1)
self.inverse_loss = tf.reduce_mean(tf.dynamic_partition(squared_difference, self.mask, 2)[1])
squared_difference = tf.reduce_sum(
tf.squared_difference(pred_action, self.selected_actions), axis=1
)
self.inverse_loss = tf.reduce_mean(
tf.dynamic_partition(squared_difference, self.mask, 2)[1]
)
[tf.layers.dense(hidden, self.act_size[i], activation=tf.nn.softmax)
for i in range(len(self.act_size))], axis=1)
cross_entropy = tf.reduce_sum(-tf.log(pred_action + 1e-10) * self.selected_actions, axis=1)
self.inverse_loss = tf.reduce_mean(tf.dynamic_partition(cross_entropy, self.mask, 2)[1])
[
tf.layers.dense(hidden, self.act_size[i], activation=tf.nn.softmax)
for i in range(len(self.act_size))
],
axis=1,
)
cross_entropy = tf.reduce_sum(
-tf.log(pred_action + 1e-10) * self.selected_actions, axis=1
)
self.inverse_loss = tf.reduce_mean(
tf.dynamic_partition(cross_entropy, self.mask, 2)[1]
)
def create_forward_model(self, encoded_state, encoded_next_state):
"""

combined_input = tf.concat([encoded_state, self.selected_actions], axis=1)
hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
# We compare against the concatenation of all observation streams, hence `self.vis_obs_size + int(self.vec_obs_size > 0)`.
pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.vis_obs_size + int(self.vec_obs_size > 0)),
activation=None)
pred_next_state = tf.layers.dense(
hidden,
self.curiosity_enc_size * (self.vis_obs_size + int(self.vec_obs_size > 0)),
activation=None,
)
squared_difference = 0.5 * tf.reduce_sum(tf.squared_difference(pred_next_state, encoded_next_state), axis=1)
self.intrinsic_reward = tf.clip_by_value(self.curiosity_strength * squared_difference, 0, 1)
self.forward_loss = tf.reduce_mean(tf.dynamic_partition(squared_difference, self.mask, 2)[1])
squared_difference = 0.5 * tf.reduce_sum(
tf.squared_difference(pred_next_state, encoded_next_state), axis=1
)
self.intrinsic_reward = tf.clip_by_value(
self.curiosity_strength * squared_difference, 0, 1
)
self.forward_loss = tf.reduce_mean(
tf.dynamic_partition(squared_difference, self.mask, 2)[1]
)
def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step):
def create_ppo_optimizer(
self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step
):
"""
Creates training-specific Tensorflow ops for PPO models.
:param probs: Current policy probabilities

:param lr: Learning rate
:param max_step: Total number of training steps.
"""
self.returns_holder = tf.placeholder(shape=[None], dtype=tf.float32, name='discounted_rewards')
self.advantage = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='advantages')
self.learning_rate = tf.train.polynomial_decay(lr, self.global_step, max_step, 1e-10, power=1.0)
self.returns_holder = tf.placeholder(
shape=[None], dtype=tf.float32, name="discounted_rewards"
)
self.advantage = tf.placeholder(
shape=[None, 1], dtype=tf.float32, name="advantages"
)
self.learning_rate = tf.train.polynomial_decay(
lr, self.global_step, max_step, 1e-10, power=1.0
)
self.old_value = tf.placeholder(shape=[None], dtype=tf.float32, name='old_value_estimates')
self.old_value = tf.placeholder(
shape=[None], dtype=tf.float32, name="old_value_estimates"
)
decay_epsilon = tf.train.polynomial_decay(epsilon, self.global_step, max_step, 0.1, power=1.0)
decay_beta = tf.train.polynomial_decay(beta, self.global_step, max_step, 1e-5, power=1.0)
decay_epsilon = tf.train.polynomial_decay(
epsilon, self.global_step, max_step, 0.1, power=1.0
)
decay_beta = tf.train.polynomial_decay(
beta, self.global_step, max_step, 1e-5, power=1.0
)
clipped_value_estimate = self.old_value + tf.clip_by_value(tf.reduce_sum(value, axis=1) - self.old_value,
- decay_epsilon, decay_epsilon)
clipped_value_estimate = self.old_value + tf.clip_by_value(
tf.reduce_sum(value, axis=1) - self.old_value, -decay_epsilon, decay_epsilon
)
v_opt_a = tf.squared_difference(self.returns_holder, tf.reduce_sum(value, axis=1))
v_opt_a = tf.squared_difference(
self.returns_holder, tf.reduce_sum(value, axis=1)
)
self.value_loss = tf.reduce_mean(tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.mask, 2)[1])
self.value_loss = tf.reduce_mean(
tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.mask, 2)[1]
)
# Here we calculate PPO policy loss. In continuous control this is done independently for each action gaussian
# and then averaged together. This provides significantly better performance than treating the probability

p_opt_b = tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * self.advantage
self.policy_loss = -tf.reduce_mean(tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.mask, 2)[1])
p_opt_b = (
tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon)
* self.advantage
)
self.policy_loss = -tf.reduce_mean(
tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.mask, 2)[1]
)
self.loss = self.policy_loss + 0.5 * self.value_loss - decay_beta * tf.reduce_mean(
tf.dynamic_partition(entropy, self.mask, 2)[1])
self.loss = (
self.policy_loss
+ 0.5 * self.value_loss
- decay_beta
* tf.reduce_mean(tf.dynamic_partition(entropy, self.mask, 2)[1])
)
if self.use_curiosity:
self.loss += 10 * (0.2 * self.forward_loss + 0.8 * self.inverse_loss)

179
ml-agents/mlagents/trainers/ppo/policy.py


"""
super().__init__(seed, brain, trainer_params)
self.has_updated = False
self.use_curiosity = bool(trainer_params['use_curiosity'])
self.use_curiosity = bool(trainer_params["use_curiosity"])
self.model = PPOModel(brain,
lr=float(trainer_params['learning_rate']),
h_size=int(trainer_params['hidden_units']),
epsilon=float(trainer_params['epsilon']),
beta=float(trainer_params['beta']),
max_step=float(trainer_params['max_steps']),
normalize=trainer_params['normalize'],
use_recurrent=trainer_params['use_recurrent'],
num_layers=int(trainer_params['num_layers']),
m_size=self.m_size,
use_curiosity=bool(trainer_params['use_curiosity']),
curiosity_strength=float(trainer_params['curiosity_strength']),
curiosity_enc_size=float(trainer_params['curiosity_enc_size']),
seed=seed)
self.model = PPOModel(
brain,
lr=float(trainer_params["learning_rate"]),
h_size=int(trainer_params["hidden_units"]),
epsilon=float(trainer_params["epsilon"]),
beta=float(trainer_params["beta"]),
max_step=float(trainer_params["max_steps"]),
normalize=trainer_params["normalize"],
use_recurrent=trainer_params["use_recurrent"],
num_layers=int(trainer_params["num_layers"]),
m_size=self.m_size,
use_curiosity=bool(trainer_params["use_curiosity"]),
curiosity_strength=float(trainer_params["curiosity_strength"]),
curiosity_enc_size=float(trainer_params["curiosity_enc_size"]),
seed=seed,
)
if load:
self._load_graph()

self.inference_dict = {'action': self.model.output, 'log_probs': self.model.all_log_probs,
'value': self.model.value, 'entropy': self.model.entropy,
'learning_rate': self.model.learning_rate}
self.inference_dict = {
"action": self.model.output,
"log_probs": self.model.all_log_probs,
"value": self.model.value,
"entropy": self.model.entropy,
"learning_rate": self.model.learning_rate,
}
self.inference_dict['pre_action'] = self.model.output_pre
self.inference_dict["pre_action"] = self.model.output_pre
self.inference_dict['memory_out'] = self.model.memory_out
if is_training and self.use_vec_obs and trainer_params['normalize']:
self.inference_dict['update_mean'] = self.model.update_mean
self.inference_dict['update_variance'] = self.model.update_variance
self.inference_dict["memory_out"] = self.model.memory_out
if is_training and self.use_vec_obs and trainer_params["normalize"]:
self.inference_dict["update_mean"] = self.model.update_mean
self.inference_dict["update_variance"] = self.model.update_variance
self.update_dict = {'value_loss': self.model.value_loss,
'policy_loss': self.model.policy_loss,
'update_batch': self.model.update_batch}
self.update_dict = {
"value_loss": self.model.value_loss,
"policy_loss": self.model.policy_loss,
"update_batch": self.model.update_batch,
}
self.update_dict['forward_loss'] = self.model.forward_loss
self.update_dict['inverse_loss'] = self.model.inverse_loss
self.update_dict["forward_loss"] = self.model.forward_loss
self.update_dict["inverse_loss"] = self.model.inverse_loss
def evaluate(self, brain_info):
"""

"""
feed_dict = {self.model.batch_size: len(brain_info.vector_observations),
self.model.sequence_length: 1}
feed_dict = {
self.model.batch_size: len(brain_info.vector_observations),
self.model.sequence_length: 1,
}
feed_dict[self.model.prev_action] = brain_info.previous_vector_actions.reshape(
[-1, len(self.model.act_size)])
feed_dict[
self.model.prev_action
] = brain_info.previous_vector_actions.reshape(
[-1, len(self.model.act_size)]
)
size=(len(brain_info.vector_observations), self.model.act_size[0]))
size=(len(brain_info.vector_observations), self.model.act_size[0])
)
run_out['random_normal_epsilon'] = epsilon
run_out["random_normal_epsilon"] = epsilon
return run_out
def update(self, mini_batch, num_sequences):

:param mini_batch: Experience batch.
:return: Output from update process.
"""
feed_dict = {self.model.batch_size: num_sequences,
self.model.sequence_length: self.sequence_length,
self.model.mask_input: mini_batch['masks'].flatten(),
self.model.returns_holder: mini_batch['discounted_returns'].flatten(),
self.model.old_value: mini_batch['value_estimates'].flatten(),
self.model.advantage: mini_batch['advantages'].reshape([-1, 1]),
self.model.all_old_log_probs: mini_batch['action_probs'].reshape(
[-1, sum(self.model.act_size)])}
feed_dict = {
self.model.batch_size: num_sequences,
self.model.sequence_length: self.sequence_length,
self.model.mask_input: mini_batch["masks"].flatten(),
self.model.returns_holder: mini_batch["discounted_returns"].flatten(),
self.model.old_value: mini_batch["value_estimates"].flatten(),
self.model.advantage: mini_batch["advantages"].reshape([-1, 1]),
self.model.all_old_log_probs: mini_batch["action_probs"].reshape(
[-1, sum(self.model.act_size)]
),
}
feed_dict[self.model.output_pre] = mini_batch['actions_pre'].reshape(
[-1, self.model.act_size[0]])
feed_dict[self.model.epsilon] = mini_batch['random_normal_epsilon'].reshape(
[-1, self.model.act_size[0]])
feed_dict[self.model.output_pre] = mini_batch["actions_pre"].reshape(
[-1, self.model.act_size[0]]
)
feed_dict[self.model.epsilon] = mini_batch["random_normal_epsilon"].reshape(
[-1, self.model.act_size[0]]
)
feed_dict[self.model.action_holder] = mini_batch['actions'].reshape(
[-1, len(self.model.act_size)])
feed_dict[self.model.action_holder] = mini_batch["actions"].reshape(
[-1, len(self.model.act_size)]
)
feed_dict[self.model.prev_action] = mini_batch['prev_action'].reshape(
[-1, len(self.model.act_size)])
feed_dict[self.model.action_masks] = mini_batch['action_mask'].reshape(
[-1, sum(self.brain.vector_action_space_size)])
feed_dict[self.model.prev_action] = mini_batch["prev_action"].reshape(
[-1, len(self.model.act_size)]
)
feed_dict[self.model.action_masks] = mini_batch["action_mask"].reshape(
[-1, sum(self.brain.vector_action_space_size)]
)
feed_dict[self.model.vector_in] = mini_batch['vector_obs'].reshape(
[-1, self.vec_obs_size])
feed_dict[self.model.vector_in] = mini_batch["vector_obs"].reshape(
[-1, self.vec_obs_size]
)
feed_dict[self.model.next_vector_in] = mini_batch['next_vector_in'].reshape(
[-1, self.vec_obs_size])
feed_dict[self.model.next_vector_in] = mini_batch[
"next_vector_in"
].reshape([-1, self.vec_obs_size])
_obs = mini_batch['visual_obs%d' % i]
_obs = mini_batch["visual_obs%d" % i]
if self.sequence_length > 1 and self.use_recurrent:
(_batch, _seq, _w, _h, _c) = _obs.shape
feed_dict[self.model.visual_in[i]] = _obs.reshape([-1, _w, _h, _c])

for i, _ in enumerate(self.model.visual_in):
_obs = mini_batch['next_visual_obs%d' % i]
_obs = mini_batch["next_visual_obs%d" % i]
feed_dict[self.model.next_visual_in[i]] = _obs.reshape([-1, _w, _h, _c])
feed_dict[self.model.next_visual_in[i]] = _obs.reshape(
[-1, _w, _h, _c]
)
mem_in = mini_batch['memory'][:, 0, :]
mem_in = mini_batch["memory"][:, 0, :]
feed_dict[self.model.memory_in] = mem_in
self.has_updated = True
run_out = self._execute_model(feed_dict, self.update_dict)

if len(curr_info.agents) == 0:
return []
feed_dict = {self.model.batch_size: len(next_info.vector_observations),
self.model.sequence_length: 1}
feed_dict = {
self.model.batch_size: len(next_info.vector_observations),
self.model.sequence_length: 1,
}
feed_dict[self.model.selected_actions] = next_info.previous_vector_actions
feed_dict[
self.model.selected_actions
] = next_info.previous_vector_actions
feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i]
feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[
i
]
if self.use_vec_obs:
feed_dict[self.model.vector_in] = curr_info.vector_observations
feed_dict[self.model.next_vector_in] = next_info.vector_observations

feed_dict[self.model.memory_in] = curr_info.memories
intrinsic_rewards = self.sess.run(self.model.intrinsic_reward,
feed_dict=feed_dict) * float(self.has_updated)
intrinsic_rewards = self.sess.run(
self.model.intrinsic_reward, feed_dict=feed_dict
) * float(self.has_updated)
return intrinsic_rewards
else:
return None

"""
feed_dict = {self.model.batch_size: 1, self.model.sequence_length: 1}
for i in range(len(brain_info.visual_observations)):
feed_dict[self.model.visual_in[i]] = [brain_info.visual_observations[i][idx]]
feed_dict[self.model.visual_in[i]] = [
brain_info.visual_observations[i][idx]
]
if self.use_vec_obs:
feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]]
if self.use_recurrent:

if not self.use_continuous_act and self.use_recurrent:
feed_dict[self.model.prev_action] = brain_info.previous_vector_actions[idx].reshape(
[-1, len(self.model.act_size)])
feed_dict[self.model.prev_action] = brain_info.previous_vector_actions[
idx
].reshape([-1, len(self.model.act_size)])
value_estimate = self.sess.run(self.model.value, feed_dict)
return value_estimate

Updates reward value for policy.
:param new_reward: New reward to save.
"""
self.sess.run(self.model.update_reward,
feed_dict={self.model.new_reward: new_reward})
self.sess.run(
self.model.update_reward, feed_dict={self.model.new_reward: new_reward}
)

321
ml-agents/mlagents/trainers/ppo/trainer.py


class PPOTrainer(Trainer):
"""The PPOTrainer is an implementation of the PPO algorithm."""
def __init__(self, brain, reward_buff_cap, trainer_parameters, training,
load, seed, run_id):
def __init__(
self, brain, reward_buff_cap, trainer_parameters, training, load, seed, run_id
):
"""
Responsible for collecting experiences and training PPO model.
:param trainer_parameters: The parameters for the trainer (dictionary).

:param run_id: The The identifier of the current run
"""
super(PPOTrainer, self).__init__(brain, trainer_parameters,
training, run_id)
self.param_keys = ['batch_size', 'beta', 'buffer_size', 'epsilon', 'gamma', 'hidden_units', 'lambd',
'learning_rate', 'max_steps', 'normalize', 'num_epoch', 'num_layers',
'time_horizon', 'sequence_length', 'summary_freq', 'use_recurrent',
'summary_path', 'memory_size', 'use_curiosity', 'curiosity_strength',
'curiosity_enc_size', 'model_path']
super(PPOTrainer, self).__init__(brain, trainer_parameters, training, run_id)
self.param_keys = [
"batch_size",
"beta",
"buffer_size",
"epsilon",
"gamma",
"hidden_units",
"lambd",
"learning_rate",
"max_steps",
"normalize",
"num_epoch",
"num_layers",
"time_horizon",
"sequence_length",
"summary_freq",
"use_recurrent",
"summary_path",
"memory_size",
"use_curiosity",
"curiosity_strength",
"curiosity_enc_size",
"model_path",
]
self.use_curiosity = bool(trainer_parameters['use_curiosity'])
self.use_curiosity = bool(trainer_parameters["use_curiosity"])
self.policy = PPOPolicy(seed, brain, trainer_parameters,
self.is_training, load)
self.policy = PPOPolicy(seed, brain, trainer_parameters, self.is_training, load)
stats = {'Environment/Cumulative Reward': [], 'Environment/Episode Length': [],
'Policy/Value Estimate': [], 'Policy/Entropy': [], 'Losses/Value Loss': [],
'Losses/Policy Loss': [], 'Policy/Learning Rate': []}
stats = {
"Environment/Cumulative Reward": [],
"Environment/Episode Length": [],
"Policy/Value Estimate": [],
"Policy/Entropy": [],
"Losses/Value Loss": [],
"Losses/Policy Loss": [],
"Policy/Learning Rate": [],
}
stats['Losses/Forward Loss'] = []
stats['Losses/Inverse Loss'] = []
stats['Policy/Curiosity Reward'] = []
stats["Losses/Forward Loss"] = []
stats["Losses/Inverse Loss"] = []
stats["Policy/Curiosity Reward"] = []
self.intrinsic_rewards = {}
self.stats = stats

self.episode_steps = {}
def __str__(self):
return '''Hyperparameters for the PPO Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
return """Hyperparameters for the PPO Trainer of brain {0}: \n{1}""".format(
self.brain_name,
"\n".join(
[
"\t{0}:\t{1}".format(x, self.trainer_parameters[x])
for x in self.param_keys
]
),
)
@property
def parameters(self):

Returns the maximum number of steps. Is used to know when the trainer should be stopped.
:return: The maximum number of steps of the trainer
"""
return float(self.trainer_parameters['max_steps'])
return float(self.trainer_parameters["max_steps"])
@property
def get_step(self):

"""
Increment the step count of the trainer and Updates the last reward
"""
if len(self.stats['Environment/Cumulative Reward']) > 0:
mean_reward = np.mean(self.stats['Environment/Cumulative Reward'])
if len(self.stats["Environment/Cumulative Reward"]) > 0:
mean_reward = np.mean(self.stats["Environment/Cumulative Reward"])
self.policy.update_reward(mean_reward)
self.policy.increment_step()
self.step = self.policy.get_current_step()

agent_brain_info = next_info
agent_index = agent_brain_info.agents.index(agent_id)
for i in range(len(next_info.visual_observations)):
visual_observations[i].append(agent_brain_info.visual_observations[i][agent_index])
vector_observations.append(agent_brain_info.vector_observations[agent_index])
visual_observations[i].append(
agent_brain_info.visual_observations[i][agent_index]
)
vector_observations.append(
agent_brain_info.vector_observations[agent_index]
)
text_observations.append(agent_brain_info.text_observations[agent_index])
if self.policy.use_recurrent:
if len(agent_brain_info.memories) > 0:

local_dones.append(agent_brain_info.local_done[agent_index])
max_reacheds.append(agent_brain_info.max_reached[agent_index])
agents.append(agent_brain_info.agents[agent_index])
prev_vector_actions.append(agent_brain_info.previous_vector_actions[agent_index])
prev_text_actions.append(agent_brain_info.previous_text_actions[agent_index])
prev_vector_actions.append(
agent_brain_info.previous_vector_actions[agent_index]
)
prev_text_actions.append(
agent_brain_info.previous_text_actions[agent_index]
)
curr_info = BrainInfo(visual_observations, vector_observations, text_observations,
memories, rewards, agents, local_dones, prev_vector_actions,
prev_text_actions, max_reacheds, action_masks)
curr_info = BrainInfo(
visual_observations,
vector_observations,
text_observations,
memories,
rewards,
agents,
local_dones,
prev_vector_actions,
prev_text_actions,
max_reacheds,
action_masks,
)
def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainInfo, take_action_outputs):
def add_experiences(
self,
curr_all_info: AllBrainInfo,
next_all_info: AllBrainInfo,
take_action_outputs,
):
"""
Adds experiences to each agent's experience history.
:param curr_all_info: Dictionary of all current brains and corresponding BrainInfo.

self.trainer_metrics.start_experience_collection_timer()
if take_action_outputs:
self.stats['Policy/Value Estimate'].append(take_action_outputs['value'].mean())
self.stats['Policy/Entropy'].append(take_action_outputs['entropy'].mean())
self.stats['Policy/Learning Rate'].append(take_action_outputs['learning_rate'])
self.stats["Policy/Value Estimate"].append(
take_action_outputs["value"].mean()
)
self.stats["Policy/Entropy"].append(take_action_outputs["entropy"].mean())
self.stats["Policy/Learning Rate"].append(
take_action_outputs["learning_rate"]
)
curr_info = curr_all_info[self.brain_name]
next_info = next_all_info[self.brain_name]

self.training_buffer[agent_id].last_take_action_outputs = take_action_outputs
self.training_buffer[
agent_id
].last_take_action_outputs = take_action_outputs
if curr_info.agents != next_info.agents:
curr_to_use = self.construct_curr_info(next_info)

for agent_id in next_info.agents:
stored_info = self.training_buffer[agent_id].last_brain_info
stored_take_action_outputs = self.training_buffer[agent_id].last_take_action_outputs
stored_take_action_outputs = self.training_buffer[
agent_id
].last_take_action_outputs
self.training_buffer[agent_id]['visual_obs%d' % i].append(
stored_info.visual_observations[i][idx])
self.training_buffer[agent_id]['next_visual_obs%d' % i].append(
next_info.visual_observations[i][next_idx])
self.training_buffer[agent_id]["visual_obs%d" % i].append(
stored_info.visual_observations[i][idx]
)
self.training_buffer[agent_id]["next_visual_obs%d" % i].append(
next_info.visual_observations[i][next_idx]
)
self.training_buffer[agent_id]['vector_obs'].append(stored_info.vector_observations[idx])
self.training_buffer[agent_id]['next_vector_in'].append(
next_info.vector_observations[next_idx])
self.training_buffer[agent_id]["vector_obs"].append(
stored_info.vector_observations[idx]
)
self.training_buffer[agent_id]["next_vector_in"].append(
next_info.vector_observations[next_idx]
)
stored_info.memories = np.zeros((len(stored_info.agents), self.policy.m_size))
self.training_buffer[agent_id]['memory'].append(stored_info.memories[idx])
actions = stored_take_action_outputs['action']
stored_info.memories = np.zeros(
(len(stored_info.agents), self.policy.m_size)
)
self.training_buffer[agent_id]["memory"].append(
stored_info.memories[idx]
)
actions = stored_take_action_outputs["action"]
actions_pre = stored_take_action_outputs['pre_action']
self.training_buffer[agent_id]['actions_pre'].append(actions_pre[idx])
epsilons = stored_take_action_outputs['random_normal_epsilon']
self.training_buffer[agent_id]['random_normal_epsilon'].append(
epsilons[idx])
actions_pre = stored_take_action_outputs["pre_action"]
self.training_buffer[agent_id]["actions_pre"].append(
actions_pre[idx]
)
epsilons = stored_take_action_outputs["random_normal_epsilon"]
self.training_buffer[agent_id]["random_normal_epsilon"].append(
epsilons[idx]
)
self.training_buffer[agent_id]['action_mask'].append(
stored_info.action_masks[idx], padding_value=1)
a_dist = stored_take_action_outputs['log_probs']
value = stored_take_action_outputs['value']
self.training_buffer[agent_id]['actions'].append(actions[idx])
self.training_buffer[agent_id]['prev_action'].append(stored_info.previous_vector_actions[idx])
self.training_buffer[agent_id]['masks'].append(1.0)
self.training_buffer[agent_id]["action_mask"].append(
stored_info.action_masks[idx], padding_value=1
)
a_dist = stored_take_action_outputs["log_probs"]
value = stored_take_action_outputs["value"]
self.training_buffer[agent_id]["actions"].append(actions[idx])
self.training_buffer[agent_id]["prev_action"].append(
stored_info.previous_vector_actions[idx]
)
self.training_buffer[agent_id]["masks"].append(1.0)
self.training_buffer[agent_id]['rewards'].append(next_info.rewards[next_idx] +
intrinsic_rewards[next_idx])
self.training_buffer[agent_id]["rewards"].append(
next_info.rewards[next_idx] + intrinsic_rewards[next_idx]
)
self.training_buffer[agent_id]['rewards'].append(next_info.rewards[next_idx])
self.training_buffer[agent_id]['action_probs'].append(a_dist[idx])
self.training_buffer[agent_id]['value_estimates'].append(value[idx][0])
self.training_buffer[agent_id]["rewards"].append(
next_info.rewards[next_idx]
)
self.training_buffer[agent_id]["action_probs"].append(a_dist[idx])
self.training_buffer[agent_id]["value_estimates"].append(
value[idx][0]
)
if agent_id not in self.cumulative_rewards:
self.cumulative_rewards[agent_id] = 0
self.cumulative_rewards[agent_id] += next_info.rewards[next_idx]

self.trainer_metrics.start_experience_collection_timer()
info = new_info[self.brain_name]
for l in range(len(info.agents)):
agent_actions = self.training_buffer[info.agents[l]]['actions']
if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
and len(agent_actions) > 0):
agent_actions = self.training_buffer[info.agents[l]]["actions"]
if (
info.local_done[l]
or len(agent_actions) > self.trainer_parameters["time_horizon"]
) and len(agent_actions) > 0:
bootstrapping_info = self.training_buffer[agent_id].last_brain_info
bootstrapping_info = self.training_buffer[
agent_id
].last_brain_info
idx = bootstrapping_info.agents.index(agent_id)
else:
bootstrapping_info = info

self.training_buffer[agent_id]['advantages'].set(
self.training_buffer[agent_id]["advantages"].set(
rewards=self.training_buffer[agent_id]['rewards'].get_batch(),
value_estimates=self.training_buffer[agent_id]['value_estimates'].get_batch(),
rewards=self.training_buffer[agent_id]["rewards"].get_batch(),
value_estimates=self.training_buffer[agent_id][
"value_estimates"
].get_batch(),
gamma=self.trainer_parameters['gamma'],
lambd=self.trainer_parameters['lambd']))
self.training_buffer[agent_id]['discounted_returns'].set(
self.training_buffer[agent_id]['advantages'].get_batch()
+ self.training_buffer[agent_id]['value_estimates'].get_batch())
gamma=self.trainer_parameters["gamma"],
lambd=self.trainer_parameters["lambd"],
)
)
self.training_buffer[agent_id]["discounted_returns"].set(
self.training_buffer[agent_id]["advantages"].get_batch()
+ self.training_buffer[agent_id]["value_estimates"].get_batch()
)
self.training_buffer.append_update_buffer(agent_id, batch_size=None,
training_length=self.policy.sequence_length)
self.training_buffer.append_update_buffer(
agent_id,
batch_size=None,
training_length=self.policy.sequence_length,
)
self.cumulative_returns_since_policy_update.append(self.
cumulative_rewards.get(agent_id, 0))
self.stats['Environment/Cumulative Reward'].append(
self.cumulative_rewards.get(agent_id, 0))
self.reward_buffer.appendleft(self.cumulative_rewards.get(agent_id, 0))
self.stats['Environment/Episode Length'].append(
self.episode_steps.get(agent_id, 0))
self.cumulative_returns_since_policy_update.append(
self.cumulative_rewards.get(agent_id, 0)
)
self.stats["Environment/Cumulative Reward"].append(
self.cumulative_rewards.get(agent_id, 0)
)
self.reward_buffer.appendleft(
self.cumulative_rewards.get(agent_id, 0)
)
self.stats["Environment/Episode Length"].append(
self.episode_steps.get(agent_id, 0)
)
self.stats['Policy/Curiosity Reward'].append(
self.intrinsic_rewards.get(agent_id, 0))
self.stats["Policy/Curiosity Reward"].append(
self.intrinsic_rewards.get(agent_id, 0)
)
self.intrinsic_rewards[agent_id] = 0
self.trainer_metrics.end_experience_collection_timer()

Returns whether or not the trainer has enough elements to run update model
:return: A boolean corresponding to whether or not update_model() can be run
"""
size_of_buffer = len(self.training_buffer.update_buffer['actions'])
return size_of_buffer > max(int(self.trainer_parameters['buffer_size'] / self.policy.sequence_length), 1)
size_of_buffer = len(self.training_buffer.update_buffer["actions"])
return size_of_buffer > max(
int(self.trainer_parameters["buffer_size"] / self.policy.sequence_length), 1
)
def update_policy(self):
"""

number_experiences=len(self.training_buffer.update_buffer['actions']),
mean_return=float(np.mean(self.cumulative_returns_since_policy_update)))
n_sequences = max(int(self.trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
number_experiences=len(self.training_buffer.update_buffer["actions"]),
mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
)
n_sequences = max(
int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
)
advantages = self.training_buffer.update_buffer['advantages'].get_batch()
self.training_buffer.update_buffer['advantages'].set(
(advantages - advantages.mean()) / (advantages.std() + 1e-10))
num_epoch = self.trainer_parameters['num_epoch']
advantages = self.training_buffer.update_buffer["advantages"].get_batch()
self.training_buffer.update_buffer["advantages"].set(
(advantages - advantages.mean()) / (advantages.std() + 1e-10)
)
num_epoch = self.trainer_parameters["num_epoch"]
for l in range(len(self.training_buffer.update_buffer['actions']) // n_sequences):
for l in range(
len(self.training_buffer.update_buffer["actions"]) // n_sequences
):
run_out = self.policy.update(buffer.make_mini_batch(start, end), n_sequences)
value_total.append(run_out['value_loss'])
policy_total.append(np.abs(run_out['policy_loss']))
run_out = self.policy.update(
buffer.make_mini_batch(start, end), n_sequences
)
value_total.append(run_out["value_loss"])
policy_total.append(np.abs(run_out["policy_loss"]))
inverse_total.append(run_out['inverse_loss'])
forward_total.append(run_out['forward_loss'])
self.stats['Losses/Value Loss'].append(np.mean(value_total))
self.stats['Losses/Policy Loss'].append(np.mean(policy_total))
inverse_total.append(run_out["inverse_loss"])
forward_total.append(run_out["forward_loss"])
self.stats["Losses/Value Loss"].append(np.mean(value_total))
self.stats["Losses/Policy Loss"].append(np.mean(policy_total))
self.stats['Losses/Forward Loss'].append(np.mean(forward_total))
self.stats['Losses/Inverse Loss'].append(np.mean(inverse_total))
self.stats["Losses/Forward Loss"].append(np.mean(forward_total))
self.stats["Losses/Inverse Loss"].append(np.mean(inverse_total))
def discount_rewards(r, gamma=0.99, value_next=0.0):
"""

877
ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
文件差异内容过多而无法显示
查看文件

12
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


def test_barracuda_converter():
path_prefix = os.path.dirname(os.path.abspath(__file__))
tmpfile = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) + '.nn')
tmpfile = os.path.join(
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) + ".nn"
)
tf2bc.convert(path_prefix+'/BasicLearning.pb', tmpfile)
tf2bc.convert(path_prefix + "/BasicLearning.pb", tmpfile)
assert (os.path.isfile(tmpfile))
assert os.path.isfile(tmpfile)
# 100 bytes is high enough to prove that conversion was successful
assert (os.path.getsize(tmpfile) > 100)
# 100 bytes is high enough to prove that conversion was successful
assert os.path.getsize(tmpfile) > 100
# cleanup
os.remove(tmpfile)

110
ml-agents/mlagents/trainers/tests/test_bc.py


@pytest.fixture
def dummy_config():
return yaml.load(
'''
"""
hidden_units: 128
learning_rate: 3.0e-4
num_layers: 2

''')
"""
)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
trainer_parameters['model_path'] = model_path
trainer_parameters['keep_checkpoints'] = 3
trainer_parameters["model_path"] = model_path
trainer_parameters["keep_checkpoints"] = 3
assert run_out['action'].shape == (3, 2)
assert run_out["action"].shape == (3, 2)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=True, visual_inputs=0
)
env = UnityEnvironment(" ")
feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([2, 2])}
feed_dict = {
model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([2, 2]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=True, visual_inputs=2)
env = UnityEnvironment(' ')
discrete_action=True, visual_inputs=2
)
env = UnityEnvironment(" ")
feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2, 2])}
feed_dict = {
model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2, 2]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=2)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=2
)
env = UnityEnvironment(" ")
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3])}
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
}
if __name__ == '__main__':
if __name__ == "__main__":
pytest.main()

62
ml-agents/mlagents/trainers/tests/test_buffer.py


b = Buffer()
for fake_agent_id in range(4):
for step in range(9):
b[fake_agent_id]['vector_observation'].append(
[100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,
100 * fake_agent_id + 10 * step + 3]
b[fake_agent_id]["vector_observation"].append(
[
100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,
100 * fake_agent_id + 10 * step + 3,
]
)
b[fake_agent_id]["action"].append(
[
100 * fake_agent_id + 10 * step + 4,
100 * fake_agent_id + 10 * step + 5,
]
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
100 * fake_agent_id + 10 * step + 5])
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
a = b[1]["vector_observation"].get_batch(
batch_size=2, training_length=1, sequential=True
)
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
assert_array(a, np.array([
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
assert_array(a, np.array([
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
a = b[2]["vector_observation"].get_batch(
batch_size=2, training_length=3, sequential=True
)
assert_array(
a,
np.array(
[
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]],
]
),
)
a = b[2]["vector_observation"].get_batch(
batch_size=2, training_length=3, sequential=False
)
assert_array(
a,
np.array(
[
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]],
]
),
)
assert len(b.update_buffer['action']) == 10
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
assert len(b.update_buffer["action"]) == 10
assert np.array(b.update_buffer["action"]).shape == (10, 2, 2)
assert c['action'].shape == (1, 2, 2)
assert c["action"].shape == (1, 2, 2)

31
ml-agents/mlagents/trainers/tests/test_curriculum.py


from mlagents.trainers import Curriculum
dummy_curriculum_json_str = '''
dummy_curriculum_json_str = """
{
"measure" : "reward",
"thresholds" : [10, 20, 50],

"param3" : [0.2, 0.3, 0.7, 0.9]
}
}
'''
"""
bad_curriculum_json_str = '''
bad_curriculum_json_str = """
{
"measure" : "reward",
"thresholds" : [10, 20, 50],

"param3" : [0.2, 0.3, 0.7, 0.9]
}
}
'''
"""
return 'TestBrain.json'
return "TestBrain.json"
@pytest.fixture

@patch('builtins.open', new_callable=mock_open, read_data=dummy_curriculum_json_str)
@patch("builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str)
assert curriculum._brain_name == 'TestBrain'
assert curriculum._brain_name == "TestBrain"
assert curriculum.measure == 'reward'
assert curriculum.measure == "reward"
@patch('builtins.open', new_callable=mock_open, read_data=bad_curriculum_json_str)
def test_init_curriculum_bad_curriculum_raises_error(mock_file, location, default_reset_parameters):
@patch("builtins.open", new_callable=mock_open, read_data=bad_curriculum_json_str)
def test_init_curriculum_bad_curriculum_raises_error(
mock_file, location, default_reset_parameters
):
@patch('builtins.open', new_callable=mock_open, read_data=dummy_curriculum_json_str)
@patch("builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str)
def test_increment_lesson(mock_file, location, default_reset_parameters):
curriculum = Curriculum(location, default_reset_parameters)
assert curriculum.lesson_num == 0

assert curriculum.lesson_num == 3
@patch('builtins.open', new_callable=mock_open, read_data=dummy_curriculum_json_str)
@patch("builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str)
curriculum = Curriculum('TestBrain.json', {"param1": 1, "param2": 1, "param3": 1})
curriculum = Curriculum("TestBrain.json", {"param1": 1, "param2": 1, "param3": 1})
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
assert curriculum.get_config() == {"param1": 0.3, "param2": 20, "param3": 0.7}
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}

12
ml-agents/mlagents/trainers/tests/test_demo_loader.py


def test_load_demo():
path_prefix = os.path.dirname(os.path.abspath(__file__))
brain_parameters, brain_infos, total_expected = load_demonstration(path_prefix+'/test.demo')
assert (brain_parameters.brain_name == "Ball3DBrain")
assert (brain_parameters.vector_observation_space_size == 8)
assert (len(brain_infos) == total_expected)
brain_parameters, brain_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert brain_parameters.brain_name == "Ball3DBrain"
assert brain_parameters.vector_observation_space_size == 8
assert len(brain_infos) == total_expected
assert (len(demo_buffer.update_buffer['actions']) == total_expected - 1)
assert len(demo_buffer.update_buffer["actions"]) == total_expected - 1

60
ml-agents/mlagents/trainers/tests/test_learn.py


@pytest.fixture
def basic_options():
return {
'--docker-target-name': 'None',
'--env': 'None',
'--run-id': 'ppo',
'--load': False,
'--train': False,
'--save-freq': '50000',
'--keep-checkpoints': '5',
'--base-port': '5005',
'--num-envs': '1',
'--curriculum': 'None',
'--lesson': '0',
'--slow': False,
'--no-graphics': False,
'<trainer-config-path>': 'basic_path',
'--debug': False,
"--docker-target-name": "None",
"--env": "None",
"--run-id": "ppo",
"--load": False,
"--train": False,
"--save-freq": "50000",
"--keep-checkpoints": "5",
"--base-port": "5005",
"--num-envs": "1",
"--curriculum": "None",
"--lesson": "0",
"--slow": False,
"--no-graphics": False,
"<trainer-config-path>": "basic_path",
"--debug": False,
@patch('mlagents.trainers.learn.SubprocessUnityEnvironment')
@patch('mlagents.trainers.learn.create_environment_factory')
@patch('mlagents.trainers.learn.load_config')
@patch("mlagents.trainers.learn.SubprocessUnityEnvironment")
@patch("mlagents.trainers.learn.create_environment_factory")
@patch("mlagents.trainers.learn.load_config")
mock_env.academy_name = 'TestAcademyName'
mock_env.academy_name = "TestAcademyName"
create_environment_factory.return_value = mock_env
trainer_config_mock = MagicMock()
load_config.return_value = trainer_config_mock

with patch.object(TrainerController, "start_learning", MagicMock()):
learn.run_training(0, 0, basic_options(), MagicMock())
mock_init.assert_called_once_with(
'./models/ppo-0',
'./summaries',
'ppo-0',
"./models/ppo-0",
"./summaries",
"ppo-0",
50000,
None,
False,

subproc_env_mock.return_value.external_brains,
0,
True
True,
@patch('mlagents.trainers.learn.SubprocessUnityEnvironment')
@patch('mlagents.trainers.learn.create_environment_factory')
@patch('mlagents.trainers.learn.load_config')
@patch("mlagents.trainers.learn.SubprocessUnityEnvironment")
@patch("mlagents.trainers.learn.create_environment_factory")
@patch("mlagents.trainers.learn.load_config")
mock_env.academy_name = 'TestAcademyName'
mock_env.academy_name = "TestAcademyName"
options_with_docker_target['--docker-target-name'] = 'dockertarget'
options_with_docker_target["--docker-target-name"] = "dockertarget"
mock_init = MagicMock(return_value=None)
with patch.object(TrainerController, "__init__", mock_init):

assert(mock_init.call_args[0][0] == '/dockertarget/models/ppo-0')
assert(mock_init.call_args[0][1] == '/dockertarget/summaries')
assert mock_init.call_args[0][0] == "/dockertarget/models/ppo-0"
assert mock_init.call_args[0][1] == "/dockertarget/summaries"

98
ml-agents/mlagents/trainers/tests/test_meta_curriculum.py


"""This class allows us to test MetaCurriculum objects without calling
MetaCurriculum's __init__ function.
"""
def __init__(self, brains_to_curriculums):
self._brains_to_curriculums = brains_to_curriculums

return {'param1' : 1, 'param2' : 2, 'param3' : 3}
return {"param1": 1, "param2": 2, "param3": 3}
return {'param4' : 4, 'param5' : 5, 'param6' : 6}
return {"param4": 4, "param5": 5, "param6": 6}
return {'Brain1' : 0.2, 'Brain2' : 0.3}
return {"Brain1": 0.2, "Brain2": 0.3}
return {'Brain1' : 7, 'Brain2' : 8}
return {"Brain1": 7, "Brain2": 8}
@patch('mlagents.trainers.Curriculum.get_config', return_value={})
@patch('mlagents.trainers.Curriculum.__init__', return_value=None)
@patch('os.listdir', return_value=['Brain1.json', 'Brain2.json'])
def test_init_meta_curriculum_happy_path(listdir, mock_curriculum_init,
mock_curriculum_get_config,
default_reset_parameters):
meta_curriculum = MetaCurriculum('test/', default_reset_parameters)
@patch("mlagents.trainers.Curriculum.get_config", return_value={})
@patch("mlagents.trainers.Curriculum.__init__", return_value=None)
@patch("os.listdir", return_value=["Brain1.json", "Brain2.json"])
def test_init_meta_curriculum_happy_path(
listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters
):
meta_curriculum = MetaCurriculum("test/", default_reset_parameters)
assert 'Brain1' in meta_curriculum.brains_to_curriculums
assert 'Brain2' in meta_curriculum.brains_to_curriculums
assert "Brain1" in meta_curriculum.brains_to_curriculums
assert "Brain2" in meta_curriculum.brains_to_curriculums
calls = [call('test/Brain1.json', default_reset_parameters),
call('test/Brain2.json', default_reset_parameters)]
calls = [
call("test/Brain1.json", default_reset_parameters),
call("test/Brain2.json", default_reset_parameters),
]
@patch('os.listdir', side_effect=NotADirectoryError())
@patch("os.listdir", side_effect=NotADirectoryError())
MetaCurriculum('test/', default_reset_parameters)
MetaCurriculum("test/", default_reset_parameters)
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}
)
meta_curriculum.lesson_nums = {'Brain1' : 1, 'Brain2' : 3}
meta_curriculum.lesson_nums = {"Brain1": 1, "Brain2": 3}
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}
)
meta_curriculum.increment_lessons(measure_vals)

@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_increment_lessons_with_reward_buff_sizes(curriculum_a, curriculum_b,
measure_vals,
reward_buff_sizes):
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
def test_increment_lessons_with_reward_buff_sizes(
curriculum_a, curriculum_b, measure_vals, reward_buff_sizes
):
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}
)
meta_curriculum.increment_lessons(measure_vals,
reward_buff_sizes=reward_buff_sizes)
meta_curriculum.increment_lessons(measure_vals, reward_buff_sizes=reward_buff_sizes)
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}
)
meta_curriculum.set_all_curriculums_to_lesson_num(2)

@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_get_config(curriculum_a, curriculum_b, default_reset_parameters,
more_reset_parameters):
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
def test_get_config(
curriculum_a, curriculum_b, default_reset_parameters, more_reset_parameters
):
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}
)
assert meta_curriculum.get_config() == default_reset_parameters

31
ml-agents/mlagents/trainers/tests/test_policy.py


from mlagents.trainers.policy import *
from unittest.mock import MagicMock
return {
"use_recurrent": False,
"model_path": "my/path"
}
return {"use_recurrent": False, "model_path": "my/path"}
def test_take_action_returns_empty_with_no_agents():

result = policy.get_action(no_agent_brain_info)
assert(result == ActionInfo([], [], [], None, None))
assert result == ActionInfo([], [], [], None, None)
def test_take_action_returns_nones_on_missing_values():

brain_info_with_agents = BrainInfo([], [], [], agents=['an-agent-id'])
brain_info_with_agents = BrainInfo([], [], [], agents=["an-agent-id"])
assert(result == ActionInfo(None, None, None, None, {}))
assert result == ActionInfo(None, None, None, None, {})
def test_take_action_returns_action_info_when_available():

'action': np.array([1.0]),
'memory_out': np.array([2.5]),
'value': np.array([1.1])
"action": np.array([1.0]),
"memory_out": np.array([2.5]),
"value": np.array([1.1]),
brain_info_with_agents = BrainInfo([], [], [], agents=['an-agent-id'])
brain_info_with_agents = BrainInfo([], [], [], agents=["an-agent-id"])
policy_eval_out['action'],
policy_eval_out['memory_out'],
policy_eval_out["action"],
policy_eval_out["memory_out"],
policy_eval_out['value'],
policy_eval_out
policy_eval_out["value"],
policy_eval_out,
assert (result == expected)
assert result == expected

391
ml-agents/mlagents/trainers/tests/test_ppo.py


@pytest.fixture
def dummy_config():
return yaml.load(
'''
"""
trainer: ppo
batch_size: 32
beta: 5.0e-3

use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
"""
)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
trainer_parameters['model_path'] = model_path
trainer_parameters['keep_checkpoints'] = 3
policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters, False, False)
trainer_parameters["model_path"] = model_path
trainer_parameters["keep_checkpoints"] = 3
policy = PPOPolicy(
0, env.brains[env.brain_names[0]], trainer_parameters, False, False
)
assert run_out['action'].shape == (3, 2)
assert run_out["action"].shape == (3, 2)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
run_list = [model.output, model.log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]],),
model.epsilon: np.array([[0, 1], [2, 3]])}
run_list = [
model.output,
model.log_probs,
model.value,
model.entropy,
model.learning_rate,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.epsilon: np.array([[0, 1], [2, 3]]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=2)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=2
)
env = UnityEnvironment(" ")
run_list = [model.output, model.log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.epsilon: np.array([[0, 1], [2, 3]])}
run_list = [
model.output,
model.log_probs,
model.value,
model.entropy,
model.learning_rate,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.epsilon: np.array([[0, 1], [2, 3]]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=True, visual_inputs=2)
env = UnityEnvironment(' ')
discrete_action=True, visual_inputs=2
)
env = UnityEnvironment(" ")
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2, 2],)
}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2, 2]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=True, visual_inputs=0
)
env = UnityEnvironment(" ")
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([2, 2])}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([2, 2]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=True, visual_inputs=0
)
env = UnityEnvironment(" ")
model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size)
model = PPOModel(
env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size
)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.memory_out]
feed_dict = {model.batch_size: 1,
model.sequence_length: 2,
model.prev_action: [[0], [0]],
model.memory_in: np.zeros((1, memory_size)),
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([1, 2])}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
model.memory_out,
]
feed_dict = {
model.batch_size: 1,
model.sequence_length: 2,
model.prev_action: [[0], [0]],
model.memory_in: np.zeros((1, memory_size)),
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([1, 2]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size)
model = PPOModel(
env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size
)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.memory_out]
feed_dict = {model.batch_size: 1,
model.sequence_length: 2,
model.memory_in: np.zeros((1, memory_size)),
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.epsilon: np.array([[0, 1]])}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
model.memory_out,
]
feed_dict = {
model.batch_size: 1,
model.sequence_length: 2,
model.memory_in: np.zeros((1, memory_size)),
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.epsilon: np.array([[0, 1]]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=True, visual_inputs=0
)
env = UnityEnvironment(" ")
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_holder: [[0], [0]],
model.action_masks: np.ones([2,2])}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
model.intrinsic_reward,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array(
[[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]
),
model.action_holder: [[0], [0]],
model.action_masks: np.ones([2, 2]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.output: [[0.0, 0.0], [0.0, 0.0]],
model.epsilon: np.array([[0, 1], [2, 3]])}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
model.intrinsic_reward,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array(
[[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]
),
model.output: [[0.0, 0.0], [0.0, 0.0]],
model.epsilon: np.array([[0, 1], [2, 3]]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=True, visual_inputs=2)
env = UnityEnvironment(' ')
discrete_action=True, visual_inputs=2
)
env = UnityEnvironment(" ")
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_holder: [[0], [0]],
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
model.next_visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2, 2])
}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
model.intrinsic_reward,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array(
[[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]
),
model.action_holder: [[0], [0]],
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
model.next_visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2, 2]),
}
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
discrete_action=False, visual_inputs=2)
env = UnityEnvironment(' ')
discrete_action=False, visual_inputs=2
)
env = UnityEnvironment(" ")
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.output: [[0.0, 0.0], [0.0, 0.0]],
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
model.next_visual_in[1]: np.ones([2, 40, 30, 3]),
model.epsilon: np.array([[0, 1], [2, 3]])
}
run_list = [
model.output,
model.all_log_probs,
model.value,
model.entropy,
model.learning_rate,
model.intrinsic_reward,
]
feed_dict = {
model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array(
[[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]
),
model.output: [[0.0, 0.0], [0.0, 0.0]],
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
model.next_visual_in[1]: np.ones([2, 40, 30, 3]),
model.epsilon: np.array([[0, 1], [2, 3]]),
}
sess.run(run_list, feed_dict=feed_dict)
env.close()

np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0]))
if __name__ == '__main__':
if __name__ == "__main__":
pytest.main()

206
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


@pytest.fixture
def dummy_config():
return yaml.load(
'''
"""
default:
trainer: ppo
batch_size: 32

use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
"""
)
'''
"""
default:
trainer: online_bc
brain_to_imitate: ExpertBrain

use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
"""
)
'''
"""
demo_path: '''
+ os.path.dirname(os.path.abspath(__file__)) + '''/test.demo
demo_path: """
+ os.path.dirname(os.path.abspath(__file__))
+ """/test.demo
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3

use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
"""
)
base['testbrain'] = {}
base['testbrain']['normalize'] = False
base["testbrain"] = {}
base["testbrain"]["normalize"] = False
'''
"""
default:
trainer: incorrect_trainer
brain_to_imitate: ExpertBrain

summary_freq: 1000
use_recurrent: false
memory_size: 8
''')
"""
)
model_path='test_model_path',
summaries_dir='test_summaries_dir',
run_id='test_run_id',
model_path="test_model_path",
summaries_dir="test_summaries_dir",
run_id="test_run_id",
save_freq=100,
meta_curriculum=None,
load=True,

external_brains={'testbrain': brain_info},
external_brains={"testbrain": brain_info},
fast_simulation=True
fast_simulation=True,
@patch('numpy.random.seed')
@patch('tensorflow.set_random_seed')
@patch("numpy.random.seed")
@patch("tensorflow.set_random_seed")
TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed, True)
TrainerController("", "", "1", 1, None, True, False, False, None, {}, seed, True)
def assert_bc_trainer_constructed(trainer_cls, input_config, tc, expected_brain_info, expected_config):
def assert_bc_trainer_constructed(
trainer_cls, input_config, tc, expected_brain_info, expected_config
):
assert(brain == expected_brain_info)
assert(trainer_params == expected_config)
assert(training == tc.train_model)
assert(load == tc.load_model)
assert(seed == tc.seed)
assert(run_id == tc.run_id)
assert brain == expected_brain_info
assert trainer_params == expected_config
assert training == tc.train_model
assert load == tc.load_model
assert seed == tc.seed
assert run_id == tc.run_id
assert('testbrain' in tc.trainers)
assert(isinstance(tc.trainers['testbrain'], trainer_cls))
assert "testbrain" in tc.trainers
assert isinstance(tc.trainers["testbrain"], trainer_cls)
def assert_ppo_trainer_constructed(input_config, tc, expected_brain_info,
expected_config, expected_reward_buff_cap=0):
def mock_constructor(self, brain, reward_buff_cap, trainer_parameters, training, load, seed, run_id):
self.trainer_metrics = TrainerMetrics('', '')
assert(brain == expected_brain_info)
assert(trainer_parameters == expected_config)
assert(reward_buff_cap == expected_reward_buff_cap)
assert(training == tc.train_model)
assert(load == tc.load_model)
assert(seed == tc.seed)
assert(run_id == tc.run_id)
def assert_ppo_trainer_constructed(
input_config, tc, expected_brain_info, expected_config, expected_reward_buff_cap=0
):
def mock_constructor(
self, brain, reward_buff_cap, trainer_parameters, training, load, seed, run_id
):
self.trainer_metrics = TrainerMetrics("", "")
assert brain == expected_brain_info
assert trainer_parameters == expected_config
assert reward_buff_cap == expected_reward_buff_cap
assert training == tc.train_model
assert load == tc.load_model
assert seed == tc.seed
assert run_id == tc.run_id
assert('testbrain' in tc.trainers)
assert(isinstance(tc.trainers['testbrain'], PPOTrainer))
assert "testbrain" in tc.trainers
assert isinstance(tc.trainers["testbrain"], PPOTrainer)
@patch('mlagents.envs.BrainInfo')
@patch("mlagents.envs.BrainInfo")
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
assert_bc_trainer_constructed(OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config)
assert_bc_trainer_constructed(
OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config
)
@patch('mlagents.envs.BrainInfo')
@patch("mlagents.envs.BrainInfo")
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
expected_config['normalize'] = False
expected_config["normalize"] = False
assert_bc_trainer_constructed(OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config)
assert_bc_trainer_constructed(
OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config
)
@patch('mlagents.envs.BrainInfo')
@patch("mlagents.envs.BrainInfo")
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
assert_bc_trainer_constructed(OnlineBCTrainer, full_config, tc, brain_info_mock, expected_config)
assert_bc_trainer_constructed(
OnlineBCTrainer, full_config, tc, brain_info_mock, expected_config
)
@patch('mlagents.envs.BrainInfo')
@patch("mlagents.envs.BrainInfo")
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
@patch('mlagents.envs.BrainInfo')
@patch("mlagents.envs.BrainInfo")
def test_initialize_invalid_trainer_raises_exception(BrainInfoMock):
brain_info_mock = BrainInfoMock()
tc = basic_trainer_controller(brain_info_mock)

tc.initialize_trainers(bad_config)
assert(1 == 0, "Initialize trainers with bad config did not raise an exception.")
assert (
1 == 0,
"Initialize trainers with bad config did not raise an exception.",
)
except UnityEnvironmentException:
pass

trainer_mock.get_step = 0
trainer_mock.get_max_steps = 5
trainer_mock.parameters = {'some': 'parameter'}
trainer_mock.parameters = {"some": "parameter"}
tc.trainers = {'testbrain': trainer_mock}
tc.trainers = {"testbrain": trainer_mock}
tc.trainers['testbrain'].get_step += 1
if tc.trainers['testbrain'].get_step > 10:
tc.trainers["testbrain"].get_step += 1
if tc.trainers["testbrain"].get_step > 10:
tc.take_step.side_effect = take_step_sideeffect
tc._export_graph = MagicMock()

@patch('tensorflow.reset_default_graph')
@patch("tensorflow.reset_default_graph")
def test_start_learning_trains_forever_if_no_train_model(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
tc.train_model = False

tf_reset_graph.assert_called_once()
tc.initialize_trainers.assert_called_once_with(trainer_config)
env_mock.reset.assert_called_once()
assert (tc.take_step.call_count == 11)
assert tc.take_step.call_count == 11
@patch('tensorflow.reset_default_graph')
@patch("tensorflow.reset_default_graph")
def test_start_learning_trains_until_max_steps_then_saves(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
trainer_config = dummy_config()

tf_reset_graph.assert_called_once()
tc.initialize_trainers.assert_called_once_with(trainer_config)
env_mock.reset.assert_called_once()
assert(tc.take_step.call_count == trainer_mock.get_max_steps + 1)
assert tc.take_step.call_count == trainer_mock.get_max_steps + 1
env_mock.close.assert_called_once()
tc._save_model.assert_called_once_with(steps=6)

tc.lesson = 5
tc.start_learning(env_mock, trainer_config)
meta_curriculum_mock.set_all_curriculums_to_lesson_num.assert_called_once_with(tc.lesson)
meta_curriculum_mock.set_all_curriculums_to_lesson_num.assert_called_once_with(
tc.lesson
)
def trainer_controller_with_take_step_mocks():

trainer_mock.parameters = {'some': 'parameter'}
trainer_mock.parameters = {"some": "parameter"}
tc.trainers = {'testbrain': trainer_mock}
tc.trainers = {"testbrain": trainer_mock}
return tc, trainer_mock

env_mock.reset = MagicMock(return_value=brain_info_mock)
env_mock.global_done = True
trainer_mock.get_action = MagicMock(return_value = ActionInfo(None, None, None, None, None))
trainer_mock.get_action = MagicMock(
return_value=ActionInfo(None, None, None, None, None)
)
tc.take_step(env_mock, brain_info_mock)
env_mock.reset.assert_called_once()

env_mock.global_done = False
action_output_mock = ActionInfo(
'action',
'memory',
'actiontext',
'value',
{'some': 'output'}
"action", "memory", "actiontext", "value", {"some": "output"}
)
trainer_mock.get_action = MagicMock(return_value=action_output_mock)

env_mock.step.assert_called_once_with(
vector_action={'testbrain': action_output_mock.action},
memory={'testbrain': action_output_mock.memory},
text_action={'testbrain': action_output_mock.text},
value={'testbrain': action_output_mock.value}
vector_action={"testbrain": action_output_mock.action},
memory={"testbrain": action_output_mock.memory},
text_action={"testbrain": action_output_mock.text},
value={"testbrain": action_output_mock.value},
trainer_mock.process_experiences.assert_called_once_with(curr_info_mock, env_step_output_mock)
trainer_mock.process_experiences.assert_called_once_with(
curr_info_mock, env_step_output_mock
)
trainer_mock.update_policy.assert_called_once()
trainer_mock.write_summary.assert_called_once()
trainer_mock.increment_step_and_update_last_reward.assert_called_once()

47
ml-agents/mlagents/trainers/tests/test_trainer_metrics.py


import unittest.mock as mock
from mlagents.trainers import TrainerMetrics
class TestTrainerMetrics:
class TestTrainerMetrics:
field_names = ['Brain name', 'Time to update policy',
'Time since start of training',
'Time for last experience collection',
'Number of experiences used for training', 'Mean return']
field_names = [
"Brain name",
"Time to update policy",
"Time since start of training",
"Time for last experience collection",
"Number of experiences used for training",
"Mean return",
]
@mock.patch('mlagents.trainers.trainer_metrics.time', mock.MagicMock(return_value=42))
@mock.patch(
"mlagents.trainers.trainer_metrics.time", mock.MagicMock(return_value=42)
)
mock_path = 'fake'
mock_brain_name = 'fake'
trainer_metrics = TrainerMetrics(path=mock_path,
brain_name=mock_brain_name)
mock_path = "fake"
mock_brain_name = "fake"
trainer_metrics = TrainerMetrics(path=mock_path, brain_name=mock_brain_name)
@mock.patch('mlagents.trainers.trainer_metrics.time', mock.MagicMock(return_value=42))
@mock.patch(
"mlagents.trainers.trainer_metrics.time", mock.MagicMock(return_value=42)
)
mock_path = 'fake'
mock_brain_name = 'fake'
fake_buffer_length = 350
mock_path = "fake"
mock_brain_name = "fake"
fake_buffer_length = 350
trainer_metrics = TrainerMetrics(path=mock_path,
brain_name=mock_brain_name)
trainer_metrics = TrainerMetrics(path=mock_path, brain_name=mock_brain_name)
trainer_metrics.start_policy_update_timer(number_experiences=fake_buffer_length,
mean_return=fake_mean_return)
trainer_metrics.start_policy_update_timer(
number_experiences=fake_buffer_length, mean_return=fake_mean_return
)
fake_row = [mock_brain_name, 0,0, 0, 350, '0.300']
fake_row = [mock_brain_name, 0, 0, 0, 350, "0.300"]

117
ml-agents/mlagents/trainers/trainer.py


LOGGER = logging.getLogger("mlagents.trainers")
pass

self.brain_name = brain.brain_name
self.run_id = run_id
self.trainer_parameters = trainer_parameters
self.summary_path = trainer_parameters['summary_path']
self.summary_path = trainer_parameters["summary_path"]
self.trainer_metrics = TrainerMetrics(path=self.summary_path + '.csv',
brain_name=self.brain_name)
self.trainer_metrics = TrainerMetrics(
path=self.summary_path + ".csv", brain_name=self.brain_name
)
return '''{} Trainer'''.format(self.__class__)
return """{} Trainer""".format(self.__class__)
def check_param_keys(self):
for k in self.param_keys:

"brain {2}.".format(k, self.__class__, self.brain_name))
"brain {2}.".format(k, self.__class__, self.brain_name)
)
@property
def parameters(self):

raise UnityTrainerException(
"The parameters property was not implemented.")
raise UnityTrainerException("The parameters property was not implemented.")
@property
def graph_scope(self):

raise UnityTrainerException(
"The graph_scope property was not implemented.")
raise UnityTrainerException("The graph_scope property was not implemented.")
@property
def get_max_steps(self):

"""
raise UnityTrainerException(
"The get_max_steps property was not implemented.")
raise UnityTrainerException("The get_max_steps property was not implemented.")
@property
def get_step(self):

"""
raise UnityTrainerException(
"The get_step property was not implemented.")
raise UnityTrainerException("The get_step property was not implemented.")
@property
def get_last_reward(self):

"""
raise UnityTrainerException(
"The get_last_reward property was not implemented.")
raise UnityTrainerException("The get_last_reward property was not implemented.")
def increment_step_and_update_last_reward(self):
"""

"The increment_step_and_update_last_reward method was not implemented.")
"The increment_step_and_update_last_reward method was not implemented."
)
def get_action(self, curr_info: BrainInfo) -> ActionInfo:
"""

self.trainer_metrics.end_experience_collection_timer()
return action
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
take_action_outputs):
def add_experiences(
self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take_action_outputs
):
"""
Adds experiences to each agent's experience history.
:param curr_info: Current AllBrainInfo.

raise UnityTrainerException(
"The add_experiences method was not implemented.")
raise UnityTrainerException("The add_experiences method was not implemented.")
def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
"""

:param next_info: Dictionary of all next-step brains and corresponding BrainInfo.
"""
raise UnityTrainerException(
"The process_experiences method was not implemented.")
"The process_experiences method was not implemented."
)
def end_episode(self):
"""

raise UnityTrainerException(
"The end_episode method was not implemented.")
raise UnityTrainerException("The end_episode method was not implemented.")
def is_ready_update(self):
"""

raise UnityTrainerException(
"The is_ready_update method was not implemented.")
raise UnityTrainerException("The is_ready_update method was not implemented.")
raise UnityTrainerException(
"The update_model method was not implemented.")
raise UnityTrainerException("The update_model method was not implemented.")
def save_model(self):
"""

:param lesson_num: Current lesson number in curriculum.
:param global_step: The number of steps the simulation has been going for
"""
if global_step % self.trainer_parameters['summary_freq'] == 0 and global_step != 0:
is_training = "Training." if self.is_training and self.get_step <= self.get_max_steps else "Not Training."
if len(self.stats['Environment/Cumulative Reward']) > 0:
mean_reward = np.mean(
self.stats['Environment/Cumulative Reward'])
LOGGER.info(" {}: {}: Step: {}. "
"Time Elapsed: {:0.3f} s "
"Mean "
"Reward: {"
":0.3f}. Std of Reward: {:0.3f}. {}"
.format(self.run_id, self.brain_name,
min(self.get_step, self.get_max_steps),
delta_train_start,
mean_reward, np.std(
self.stats['Environment/Cumulative Reward']),
is_training))
if (
global_step % self.trainer_parameters["summary_freq"] == 0
and global_step != 0
):
is_training = (
"Training."
if self.is_training and self.get_step <= self.get_max_steps
else "Not Training."
)
if len(self.stats["Environment/Cumulative Reward"]) > 0:
mean_reward = np.mean(self.stats["Environment/Cumulative Reward"])
LOGGER.info(
" {}: {}: Step: {}. "
"Time Elapsed: {:0.3f} s "
"Mean "
"Reward: {"
":0.3f}. Std of Reward: {:0.3f}. {}".format(
self.run_id,
self.brain_name,
min(self.get_step, self.get_max_steps),
delta_train_start,
mean_reward,
np.std(self.stats["Environment/Cumulative Reward"]),
is_training,
)
)
LOGGER.info(" {}: {}: Step: {}. No episode was completed since last summary. {}"
.format(self.run_id, self.brain_name, self.get_step, is_training))
LOGGER.info(
" {}: {}: Step: {}. No episode was completed since last summary. {}".format(
self.run_id, self.brain_name, self.get_step, is_training
)
)
summary.value.add(tag='{}'.format(
key), simple_value=stat_mean)
summary.value.add(tag="{}".format(key), simple_value=stat_mean)
summary.value.add(tag='Environment/Lesson', simple_value=lesson_num)
summary.value.add(tag="Environment/Lesson", simple_value=lesson_num)
self.summary_writer.add_summary(summary, self.get_step)
self.summary_writer.flush()

"""
try:
with tf.Session() as sess:
s_op = tf.summary.text(key, tf.convert_to_tensor(
([[str(x), str(input_dict[x])] for x in input_dict])))
s_op = tf.summary.text(
key,
tf.convert_to_tensor(
([[str(x), str(input_dict[x])] for x in input_dict])
),
)
"Cannot write text summary for Tensorboard. Tensorflow version must be r1.2 or above.")
"Cannot write text summary for Tensorboard. Tensorflow version must be r1.2 or above."
)
pass

195
ml-agents/mlagents/trainers/trainer_controller.py


class TrainerController(object):
def __init__(self,
model_path: str,
summaries_dir: str,
run_id: str,
save_freq: int,
meta_curriculum: Optional[MetaCurriculum],
load: bool,
train: bool,
keep_checkpoints: int,
lesson: Optional[int],
external_brains: Dict[str, BrainParameters],
training_seed: int,
fast_simulation: bool):
def __init__(
self,
model_path: str,
summaries_dir: str,
run_id: str,
save_freq: int,
meta_curriculum: Optional[MetaCurriculum],
load: bool,
train: bool,
keep_checkpoints: int,
lesson: Optional[int],
external_brains: Dict[str, BrainParameters],
training_seed: int,
fast_simulation: bool,
):
"""
:param model_path: Path to save the model.
:param summaries_dir: Folder to save training summaries.

self.summaries_dir = summaries_dir
self.external_brains = external_brains
self.external_brain_names = external_brains.keys()
self.logger = logging.getLogger('mlagents.envs')
self.logger = logging.getLogger("mlagents.envs")
self.run_id = run_id
self.save_freq = save_freq
self.lesson = lesson

def _get_measure_vals(self):
if self.meta_curriculum:
brain_names_to_measure_vals = {}
for brain_name, curriculum \
in self.meta_curriculum.brains_to_curriculums.items():
if curriculum.measure == 'progress':
measure_val = (self.trainers[brain_name].get_step /
self.trainers[brain_name].get_max_steps)
for (
brain_name,
curriculum,
) in self.meta_curriculum.brains_to_curriculums.items():
if curriculum.measure == "progress":
measure_val = (
self.trainers[brain_name].get_step
/ self.trainers[brain_name].get_max_steps
)
elif curriculum.measure == 'reward':
measure_val = np.mean(self.trainers[brain_name]
.reward_buffer)
elif curriculum.measure == "reward":
measure_val = np.mean(self.trainers[brain_name].reward_buffer)
brain_names_to_measure_vals[brain_name] = measure_val
return brain_names_to_measure_vals
else:

"""
for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()
self.logger.info('Saved Model')
self.logger.info("Saved Model")
self.logger.info('Learning was interrupted. Please wait '
'while the graph is generated.')
self.logger.info(
"Learning was interrupted. Please wait " "while the graph is generated."
)
self._save_model(steps)
def _write_training_metrics(self):

"""
trainer_parameters_dict = {}
for brain_name in self.external_brains:
trainer_parameters = trainer_config['default'].copy()
trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
basedir=self.summaries_dir,
name=str(self.run_id) + '_' + brain_name)
trainer_parameters['model_path'] = '{basedir}/{name}'.format(
basedir=self.model_path,
name=brain_name)
trainer_parameters['keep_checkpoints'] = self.keep_checkpoints
trainer_parameters = trainer_config["default"].copy()
trainer_parameters["summary_path"] = "{basedir}/{name}".format(
basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name
)
trainer_parameters["model_path"] = "{basedir}/{name}".format(
basedir=self.model_path, name=brain_name
)
trainer_parameters["keep_checkpoints"] = self.keep_checkpoints
if brain_name in trainer_config:
_brain_key = brain_name
while not isinstance(trainer_config[_brain_key], dict):

trainer_parameters_dict[brain_name] = trainer_parameters.copy()
for brain_name in self.external_brains:
if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
trainer_parameters_dict[brain_name], self.train_model,
self.load_model, self.seed, self.run_id)
elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
trainer_parameters_dict[brain_name],
self.train_model,
self.load_model,
self.seed,
self.run_id,
)
elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
trainer_parameters_dict[brain_name], self.train_model,
self.load_model, self.seed, self.run_id)
elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo':
trainer_parameters_dict[brain_name],
self.train_model,
self.load_model,
self.seed,
self.run_id,
)
elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
self.meta_curriculum
.brains_to_curriculums[brain_name]
.min_lesson_length if self.meta_curriculum else 0,
self.meta_curriculum.brains_to_curriculums[
brain_name
].min_lesson_length
if self.meta_curriculum
else 0,
self.train_model, self.load_model, self.seed,
self.run_id)
self.trainer_metrics[brain_name] = self.trainers[brain_name].trainer_metrics
self.train_model,
self.load_model,
self.seed,
self.run_id,
)
self.trainer_metrics[brain_name] = self.trainers[
brain_name
].trainer_metrics
raise UnityEnvironmentException('The trainer config contains '
'an unknown trainer type for '
'brain {}'
.format(brain_name))
raise UnityEnvironmentException(
"The trainer config contains "
"an unknown trainer type for "
"brain {}".format(brain_name)
)
@staticmethod
def _create_model_path(model_path):

except Exception:
raise UnityEnvironmentException('The folder {} containing the '
'generated model could not be '
'accessed. Please make sure the '
'permissions are set correctly.'
.format(model_path))
raise UnityEnvironmentException(
"The folder {} containing the "
"generated model could not be "
"accessed. Please make sure the "
"permissions are set correctly.".format(model_path)
)
def _reset_env(self, env: BaseUnityEnvironment):
"""Resets the environment.

environment.
"""
if self.meta_curriculum is not None:
return env.reset(train_mode=self.fast_simulation, config=self.meta_curriculum.get_config())
return env.reset(
train_mode=self.fast_simulation,
config=self.meta_curriculum.get_config(),
)
else:
return env.reset(train_mode=self.fast_simulation)

if self.train_model:
for brain_name, trainer in self.trainers.items():
trainer.write_tensorboard_text('Hyperparameters',
trainer.parameters)
trainer.write_tensorboard_text("Hyperparameters", trainer.parameters)
while any([t.get_step <= t.get_max_steps \
for k, t in self.trainers.items()]) \
or not self.train_model:
while (
any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()])
or not self.train_model
):
if self.global_step % self.save_freq == 0 and self.global_step != 0 \
and self.train_model:
if (
self.global_step % self.save_freq == 0
and self.global_step != 0
and self.train_model
):
# Save Tensorflow model
self._save_model(steps=self.global_step)
curr_info = new_info

def take_step(self, env: BaseUnityEnvironment, curr_info: AllBrainInfo):
if self.meta_curriculum:
# Get the sizes of the reward buffers.
reward_buff_sizes = {k: len(t.reward_buffer)
for (k, t) in self.trainers.items()}
reward_buff_sizes = {
k: len(t.reward_buffer) for (k, t) in self.trainers.items()
}
lessons_incremented = \
self.meta_curriculum.increment_lessons(
self._get_measure_vals(),
reward_buff_sizes=reward_buff_sizes)
lessons_incremented = self.meta_curriculum.increment_lessons(
self._get_measure_vals(), reward_buff_sizes=reward_buff_sizes
)
if (self.meta_curriculum
and any(lessons_incremented.values())):
if self.meta_curriculum and any(lessons_incremented.values()):
curr_info = self._reset_env(env)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()

vector_action=take_action_vector,
memory=take_action_memories,
text_action=take_action_text,
value=take_action_value
value=take_action_value,
trainer.add_experiences(curr_info, new_info,
take_action_outputs[brain_name])
trainer.add_experiences(
curr_info, new_info, take_action_outputs[brain_name]
)
if trainer.is_ready_update() and self.train_model \
and trainer.get_step <= trainer.get_max_steps:
if (
trainer.is_ready_update()
and self.train_model
and trainer.get_step <= trainer.get_max_steps
):
# Perform gradient descent with experience buffer
trainer.update_policy()

trainer.write_summary(
self.global_step,
delta_train_start, lesson_num=self.meta_curriculum
.brains_to_curriculums[brain_name]
.lesson_num)
delta_train_start,
lesson_num=self.meta_curriculum.brains_to_curriculums[
brain_name
].lesson_num,
)
if self.train_model \
and trainer.get_step <= trainer.get_max_steps:
if self.train_model and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
return new_info

53
ml-agents/mlagents/trainers/trainer_metrics.py


from time import time
LOGGER = logging.getLogger("mlagents.trainers")
FIELD_NAMES = ['Brain name', 'Time to update policy',
'Time since start of training', 'Time for last experience collection',
'Number of experiences used for training', 'Mean return']
FIELD_NAMES = [
"Brain name",
"Time to update policy",
"Time since start of training",
"Time for last experience collection",
"Number of experiences used for training",
"Mean return",
]
class TrainerMetrics:
"""

def __init__(self, path: str, brain_name: str):
"""
:str path: Fully qualified path where CSV is stored.

def _add_row(self, delta_train_start):
row = [self.brain_name]
row.extend(format(c, '.3f') if isinstance(c, float) else c
for c in [self.delta_policy_update, delta_train_start,
self.delta_last_experience_collection,
self.last_buffer_length, self.last_mean_return])
row.extend(
format(c, ".3f") if isinstance(c, float) else c
for c in [
self.delta_policy_update,
delta_train_start,
self.delta_last_experience_collection,
self.last_buffer_length,
self.last_mean_return,
]
)
self.delta_last_experience_collection = None
self.rows.append(row)

else:
self.delta_policy_update = 0
delta_train_start = time() - self.time_training_start
LOGGER.debug(" Policy Update Training Metrics for {}: "
"\n\t\tTime to update Policy: {:0.3f} s \n"
"\t\tTime elapsed since training: {:0.3f} s \n"
"\t\tTime for experience collection: {:0.3f} s \n"
"\t\tBuffer Length: {} \n"
"\t\tReturns : {:0.3f}\n"
.format(self.brain_name, self.delta_policy_update,
delta_train_start, self.delta_last_experience_collection,
self.last_buffer_length, self.last_mean_return))
LOGGER.debug(
" Policy Update Training Metrics for {}: "
"\n\t\tTime to update Policy: {:0.3f} s \n"
"\t\tTime elapsed since training: {:0.3f} s \n"
"\t\tTime for experience collection: {:0.3f} s \n"
"\t\tBuffer Length: {} \n"
"\t\tReturns : {:0.3f}\n".format(
self.brain_name,
self.delta_policy_update,
delta_train_start,
self.delta_last_experience_collection,
self.last_buffer_length,
self.last_mean_return,
)
)
self._add_row(delta_train_start)
def write_training_metrics(self):

with open(self.path, 'w') as file:
with open(self.path, "w") as file:
writer = csv.writer(file)
writer.writerow(FIELD_NAMES)
for row in self.rows:

62
ml-agents/setup.py


here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
with open(path.join(here, "README.md"), encoding="utf-8") as f:
name='mlagents',
version='0.8.1',
description='Unity Machine Learning Agents',
name="mlagents",
version="0.8.1",
description="Unity Machine Learning Agents",
long_description_content_type='text/markdown',
url='https://github.com/Unity-Technologies/ml-agents',
author='Unity Technologies',
author_email='ML-Agents@unity3d.com',
long_description_content_type="text/markdown",
url="https://github.com/Unity-Technologies/ml-agents",
author="Unity Technologies",
author_email="ML-Agents@unity3d.com",
'Intended Audience :: Developers',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.6'
"Intended Audience :: Developers",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.6",
packages=['mlagents.trainers', 'mlagents.trainers.bc', 'mlagents.trainers.ppo'], # Required
packages=["mlagents.trainers"], # Required
'mlagents_envs==0.8.1',
'tensorflow>=1.7,<1.8',
'Pillow>=4.2.1',
'matplotlib',
'numpy>=1.13.3,<=1.14.5',
'jupyter',
'pytest>=3.2.2,<4.0.0',
'docopt',
'pyyaml',
'protobuf>=3.6,<3.7',
'grpcio>=1.11.0,<1.12.0',
'pypiwin32==223;platform_system=="Windows"'],
"mlagents_envs==0.8.1",
"tensorflow>=1.7,<1.8",
"Pillow>=4.2.1",
"matplotlib",
"numpy>=1.13.3,<=1.14.5",
"jupyter",
"pytest>=3.2.2,<4.0.0",
"docopt",
"pyyaml",
"protobuf>=3.6,<3.7",
"grpcio>=1.11.0,<1.12.0",
'pypiwin32==223;platform_system=="Windows"',
],
entry_points={
'console_scripts': [
'mlagents-learn=mlagents.trainers.learn:main',
],
},
entry_points={"console_scripts": ["mlagents-learn=mlagents.trainers.learn:main"]},
)
正在加载...
取消
保存