浏览代码

Fixed imports, all tests are passing.

/develop-generalizationTraining-TrainerController
Deric Pang 6 年前
当前提交
634280a6
共有 58 个文件被更改,包括 1108 次插入108 次删除
  1. 2
      docs/Learning-Environment-Executable.md
  2. 2
      mlagents/trainers/buffer.py
  3. 2
      mlagents/trainers/curriculum.py
  4. 2
      mlagents/trainers/exception.py
  5. 6
      mlagents/trainers/meta_curriculum.py
  6. 2
      mlagents/trainers/models.py
  7. 6
      mlagents/trainers/trainer.py
  8. 16
      mlagents/trainers/trainer_controller.py
  9. 4
      mlagents/trainers/ppo/models.py
  10. 10
      mlagents/trainers/ppo/trainer.py
  11. 2
      mlagents/trainers/bc/models.py
  12. 10
      mlagents/trainers/bc/trainer.py
  13. 3
      mlagents/envs/__init__.py
  14. 55
      mlagents/envs/brain.py
  15. 4
      mlagents/envs/communicator.py
  16. 4
      mlagents/envs/environment.py
  17. 2
      mlagents/envs/exception.py
  18. 6
      mlagents/envs/rpc_communicator.py
  19. 4
      mlagents/envs/socket_communicator.py
  20. 2
      mlagents/envs/notebooks/getting-started.ipynb
  21. 6
      mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py
  22. 2
      mlagents/envs/communicator_objects/brain_type_proto_pb2.py
  23. 2
      mlagents/envs/communicator_objects/space_type_proto_pb2.py
  24. 4
      mlagents/envs/communicator_objects/unity_input_pb2.py
  25. 6
      mlagents/envs/communicator_objects/unity_message_pb2.py
  26. 4
      mlagents/envs/communicator_objects/unity_output_pb2.py
  27. 4
      mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py
  28. 6
      mlagents/envs/communicator_objects/unity_rl_input_pb2.py
  29. 2
      mlagents/envs/communicator_objects/unity_rl_output_pb2.py
  30. 2
      mlagents/envs/communicator_objects/unity_to_external_pb2.py
  31. 2
      mlagents/envs/communicator_objects/unity_to_external_pb2_grpc.py
  32. 5
      tests/mock_communicator.py
  33. 4
      tests/trainers/test_curriculum.py
  34. 36
      tests/trainers/test_trainer_controller.py
  35. 0
      mlagents/__init__.py
  36. 110
      mlagents/learn.py
  37. 203
      setup.py
  38. 0
      tests/__init__.py
  39. 0
      tests/envs/__init__.py
  40. 95
      tests/envs/test_envs.py
  41. 0
      tests/trainers/__init__.py
  42. 107
      tests/trainers/test_bc.py
  43. 56
      tests/trainers/test_buffer.py
  44. 109
      tests/trainers/test_meta_curriculum.py
  45. 285
      tests/trainers/test_ppo.py
  46. 22
      mlagents/setup.py
  47. 0
      /requirements.txt
  48. 0
      /mlagents/trainers
  49. 0
      /mlagents/envs
  50. 0
      /tests/mock_communicator.py
  51. 0
      /tests/trainers/test_curriculum.py
  52. 0
      /tests/trainers/test_trainer_controller.py

2
docs/Learning-Environment-Executable.md


If you want to use the [Python API](Python-API.md) to interact with your executable, you can pass the name of the executable with the argument 'file_name' of the `UnityEnvironment`. For instance :
```python
from unityagents import UnityEnvironment
from mlagents.envs import UnityEnvironment
env = UnityEnvironment(file_name=<env_name>)
```

2
mlagents/trainers/buffer.py


import numpy as np
from unityagents.exception import UnityException
from mlagents.envs.exception import UnityException
class BufferException(UnityException):

2
mlagents/trainers/curriculum.py


import logging
logger = logging.getLogger('unitytrainers')
logger = logging.getLogger('mlagents.trainers')
class Curriculum(object):

2
mlagents/trainers/exception.py


"""
Contains exceptions for the unitytrainers package.
Contains exceptions for the trainers package.
"""
class TrainerError(Exception):

6
mlagents/trainers/meta_curriculum.py


"""Contains the MetaCurriculum class."""
import os
from unitytrainers.curriculum import Curriculum
from unitytrainers.exception import MetaCurriculumError
from mlagents.trainers.curriculum import Curriculum
from mlagents.trainers.exception import MetaCurriculumError
logger = logging.getLogger('unitytrainers')
logger = logging.getLogger('mlagents.trainers')
class MetaCurriculum(object):

2
mlagents/trainers/models.py


import tensorflow as tf
import tensorflow.contrib.layers as c_layers
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class LearningModel(object):

6
mlagents/trainers/trainer.py


import tensorflow as tf
import numpy as np
from unityagents import UnityException, AllBrainInfo
from mlagents.envs import UnityException, AllBrainInfo
logger = logging.getLogger("unitytrainers")
logger = logging.getLogger("mlagents.trainers")
class UnityTrainerException(UnityException):

class Trainer(object):
"""This class is the abstract class for the unitytrainers"""
"""This class is the abstract class for the mlagents.trainers"""
def __init__(self, sess, env, brain_name, trainer_parameters, training, run_id):
"""

16
mlagents/trainers/trainer_controller.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning
"""Launches unitytrainers for each External Brains in a Unity Environment."""
"""Launches trainers for each External Brains in a Unity Environment."""
import os
import logging

import numpy as np
import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from unityagents.environment import UnityEnvironment
from unityagents.exception import UnityEnvironmentException
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.exception import UnityEnvironmentException
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unitytrainers.meta_curriculum import MetaCurriculum
from unitytrainers.exception import MetaCurriculumError
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.exception import MetaCurriculumError
class TrainerController(object):

self.summaries_dir = '/{docker_target_name}/summaries'.format(
docker_target_name=docker_target_name)
self.logger = logging.getLogger("unityagents")
self.logger = logging.getLogger("mlagents.envs")
self.run_id = run_id
self.save_freq = save_freq
self.lesson = lesson

4
mlagents/trainers/ppo/models.py


import numpy as np
import tensorflow as tf
from unitytrainers.models import LearningModel
from mlagents.trainers.models import LearningModel
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class PPOModel(LearningModel):

10
mlagents/trainers/ppo/trainer.py


import numpy as np
import tensorflow as tf
from unityagents import AllBrainInfo, BrainInfo
from unitytrainers.buffer import Buffer
from unitytrainers.ppo.models import PPOModel
from unitytrainers.trainer import UnityTrainerException, Trainer
from mlagents.envs import AllBrainInfo, BrainInfo
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.trainer import UnityTrainerException, Trainer
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class PPOTrainer(Trainer):

2
mlagents/trainers/bc/models.py


import tensorflow as tf
import tensorflow.contrib.layers as c_layers
from unitytrainers.models import LearningModel
from mlagents.trainers.models import LearningModel
class BehavioralCloningModel(LearningModel):

10
mlagents/trainers/bc/trainer.py


import numpy as np
import tensorflow as tf
from unityagents import AllBrainInfo
from unitytrainers.bc.models import BehavioralCloningModel
from unitytrainers.buffer import Buffer
from unitytrainers.trainer import UnityTrainerException, Trainer
from mlagents.envs import AllBrainInfo
from mlagents.trainers.bc.models import BehavioralCloningModel
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.trainer import UnityTrainerException, Trainer
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class BehavioralCloningTrainer(Trainer):

3
mlagents/envs/__init__.py


from .environment import *
from .brain import *
from .exception import *

55
mlagents/envs/brain.py


from typing import Dict
class BrainInfo:
def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
reward=None, agents=None, local_done=None,
vector_action=None, text_action=None, max_reached=None):
"""
Describes experience at current step of all agents linked to a brain.
"""
self.visual_observations = visual_observation
self.vector_observations = vector_observation
self.text_observations = text_observations
self.memories = memory
self.rewards = reward
self.local_done = local_done
self.max_reached = max_reached
self.agents = agents
self.previous_vector_actions = vector_action
self.previous_text_actions = text_action
AllBrainInfo = Dict[str, BrainInfo]
class BrainParameters:
def __init__(self, brain_name, brain_param):
"""
Contains all brain-specific parameters.
:param brain_name: Name of brain.
:param brain_param: Dictionary of brain parameters.
"""
self.brain_name = brain_name
self.vector_observation_space_size = brain_param["vectorObservationSize"]
self.num_stacked_vector_observations = brain_param["numStackedVectorObservations"]
self.number_visual_observations = len(brain_param["cameraResolutions"])
self.camera_resolutions = brain_param["cameraResolutions"]
self.vector_action_space_size = brain_param["vectorActionSize"]
self.vector_action_descriptions = brain_param["vectorActionDescriptions"]
self.vector_action_space_type = ["discrete", "continuous"][brain_param["vectorActionSpaceType"]]
def __str__(self):
return '''Unity brain name: {}
Number of Visual Observations (per agent): {}
Vector Observation space size (per agent): {}
Number of stacked Vector Observation: {}
Vector Action space type: {}
Vector Action space size (per agent): {}
Vector Action descriptions: {}'''.format(self.brain_name,
str(self.number_visual_observations),
str(self.vector_observation_space_size),
str(self.num_stacked_vector_observations),
self.vector_action_space_type,
str(self.vector_action_space_size),
', '.join(self.vector_action_descriptions))

4
mlagents/envs/communicator.py


import logging
from communicator_objects import UnityOutput, UnityInput
from .communicator_objects import UnityOutput, UnityInput
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class Communicator(object):

4
mlagents/envs/environment.py


from .brain import BrainInfo, BrainParameters, AllBrainInfo
from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput,\
UnityInput, UnityOutput

from PIL import Image
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class UnityEnvironment(object):

2
mlagents/envs/exception.py


import logging
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class UnityException(Exception):
"""

6
mlagents/envs/rpc_communicator.py


from concurrent.futures import ThreadPoolExecutor
from .communicator import Communicator
from communicator_objects import UnityToExternalServicer, add_UnityToExternalServicer_to_server
from communicator_objects import UnityMessage, UnityInput, UnityOutput
from .communicator_objects import UnityToExternalServicer, add_UnityToExternalServicer_to_server
from .communicator_objects import UnityMessage, UnityInput, UnityOutput
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class UnityToExternalServicerImplementation(UnityToExternalServicer):

4
mlagents/envs/socket_communicator.py


import struct
from .communicator import Communicator
from communicator_objects import UnityMessage, UnityOutput, UnityInput
from .communicator_objects import UnityMessage, UnityOutput, UnityInput
logger = logging.getLogger("unityagents")
logger = logging.getLogger("mlagents.envs")
class SocketCommunicator(Communicator):

2
mlagents/envs/notebooks/getting-started.ipynb


"import numpy as np\n",
"import sys\n",
"\n",
"from unityagents import UnityEnvironment\n",
"from mlagents.envs import UnityEnvironment\n",
"\n",
"%matplotlib inline\n",
"\n",

6
mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
from communicator_objects import brain_type_proto_pb2 as communicator__objects_dot_brain__type__proto__pb2
from communicator_objects import space_type_proto_pb2 as communicator__objects_dot_space__type__proto__pb2
from . import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
from . import brain_type_proto_pb2 as communicator__objects_dot_brain__type__proto__pb2
from . import space_type_proto_pb2 as communicator__objects_dot_space__type__proto__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

2
mlagents/envs/communicator_objects/brain_type_proto_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
from . import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

2
mlagents/envs/communicator_objects/space_type_proto_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
from . import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

4
mlagents/envs/communicator_objects/unity_input_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import unity_rl_input_pb2 as communicator__objects_dot_unity__rl__input__pb2
from communicator_objects import unity_rl_initialization_input_pb2 as communicator__objects_dot_unity__rl__initialization__input__pb2
from . import unity_rl_input_pb2 as communicator__objects_dot_unity__rl__input__pb2
from . import unity_rl_initialization_input_pb2 as communicator__objects_dot_unity__rl__initialization__input__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

6
mlagents/envs/communicator_objects/unity_message_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import unity_output_pb2 as communicator__objects_dot_unity__output__pb2
from communicator_objects import unity_input_pb2 as communicator__objects_dot_unity__input__pb2
from communicator_objects import header_pb2 as communicator__objects_dot_header__pb2
from . import unity_output_pb2 as communicator__objects_dot_unity__output__pb2
from . import unity_input_pb2 as communicator__objects_dot_unity__input__pb2
from . import header_pb2 as communicator__objects_dot_header__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

4
mlagents/envs/communicator_objects/unity_output_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import unity_rl_output_pb2 as communicator__objects_dot_unity__rl__output__pb2
from communicator_objects import unity_rl_initialization_output_pb2 as communicator__objects_dot_unity__rl__initialization__output__pb2
from . import unity_rl_output_pb2 as communicator__objects_dot_unity__rl__output__pb2
from . import unity_rl_initialization_output_pb2 as communicator__objects_dot_unity__rl__initialization__output__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

4
mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import brain_parameters_proto_pb2 as communicator__objects_dot_brain__parameters__proto__pb2
from communicator_objects import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2
from . import brain_parameters_proto_pb2 as communicator__objects_dot_brain__parameters__proto__pb2
from . import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

6
mlagents/envs/communicator_objects/unity_rl_input_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import agent_action_proto_pb2 as communicator__objects_dot_agent__action__proto__pb2
from communicator_objects import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2
from communicator_objects import command_proto_pb2 as communicator__objects_dot_command__proto__pb2
from . import agent_action_proto_pb2 as communicator__objects_dot_agent__action__proto__pb2
from . import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2
from . import command_proto_pb2 as communicator__objects_dot_command__proto__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

2
mlagents/envs/communicator_objects/unity_rl_output_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import agent_info_proto_pb2 as communicator__objects_dot_agent__info__proto__pb2
from . import agent_info_proto_pb2 as communicator__objects_dot_agent__info__proto__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

2
mlagents/envs/communicator_objects/unity_to_external_pb2.py


_sym_db = _symbol_database.Default()
from communicator_objects import unity_message_pb2 as communicator__objects_dot_unity__message__pb2
from . import unity_message_pb2 as communicator__objects_dot_unity__message__pb2
DESCRIPTOR = _descriptor.FileDescriptor(

2
mlagents/envs/communicator_objects/unity_to_external_pb2_grpc.py


# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
import grpc
from communicator_objects import unity_message_pb2 as communicator__objects_dot_unity__message__pb2
from . import unity_message_pb2 as communicator__objects_dot_unity__message__pb2
class UnityToExternalStub(object):

5
tests/mock_communicator.py


from unityagents.communicator import Communicator
from communicator_objects import UnityMessage, UnityOutput, UnityInput,\
from mlagents.envs.communicator import Communicator
from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput,\
ResolutionProto, BrainParametersProto, UnityRLInitializationOutput,\
AgentInfoProto, UnityRLOutput

4
tests/trainers/test_curriculum.py


import json
from unittest.mock import patch, mock_open
from unitytrainers.exception import CurriculumError
from unitytrainers import Curriculum
from mlagents.trainers.exception import CurriculumError
from mlagents.trainers import Curriculum
dummy_curriculum_json_str = '''

36
tests/trainers/test_trainer_controller.py


import pytest
import tensorflow as tf
from unitytrainers.trainer_controller import TrainerController
from unitytrainers.buffer import Buffer
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unitytrainers.curriculum import Curriculum
from unitytrainers.exception import CurriculumError
from unityagents.exception import UnityEnvironmentException
from .mock_communicator import MockCommunicator
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
from mlagents.trainers.curriculum import Curriculum
from mlagents.trainers.exception import CurriculumError
from mlagents.envs.exception import UnityEnvironmentException
from tests.mock_communicator import MockCommunicator
@pytest.fixture

''')
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
1, 1, 1, '', "tests/test_unitytrainers.py", False)
1, 1, 1, '', "tests/test_mlagents.trainers.py", False)
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
open_name = 'unitytrainers.trainer_controller' + '.open'
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_load.return_value = dummy_config

assert(config['default']['trainer'] == "ppo")
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
open_name = 'unitytrainers.trainer_controller' + '.open'
open_name = 'mlagents.trainers.trainer_controller' + '.open'
1, 1, '', "tests/test_unitytrainers.py",
1, 1, '', "tests/test_mlagents.trainers.py",
False)
# Test for PPO trainer

0
mlagents/__init__.py

110
mlagents/learn.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning
import logging
import os
import multiprocessing
import numpy as np
from docopt import docopt
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.exception import TrainerError
def main():
print('''
,m' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌
' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄
^
^ `
'▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '
,
`
¬`
''')
logger = logging.getLogger("mlagents.learn")
_USAGE = '''
Usage:
learn (<env>) [options]
learn [options]
learn --help
Options:
--curriculum=<file> Curriculum json file for environment [default: None].
--keep-checkpoints=<n> How many model checkpoints to keep [default: 5].
--lesson=<n> Start learning from this lesson [default: 0].
--load Whether to load the model or randomly initialize [default: False].
--run-id=<path> The sub-directory name for model and summary statistics [default: ppo].
--num-runs=<n> Number of concurrent training sessions [default: 1].
--save-freq=<n> Frequency at which to save model [default: 50000].
--seed=<n> Random seed used for training [default: -1].
--slow Whether to run the game at training speed [default: False].
--train Whether to train model, or only run inference [default: False].
--worker-id=<n> Number to add to communication port (5005). Used for multi-environment [default: 0].
--docker-target-name=<dt> Docker Volume to store curriculum, executable and model files [default: Empty].
--no-graphics Whether to run the Unity simulator in no-graphics mode [default: False].
'''
options = docopt(_USAGE)
logger.info(options)
# Docker Parameters
if options['--docker-target-name'] == 'Empty':
docker_target_name = ''
else:
docker_target_name = options['--docker-target-name']
# General parameters
run_id = options['--run-id']
num_runs = int(options['--num-runs'])
seed = int(options['--seed'])
load_model = options['--load']
train_model = options['--train']
save_freq = int(options['--save-freq'])
env_path = options['<env>']
keep_checkpoints = int(options['--keep-checkpoints'])
worker_id = int(options['--worker-id'])
curriculum_file = str(options['--curriculum'])
if curriculum_file == "None":
curriculum_file = None
lesson = int(options['--lesson'])
fast_simulation = not bool(options['--slow'])
no_graphics = options['--no-graphics']
# Constants
# Assumption that this yaml is present in same dir as this file
base_path = os.path.dirname(__file__)
TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))
def run_training(sub_id, use_seed):
tc = TrainerController(env_path, run_id + "-" + str(sub_id), save_freq, curriculum_file, fast_simulation,
load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, use_seed,
docker_target_name, TRAINER_CONFIG_PATH, no_graphics)
tc.start_learning()
if env_path is None and num_runs > 1:
raise TrainerError("It is not possible to launch more than one concurrent training session "
"when training from the editor")
jobs = []
for i in range(num_runs):
if seed == -1:
use_seed = np.random.randint(0, 9999)
else:
use_seed = seed
p = multiprocessing.Process(target=run_training, args=(i, use_seed))
jobs.append(p)
p.start()
if __name__ == '__main__':
main()

203
setup.py


"""A setuptools based setup module.
See:
https://packaging.python.org/en/latest/distributing.html
https://github.com/pypa/sampleproject
"""
# Always prefer setuptools over distutils
from setuptools import setup, find_packages
from os import path
# io.open is needed for projects that support Python 2.7
# It ensures open() defaults to text mode with universal newlines,
# and accepts an argument to specify the text encoding
# Python 3 only projects can skip this import
from io import open
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()
# Arguments marked as "Required" below must be included for upload to PyPI.
# Fields marked as "Optional" may be commented out.
setup(
# This is the name of your project. The first time you publish this
# package, this name will be registered for you. It will determine how
# users can install this project, e.g.:
#
# $ pip install sampleproject
#
# And where it will live on PyPI: https://pypi.org/project/sampleproject/
#
# There are some restrictions on what makes a valid project name
# specification here:
# https://packaging.python.org/specifications/core-metadata/#name
name='mlagents', # Required
# Versions should comply with PEP 440:
# https://www.python.org/dev/peps/pep-0440/
#
# For a discussion on single-sourcing the version across setup.py and the
# project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='0.4.0', # Required
# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
# https://packaging.python.org/specifications/core-metadata/#summary
description='Unity Machine Learning Agents', # Required
# This is an optional longer description of your project that represents
# the body of text which users will see when they visit PyPI.
#
# Often, this is the same as your README, so you can just read it in from
# that file directly (as we have already done above)
#
# This field corresponds to the "Description" metadata field:
# https://packaging.python.org/specifications/core-metadata/#description-optional
long_description=long_description, # Optional
# Denotes that our long_description is in Markdown; valid values are
# text/plain, text/x-rst, and text/markdown
#
# Optional if long_description is written in reStructuredText (rst) but
# required for plain-text or Markdown; if unspecified, "applications should
# attempt to render [the long_description] as text/x-rst; charset=UTF-8 and
# fall back to text/plain if it is not valid rst" (see link below)
#
# This field corresponds to the "Description-Content-Type" metadata field:
# https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
long_description_content_type='text/markdown', # Optional (see note above)
# This should be a valid link to your project's main homepage.
#
# This field corresponds to the "Home-Page" metadata field:
# https://packaging.python.org/specifications/core-metadata/#home-page-optional
url='https://github.com/Unity-Technologies/ml-agents', # Optional
# This should be your name or the name of the organization which owns the
# project.
author='Unity Technologies', # Optional
# This should be a valid email address corresponding to the author listed
# above.
author_email='ML-Agents@unity3d.com', # Optional
# Classifiers help users find your project by categorizing it.
#
# For a list of valid classifiers, see https://pypi.org/classifiers/
classifiers=[ # Optional
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
#'Development Status :: 3 - Alpha',
# Indicate who your project is intended for
'Intended Audience :: Developers',
'Topic :: Software Development :: Build Tools',
# Pick your license as you wish
'License :: OSI Approved :: Apache Software License',
# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
'Programming Language :: Python :: 3.6'
],
# This field adds keywords for your project which will appear on the
# project page. What does your project relate to?
#
# Note that this is a string of words separated by whitespace, not a list.
#keywords='sample setuptools development', # Optional
# You can just specify package directories manually here if your project is
# simple. Or you can use find_packages().
#
# Alternatively, if you just want to distribute a single Python file, use
# the `py_modules` argument instead as follows, which will expect a file
# called `my_module.py` to exist:
#
# py_modules=["my_module"],
#
packages=find_packages(exclude=['tests', 'tests.*', '*.tests', '*.tests.*']), # Required
# This field lists other packages that your project depends on to run.
# Any package you put here will be installed by pip when your project is
# installed, so they must be valid existing projects.
#
# For an analysis of "install_requires" vs pip's requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=[
'tensorflow>=1.7.1',
'Pillow>=4.2.1',
'matplotlib',
'numpy>=1.11.0',
'jupyter',
'pytest>=3.2.2',
'docopt',
'pyyaml',
'protobuf>=3.6.0',
'grpcio>=1.11.0'], # Optional
# List additional groups of dependencies here (e.g. development
# dependencies). Users will be able to install these using the "extras"
# syntax, for example:
#
# $ pip install sampleproject[dev]
#
# Similar to `install_requires` above, these must be valid existing
# projects.
#extras_require={ # Optional
# 'dev': ['check-manifest'],
# 'test': ['coverage'],
#},
# If there are data files included in your packages that need to be
# installed, specify them here.
#
# If using Python 2.6 or earlier, then these have to be included in
# MANIFEST.in as well.
#package_data={ # Optional
# 'sample': ['package_data.dat'],
#},
# Although 'package_data' is the preferred approach, in some case you may
# need to place data files outside of your packages. See:
# http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files
#
# In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
#data_files=[('my_data', ['data/data_file'])], # Optional
# To provide executable scripts, use entry points in preference to the
# "scripts" keyword. Entry points provide cross-platform support and allow
# `pip` to create the appropriate form of executable for the target
# platform.
#
# For example, the following would provide a command called `sample` which
# executes the function `main` from this package when invoked:
entry_points={ # Optional
'console_scripts': [
'learn=mlagents.learn:main',
],
},
# List additional URLs that are relevant to your project as a dict.
#
# This field corresponds to the "Project-URL" metadata fields:
# https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use
#
# Examples listed include a pattern for specifying where the package tracks
# issues, where the source is hosted, where to say thanks to the package
# maintainers, and where to support the project financially. The key is
# what's used to render the link text on PyPI.
#project_urls={ # Optional
# 'Bug Reports': 'https://github.com/pypa/sampleproject/issues',
# 'Funding': 'https://donate.pypi.org',
# 'Say Thanks!': 'http://saythanks.io/to/example',
# 'Source': 'https://github.com/pypa/sampleproject/',
#},
)

0
tests/__init__.py

0
tests/envs/__init__.py

95
tests/envs/test_envs.py


import unittest.mock as mock
import pytest
import struct
import numpy as np
from mlagents.envs import UnityEnvironment, UnityEnvironmentException, UnityActionException, \
BrainInfo
from tests.mock_communicator import MockCommunicator
def test_handles_bad_filename():
with pytest.raises(UnityEnvironmentException):
UnityEnvironment(' ')
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_initialization(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
with pytest.raises(UnityActionException):
env.step([0])
assert env.brain_names[0] == 'RealFakeBrain'
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_reset(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
brain_info = env.reset()
env.close()
assert not env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
len(brain_info['RealFakeBrain'].agents)
assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
brain.vector_observation_space_size * brain.num_stacked_vector_observations
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_step(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
brain = env.brains['RealFakeBrain']
brain_info = env.reset()
brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
with pytest.raises(UnityActionException):
env.step([0])
brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
with pytest.raises(UnityActionException):
env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
env.close()
assert env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
len(brain_info['RealFakeBrain'].agents)
assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
brain.vector_observation_space_size * brain.num_stacked_vector_observations
print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done))
assert not brain_info['RealFakeBrain'].local_done[0]
assert brain_info['RealFakeBrain'].local_done[2]
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_close(mock_communicator, mock_launcher):
comm = MockCommunicator(
discrete_action=False, visual_inputs=0)
mock_communicator.return_value = comm
env = UnityEnvironment(' ')
assert env._loaded
env.close()
assert not env._loaded
assert comm.has_been_closed
if __name__ == '__main__':
pytest.main()

0
tests/trainers/__init__.py

107
tests/trainers/test_bc.py


import unittest.mock as mock
import pytest
import numpy as np
import tensorflow as tf
from mlagents.trainers.bc.models import BehavioralCloningModel
from mlagents.envs import UnityEnvironment
from tests.mock_communicator import MockCommunicator
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_cc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_dc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.action_probs]
feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_visual_dc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=2)
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.action_probs]
feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_visual_cc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=2)
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
if __name__ == '__main__':
pytest.main()

56
tests/trainers/test_buffer.py


import json
import unittest.mock as mock
import yaml
import pytest
import numpy as np
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
from mlagents.trainers.curriculum import Curriculum
from mlagents.trainers.exception import CurriculumError
from mlagents.envs.exception import UnityEnvironmentException
from tests.mock_communicator import MockCommunicator
def assert_array(a, b):
assert a.shape == b.shape
la = list(a.flatten())
lb = list(b.flatten())
for i in range(len(la)):
assert la[i] == lb[i]
def test_buffer():
b = Buffer()
for fake_agent_id in range(4):
for step in range(9):
b[fake_agent_id]['vector_observation'].append(
[100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,
100 * fake_agent_id + 10 * step + 3]
)
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
100 * fake_agent_id + 10 * step + 5])
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
assert_array(a, np.array([
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
assert_array(a, np.array([
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
b[4].reset_agent()
assert len(b[4]) == 0
b.append_update_buffer(3,
batch_size=None, training_length=2)
b.append_update_buffer(2,
batch_size=None, training_length=2)
assert len(b.update_buffer['action']) == 10
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)

109
tests/trainers/test_meta_curriculum.py


import pytest
from unittest.mock import patch, call, Mock
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.exception import MetaCurriculumError
class MetaCurriculumTest(MetaCurriculum):
"""This class allows us to test MetaCurriculum objects without calling
MetaCurriculum's __init__ function.
"""
def __init__(self, brains_to_curriculums):
self._brains_to_curriculums = brains_to_curriculums
@pytest.fixture
def default_reset_parameters():
return {'param1' : 1, 'param2' : 2, 'param3' : 3}
@pytest.fixture
def more_reset_parameters():
return {'param4' : 4, 'param5' : 5, 'param6' : 6}
@pytest.fixture
def progresses():
return {'Brain1' : 0.2, 'Brain2' : 0.3}
@patch('mlagents.trainers.Curriculum.get_config', return_value={})
@patch('mlagents.trainers.Curriculum.__init__', return_value=None)
@patch('os.listdir', return_value=['Brain1.json', 'Brain2.json'])
def test_init_meta_curriculum_happy_path(listdir, mock_curriculum_init,
mock_curriculum_get_config,
default_reset_parameters):
meta_curriculum = MetaCurriculum('test/', default_reset_parameters)
assert len(meta_curriculum.brains_to_curriculums) == 2
assert 'Brain1' in meta_curriculum.brains_to_curriculums
assert 'Brain2' in meta_curriculum.brains_to_curriculums
calls = [call('test/Brain1.json', default_reset_parameters),
call('test/Brain2.json', default_reset_parameters)]
mock_curriculum_init.assert_has_calls(calls)
@patch('os.listdir', side_effect=NotADirectoryError())
def test_init_meta_curriculum_bad_curriculum_folder_raises_error(listdir):
with pytest.raises(MetaCurriculumError):
MetaCurriculum('test/', default_reset_parameters)
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_set_lesson_nums(curriculum_a, curriculum_b):
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum.lesson_nums = {'Brain1' : 1, 'Brain2' : 3}
assert curriculum_a.lesson_num == 1
assert curriculum_b.lesson_num == 3
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_increment_lessons(curriculum_a, curriculum_b, progresses):
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum.increment_lessons(progresses)
curriculum_a.increment_lesson.assert_called_with(0.2)
curriculum_b.increment_lesson.assert_called_with(0.3)
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_set_all_curriculums_to_lesson_num(curriculum_a, curriculum_b):
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
meta_curriculum.set_all_curriculums_to_lesson_num(2)
assert curriculum_a.lesson_num == 2
assert curriculum_b.lesson_num == 2
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_get_config(curriculum_a, curriculum_b, default_reset_parameters,
more_reset_parameters):
curriculum_a.get_config.return_value = default_reset_parameters
curriculum_b.get_config.return_value = default_reset_parameters
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
'Brain2' : curriculum_b})
assert meta_curriculum.get_config() == default_reset_parameters
curriculum_b.get_config.return_value = more_reset_parameters
new_reset_parameters = dict(default_reset_parameters)
new_reset_parameters.update(more_reset_parameters)
assert meta_curriculum.get_config() == new_reset_parameters

285
tests/trainers/test_ppo.py


import unittest.mock as mock
import pytest
import numpy as np
import tensorflow as tf
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.ppo.trainer import discount_rewards
from mlagents.envs import UnityEnvironment
from tests.mock_communicator import MockCommunicator
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_cc_vector(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_cc_visual(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=2)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_dc_visual(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=2)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_dc_vector(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_dc_vector_rnn(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
memory_size = 128
model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size)
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.memory_out]
feed_dict = {model.batch_size: 1,
model.sequence_length: 2,
model.prev_action: [[0], [0]],
model.memory_in: np.zeros((1, memory_size)),
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
memory_size = 128
model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size)
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.memory_out]
feed_dict = {model.batch_size: 1,
model.sequence_length: 2,
model.memory_in: np.zeros((1, memory_size)),
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=0)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_holder: [[0], [0]]}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_cc_vector_curio(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.output: [[0.0, 0.0], [0.0, 0.0]]}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_dc_visual_curio(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=2)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_holder: [[0], [0]],
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
model.next_visual_in[1]: np.ones([2, 40, 30, 3])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_ppo_model_cc_visual_curio(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=2)
env = UnityEnvironment(' ')
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.output, model.all_log_probs, model.value, model.entropy,
model.learning_rate, model.intrinsic_reward]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.output: [[0.0, 0.0], [0.0, 0.0]],
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
model.next_visual_in[1]: np.ones([2, 40, 30, 3])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()
def test_rl_functions():
rewards = np.array([0.0, 0.0, 0.0, 1.0])
gamma = 0.9
returns = discount_rewards(rewards, gamma, 0.0)
np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0]))
if __name__ == '__main__':
pytest.main()

22
mlagents/setup.py


#!/usr/bin/env python
from setuptools import setup, Command, find_packages
with open('requirements.txt') as f:
required = f.read().splitlines()
setup(name='unityagents',
version='0.4.0',
description='Unity Machine Learning Agents',
license='Apache License 2.0',
author='Unity Technologies',
author_email='ML-Agents@unity3d.com',
url='https://github.com/Unity-Technologies/ml-agents',
packages=find_packages(),
install_requires = required,
long_description= ("Unity Machine Learning Agents allows researchers and developers "
"to transform games and simulations created using the Unity Editor into environments "
"where intelligent agents can be trained using reinforcement learning, evolutionary "
"strategies, or other machine learning methods through a simple to use Python API.")
)

/mlagents/requirements.txt → /requirements.txt

/mlagents/mlagents/trainers → /mlagents/trainers

/mlagents/mlagents/envs → /mlagents/envs

/mlagents/tests/mock_communicator.py → /tests/mock_communicator.py

/mlagents/tests/trainers/test_curriculum.py → /tests/trainers/test_curriculum.py

/mlagents/tests/trainers/test_trainer_controller.py → /tests/trainers/test_trainer_controller.py

正在加载...
取消
保存