Deric Pang
6 年前
当前提交
634280a6
共有 58 个文件被更改,包括 1108 次插入 和 108 次删除
-
2docs/Learning-Environment-Executable.md
-
2mlagents/trainers/buffer.py
-
2mlagents/trainers/curriculum.py
-
2mlagents/trainers/exception.py
-
6mlagents/trainers/meta_curriculum.py
-
2mlagents/trainers/models.py
-
6mlagents/trainers/trainer.py
-
16mlagents/trainers/trainer_controller.py
-
4mlagents/trainers/ppo/models.py
-
10mlagents/trainers/ppo/trainer.py
-
2mlagents/trainers/bc/models.py
-
10mlagents/trainers/bc/trainer.py
-
3mlagents/envs/__init__.py
-
55mlagents/envs/brain.py
-
4mlagents/envs/communicator.py
-
4mlagents/envs/environment.py
-
2mlagents/envs/exception.py
-
6mlagents/envs/rpc_communicator.py
-
4mlagents/envs/socket_communicator.py
-
2mlagents/envs/notebooks/getting-started.ipynb
-
6mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py
-
2mlagents/envs/communicator_objects/brain_type_proto_pb2.py
-
2mlagents/envs/communicator_objects/space_type_proto_pb2.py
-
4mlagents/envs/communicator_objects/unity_input_pb2.py
-
6mlagents/envs/communicator_objects/unity_message_pb2.py
-
4mlagents/envs/communicator_objects/unity_output_pb2.py
-
4mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py
-
6mlagents/envs/communicator_objects/unity_rl_input_pb2.py
-
2mlagents/envs/communicator_objects/unity_rl_output_pb2.py
-
2mlagents/envs/communicator_objects/unity_to_external_pb2.py
-
2mlagents/envs/communicator_objects/unity_to_external_pb2_grpc.py
-
5tests/mock_communicator.py
-
4tests/trainers/test_curriculum.py
-
36tests/trainers/test_trainer_controller.py
-
0mlagents/__init__.py
-
110mlagents/learn.py
-
203setup.py
-
0tests/__init__.py
-
0tests/envs/__init__.py
-
95tests/envs/test_envs.py
-
0tests/trainers/__init__.py
-
107tests/trainers/test_bc.py
-
56tests/trainers/test_buffer.py
-
109tests/trainers/test_meta_curriculum.py
-
285tests/trainers/test_ppo.py
-
22mlagents/setup.py
-
0/requirements.txt
-
0/mlagents/trainers
-
0/mlagents/envs
-
0/tests/mock_communicator.py
-
0/tests/trainers/test_curriculum.py
-
0/tests/trainers/test_trainer_controller.py
|
|||
from .environment import * |
|||
from .brain import * |
|||
from .exception import * |
|
|||
from typing import Dict |
|||
|
|||
|
|||
class BrainInfo: |
|||
def __init__(self, visual_observation, vector_observation, text_observations, memory=None, |
|||
reward=None, agents=None, local_done=None, |
|||
vector_action=None, text_action=None, max_reached=None): |
|||
""" |
|||
Describes experience at current step of all agents linked to a brain. |
|||
""" |
|||
self.visual_observations = visual_observation |
|||
self.vector_observations = vector_observation |
|||
self.text_observations = text_observations |
|||
self.memories = memory |
|||
self.rewards = reward |
|||
self.local_done = local_done |
|||
self.max_reached = max_reached |
|||
self.agents = agents |
|||
self.previous_vector_actions = vector_action |
|||
self.previous_text_actions = text_action |
|||
|
|||
|
|||
AllBrainInfo = Dict[str, BrainInfo] |
|||
|
|||
|
|||
class BrainParameters: |
|||
def __init__(self, brain_name, brain_param): |
|||
""" |
|||
Contains all brain-specific parameters. |
|||
:param brain_name: Name of brain. |
|||
:param brain_param: Dictionary of brain parameters. |
|||
""" |
|||
self.brain_name = brain_name |
|||
self.vector_observation_space_size = brain_param["vectorObservationSize"] |
|||
self.num_stacked_vector_observations = brain_param["numStackedVectorObservations"] |
|||
self.number_visual_observations = len(brain_param["cameraResolutions"]) |
|||
self.camera_resolutions = brain_param["cameraResolutions"] |
|||
self.vector_action_space_size = brain_param["vectorActionSize"] |
|||
self.vector_action_descriptions = brain_param["vectorActionDescriptions"] |
|||
self.vector_action_space_type = ["discrete", "continuous"][brain_param["vectorActionSpaceType"]] |
|||
|
|||
def __str__(self): |
|||
return '''Unity brain name: {} |
|||
Number of Visual Observations (per agent): {} |
|||
Vector Observation space size (per agent): {} |
|||
Number of stacked Vector Observation: {} |
|||
Vector Action space type: {} |
|||
Vector Action space size (per agent): {} |
|||
Vector Action descriptions: {}'''.format(self.brain_name, |
|||
str(self.number_visual_observations), |
|||
str(self.vector_observation_space_size), |
|||
str(self.num_stacked_vector_observations), |
|||
self.vector_action_space_type, |
|||
str(self.vector_action_space_size), |
|||
', '.join(self.vector_action_descriptions)) |
|
|||
# # Unity ML-Agents Toolkit |
|||
# ## ML-Agent Learning |
|||
|
|||
import logging |
|||
|
|||
import os |
|||
import multiprocessing |
|||
import numpy as np |
|||
from docopt import docopt |
|||
|
|||
|
|||
from mlagents.trainers.trainer_controller import TrainerController |
|||
from mlagents.trainers.exception import TrainerError |
|||
|
|||
def main(): |
|||
print(''' |
|||
|
|||
▄▄▄▓▓▓▓ |
|||
╓▓▓▓▓▓▓█▓▓▓▓▓ |
|||
,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌ |
|||
▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄ |
|||
▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌ |
|||
▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌ |
|||
▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓ |
|||
^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓` |
|||
'▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌ |
|||
▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀ |
|||
`▀█▓▓▓▓▓▓▓▓▓▌ |
|||
¬`▀▀▀█▓ |
|||
|
|||
''') |
|||
|
|||
logger = logging.getLogger("mlagents.learn") |
|||
_USAGE = ''' |
|||
Usage: |
|||
learn (<env>) [options] |
|||
learn [options] |
|||
learn --help |
|||
|
|||
Options: |
|||
--curriculum=<file> Curriculum json file for environment [default: None]. |
|||
--keep-checkpoints=<n> How many model checkpoints to keep [default: 5]. |
|||
--lesson=<n> Start learning from this lesson [default: 0]. |
|||
--load Whether to load the model or randomly initialize [default: False]. |
|||
--run-id=<path> The sub-directory name for model and summary statistics [default: ppo]. |
|||
--num-runs=<n> Number of concurrent training sessions [default: 1]. |
|||
--save-freq=<n> Frequency at which to save model [default: 50000]. |
|||
--seed=<n> Random seed used for training [default: -1]. |
|||
--slow Whether to run the game at training speed [default: False]. |
|||
--train Whether to train model, or only run inference [default: False]. |
|||
--worker-id=<n> Number to add to communication port (5005). Used for multi-environment [default: 0]. |
|||
--docker-target-name=<dt> Docker Volume to store curriculum, executable and model files [default: Empty]. |
|||
--no-graphics Whether to run the Unity simulator in no-graphics mode [default: False]. |
|||
''' |
|||
|
|||
options = docopt(_USAGE) |
|||
logger.info(options) |
|||
# Docker Parameters |
|||
if options['--docker-target-name'] == 'Empty': |
|||
docker_target_name = '' |
|||
else: |
|||
docker_target_name = options['--docker-target-name'] |
|||
|
|||
# General parameters |
|||
run_id = options['--run-id'] |
|||
num_runs = int(options['--num-runs']) |
|||
seed = int(options['--seed']) |
|||
load_model = options['--load'] |
|||
train_model = options['--train'] |
|||
save_freq = int(options['--save-freq']) |
|||
env_path = options['<env>'] |
|||
keep_checkpoints = int(options['--keep-checkpoints']) |
|||
worker_id = int(options['--worker-id']) |
|||
curriculum_file = str(options['--curriculum']) |
|||
if curriculum_file == "None": |
|||
curriculum_file = None |
|||
lesson = int(options['--lesson']) |
|||
fast_simulation = not bool(options['--slow']) |
|||
no_graphics = options['--no-graphics'] |
|||
|
|||
# Constants |
|||
# Assumption that this yaml is present in same dir as this file |
|||
base_path = os.path.dirname(__file__) |
|||
TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml")) |
|||
|
|||
|
|||
def run_training(sub_id, use_seed): |
|||
tc = TrainerController(env_path, run_id + "-" + str(sub_id), save_freq, curriculum_file, fast_simulation, |
|||
load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, use_seed, |
|||
docker_target_name, TRAINER_CONFIG_PATH, no_graphics) |
|||
tc.start_learning() |
|||
|
|||
|
|||
if env_path is None and num_runs > 1: |
|||
raise TrainerError("It is not possible to launch more than one concurrent training session " |
|||
"when training from the editor") |
|||
|
|||
jobs = [] |
|||
for i in range(num_runs): |
|||
if seed == -1: |
|||
use_seed = np.random.randint(0, 9999) |
|||
else: |
|||
use_seed = seed |
|||
p = multiprocessing.Process(target=run_training, args=(i, use_seed)) |
|||
jobs.append(p) |
|||
p.start() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
main() |
|
|||
"""A setuptools based setup module. |
|||
|
|||
See: |
|||
https://packaging.python.org/en/latest/distributing.html |
|||
https://github.com/pypa/sampleproject |
|||
""" |
|||
|
|||
# Always prefer setuptools over distutils |
|||
from setuptools import setup, find_packages |
|||
from os import path |
|||
# io.open is needed for projects that support Python 2.7 |
|||
# It ensures open() defaults to text mode with universal newlines, |
|||
# and accepts an argument to specify the text encoding |
|||
# Python 3 only projects can skip this import |
|||
from io import open |
|||
|
|||
here = path.abspath(path.dirname(__file__)) |
|||
|
|||
# Get the long description from the README file |
|||
with open(path.join(here, 'README.md'), encoding='utf-8') as f: |
|||
long_description = f.read() |
|||
|
|||
# Arguments marked as "Required" below must be included for upload to PyPI. |
|||
# Fields marked as "Optional" may be commented out. |
|||
|
|||
setup( |
|||
# This is the name of your project. The first time you publish this |
|||
# package, this name will be registered for you. It will determine how |
|||
# users can install this project, e.g.: |
|||
# |
|||
# $ pip install sampleproject |
|||
# |
|||
# And where it will live on PyPI: https://pypi.org/project/sampleproject/ |
|||
# |
|||
# There are some restrictions on what makes a valid project name |
|||
# specification here: |
|||
# https://packaging.python.org/specifications/core-metadata/#name |
|||
name='mlagents', # Required |
|||
|
|||
# Versions should comply with PEP 440: |
|||
# https://www.python.org/dev/peps/pep-0440/ |
|||
# |
|||
# For a discussion on single-sourcing the version across setup.py and the |
|||
# project code, see |
|||
# https://packaging.python.org/en/latest/single_source_version.html |
|||
version='0.4.0', # Required |
|||
|
|||
# This is a one-line description or tagline of what your project does. This |
|||
# corresponds to the "Summary" metadata field: |
|||
# https://packaging.python.org/specifications/core-metadata/#summary |
|||
description='Unity Machine Learning Agents', # Required |
|||
|
|||
# This is an optional longer description of your project that represents |
|||
# the body of text which users will see when they visit PyPI. |
|||
# |
|||
# Often, this is the same as your README, so you can just read it in from |
|||
# that file directly (as we have already done above) |
|||
# |
|||
# This field corresponds to the "Description" metadata field: |
|||
# https://packaging.python.org/specifications/core-metadata/#description-optional |
|||
long_description=long_description, # Optional |
|||
|
|||
# Denotes that our long_description is in Markdown; valid values are |
|||
# text/plain, text/x-rst, and text/markdown |
|||
# |
|||
# Optional if long_description is written in reStructuredText (rst) but |
|||
# required for plain-text or Markdown; if unspecified, "applications should |
|||
# attempt to render [the long_description] as text/x-rst; charset=UTF-8 and |
|||
# fall back to text/plain if it is not valid rst" (see link below) |
|||
# |
|||
# This field corresponds to the "Description-Content-Type" metadata field: |
|||
# https://packaging.python.org/specifications/core-metadata/#description-content-type-optional |
|||
long_description_content_type='text/markdown', # Optional (see note above) |
|||
|
|||
# This should be a valid link to your project's main homepage. |
|||
# |
|||
# This field corresponds to the "Home-Page" metadata field: |
|||
# https://packaging.python.org/specifications/core-metadata/#home-page-optional |
|||
url='https://github.com/Unity-Technologies/ml-agents', # Optional |
|||
|
|||
# This should be your name or the name of the organization which owns the |
|||
# project. |
|||
author='Unity Technologies', # Optional |
|||
|
|||
# This should be a valid email address corresponding to the author listed |
|||
# above. |
|||
author_email='ML-Agents@unity3d.com', # Optional |
|||
|
|||
# Classifiers help users find your project by categorizing it. |
|||
# |
|||
# For a list of valid classifiers, see https://pypi.org/classifiers/ |
|||
classifiers=[ # Optional |
|||
# How mature is this project? Common values are |
|||
# 3 - Alpha |
|||
# 4 - Beta |
|||
# 5 - Production/Stable |
|||
#'Development Status :: 3 - Alpha', |
|||
|
|||
# Indicate who your project is intended for |
|||
'Intended Audience :: Developers', |
|||
'Topic :: Software Development :: Build Tools', |
|||
|
|||
# Pick your license as you wish |
|||
'License :: OSI Approved :: Apache Software License', |
|||
|
|||
# Specify the Python versions you support here. In particular, ensure |
|||
# that you indicate whether you support Python 2, Python 3 or both. |
|||
'Programming Language :: Python :: 3.6' |
|||
], |
|||
|
|||
# This field adds keywords for your project which will appear on the |
|||
# project page. What does your project relate to? |
|||
# |
|||
# Note that this is a string of words separated by whitespace, not a list. |
|||
#keywords='sample setuptools development', # Optional |
|||
|
|||
# You can just specify package directories manually here if your project is |
|||
# simple. Or you can use find_packages(). |
|||
# |
|||
# Alternatively, if you just want to distribute a single Python file, use |
|||
# the `py_modules` argument instead as follows, which will expect a file |
|||
# called `my_module.py` to exist: |
|||
# |
|||
# py_modules=["my_module"], |
|||
# |
|||
packages=find_packages(exclude=['tests', 'tests.*', '*.tests', '*.tests.*']), # Required |
|||
|
|||
# This field lists other packages that your project depends on to run. |
|||
# Any package you put here will be installed by pip when your project is |
|||
# installed, so they must be valid existing projects. |
|||
# |
|||
# For an analysis of "install_requires" vs pip's requirements files see: |
|||
# https://packaging.python.org/en/latest/requirements.html |
|||
install_requires=[ |
|||
'tensorflow>=1.7.1', |
|||
'Pillow>=4.2.1', |
|||
'matplotlib', |
|||
'numpy>=1.11.0', |
|||
'jupyter', |
|||
'pytest>=3.2.2', |
|||
'docopt', |
|||
'pyyaml', |
|||
'protobuf>=3.6.0', |
|||
'grpcio>=1.11.0'], # Optional |
|||
|
|||
# List additional groups of dependencies here (e.g. development |
|||
# dependencies). Users will be able to install these using the "extras" |
|||
# syntax, for example: |
|||
# |
|||
# $ pip install sampleproject[dev] |
|||
# |
|||
# Similar to `install_requires` above, these must be valid existing |
|||
# projects. |
|||
#extras_require={ # Optional |
|||
# 'dev': ['check-manifest'], |
|||
# 'test': ['coverage'], |
|||
#}, |
|||
|
|||
# If there are data files included in your packages that need to be |
|||
# installed, specify them here. |
|||
# |
|||
# If using Python 2.6 or earlier, then these have to be included in |
|||
# MANIFEST.in as well. |
|||
#package_data={ # Optional |
|||
# 'sample': ['package_data.dat'], |
|||
#}, |
|||
|
|||
# Although 'package_data' is the preferred approach, in some case you may |
|||
# need to place data files outside of your packages. See: |
|||
# http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files |
|||
# |
|||
# In this case, 'data_file' will be installed into '<sys.prefix>/my_data' |
|||
#data_files=[('my_data', ['data/data_file'])], # Optional |
|||
|
|||
# To provide executable scripts, use entry points in preference to the |
|||
# "scripts" keyword. Entry points provide cross-platform support and allow |
|||
# `pip` to create the appropriate form of executable for the target |
|||
# platform. |
|||
# |
|||
# For example, the following would provide a command called `sample` which |
|||
# executes the function `main` from this package when invoked: |
|||
entry_points={ # Optional |
|||
'console_scripts': [ |
|||
'learn=mlagents.learn:main', |
|||
], |
|||
}, |
|||
|
|||
# List additional URLs that are relevant to your project as a dict. |
|||
# |
|||
# This field corresponds to the "Project-URL" metadata fields: |
|||
# https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use |
|||
# |
|||
# Examples listed include a pattern for specifying where the package tracks |
|||
# issues, where the source is hosted, where to say thanks to the package |
|||
# maintainers, and where to support the project financially. The key is |
|||
# what's used to render the link text on PyPI. |
|||
#project_urls={ # Optional |
|||
# 'Bug Reports': 'https://github.com/pypa/sampleproject/issues', |
|||
# 'Funding': 'https://donate.pypi.org', |
|||
# 'Say Thanks!': 'http://saythanks.io/to/example', |
|||
# 'Source': 'https://github.com/pypa/sampleproject/', |
|||
#}, |
|||
) |
|
|||
import unittest.mock as mock |
|||
import pytest |
|||
import struct |
|||
|
|||
import numpy as np |
|||
|
|||
from mlagents.envs import UnityEnvironment, UnityEnvironmentException, UnityActionException, \ |
|||
BrainInfo |
|||
from tests.mock_communicator import MockCommunicator |
|||
|
|||
|
|||
def test_handles_bad_filename(): |
|||
with pytest.raises(UnityEnvironmentException): |
|||
UnityEnvironment(' ') |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_initialization(mock_communicator, mock_launcher): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0]) |
|||
assert env.brain_names[0] == 'RealFakeBrain' |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_reset(mock_communicator, mock_launcher): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
brain = env.brains['RealFakeBrain'] |
|||
brain_info = env.reset() |
|||
env.close() |
|||
assert not env.global_done |
|||
assert isinstance(brain_info, dict) |
|||
assert isinstance(brain_info['RealFakeBrain'], BrainInfo) |
|||
assert isinstance(brain_info['RealFakeBrain'].visual_observations, list) |
|||
assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray) |
|||
assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations |
|||
assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \ |
|||
len(brain_info['RealFakeBrain'].agents) |
|||
assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \ |
|||
brain.vector_observation_space_size * brain.num_stacked_vector_observations |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_step(mock_communicator, mock_launcher): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
brain = env.brains['RealFakeBrain'] |
|||
brain_info = env.reset() |
|||
brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0]) |
|||
brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) |
|||
env.close() |
|||
assert env.global_done |
|||
assert isinstance(brain_info, dict) |
|||
assert isinstance(brain_info['RealFakeBrain'], BrainInfo) |
|||
assert isinstance(brain_info['RealFakeBrain'].visual_observations, list) |
|||
assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray) |
|||
assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations |
|||
assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \ |
|||
len(brain_info['RealFakeBrain'].agents) |
|||
assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \ |
|||
brain.vector_observation_space_size * brain.num_stacked_vector_observations |
|||
|
|||
print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done)) |
|||
assert not brain_info['RealFakeBrain'].local_done[0] |
|||
assert brain_info['RealFakeBrain'].local_done[2] |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_close(mock_communicator, mock_launcher): |
|||
comm = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
mock_communicator.return_value = comm |
|||
env = UnityEnvironment(' ') |
|||
assert env._loaded |
|||
env.close() |
|||
assert not env._loaded |
|||
assert comm.has_been_closed |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pytest.main() |
|
|||
import unittest.mock as mock |
|||
import pytest |
|||
|
|||
import numpy as np |
|||
import tensorflow as tf |
|||
|
|||
from mlagents.trainers.bc.models import BehavioralCloningModel |
|||
from mlagents.envs import UnityEnvironment |
|||
from tests.mock_communicator import MockCommunicator |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_cc_bc_model(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
model = BehavioralCloningModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.sample_action, model.policy] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_dc_bc_model(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=True, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
model = BehavioralCloningModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.sample_action, model.action_probs] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.dropout_rate: 1.0, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_visual_dc_bc_model(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=True, visual_inputs=2) |
|||
env = UnityEnvironment(' ') |
|||
model = BehavioralCloningModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.sample_action, model.action_probs] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.dropout_rate: 1.0, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.visual_in[1]: np.ones([2, 40, 30, 3])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_visual_cc_bc_model(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=2) |
|||
env = UnityEnvironment(' ') |
|||
model = BehavioralCloningModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.sample_action, model.policy] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.visual_in[1]: np.ones([2, 40, 30, 3])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pytest.main() |
|
|||
import json |
|||
import unittest.mock as mock |
|||
|
|||
import yaml |
|||
import pytest |
|||
import numpy as np |
|||
|
|||
from mlagents.trainers.trainer_controller import TrainerController |
|||
from mlagents.trainers.buffer import Buffer |
|||
from mlagents.trainers.ppo.trainer import PPOTrainer |
|||
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer |
|||
from mlagents.trainers.curriculum import Curriculum |
|||
from mlagents.trainers.exception import CurriculumError |
|||
from mlagents.envs.exception import UnityEnvironmentException |
|||
from tests.mock_communicator import MockCommunicator |
|||
|
|||
|
|||
def assert_array(a, b): |
|||
assert a.shape == b.shape |
|||
la = list(a.flatten()) |
|||
lb = list(b.flatten()) |
|||
for i in range(len(la)): |
|||
assert la[i] == lb[i] |
|||
|
|||
|
|||
def test_buffer(): |
|||
b = Buffer() |
|||
for fake_agent_id in range(4): |
|||
for step in range(9): |
|||
b[fake_agent_id]['vector_observation'].append( |
|||
[100 * fake_agent_id + 10 * step + 1, |
|||
100 * fake_agent_id + 10 * step + 2, |
|||
100 * fake_agent_id + 10 * step + 3] |
|||
) |
|||
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4, |
|||
100 * fake_agent_id + 10 * step + 5]) |
|||
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True) |
|||
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]])) |
|||
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True) |
|||
assert_array(a, np.array([ |
|||
[[231, 232, 233], [241, 242, 243], [251, 252, 253]], |
|||
[[261, 262, 263], [271, 272, 273], [281, 282, 283]] |
|||
])) |
|||
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False) |
|||
assert_array(a, np.array([ |
|||
[[251, 252, 253], [261, 262, 263], [271, 272, 273]], |
|||
[[261, 262, 263], [271, 272, 273], [281, 282, 283]] |
|||
])) |
|||
b[4].reset_agent() |
|||
assert len(b[4]) == 0 |
|||
b.append_update_buffer(3, |
|||
batch_size=None, training_length=2) |
|||
b.append_update_buffer(2, |
|||
batch_size=None, training_length=2) |
|||
assert len(b.update_buffer['action']) == 10 |
|||
assert np.array(b.update_buffer['action']).shape == (10, 2, 2) |
|
|||
import pytest |
|||
from unittest.mock import patch, call, Mock |
|||
|
|||
from mlagents.trainers.meta_curriculum import MetaCurriculum |
|||
from mlagents.trainers.exception import MetaCurriculumError |
|||
|
|||
|
|||
class MetaCurriculumTest(MetaCurriculum): |
|||
"""This class allows us to test MetaCurriculum objects without calling |
|||
MetaCurriculum's __init__ function. |
|||
""" |
|||
def __init__(self, brains_to_curriculums): |
|||
self._brains_to_curriculums = brains_to_curriculums |
|||
|
|||
|
|||
@pytest.fixture |
|||
def default_reset_parameters(): |
|||
return {'param1' : 1, 'param2' : 2, 'param3' : 3} |
|||
|
|||
|
|||
@pytest.fixture |
|||
def more_reset_parameters(): |
|||
return {'param4' : 4, 'param5' : 5, 'param6' : 6} |
|||
|
|||
|
|||
@pytest.fixture |
|||
def progresses(): |
|||
return {'Brain1' : 0.2, 'Brain2' : 0.3} |
|||
|
|||
|
|||
@patch('mlagents.trainers.Curriculum.get_config', return_value={}) |
|||
@patch('mlagents.trainers.Curriculum.__init__', return_value=None) |
|||
@patch('os.listdir', return_value=['Brain1.json', 'Brain2.json']) |
|||
def test_init_meta_curriculum_happy_path(listdir, mock_curriculum_init, |
|||
mock_curriculum_get_config, |
|||
default_reset_parameters): |
|||
meta_curriculum = MetaCurriculum('test/', default_reset_parameters) |
|||
|
|||
assert len(meta_curriculum.brains_to_curriculums) == 2 |
|||
|
|||
assert 'Brain1' in meta_curriculum.brains_to_curriculums |
|||
assert 'Brain2' in meta_curriculum.brains_to_curriculums |
|||
|
|||
calls = [call('test/Brain1.json', default_reset_parameters), |
|||
call('test/Brain2.json', default_reset_parameters)] |
|||
|
|||
mock_curriculum_init.assert_has_calls(calls) |
|||
|
|||
|
|||
@patch('os.listdir', side_effect=NotADirectoryError()) |
|||
def test_init_meta_curriculum_bad_curriculum_folder_raises_error(listdir): |
|||
with pytest.raises(MetaCurriculumError): |
|||
MetaCurriculum('test/', default_reset_parameters) |
|||
|
|||
|
|||
@patch('mlagents.trainers.Curriculum') |
|||
@patch('mlagents.trainers.Curriculum') |
|||
def test_set_lesson_nums(curriculum_a, curriculum_b): |
|||
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a, |
|||
'Brain2' : curriculum_b}) |
|||
|
|||
meta_curriculum.lesson_nums = {'Brain1' : 1, 'Brain2' : 3} |
|||
|
|||
assert curriculum_a.lesson_num == 1 |
|||
assert curriculum_b.lesson_num == 3 |
|||
|
|||
|
|||
|
|||
@patch('mlagents.trainers.Curriculum') |
|||
@patch('mlagents.trainers.Curriculum') |
|||
def test_increment_lessons(curriculum_a, curriculum_b, progresses): |
|||
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a, |
|||
'Brain2' : curriculum_b}) |
|||
|
|||
meta_curriculum.increment_lessons(progresses) |
|||
|
|||
curriculum_a.increment_lesson.assert_called_with(0.2) |
|||
curriculum_b.increment_lesson.assert_called_with(0.3) |
|||
|
|||
|
|||
@patch('mlagents.trainers.Curriculum') |
|||
@patch('mlagents.trainers.Curriculum') |
|||
def test_set_all_curriculums_to_lesson_num(curriculum_a, curriculum_b): |
|||
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a, |
|||
'Brain2' : curriculum_b}) |
|||
|
|||
meta_curriculum.set_all_curriculums_to_lesson_num(2) |
|||
|
|||
assert curriculum_a.lesson_num == 2 |
|||
assert curriculum_b.lesson_num == 2 |
|||
|
|||
|
|||
@patch('mlagents.trainers.Curriculum') |
|||
@patch('mlagents.trainers.Curriculum') |
|||
def test_get_config(curriculum_a, curriculum_b, default_reset_parameters, |
|||
more_reset_parameters): |
|||
curriculum_a.get_config.return_value = default_reset_parameters |
|||
curriculum_b.get_config.return_value = default_reset_parameters |
|||
meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a, |
|||
'Brain2' : curriculum_b}) |
|||
|
|||
assert meta_curriculum.get_config() == default_reset_parameters |
|||
|
|||
curriculum_b.get_config.return_value = more_reset_parameters |
|||
|
|||
new_reset_parameters = dict(default_reset_parameters) |
|||
new_reset_parameters.update(more_reset_parameters) |
|||
|
|||
assert meta_curriculum.get_config() == new_reset_parameters |
|
|||
import unittest.mock as mock |
|||
import pytest |
|||
|
|||
import numpy as np |
|||
import tensorflow as tf |
|||
|
|||
from mlagents.trainers.ppo.models import PPOModel |
|||
from mlagents.trainers.ppo.trainer import discount_rewards |
|||
from mlagents.envs import UnityEnvironment |
|||
from tests.mock_communicator import MockCommunicator |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_cc_vector(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
|
|||
model = PPOModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.log_probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_cc_visual(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=2) |
|||
env = UnityEnvironment(' ') |
|||
|
|||
model = PPOModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.log_probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.visual_in[1]: np.ones([2, 40, 30, 3])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_dc_visual(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=True, visual_inputs=2) |
|||
env = UnityEnvironment(' ') |
|||
model = PPOModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.visual_in[1]: np.ones([2, 40, 30, 3]) |
|||
} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_dc_vector(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=True, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
model = PPOModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_dc_vector_rnn(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=True, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
memory_size = 128 |
|||
model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate, model.memory_out] |
|||
feed_dict = {model.batch_size: 1, |
|||
model.sequence_length: 2, |
|||
model.prev_action: [[0], [0]], |
|||
model.memory_in: np.zeros((1, memory_size)), |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
memory_size = 128 |
|||
model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate, model.memory_out] |
|||
feed_dict = {model.batch_size: 1, |
|||
model.sequence_length: 2, |
|||
model.memory_in: np.zeros((1, memory_size)), |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=True, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate, model.intrinsic_reward] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.action_holder: [[0], [0]]} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_cc_vector_curio(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=0) |
|||
env = UnityEnvironment(' ') |
|||
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate, model.intrinsic_reward] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.output: [[0.0, 0.0], [0.0, 0.0]]} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_dc_visual_curio(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=True, visual_inputs=2) |
|||
env = UnityEnvironment(' ') |
|||
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate, model.intrinsic_reward] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.action_holder: [[0], [0]], |
|||
model.visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.visual_in[1]: np.ones([2, 40, 30, 3]), |
|||
model.next_visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.next_visual_in[1]: np.ones([2, 40, 30, 3]) |
|||
} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher') |
|||
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator') |
|||
def test_ppo_model_cc_visual_curio(mock_communicator, mock_launcher): |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_communicator.return_value = MockCommunicator( |
|||
discrete_action=False, visual_inputs=2) |
|||
env = UnityEnvironment(' ') |
|||
model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_log_probs, model.value, model.entropy, |
|||
model.learning_rate, model.intrinsic_reward] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.output: [[0.0, 0.0], [0.0, 0.0]], |
|||
model.visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.visual_in[1]: np.ones([2, 40, 30, 3]), |
|||
model.next_visual_in[0]: np.ones([2, 40, 30, 3]), |
|||
model.next_visual_in[1]: np.ones([2, 40, 30, 3]) |
|||
} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
def test_rl_functions(): |
|||
rewards = np.array([0.0, 0.0, 0.0, 1.0]) |
|||
gamma = 0.9 |
|||
returns = discount_rewards(rewards, gamma, 0.0) |
|||
np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0])) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pytest.main() |
|
|||
#!/usr/bin/env python |
|||
|
|||
from setuptools import setup, Command, find_packages |
|||
|
|||
|
|||
with open('requirements.txt') as f: |
|||
required = f.read().splitlines() |
|||
|
|||
setup(name='unityagents', |
|||
version='0.4.0', |
|||
description='Unity Machine Learning Agents', |
|||
license='Apache License 2.0', |
|||
author='Unity Technologies', |
|||
author_email='ML-Agents@unity3d.com', |
|||
url='https://github.com/Unity-Technologies/ml-agents', |
|||
packages=find_packages(), |
|||
install_requires = required, |
|||
long_description= ("Unity Machine Learning Agents allows researchers and developers " |
|||
"to transform games and simulations created using the Unity Editor into environments " |
|||
"where intelligent agents can be trained using reinforcement learning, evolutionary " |
|||
"strategies, or other machine learning methods through a simple to use Python API.") |
|||
) |
撰写
预览
正在加载...
取消
保存
Reference in new issue