vincentpierre
5 年前
当前提交
6ddfe74f
共有 50 个文件被更改,包括 1203 次插入 和 654 次删除
-
1.github/ISSUE_TEMPLATE/bug_report.md
-
23.pre-commit-config.yaml
-
11.yamato/com.unity.ml-agents-test.yml
-
22.yamato/gym-interface-test.yml
-
14.yamato/protobuf-generation-test.yml
-
21.yamato/python-ll-api-test.yml
-
20.yamato/standalone-build-test.yml
-
25.yamato/training-int-tests.yml
-
23Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
-
35README.md
-
3com.unity.ml-agents/CHANGELOG.md
-
20com.unity.ml-agents/Editor/BrainParametersDrawer.cs
-
18com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
-
13com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
-
5docs/Learning-Environment-Create-New.md
-
3docs/Learning-Environment-Design-Agents.md
-
2docs/Learning-Environment-Executable.md
-
4docs/Migrating.md
-
22docs/Python-API.md
-
338docs/Training-ML-Agents.md
-
8docs/Using-Tensorboard.md
-
21gym-unity/gym_unity/envs/__init__.py
-
5gym-unity/gym_unity/tests/test_gym.py
-
50ml-agents-envs/mlagents_envs/base_env.py
-
293ml-agents-envs/mlagents_envs/environment.py
-
54ml-agents-envs/mlagents_envs/tests/test_envs.py
-
118ml-agents-envs/mlagents_envs/tests/test_side_channel.py
-
14ml-agents/mlagents/trainers/learn.py
-
25ml-agents/mlagents/trainers/models.py
-
1ml-agents/mlagents/trainers/policy/tf_policy.py
-
34ml-agents/mlagents/trainers/ppo/optimizer.py
-
4ml-agents/mlagents/trainers/ppo/trainer.py
-
12ml-agents/mlagents/trainers/sac/optimizer.py
-
8ml-agents/mlagents/trainers/simple_env_manager.py
-
10ml-agents/mlagents/trainers/subprocess_env_manager.py
-
12ml-agents/mlagents/trainers/tests/simple_test_envs.py
-
5ml-agents/mlagents/trainers/tests/test_learn.py
-
4ml-agents/mlagents/trainers/tests/test_nn_policy.py
-
4ml-agents/mlagents/trainers/tests/test_simple_rl.py
-
6ml-agents/tests/yamato/check_coverage_percent.py
-
4ml-agents/tests/yamato/scripts/run_llapi.py
-
21ml-agents/tests/yamato/training_int_tests.py
-
28ml-agents/tests/yamato/yamato_utils.py
-
6utils/validate_versions.py
-
37com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
-
95docs/Versioning.md
-
108ml-agents-envs/mlagents_envs/env_utils.py
-
81ml-agents-envs/mlagents_envs/side_channel/side_channel_manager.py
-
64ml-agents-envs/mlagents_envs/tests/test_env_utils.py
-
102ml-agents-envs/mlagents_envs/tests/test_steps.py
|
|||
using NUnit.Framework; |
|||
using UnityEngine; |
|||
using Unity.MLAgents.Policies; |
|||
using Unity.MLAgents.Demonstrations; |
|||
using Unity.MLAgents.Sensors; |
|||
|
|||
namespace Unity.MLAgents.Tests |
|||
{ |
|||
[TestFixture] |
|||
public class GrpcExtensionsTests |
|||
{ |
|||
[Test] |
|||
public void TestDefaultBrainParametersToProto() |
|||
{ |
|||
// Should be able to convert a default instance to proto.
|
|||
var brain = new BrainParameters(); |
|||
brain.ToProto("foo", false); |
|||
} |
|||
|
|||
[Test] |
|||
public void TestDefaultAgentInfoToProto() |
|||
{ |
|||
// Should be able to convert a default instance to proto.
|
|||
var agentInfo = new AgentInfo(); |
|||
agentInfo.ToInfoActionPairProto(); |
|||
agentInfo.ToAgentInfoProto(); |
|||
} |
|||
|
|||
[Test] |
|||
public void TestDefaultDemonstrationMetaDataToProto() |
|||
{ |
|||
// Should be able to convert a default instance to proto.
|
|||
var demoMetaData = new DemonstrationMetaData(); |
|||
demoMetaData.ToProto(); |
|||
} |
|||
} |
|||
} |
|
|||
# ML-Agents Versioning |
|||
|
|||
## Context |
|||
As the ML-Agents project evolves into a more mature product, we want to communicate the process |
|||
we use to version our packages and the data that flows into, through, and out of them clearly. |
|||
Our project now has four packages (1 Unity, 3 Python) along with artifacts that are produced as |
|||
well as consumed. This document covers the versioning for these packages and artifacts. |
|||
|
|||
## GitHub Releases |
|||
Up until now, all packages were in lockstep in-terms of versioning. As a result, the GitHub releases |
|||
were tagged with the version of all those packages (e.g. v0.15.0, v0.15.1) and labeled accordingly. |
|||
With the decoupling of package versions, we now need to revisit our GitHub release tagging. |
|||
The proposal is that we move towards an integer release numbering for our repo and each such |
|||
release will call out specific version upgrades of each package. For instance, with |
|||
[the April 30th release](https://github.com/Unity-Technologies/ml-agents/releases/tag/release_1), |
|||
we will have: |
|||
- GitHub Release 1 (branch name: *release_1_branch*) |
|||
- com.unity.ml-agents release 1.0.0 |
|||
- ml-agents release 0.16.0 |
|||
- ml-agents-envs release 0.16.0 |
|||
- gym-unity release 0.16.0 |
|||
|
|||
Our release cadence will not be affected by these versioning changes. We will keep having |
|||
monthly releases to fix bugs and release new features. |
|||
|
|||
## Packages |
|||
All of the software packages, and their generated artifacts will be versioned. Any automation |
|||
tools will not be versioned. |
|||
|
|||
### Unity package |
|||
Package name: com.unity.ml-agents |
|||
- Versioned following [Semantic Versioning Guidelines](https://www.semver.org) |
|||
- This package consumes an artifact of the training process: the `.nn` file. These files |
|||
are integer versioned and currently at version 2. The com.unity.ml-agents package |
|||
will need to support the version of `.nn` files which existed at its 1.0.0 release. |
|||
For example, consider that com.unity.ml-agents is at version 1.0.0 and the NN files |
|||
are at version 2. If the NN files change to version 3, the next release of |
|||
com.unity.ml-agents at version 1.1.0 guarantees it will be able to read both of these |
|||
formats. If the NN files were to change to version 4 and com.unity.ml-agents to |
|||
version 2.0.0, support for NN versions 2 and 3 could be dropped for com.unity.ml-agents |
|||
version 2.0.0. |
|||
- This package produces one artifact, the `.demo` files. These files will have integer |
|||
versioning. This means their version will increment by 1 at each change. The |
|||
com.unity.ml-agents package must be backward compatible with version changes |
|||
that occur between minor versions. |
|||
- To summarize, the artifacts produced and consumed by com.unity.ml-agents are guaranteed |
|||
to be supported for 1.x.x versions of com.unity.ml-agents. We intend to provide stability |
|||
for our users by moving to a 1.0.0 release of com.unity.ml-agents. |
|||
|
|||
|
|||
### Python Packages |
|||
Package names: ml-agents / ml-agents-envs / gym-unity |
|||
- The python packages remain in "Beta." This means that breaking changes to the public |
|||
API of the python packages can change without having to have a major version bump. |
|||
Historically, the python and C# packages were in version lockstep. This is no longer |
|||
the case. The python packages will remain in lockstep with each other for now, while the |
|||
C# package will follow its own versioning as is appropriate. However, the python package |
|||
versions may diverge in the future. |
|||
- While the python packages will remain in Beta for now, we acknowledge that the most |
|||
heavily used portion of our python interface is the `mlagents-learn` CLI and strive |
|||
to make this part of our API backward compatible. We are actively working on this and |
|||
expect to have a stable CLI in the next few weeks. |
|||
|
|||
## Communicator |
|||
|
|||
Packages which communicate: com.unity.ml-agents / ml-agents-envs |
|||
|
|||
Another entity of the ML-Agents Toolkit that requires versioning is the communication layer |
|||
between C# and Python, which will follow also semantic versioning. This guarantees a level of |
|||
backward compatibility between different versions of C# and Python packages which communicate. |
|||
Any Communicator version 1.x.x of the Unity package should be compatible with any 1.x.x |
|||
Communicator Version in Python. |
|||
|
|||
An RLCapabilities struct keeps track of which features exist. This struct is passed from C# to |
|||
Python, and another from Python to C#. With this feature level granularity, we can notify users |
|||
more specifically about feature limitations based on what's available in both C# and Python. |
|||
These notifications will be logged to the python terminal, or to the Unity Editor Console. |
|||
|
|||
|
|||
## Side Channels |
|||
|
|||
The communicator is what manages data transfer between Unity and Python for the core |
|||
training loop. Side Channels are another means of data transfer between Unity and Python. |
|||
Side Channels are not versioned, but have been designed to support backward compatibility |
|||
for what they are. As of today, we provide 4 side channels: |
|||
- FloatProperties: shared float data between Unity - Python (bidirectional) |
|||
- RawBytes: raw data that can be sent Unity - Python (bidirectional) |
|||
- EngineConfig: a set of numeric fields in a pre-defined order sent from Python to Unity |
|||
- Stats: (name, value, agg) messages sent from Unity to Python |
|||
|
|||
Aside from the specific implementations of side channels we provide (and use ourselves), |
|||
the Side Channel interface is made available for users to create their own custom side |
|||
channels. As such, we guarantee that the built in SideChannel interface between Unity and |
|||
Python is backward compatible in packages that share the same major version. |
|||
|
|
|||
import glob |
|||
import os |
|||
import subprocess |
|||
from sys import platform |
|||
from typing import Optional, List |
|||
from mlagents_envs.logging_util import get_logger |
|||
from mlagents_envs.exception import UnityEnvironmentException |
|||
|
|||
|
|||
def get_platform(): |
|||
""" |
|||
returns the platform of the operating system : linux, darwin or win32 |
|||
""" |
|||
return platform |
|||
|
|||
|
|||
def validate_environment_path(env_path: str) -> Optional[str]: |
|||
""" |
|||
Strip out executable extensions of the env_path |
|||
:param env_path: The path to the executable |
|||
""" |
|||
env_path = ( |
|||
env_path.strip() |
|||
.replace(".app", "") |
|||
.replace(".exe", "") |
|||
.replace(".x86_64", "") |
|||
.replace(".x86", "") |
|||
) |
|||
true_filename = os.path.basename(os.path.normpath(env_path)) |
|||
get_logger(__name__).debug("The true file name is {}".format(true_filename)) |
|||
|
|||
if not (glob.glob(env_path) or glob.glob(env_path + ".*")): |
|||
return None |
|||
|
|||
cwd = os.getcwd() |
|||
launch_string = None |
|||
true_filename = os.path.basename(os.path.normpath(env_path)) |
|||
if get_platform() == "linux" or get_platform() == "linux2": |
|||
candidates = glob.glob(os.path.join(cwd, env_path) + ".x86_64") |
|||
if len(candidates) == 0: |
|||
candidates = glob.glob(os.path.join(cwd, env_path) + ".x86") |
|||
if len(candidates) == 0: |
|||
candidates = glob.glob(env_path + ".x86_64") |
|||
if len(candidates) == 0: |
|||
candidates = glob.glob(env_path + ".x86") |
|||
if len(candidates) > 0: |
|||
launch_string = candidates[0] |
|||
|
|||
elif get_platform() == "darwin": |
|||
candidates = glob.glob( |
|||
os.path.join(cwd, env_path + ".app", "Contents", "MacOS", true_filename) |
|||
) |
|||
if len(candidates) == 0: |
|||
candidates = glob.glob( |
|||
os.path.join(env_path + ".app", "Contents", "MacOS", true_filename) |
|||
) |
|||
if len(candidates) == 0: |
|||
candidates = glob.glob( |
|||
os.path.join(cwd, env_path + ".app", "Contents", "MacOS", "*") |
|||
) |
|||
if len(candidates) == 0: |
|||
candidates = glob.glob( |
|||
os.path.join(env_path + ".app", "Contents", "MacOS", "*") |
|||
) |
|||
if len(candidates) > 0: |
|||
launch_string = candidates[0] |
|||
elif get_platform() == "win32": |
|||
candidates = glob.glob(os.path.join(cwd, env_path + ".exe")) |
|||
if len(candidates) == 0: |
|||
candidates = glob.glob(env_path + ".exe") |
|||
if len(candidates) > 0: |
|||
launch_string = candidates[0] |
|||
return launch_string |
|||
|
|||
|
|||
def launch_executable(file_name: str, args: List[str]) -> subprocess.Popen: |
|||
""" |
|||
Launches a Unity executable and returns the process handle for it. |
|||
:param file_name: the name of the executable |
|||
:param args: List of string that will be passed as command line arguments |
|||
when launching the executable. |
|||
""" |
|||
launch_string = validate_environment_path(file_name) |
|||
if launch_string is None: |
|||
raise UnityEnvironmentException( |
|||
f"Couldn't launch the {file_name} environment. Provided filename does not match any environments." |
|||
) |
|||
else: |
|||
get_logger(__name__).debug("This is the launch string {}".format(launch_string)) |
|||
# Launch Unity environment |
|||
subprocess_args = [launch_string] + args |
|||
try: |
|||
return subprocess.Popen( |
|||
subprocess_args, |
|||
# start_new_session=True means that signals to the parent python process |
|||
# (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms. |
|||
# This is generally good since we want the environment to have a chance to shutdown, |
|||
# but may be undesirable in come cases; if so, we'll add a command-line toggle. |
|||
# Note that on Windows, the CTRL_C signal will still be sent. |
|||
start_new_session=True, |
|||
) |
|||
except PermissionError as perm: |
|||
# This is likely due to missing read or execute permissions on file. |
|||
raise UnityEnvironmentException( |
|||
f"Error when trying to launch environment - make sure " |
|||
f"permissions are set correctly. For example " |
|||
f'"chmod -R 755 {launch_string}"' |
|||
) from perm |
|
|||
import uuid |
|||
import struct |
|||
from typing import Dict, Optional, List |
|||
from mlagents_envs.side_channel import SideChannel, IncomingMessage |
|||
from mlagents_envs.exception import UnityEnvironmentException |
|||
from mlagents_envs.logging_util import get_logger |
|||
|
|||
|
|||
class SideChannelManager: |
|||
def __init__(self, side_channels=Optional[List[SideChannel]]): |
|||
self._side_channels_dict = self._get_side_channels_dict(side_channels) |
|||
|
|||
def process_side_channel_message(self, data: bytes) -> None: |
|||
""" |
|||
Separates the data received from Python into individual messages for each |
|||
registered side channel and calls on_message_received on them. |
|||
:param data: The packed message sent by Unity |
|||
""" |
|||
offset = 0 |
|||
while offset < len(data): |
|||
try: |
|||
channel_id = uuid.UUID(bytes_le=bytes(data[offset : offset + 16])) |
|||
offset += 16 |
|||
message_len, = struct.unpack_from("<i", data, offset) |
|||
offset = offset + 4 |
|||
message_data = data[offset : offset + message_len] |
|||
offset = offset + message_len |
|||
except (struct.error, ValueError, IndexError): |
|||
raise UnityEnvironmentException( |
|||
"There was a problem reading a message in a SideChannel. " |
|||
"Please make sure the version of MLAgents in Unity is " |
|||
"compatible with the Python version." |
|||
) |
|||
if len(message_data) != message_len: |
|||
raise UnityEnvironmentException( |
|||
"The message received by the side channel {0} was " |
|||
"unexpectedly short. Make sure your Unity Environment " |
|||
"sending side channel data properly.".format(channel_id) |
|||
) |
|||
if channel_id in self._side_channels_dict: |
|||
incoming_message = IncomingMessage(message_data) |
|||
self._side_channels_dict[channel_id].on_message_received( |
|||
incoming_message |
|||
) |
|||
else: |
|||
get_logger(__name__).warning( |
|||
f"Unknown side channel data received. Channel type: {channel_id}." |
|||
) |
|||
|
|||
def generate_side_channel_messages(self) -> bytearray: |
|||
""" |
|||
Gathers the messages that the registered side channels will send to Unity |
|||
and combines them into a single message ready to be sent. |
|||
""" |
|||
result = bytearray() |
|||
for channel_id, channel in self._side_channels_dict.items(): |
|||
for message in channel.message_queue: |
|||
result += channel_id.bytes_le |
|||
result += struct.pack("<i", len(message)) |
|||
result += message |
|||
channel.message_queue = [] |
|||
return result |
|||
|
|||
@staticmethod |
|||
def _get_side_channels_dict( |
|||
side_channels: Optional[List[SideChannel]] |
|||
) -> Dict[uuid.UUID, SideChannel]: |
|||
""" |
|||
Converts a list of side channels into a dictionary of channel_id to SideChannel |
|||
:param side_channels: The list of side channels. |
|||
""" |
|||
side_channels_dict: Dict[uuid.UUID, SideChannel] = {} |
|||
if side_channels is not None: |
|||
for _sc in side_channels: |
|||
if _sc.channel_id in side_channels_dict: |
|||
raise UnityEnvironmentException( |
|||
f"There cannot be two side channels with " |
|||
f"the same channel id {_sc.channel_id}." |
|||
) |
|||
side_channels_dict[_sc.channel_id] = _sc |
|||
return side_channels_dict |
|
|||
from unittest import mock |
|||
import pytest |
|||
from mlagents_envs.env_utils import validate_environment_path, launch_executable |
|||
from mlagents_envs.exception import UnityEnvironmentException |
|||
from mlagents_envs.logging_util import ( |
|||
set_log_level, |
|||
get_logger, |
|||
INFO, |
|||
ERROR, |
|||
FATAL, |
|||
CRITICAL, |
|||
DEBUG, |
|||
) |
|||
|
|||
|
|||
def mock_glob_method(path): |
|||
""" |
|||
Given a path input, returns a list of candidates |
|||
""" |
|||
if ".x86" in path: |
|||
return ["linux"] |
|||
if ".app" in path: |
|||
return ["darwin"] |
|||
if ".exe" in path: |
|||
return ["win32"] |
|||
if "*" in path: |
|||
return "Any" |
|||
return [] |
|||
|
|||
|
|||
@mock.patch("sys.platform") |
|||
@mock.patch("glob.glob") |
|||
def test_validate_path_empty(glob_mock, platform_mock): |
|||
glob_mock.return_value = None |
|||
path = validate_environment_path(" ") |
|||
assert path is None |
|||
|
|||
|
|||
@mock.patch("mlagents_envs.env_utils.get_platform") |
|||
@mock.patch("glob.glob") |
|||
def test_validate_path(glob_mock, platform_mock): |
|||
glob_mock.side_effect = mock_glob_method |
|||
for platform in ["linux", "darwin", "win32"]: |
|||
platform_mock.return_value = platform |
|||
path = validate_environment_path(" ") |
|||
assert path == platform |
|||
|
|||
|
|||
@mock.patch("glob.glob") |
|||
@mock.patch("subprocess.Popen") |
|||
def test_launch_executable(mock_popen, glob_mock): |
|||
with pytest.raises(UnityEnvironmentException): |
|||
launch_executable(" ", []) |
|||
glob_mock.return_value = ["FakeLaunchPath"] |
|||
launch_executable(" ", []) |
|||
mock_popen.side_effect = PermissionError("Fake permission error") |
|||
with pytest.raises(UnityEnvironmentException): |
|||
launch_executable(" ", []) |
|||
|
|||
|
|||
def test_set_logging_level(): |
|||
for level in [INFO, ERROR, FATAL, CRITICAL, DEBUG]: |
|||
set_log_level(level) |
|||
assert get_logger("test").level == level |
|
|||
import pytest |
|||
import numpy as np |
|||
|
|||
from mlagents_envs.base_env import ( |
|||
DecisionSteps, |
|||
TerminalSteps, |
|||
ActionType, |
|||
BehaviorSpec, |
|||
) |
|||
|
|||
|
|||
def test_decision_steps(): |
|||
ds = DecisionSteps( |
|||
obs=[np.array(range(12), dtype=np.float32).reshape(3, 4)], |
|||
reward=np.array(range(3), dtype=np.float32), |
|||
agent_id=np.array(range(10, 13), dtype=np.int32), |
|||
action_mask=[np.zeros((3, 4), dtype=np.bool)], |
|||
) |
|||
|
|||
assert ds.agent_id_to_index[10] == 0 |
|||
assert ds.agent_id_to_index[11] == 1 |
|||
assert ds.agent_id_to_index[12] == 2 |
|||
|
|||
with pytest.raises(KeyError): |
|||
assert ds.agent_id_to_index[-1] == -1 |
|||
|
|||
mask_agent = ds[10].action_mask |
|||
assert isinstance(mask_agent, list) |
|||
assert len(mask_agent) == 1 |
|||
assert np.array_equal(mask_agent[0], np.zeros((4), dtype=np.bool)) |
|||
|
|||
for agent_id in ds: |
|||
assert ds.agent_id_to_index[agent_id] in range(3) |
|||
|
|||
|
|||
def test_empty_decision_steps(): |
|||
specs = BehaviorSpec( |
|||
observation_shapes=[(3, 2), (5,)], |
|||
action_type=ActionType.CONTINUOUS, |
|||
action_shape=3, |
|||
) |
|||
ds = DecisionSteps.empty(specs) |
|||
assert len(ds.obs) == 2 |
|||
assert ds.obs[0].shape == (0, 3, 2) |
|||
assert ds.obs[1].shape == (0, 5) |
|||
|
|||
|
|||
def test_terminal_steps(): |
|||
ts = TerminalSteps( |
|||
obs=[np.array(range(12), dtype=np.float32).reshape(3, 4)], |
|||
reward=np.array(range(3), dtype=np.float32), |
|||
agent_id=np.array(range(10, 13), dtype=np.int32), |
|||
interrupted=np.array([1, 0, 1], dtype=np.bool), |
|||
) |
|||
|
|||
assert ts.agent_id_to_index[10] == 0 |
|||
assert ts.agent_id_to_index[11] == 1 |
|||
assert ts.agent_id_to_index[12] == 2 |
|||
|
|||
assert ts[10].interrupted |
|||
assert not ts[11].interrupted |
|||
assert ts[12].interrupted |
|||
|
|||
with pytest.raises(KeyError): |
|||
assert ts.agent_id_to_index[-1] == -1 |
|||
|
|||
for agent_id in ts: |
|||
assert ts.agent_id_to_index[agent_id] in range(3) |
|||
|
|||
|
|||
def test_empty_terminal_steps(): |
|||
specs = BehaviorSpec( |
|||
observation_shapes=[(3, 2), (5,)], |
|||
action_type=ActionType.CONTINUOUS, |
|||
action_shape=3, |
|||
) |
|||
ts = TerminalSteps.empty(specs) |
|||
assert len(ts.obs) == 2 |
|||
assert ts.obs[0].shape == (0, 3, 2) |
|||
assert ts.obs[1].shape == (0, 5) |
|||
|
|||
|
|||
def test_specs(): |
|||
specs = BehaviorSpec( |
|||
observation_shapes=[(3, 2), (5,)], |
|||
action_type=ActionType.CONTINUOUS, |
|||
action_shape=3, |
|||
) |
|||
assert specs.discrete_action_branches is None |
|||
assert specs.action_size == 3 |
|||
assert specs.create_empty_action(5).shape == (5, 3) |
|||
assert specs.create_empty_action(5).dtype == np.float32 |
|||
|
|||
specs = BehaviorSpec( |
|||
observation_shapes=[(3, 2), (5,)], |
|||
action_type=ActionType.DISCRETE, |
|||
action_shape=(3,), |
|||
) |
|||
assert specs.discrete_action_branches == (3,) |
|||
assert specs.action_size == 1 |
|||
assert specs.create_empty_action(5).shape == (5, 1) |
|||
assert specs.create_empty_action(5).dtype == np.int32 |
撰写
预览
正在加载...
取消
保存
Reference in new issue