浏览代码

Merge branch 'master' into goal-conditioning-new

/goal-conditioning/new
Arthur Juliani 4 年前
当前提交
ff70c5c4
共有 25 个文件被更改,包括 276 次插入68 次删除
  1. 3
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  2. 2
      Project/ProjectSettings/UnityConnectSettings.asset
  3. 10
      com.unity.ml-agents/CHANGELOG.md
  4. 7
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  5. 6
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  6. 7
      com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs
  7. 17
      ml-agents-envs/mlagents_envs/communicator.py
  8. 8
      ml-agents-envs/mlagents_envs/env_utils.py
  9. 49
      ml-agents-envs/mlagents_envs/environment.py
  10. 12
      ml-agents-envs/mlagents_envs/mock_communicator.py
  11. 47
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  12. 54
      ml-agents-envs/mlagents_envs/tests/test_rpc_communicator.py
  13. 9
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  14. 27
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  15. 8
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  16. 4
      ml-agents/mlagents/trainers/tests/torch/test_action_model.py
  17. 10
      ml-agents/mlagents/trainers/tests/torch/test_distributions.py
  18. 4
      ml-agents/mlagents/trainers/tests/torch/test_encoders.py
  19. 15
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  20. 8
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  21. 3
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  22. 2
      ml-agents/mlagents/trainers/torch/distributions.py
  23. 4
      ml-agents/mlagents/trainers/torch/encoders.py
  24. 24
      .yamato/pytest-gpu.yml
  25. 4
      pytest.ini

3
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using UnityEngine.Rendering;
using UnityEngine.Serialization;
public class GridAgent : Agent

void WaitTimeInference()
{
if (renderCamera != null)
if (renderCamera != null && SystemInfo.graphicsDeviceType != GraphicsDeviceType.Null)
{
renderCamera.Render();
}

2
Project/ProjectSettings/UnityConnectSettings.asset


UnityConnectSettings:
m_ObjectHideFlags: 0
serializedVersion: 1
m_Enabled: 1
m_Enabled: 0
m_TestMode: 0
m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
m_EventUrl: https://cdp.cloud.unity3d.com/v1/events

10
com.unity.ml-agents/CHANGELOG.md


Updated the Basic example and the Match3 Example to use Actuators.
Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
- CameraSensor now logs an error if the GraphicsDevice is null. (#4880)
- Fixed a bug that can cause a crash if a behavior can appear during training in multi-environment training. (#4872)
- Fixed the computation of entropy for continuous actions. (#4869)
- Fixed a bug that would cause `UnityEnvironment` to wait the full timeout
period and report a misleading error message if the executable crashed
without closing the connection. It now periodically checks the process status
while waiting for a connection, and raises a better error message if it crashes. (#4880)
- Passing a `-logfile` option in the `--env-args` option to `mlagents-learn` is
no longer overwritten. (#4880)
## [1.7.2-preview] - 2020-12-22

7
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


}
else
{
Debug.Assert(sourceActionBuffer.Length == destination.Length,
$"sourceActionBuffer:{sourceActionBuffer.Length} is a different" +
$" size than destination: {destination.Length}.");
Debug.AssertFormat(sourceActionBuffer.Length == destination.Length,
"sourceActionBuffer: {0} is a different size than destination: {1}.",
sourceActionBuffer.Length,
destination.Length);
Array.Copy(sourceActionBuffer.Array,
sourceActionBuffer.Offset,

6
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


using UnityEngine;
using UnityEngine.Rendering;
namespace Unity.MLAgents.Sensors
{

/// <returns name="texture2D">Texture2D to render to.</returns>
public static Texture2D ObservationToTexture(Camera obsCamera, int width, int height)
{
if (SystemInfo.graphicsDeviceType == GraphicsDeviceType.Null)
{
Debug.LogError("GraphicsDeviceType is Null. This will likely crash when trying to render.");
}
var texture2D = new Texture2D(width, height, TextureFormat.RGB24, false);
var oldRec = obsCamera.rect;
obsCamera.rect = new Rect(0f, 0f, 1f, 1f);

7
com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs


{
// Check for compatibility with the other Agents' Sensors
// TODO make sure this only checks once per agent
Debug.Assert(m_SensorShapes.Count == sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {sensors.Count}");
Debug.AssertFormat(
m_SensorShapes.Count == sensors.Count,
"Number of Sensors must match. {0} != {1}",
m_SensorShapes.Count,
sensors.Count
);
for (var i = 0; i < Mathf.Min(m_SensorShapes.Count, sensors.Count); i++)
{
var cachedShape = m_SensorShapes[i];

17
ml-agents-envs/mlagents_envs/communicator.py


from typing import Optional
from typing import Callable, Optional
# Function to call while waiting for a connection timeout.
# This should raise an exception if it needs to break from waiting for the timeout.
PollCallback = Callable[[], None]
class Communicator:

:int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
"""
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
def initialize(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
:param poll_callback: Optional callback to be used while polling the connection.
def exchange(self, inputs: UnityInputProto) -> Optional[UnityOutputProto]:
def exchange(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> Optional[UnityOutputProto]:
:param poll_callback: Optional callback to be used while polling the connection.
:return: The UnityOutputs generated by the Environment
"""

8
ml-agents-envs/mlagents_envs/env_utils.py


from mlagents_envs.exception import UnityEnvironmentException
logger = get_logger(__name__)
def get_platform():
"""
returns the platform of the operating system : linux, darwin or win32

.replace(".x86", "")
)
true_filename = os.path.basename(os.path.normpath(env_path))
get_logger(__name__).debug(f"The true file name is {true_filename}")
logger.debug(f"The true file name is {true_filename}")
if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
return None

f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
)
else:
get_logger(__name__).debug(f"This is the launch string {launch_string}")
logger.debug(f"The launch string is {launch_string}")
logger.debug(f"Running with args {args}")
# Launch Unity environment
subprocess_args = [launch_string] + args
try:

49
ml-agents-envs/mlagents_envs/environment.py


# If true, this means the environment was successfully loaded
self._loaded = False
# The process that is started. If None, no process was started
self._proc1 = None
self._process: Optional[subprocess.Popen] = None
self._timeout_wait: int = timeout_wait
self._communicator = self._get_communicator(worker_id, base_port, timeout_wait)
self._worker_id = worker_id

)
if file_name is not None:
try:
self._proc1 = env_utils.launch_executable(
self._process = env_utils.launch_executable(
file_name, self._executable_args()
)
except UnityEnvironmentException:

if self._no_graphics:
args += ["-nographics", "-batchmode"]
args += [UnityEnvironment._PORT_COMMAND_LINE_ARG, str(self._port)]
if self._log_folder:
# If the logfile arg isn't already set in the env args,
# try to set it to an output directory
logfile_set = "-logfile" in (arg.lower() for arg in self._additional_args)
if self._log_folder and not logfile_set:
log_file_path = os.path.join(
self._log_folder, f"Player-{self._worker_id}.log"
)

def reset(self) -> None:
if self._loaded:
outputs = self._communicator.exchange(self._generate_reset_input())
outputs = self._communicator.exchange(
self._generate_reset_input(), self._poll_process
)
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)

].action_spec.empty_action(n_agents)
step_input = self._generate_step_input(self._env_actions)
with hierarchical_timer("communicator.exchange"):
outputs = self._communicator.exchange(step_input)
outputs = self._communicator.exchange(step_input, self._poll_process)
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)

self._assert_behavior_exists(behavior_name)
return self._env_state[behavior_name]
def _poll_process(self) -> None:
"""
Check the status of the subprocess. If it has exited, raise a UnityEnvironmentException
:return: None
"""
if not self._process:
return
poll_res = self._process.poll()
if poll_res is not None:
exc_msg = self._returncode_to_env_message(self._process.returncode)
raise UnityEnvironmentException(exc_msg)
def close(self):
"""
Sends a shutdown signal to the unity environment, and closes the socket connection.

timeout = self._timeout_wait
self._loaded = False
self._communicator.close()
if self._proc1 is not None:
if self._process is not None:
self._proc1.wait(timeout=timeout)
signal_name = self._returncode_to_signal_name(self._proc1.returncode)
signal_name = f" ({signal_name})" if signal_name else ""
return_info = f"Environment shut down with return code {self._proc1.returncode}{signal_name}."
logger.info(return_info)
self._process.wait(timeout=timeout)
logger.info(self._returncode_to_env_message(self._process.returncode))
self._proc1.kill()
self._process.kill()
self._proc1 = None
self._process = None
@timed
def _generate_step_input(

) -> UnityOutputProto:
inputs = UnityInputProto()
inputs.rl_initialization_input.CopyFrom(init_parameters)
return self._communicator.initialize(inputs)
return self._communicator.initialize(inputs, self._poll_process)
@staticmethod
def _wrap_unity_input(rl_input: UnityRLInputProto) -> UnityInputProto:

except Exception:
# Should generally be a ValueError, but catch everything just in case.
return None
@staticmethod
def _returncode_to_env_message(returncode: int) -> str:
signal_name = UnityEnvironment._returncode_to_signal_name(returncode)
signal_name = f" ({signal_name})" if signal_name else ""
return f"Environment shut down with return code {returncode}{signal_name}."

12
ml-agents-envs/mlagents_envs/mock_communicator.py


from .communicator import Communicator
from typing import Optional
from .communicator import Communicator, PollCallback
from .environment import UnityEnvironment
from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
from mlagents_envs.communicator_objects.brain_parameters_pb2 import (

self.brain_name = brain_name
self.vec_obs_size = vec_obs_size
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
def initialize(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
if self.is_discrete:
action_spec = ActionSpecProto(
num_discrete_actions=2, discrete_branch_sizes=[3, 2]

)
return dict_agent_info
def exchange(self, inputs: UnityInputProto) -> UnityOutputProto:
def exchange(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
result = UnityRLOutputProto(agentInfos=self._get_agent_infos())
return UnityOutputProto(rl_output=result)

47
ml-agents-envs/mlagents_envs/rpc_communicator.py


import grpc
from typing import Optional
from multiprocessing import Pipe
from multiprocessing import Pipe
import time
from .communicator import Communicator
from .communicator import Communicator, PollCallback
from mlagents_envs.communicator_objects.unity_to_external_pb2_grpc import (
UnityToExternalProtoServicer,
add_UnityToExternalProtoServicer_to_server,

finally:
s.close()
def poll_for_timeout(self):
def poll_for_timeout(self, poll_callback: Optional[PollCallback] = None) -> None:
Additionally, a callback can be passed to periodically check the state of the environment.
This is used to detect the case when the environment dies without cleaning up the connection,
so that we can stop sooner and raise a more appropriate error.
if not self.unity_to_external.parent_conn.poll(self.timeout_wait):
raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"
"\t The environment does not need user interaction to launch\n"
'\t The Agents\' Behavior Parameters > Behavior Type is set to "Default"\n'
"\t The environment and the Python interface have compatible versions."
)
deadline = time.monotonic() + self.timeout_wait
callback_timeout_wait = self.timeout_wait // 10
while time.monotonic() < deadline:
if self.unity_to_external.parent_conn.poll(callback_timeout_wait):
# Got an acknowledgment from the connection
return
if poll_callback:
# Fire the callback - if it detects something wrong, it should raise an exception.
poll_callback()
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
self.poll_for_timeout()
# Got this far without reading any data from the connection, so it must be dead.
raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"
"\t The environment does not need user interaction to launch\n"
'\t The Agents\' Behavior Parameters > Behavior Type is set to "Default"\n'
"\t The environment and the Python interface have compatible versions."
)
def initialize(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
self.poll_for_timeout(poll_callback)
aca_param = self.unity_to_external.parent_conn.recv().unity_output
message = UnityMessageProto()
message.header.status = 200

return aca_param
def exchange(self, inputs: UnityInputProto) -> Optional[UnityOutputProto]:
def exchange(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> Optional[UnityOutputProto]:
self.poll_for_timeout()
self.poll_for_timeout(poll_callback)
output = self.unity_to_external.parent_conn.recv()
if output.header.status != 200:
return None

54
ml-agents-envs/mlagents_envs/tests/test_rpc_communicator.py


import pytest
from unittest import mock
import grpc
import mlagents_envs.rpc_communicator
from mlagents_envs.exception import UnityWorkerInUseException
from mlagents_envs.exception import (
UnityWorkerInUseException,
UnityTimeOutException,
UnityEnvironmentException,
)
from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
def test_rpc_communicator_checks_port_on_create():

second_comm = RpcCommunicator(worker_id=1)
first_comm.close()
second_comm.close()
@mock.patch.object(grpc, "server")
@mock.patch.object(
mlagents_envs.rpc_communicator, "UnityToExternalServicerImplementation"
)
def test_rpc_communicator_initialize_OK(mock_impl, mock_grpc_server):
comm = RpcCommunicator(timeout_wait=0.25)
comm.unity_to_external.parent_conn.poll.return_value = True
input = UnityInputProto()
comm.initialize(input)
comm.unity_to_external.parent_conn.poll.assert_called()
@mock.patch.object(grpc, "server")
@mock.patch.object(
mlagents_envs.rpc_communicator, "UnityToExternalServicerImplementation"
)
def test_rpc_communicator_initialize_timeout(mock_impl, mock_grpc_server):
comm = RpcCommunicator(timeout_wait=0.25)
comm.unity_to_external.parent_conn.poll.return_value = None
input = UnityInputProto()
# Expect a timeout
with pytest.raises(UnityTimeOutException):
comm.initialize(input)
comm.unity_to_external.parent_conn.poll.assert_called()
@mock.patch.object(grpc, "server")
@mock.patch.object(
mlagents_envs.rpc_communicator, "UnityToExternalServicerImplementation"
)
def test_rpc_communicator_initialize_callback(mock_impl, mock_grpc_server):
def callback():
raise UnityEnvironmentException
comm = RpcCommunicator(timeout_wait=0.25)
comm.unity_to_external.parent_conn.poll.return_value = None
input = UnityInputProto()
# Expect a timeout
with pytest.raises(UnityEnvironmentException):
comm.initialize(input, poll_callback=callback)
comm.unity_to_external.parent_conn.poll.assert_called()

9
ml-agents/mlagents/trainers/subprocess_env_manager.py


)
_send_response(EnvironmentCommand.ENV_EXITED, ex)
except Exception as ex:
logger.error(
logger.exception(
f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."
)
step_queue.put(

@property
def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
self.env_workers[0].send(EnvironmentCommand.BEHAVIOR_SPECS)
return self.env_workers[0].recv().payload
result: Dict[BehaviorName, BehaviorSpec] = {}
for worker in self.env_workers:
worker.send(EnvironmentCommand.BEHAVIOR_SPECS)
result.update(worker.recv().payload)
return result
def close(self) -> None:
logger.debug("SubprocessEnvManager closing.")

27
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
)
def test_training_behaviors_collects_results_from_all_envs(
self, mock_create_worker
):
def create_worker_mock(worker_id, step_queue, env_factor, engine_c):
return MockEnvWorker(
worker_id,
EnvironmentResponse(
EnvironmentCommand.RESET, worker_id, {f"key{worker_id}": worker_id}
),
)
mock_create_worker.side_effect = create_worker_mock
manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 4
)
res = manager.training_behaviors
for env in manager.env_workers:
env.send.assert_called_with(EnvironmentCommand.BEHAVIOR_SPECS)
env.recv.assert_called()
for worker_id in range(4):
assert f"key{worker_id}" in res
assert res[f"key{worker_id}"] == worker_id
@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
)
def test_step_takes_steps_for_all_non_waiting_envs(self, mock_create_worker):
mock_create_worker.side_effect = create_worker_mock
manager = SubprocessEnvManager(

8
ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py


import os
import numpy as np
from mlagents.torch_utils import torch
from mlagents.torch_utils import torch, default_device
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver

"""
Make sure two policies have the same output for the same input.
"""
policy1.actor_critic = policy1.actor_critic.to(default_device())
policy2.actor_critic = policy2.actor_critic.to(default_device())
decision_step, _ = mb.create_steps_from_behavior_spec(
policy1.behavior_spec, num_agents=1
)

tensor_obs, masks=masks, memories=memories
)
np.testing.assert_array_equal(
log_probs1.all_discrete_tensor, log_probs2.all_discrete_tensor
ModelUtils.to_numpy(log_probs1.all_discrete_tensor),
ModelUtils.to_numpy(log_probs2.all_discrete_tensor),
)

4
ml-agents/mlagents/trainers/tests/torch/test_action_model.py


for _disc in log_probs.all_discrete_list:
assert _disc.shape == (1, 2)
for clp in log_probs.continuous_tensor[0]:
for clp in log_probs.continuous_tensor[0].tolist():
for ent, val in zip(entropies[0], [1.4189, 0.6191, 0.6191]):
for ent, val in zip(entropies[0].tolist(), [1.4189, 0.6191, 0.6191]):
assert ent == pytest.approx(val, abs=0.01)

10
ml-agents/mlagents/trainers/tests/torch/test_distributions.py


optimizer.zero_grad()
loss.backward()
optimizer.step()
for prob in log_prob.flatten():
for prob in log_prob.flatten().tolist():
assert prob == pytest.approx(-2, abs=0.1)

dist_insts = gauss_dist(sample_embedding, masks=masks)
for dist_inst in dist_insts:
log_prob = dist_inst.all_log_prob()
assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
assert log_prob.flatten()[-1].tolist() == pytest.approx(0, abs=0.001)
def test_gaussian_dist_instance():

)
action = dist_instance.sample()
assert action.shape == (1, act_size)
for log_prob in dist_instance.log_prob(torch.zeros((1, act_size))).flatten():
for log_prob in (
dist_instance.log_prob(torch.zeros((1, act_size))).flatten().tolist()
):
for ent in dist_instance.entropy().flatten():
for ent in dist_instance.entropy().flatten().tolist():
# entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma)
assert ent == pytest.approx(1.42, abs=0.01)

4
ml-agents/mlagents/trainers/tests/torch/test_encoders.py


norm.update(vec_input3)
# Test normalization
for val in norm(vec_input1)[0]:
for val in norm(vec_input1)[0].tolist():
assert val == pytest.approx(0.707, abs=0.001)
# Test copy normalization

assert compare_models(norm, norm2)
for val in norm2(vec_input1)[0]:
for val in norm2(vec_input1)[0].tolist():
assert val == pytest.approx(0.707, abs=0.001)

15
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


SAC_TORCH_CONFIG = sac_dummy_config()
@pytest.mark.check_environment_trains
PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024
PPO_TORCH_CONFIG.hyperparameters,
batch_size=64,
buffer_size=1024,
learning_rate=1e-3,
)
config = attr.evolve(
PPO_TORCH_CONFIG,

)
check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1212
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("num_visual", [1, 2])
def test_hybrid_visual_ppo(num_visual):
env = SimpleEnvironment(

check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)
@pytest.mark.check_environment_trains
def test_hybrid_recurrent_ppo():
env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
new_network_settings = attr.evolve(

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_sac(action_size):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)

)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("num_visual", [1, 2])
def test_hybrid_visual_sac(num_visual):
env = SimpleEnvironment(

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.check_environment_trains
def test_hybrid_recurrent_sac():
env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
new_networksettings = attr.evolve(

8
ml-agents/mlagents/trainers/tests/torch/test_networks.py


loss.backward()
optimizer.step()
# In the last step, values should be close to 1
for _enc in encoded.flatten():
for _enc in encoded.flatten().tolist():
assert _enc == pytest.approx(1.0, abs=0.1)

loss.backward()
optimizer.step()
# In the last step, values should be close to 1
for _enc in encoded.flatten():
for _enc in encoded.flatten().tolist():
assert _enc == pytest.approx(1.0, abs=0.1)

loss.backward()
optimizer.step()
# In the last step, values should be close to 1
for _enc in encoded.flatten():
for _enc in encoded.flatten().tolist():
assert _enc == pytest.approx(1.0, abs=0.1)

optimizer.step()
# In the last step, values should be close to 1
for value in values.values():
for _out in value:
for _out in value.tolist():
assert _out[0] == pytest.approx(1.0, abs=0.1)

3
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


PPO_TORCH_CONFIG = ppo_dummy_config()
SAC_TORCH_CONFIG = sac_dummy_config()
# tests in this file won't be tested on GPU machine
pytestmark = pytest.mark.check_environment_trains
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ppo(action_sizes):

2
ml-agents/mlagents/trainers/torch/distributions.py


def entropy(self):
return torch.mean(
0.5 * torch.log(2 * math.pi * math.e * self.std + EPSILON),
0.5 * torch.log(2 * math.pi * math.e * self.std ** 2 + EPSILON),
dim=1,
keepdim=True,
) # Use equivalent behavior to TF

4
ml-agents/mlagents/trainers/torch/encoders.py


if not exporting_to_onnx.is_exporting():
visual_obs = visual_obs.permute([0, 3, 1, 2])
hidden = self.conv_layers(visual_obs)
hidden = torch.reshape(hidden, (-1, self.final_flat))
hidden = hidden.reshape(-1, self.final_flat)
return self.dense(hidden)

if not exporting_to_onnx.is_exporting():
visual_obs = visual_obs.permute([0, 3, 1, 2])
hidden = self.conv_layers(visual_obs)
hidden = torch.reshape(hidden, (-1, self.final_flat))
hidden = hidden.reshape(-1, self.final_flat)
return self.dense(hidden)

24
.yamato/pytest-gpu.yml


pytest_gpu:
name: Pytest GPU
agent:
type: Unity::VM::GPU
image: package-ci/ubuntu:stable
flavor: b1.large
commands:
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python3 -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -u -m ml-agents.tests.yamato.setup_venv
python3 -m pip install --progress-bar=off -r test_requirements.txt --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -m pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -m pytest -m "not check_environment_trains" --junitxml=junit/test-results.xml -p no:warnings
triggers:
cancel_old_ci: true
recurring:
- branch: master
frequency: daily
artifacts:
logs:
paths:
- "artifacts/standalone_build.txt"

4
pytest.ini


[pytest]
addopts = --strict-markers
markers =
check_environment_trains: Slow training tests, do not run on yamato
正在加载...
取消
保存