浏览代码

Merge pull request #2421 from Unity-Technologies/hotfix-v0.9.1

Hotfix v0.9.1 - develop
/develop-gpu-test
GitHub 6 年前
当前提交
4472838e
共有 14 个文件被更改,包括 217 次插入116 次删除
  1. 83
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  2. 19
      UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
  3. 4
      gym-unity/setup.py
  4. 86
      ml-agents-envs/mlagents/envs/sampler_class.py
  5. 2
      ml-agents-envs/setup.py
  6. 16
      ml-agents/mlagents/trainers/bc/policy.py
  7. 2
      ml-agents/mlagents/trainers/bc/trainer.py
  8. 12
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  9. 20
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  10. 6
      ml-agents/mlagents/trainers/learn.py
  11. 20
      ml-agents/mlagents/trainers/tests/mock_brain.py
  12. 30
      ml-agents/mlagents/trainers/tests/test_bc.py
  13. 29
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  14. 4
      ml-agents/setup.py

83
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


namespace MLAgents
{
/// <summary>
/// Struct that contains all the information for an Agent, including its
/// Struct that contains all the information for an Agent, including its
/// observations, actions and current status, that is sent to the Brain.
/// </summary>
public struct AgentInfo

agentInfoProto.VisualObservations.Add(
ByteString.CopyFrom(obs.EncodeToPNG())
);
}
return agentInfoProto;
}
/// <summary>
/// Remove the visual observations from memory. Call at each timestep
/// to avoid memory leaks.
/// </summary>
public void ClearVisualObs()
{
foreach (Texture2D obs in visualObservations)
{
return agentInfoProto;
/// Struct that contains the action information sent from the Brain to the
/// Struct that contains the action information sent from the Brain to the
/// Agent.
/// </summary>
public struct AgentAction

}
/// <summary>
/// Struct that contains all the Agent-specific parameters provided in the
/// Struct that contains all the Agent-specific parameters provided in the
/// Editor. This excludes the Brain linked to the Agent since it can be
/// modified programmatically.
/// </summary>

/// observations.
/// </summary>
public List<Camera> agentCameras = new List<Camera>();
/// <summary>
/// The list of the RenderTextures the agent uses for visual
/// observations.

/// <summary>
/// The maximum number of steps the agent takes before being done.
/// The maximum number of steps the agent takes before being done.
/// </summary>
/// <remarks>
/// If set to 0, the agent can only be set to done programmatically (or

public bool resetOnDone = true;
/// <summary>
/// Whether to enable On Demand Decisions or make a decision at
/// Whether to enable On Demand Decisions or make a decision at
/// every step.
/// </summary>
public bool onDemandDecision;

/// <summary>
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
/// an Agent. An agent produces observations and takes actions in the
/// environment. Observations are determined by the cameras attached
/// an Agent. An agent produces observations and takes actions in the
/// environment. Observations are determined by the cameras attached
/// to the agent in addition to the vector observations implemented by the
/// user in <see cref="CollectObservations"/>. On the other hand, actions
/// are determined by decisions produced by a linked Brain. Currently, this

/// however, an agent need not send its observation at every step since very
/// little may have changed between sucessive steps. Currently, how often an
/// agent updates its brain with a fresh observation is determined by the
/// Academy.
///
/// At any step, an agent may be considered <see cref="done"/>.
/// Academy.
///
/// At any step, an agent may be considered <see cref="done"/>.
///
///
///
///
/// episodes. The academy controls the global episode count and each agent
/// episodes. The academy controls the global episode count and each agent
/// controls its own local episode count and can reset and start a new local
/// episode independently (based on its own experience). Thus an academy
/// (global) episode can be viewed as the upper-bound on an agents episode

/// value takes precedence (since the agent max step will never be reached).
///
///
///
///
/// Implementation-wise, it is required that this class is extended and the
/// virtual methods overridden. For sample implementations of agent behavior,
/// see the Examples/ directory within this Unity project.

{
/// <summary>
/// The Brain attached to this agent. A brain can be attached either
/// directly from the Editor through AgentEditor or
/// directly from the Editor through AgentEditor or
/// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
/// to not have a brain, as long as no decision is requested.
/// </summary>

actionMasker = new ActionMasker(param);
// If we haven't initialized vectorActions, initialize to 0. This should only
// happen during the creation of the Agent. In subsequent episodes, vectorAction
// should stay the previous action before the Done(), so that it is properly recorded.
// should stay the previous action before the Done(), so that it is properly recorded.
if (action.vectorActions == null)
{
if (param.vectorActionSpaceType == SpaceType.continuous)

brain.brainParameters.vectorObservationSize,
info.vectorObservation.Count));
}
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
info.stackedVectorObservation.Count - info.vectorObservation.Count);
info.visualObservations.Clear();

param.cameraResolutions[i].height);
info.visualObservations.Add(obsTexture);
}
//Then add all renderTextures
var camCount = agentParameters.agentCameras.Count;
for (int i = 0; i < agentParameters.agentRenderTextures.Count; i++)

/// <summary>
/// Collects the (vector, visual, text) observations of the agent.
/// The agent observation describes the current environment from the
/// The agent observation describes the current environment from the
/// observation could include distances to friends or enemies, or the
/// observation could include distances to friends or enemies, or the
/// current level of ammunition at its disposal.
/// Recall that an Agent may attach vector, visual or textual observations.
/// Vector observations are added by calling the provided helper methods:

/// needs to match the vectorObservationSize attribute of the linked Brain.
/// Visual observations are implicitly added from the cameras attached to
/// the Agent.
/// Lastly, textual observations are added using
/// Lastly, textual observations are added using
/// <see cref="SetTextObs(string)"/>.
/// </remarks>
public virtual void CollectObservations()

}
/// <summary>
/// Specifies the agent behavior when done and
/// Specifies the agent behavior when done and
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
/// used to remove the agent from the scene.
/// </summary>

{
action.memories = memories;
}
public List<float> GetMemoriesAction()
{
return action.memories;

/// <summary>
/// Sets the status of the agent.
/// </summary>
/// <param name="academyMaxStep">If set to <c>true</c>
/// <param name="academyMaxStep">If set to <c>true</c>
/// <param name="academyDone">If set to <c>true</c>
/// <param name="academyDone">If set to <c>true</c>
/// The agent must set done.</param>
/// <param name="academyStepCounter">Number of current steps in episode</param>
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)

maxStepReached = true;
}
// If the Academy needs to reset, the agent should reset
// If the Academy needs to reset, the agent should reset
// even if it reseted recently.
if (academyDone)
{

/// Signals the agent that it must reset if its done flag is set to true.
void ResetIfDone()
{
// If an agent is done, then it will also
// If an agent is done, then it will also
// request for a decision and an action
if (IsDone())
{

obsCamera.Render();
texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0);
obsCamera.targetTexture = prevCameraRT;
obsCamera.rect = oldRec;
RenderTexture.active = prevActiveRT;

/// <summary>
/// Converts a RenderTexture and correspinding resolution to a 2D texture.
/// </summary>

{
texture2D.Resize(width, height);
}
if(width != obsTexture.width || height != obsTexture.height)
{
throw new UnityAgentsException(string.Format(

{
info.customObservation = customObservation;
}
}
}
}

19
UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs


namespace MLAgents
{
/// <summary>
/// The batcher is an RL specific class that makes sure that the information each object in
/// Unity (Academy and Brains) wants to send to External is appropriately batched together
/// The batcher is an RL specific class that makes sure that the information each object in
/// Unity (Academy and Brains) wants to send to External is appropriately batched together
///
///
///
/// At each step, the batcher will keep track of the brains that queried the batcher for that
///
/// At each step, the batcher will keep track of the brains that queried the batcher for that
/// step. The batcher can only send the batched data when all the Brains have queried the
/// Batcher.
/// </summary>

}
/// <summary>
/// Sends the academy parameters through the Communicator.
/// Sends the academy parameters through the Communicator.
/// Is used by the academy to send the AcademyParameters to the communicator.
/// </summary>
/// <returns>The External Initialization Parameters received.</returns>

/// Registers the done flag of the academy to the next output to be sent
/// to the communicator.
/// </summary>
/// <param name="done">If set to <c>true</c>
/// <param name="done">If set to <c>true</c>
/// The academy done state will be sent to External at the next Exchange.</param>
public void RegisterAcademyDoneFlag(bool done)
{

/// <summary>
/// Sends the brain info. If at least one brain has an agent in need of
/// a decision or if the academy is done, the data is sent via
/// a decision or if the academy is done, the data is sent via
/// Communicator. Else, a new step is realized. The data can only be
/// sent once all the brains that subscribed to the batcher have tried
/// to send information.

{
CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
m_currentUnityRLOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
// Avoid visual obs memory leak. This should be called AFTER we are done with the visual obs.
// e.g. after recording them to demo and using them for inference.
agentInfo[agent].ClearVisualObs();
}
m_hasData[brainKey] = true;

4
gym-unity/setup.py


setup(
name="gym_unity",
version="0.4.3",
version="0.4.4",
description="Unity Machine Learning Agents Gym Interface",
license="Apache License 2.0",
author="Unity Technologies",

install_requires=["gym", "mlagents_envs==0.9.0"],
install_requires=["gym", "mlagents_envs==0.9.1"],
)

86
ml-agents-envs/mlagents/envs/sampler_class.py


"""
def __init__(
self, min_value: Union[int, float], max_value: Union[int, float], **kwargs
self,
min_value: Union[int, float],
max_value: Union[int, float],
seed: Optional[int] = None,
**kwargs
"""
:param min_value: minimum value of the range to be sampled uniformly from
:param max_value: maximum value of the range to be sampled uniformly from
:param seed: Random seed used for making draws from the uniform sampler
"""
# Draw from random state to allow for consistent reset parameter draw for a seed
self.random_state = np.random.RandomState(seed)
return np.random.uniform(self.min_value, self.max_value)
"""
Draws and returns a sample from the specified interval
"""
return self.random_state.uniform(self.min_value, self.max_value)
class MultiRangeUniformSampler(Sampler):

it proceeds to pick a value uniformly in that range.
"""
def __init__(self, intervals: List[List[Union[int, float]]], **kwargs) -> None:
def __init__(
self,
intervals: List[List[Union[int, float]]],
seed: Optional[int] = None,
**kwargs
) -> None:
"""
:param intervals: List of intervals to draw uniform samples from
:param seed: Random seed used for making uniform draws from the specified intervals
"""
self.intervals = intervals
# Measure the length of the intervals
interval_lengths = [abs(x[1] - x[0]) for x in self.intervals]

# Draw from random state to allow for consistent reset parameter draw for a seed
self.random_state = np.random.RandomState(seed)
"""
Selects an interval to pick and then draws a uniform sample from the picked interval
"""
np.random.choice(len(self.intervals), p=self.interval_weights)
self.random_state.choice(len(self.intervals), p=self.interval_weights)
return np.random.uniform(cur_min, cur_max)
return self.random_state.uniform(cur_min, cur_max)
class GaussianSampler(Sampler):

"""
def __init__(
self, mean: Union[float, int], st_dev: Union[float, int], **kwargs
self,
mean: Union[float, int],
st_dev: Union[float, int],
seed: Optional[int] = None,
**kwargs
"""
:param mean: Specifies the mean of the gaussian distribution to draw from
:param st_dev: Specifies the standard devation of the gaussian distribution to draw from
:param seed: Random seed used for making gaussian draws from the sample
"""
# Draw from random state to allow for consistent reset parameter draw for a seed
self.random_state = np.random.RandomState(seed)
return np.random.normal(self.mean, self.st_dev)
"""
Returns a draw from the specified Gaussian distribution
"""
return self.random_state.normal(self.mean, self.st_dev)
class SamplerFactory:

@staticmethod
def register_sampler(name: str, sampler_cls: Type[Sampler]) -> None:
"""
Registers the sampe in the Sampler Factory to be used later
:param name: String name to set as key for the sampler_cls in the factory
:param sampler_cls: Sampler object to associate to the name in the factory
"""
def init_sampler_class(name: str, params: Dict[str, Any]):
def init_sampler_class(
name: str, params: Dict[str, Any], seed: Optional[int] = None
) -> Sampler:
"""
Initializes the sampler class associated with the name with the params
:param name: Name of the sampler in the factory to initialize
:param params: Parameters associated to the sampler attached to the name
:param seed: Random seed to be used to set deterministic random draws for the sampler
"""
if name not in SamplerFactory.NAME_TO_CLASS:
raise SamplerException(
name + " sampler is not registered in the SamplerFactory."

sampler_cls = SamplerFactory.NAME_TO_CLASS[name]
params["seed"] = seed
try:
return sampler_cls(**params)
except TypeError:

class SamplerManager:
def __init__(self, reset_param_dict: Dict[str, Any]) -> None:
def __init__(
self, reset_param_dict: Dict[str, Any], seed: Optional[int] = None
) -> None:
"""
:param reset_param_dict: Arguments needed for initializing the samplers
:param seed: Random seed to be used for drawing samples from the samplers
"""
self.reset_param_dict = reset_param_dict if reset_param_dict else {}
assert isinstance(self.reset_param_dict, dict)
self.samplers: Dict[str, Sampler] = {}

)
sampler_name = cur_param_dict.pop("sampler-type")
param_sampler = SamplerFactory.init_sampler_class(
sampler_name, cur_param_dict
sampler_name, cur_param_dict, seed
)
self.samplers[param_name] = param_sampler

return not bool(self.samplers)
def sample_all(self) -> Dict[str, float]:
"""
Loop over all samplers and draw a sample from each one for generating
next set of reset parameter values.
"""
res = {}
for param_name, param_sampler in list(self.samplers.items()):
res[param_name] = param_sampler.sample_parameter()

2
ml-agents-envs/setup.py


setup(
name="mlagents_envs",
version="0.9.0",
version="0.9.1",
description="Unity Machine Learning Agents Interface",
url="https://github.com/Unity-Technologies/ml-agents",
author="Unity Technologies",

16
ml-agents/mlagents/trainers/bc/policy.py


self.model.sequence_length: self.sequence_length,
}
if self.use_continuous_act:
feed_dict[self.model.true_action] = mini_batch["actions"].reshape(
[-1, self.brain.vector_action_space_size[0]]
)
feed_dict[self.model.true_action] = mini_batch["actions"]
feed_dict[self.model.true_action] = mini_batch["actions"].reshape(
[-1, len(self.brain.vector_action_space_size)]
)
feed_dict[self.model.true_action] = mini_batch["actions"]
apparent_obs_size = (
self.brain.vector_observation_space_size
* self.brain.num_stacked_vector_observations
)
feed_dict[self.model.vector_in] = mini_batch["vector_obs"].reshape(
[-1, apparent_obs_size]
)
feed_dict[self.model.vector_in] = mini_batch["vector_obs"]
for i, _ in enumerate(self.model.visual_in):
visual_obs = mini_batch["visual_obs%d" % i]
feed_dict[self.model.visual_in[i]] = visual_obs

2
ml-agents/mlagents/trainers/bc/trainer.py


"""
Updates the policy.
"""
self.demonstration_buffer.update_buffer.shuffle()
self.demonstration_buffer.update_buffer.shuffle(self.policy.sequence_length)
batch_losses = []
num_batches = min(
len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences,

12
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


# Create input ops for next (t+1) visual observations.
next_visual_input = LearningModel.create_visual_input(
self.policy_model.brain.camera_resolutions[i],
name="next_visual_observation_" + str(i),
name="curiosity_next_visual_observation_" + str(i),
)
self.next_visual_in.append(next_visual_input)

self.encoding_size,
LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"curiosity_stream_{}_visual_obs_encoder".format(i),
False,
)

LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"curiosity_stream_{}_visual_obs_encoder".format(i),
True,
)
visual_encoders.append(encoded_visual)

self.next_vector_in = tf.placeholder(
shape=[None, self.policy_model.vec_obs_size],
dtype=tf.float32,
name="next_vector_observation",
name="curiosity_next_vector_observation",
)
encoded_vector_obs = self.policy_model.create_vector_observation_encoder(

2,
"vector_obs_encoder",
"curiosity_vector_obs_encoder",
False,
)
encoded_next_vector_obs = self.policy_model.create_vector_observation_encoder(

2,
"vector_obs_encoder",
"curiosity_vector_obs_encoder",
True,
)
encoded_state_list.append(encoded_vector_obs)

20
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


# Create input ops for next (t+1) visual observations.
visual_input = self.policy_model.create_visual_input(
self.policy_model.brain.camera_resolutions[i],
name="visual_observation_" + str(i),
name="gail_visual_observation_" + str(i),
)
self.expert_visual_in.append(visual_input)

LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"gail_stream_{}_visual_obs_encoder".format(i),
False,
)

LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"gail_stream_{}_visual_obs_encoder".format(i),
True,
)
visual_policy_encoders.append(encoded_policy_visual)

concat_input,
self.h_size,
activation=LearningModel.swish,
name="d_hidden_1",
name="gail_d_hidden_1",
reuse=reuse,
)

activation=LearningModel.swish,
name="d_hidden_2",
name="gail_d_hidden_2",
reuse=reuse,
)

hidden_2,
self.z_size,
reuse=reuse,
name="z_mean",
name="gail_z_mean",
kernel_initializer=LearningModel.scaled_init(0.01),
)

estimate_input,
1,
activation=tf.nn.sigmoid,
name="d_estimate",
name="gail_d_estimate",
reuse=reuse,
)
return estimate, z_mean, concat_input

"""
if self.use_vail:
self.z_sigma = tf.get_variable(
"sigma_vail",
"gail_sigma_vail",
self.z_size,
dtype=tf.float32,
initializer=tf.ones_initializer(),

self.use_noise = tf.placeholder(
shape=[1], dtype=tf.float32, name="NoiseLevel"
shape=[1], dtype=tf.float32, name="gail_NoiseLevel"
)
self.expert_estimate, self.z_mean_expert, _ = self.create_encoder(
self.encoded_expert, self.expert_action, self.done_expert, reuse=False

reuse=True,
)
self.discriminator_score = tf.reshape(
self.policy_estimate, [-1], name="GAIL_reward"
self.policy_estimate, [-1], name="gail_reward"
)
self.intrinsic_reward = -tf.log(1.0 - self.discriminator_score + EPSILON)

6
ml-agents/mlagents/trainers/learn.py


env = SubprocessEnvManager(env_factory, num_envs)
maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
sampler_manager, resampling_interval = create_sampler_manager(
sampler_file_path, env.reset_parameters
sampler_file_path, env.reset_parameters, run_seed
)
trainers = initialize_trainers(

tc.start_learning(env)
def create_sampler_manager(sampler_file_path, env_reset_params):
def create_sampler_manager(sampler_file_path, env_reset_params, run_seed=None):
sampler_config = None
resample_interval = None
if sampler_file_path is not None:

"Resampling interval was not specified in the sampler file."
" Please specify it with the 'resampling-interval' key in the sampler config file."
)
sampler_manager = SamplerManager(sampler_config)
sampler_manager = SamplerManager(sampler_config, run_seed)
return sampler_manager, resample_interval

20
ml-agents/mlagents/trainers/tests/mock_brain.py


buffer.append_update_buffer(0, batch_size=None, training_length=sequence_length)
return buffer
def create_mock_3dball_brain():
mock_brain = create_mock_brainparams(
vector_action_space_type="continuous",
vector_action_space_size=[2],
vector_observation_space_size=8,
)
mock_brain.brain_name = "Ball3DBrain"
return mock_brain
def create_mock_banana_brain():
mock_brain = create_mock_brainparams(
number_visual_observations=1,
vector_action_space_type="discrete",
vector_action_space_size=[3, 3, 3, 2],
vector_observation_space_size=0,
)
return mock_brain

30
ml-agents/mlagents/trainers/tests/test_bc.py


import unittest.mock as mock
import pytest
import os
import numpy as np
import tensorflow as tf

import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.bc.offline_trainer import BCTrainer
from mlagents.envs import UnityEnvironment
from mlagents.envs.mock_communicator import MockCommunicator

use_recurrent: false
sequence_length: 32
memory_size: 32
batches_per_epoch: 1
batch_size: 32
summary_freq: 2000
max_steps: 4000
@mock.patch("mlagents.envs.UnityEnvironment")
def test_bc_trainer(mock_env, dummy_config):
mock_brain = mb.create_mock_3dball_brain()
mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
env = mock_env()
trainer_parameters = dummy_config
trainer_parameters["summary_path"] = "tmp"
trainer_parameters["model_path"] = "tmp"
trainer_parameters["demo_path"] = (
os.path.dirname(os.path.abspath(__file__)) + "/test.demo"
)
trainer = BCTrainer(
mock_brain, trainer_parameters, training=True, load=False, seed=0, run_id=0
)
trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy, 100)
trainer.update_policy()
assert len(trainer.stats["Losses/Cloning Loss"]) > 0
trainer.increment_step(1)
assert trainer.step == 1
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")

29
ml-agents/mlagents/trainers/tests/test_bcmodule.py


)
def create_mock_3dball_brain():
mock_brain = mb.create_mock_brainparams(
vector_action_space_type="continuous",
vector_action_space_size=[2],
vector_observation_space_size=8,
)
return mock_brain
def create_mock_banana_brain():
mock_brain = mb.create_mock_brainparams(
number_visual_observations=1,
vector_action_space_type="discrete",
vector_action_space_size=[3, 3, 3, 2],
vector_observation_space_size=0,
)
return mock_brain
def create_ppo_policy_with_bc_mock(
mock_env, mock_brain, dummy_config, use_rnn, demo_file
):

@mock.patch("mlagents.envs.UnityEnvironment")
def test_bcmodule_defaults(mock_env, dummy_config):
# See if default values match
mock_brain = create_mock_3dball_brain()
mock_brain = mb.create_mock_3dball_brain()
env, policy = create_ppo_policy_with_bc_mock(
mock_env, mock_brain, dummy_config, False, "test.demo"
)

# Test with continuous control env and vector actions
@mock.patch("mlagents.envs.UnityEnvironment")
def test_bcmodule_update(mock_env, dummy_config):
mock_brain = create_mock_3dball_brain()
mock_brain = mb.create_mock_3dball_brain()
env, policy = create_ppo_policy_with_bc_mock(
mock_env, mock_brain, dummy_config, False, "test.demo"
)

# Test with RNN
@mock.patch("mlagents.envs.UnityEnvironment")
def test_bcmodule_rnn_update(mock_env, dummy_config):
mock_brain = create_mock_3dball_brain()
mock_brain = mb.create_mock_3dball_brain()
env, policy = create_ppo_policy_with_bc_mock(
mock_env, mock_brain, dummy_config, True, "test.demo"
)

# Test with discrete control and visual observations
@mock.patch("mlagents.envs.UnityEnvironment")
def test_bcmodule_dc_visual_update(mock_env, dummy_config):
mock_brain = create_mock_banana_brain()
mock_brain = mb.create_mock_banana_brain()
env, policy = create_ppo_policy_with_bc_mock(
mock_env, mock_brain, dummy_config, False, "testdcvis.demo"
)

# Test with discrete control, visual observations and RNN
@mock.patch("mlagents.envs.UnityEnvironment")
def test_bcmodule_rnn_dc_update(mock_env, dummy_config):
mock_brain = create_mock_banana_brain()
mock_brain = mb.create_mock_banana_brain()
env, policy = create_ppo_policy_with_bc_mock(
mock_env, mock_brain, dummy_config, True, "testdcvis.demo"
)

4
ml-agents/setup.py


setup(
name="mlagents",
version="0.9.0",
version="0.9.1",
description="Unity Machine Learning Agents",
long_description=long_description,
long_description_content_type="text/markdown",

),
zip_safe=False,
install_requires=[
"mlagents_envs==0.9.0",
"mlagents_envs==0.9.1",
"tensorflow>=1.7,<1.8",
"Pillow>=4.2.1",
"matplotlib",

正在加载...
取消
保存