浏览代码

Merge pull request #2704 from Unity-Technologies/hotfix-0.10.1

Merge Hotfix 0.10.1
/0.10.1
GitHub 5 年前
当前提交
f22c41db
共有 21 个文件被更改,包括 151 次插入86 次删除
  1. 7
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Brains/PushBlockLearning.asset
  2. 2
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
  3. 4
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity
  4. 8
      config/sac_trainer_config.yaml
  5. 2
      gym-unity/setup.py
  6. 4
      markdown-link-check.config.json
  7. 2
      ml-agents-envs/setup.py
  8. 2
      ml-agents/mlagents/trainers/bc/models.py
  9. 9
      ml-agents/mlagents/trainers/bc/trainer.py
  10. 20
      ml-agents/mlagents/trainers/ppo/policy.py
  11. 11
      ml-agents/mlagents/trainers/ppo/trainer.py
  12. 7
      ml-agents/mlagents/trainers/sac/models.py
  13. 5
      ml-agents/mlagents/trainers/sac/policy.py
  14. 3
      ml-agents/mlagents/trainers/sac/trainer.py
  15. 10
      ml-agents/mlagents/trainers/tests/mock_brain.py
  16. 29
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  17. 14
      ml-agents/mlagents/trainers/tests/test_bc.py
  18. 73
      ml-agents/mlagents/trainers/tests/test_ppo.py
  19. 19
      ml-agents/mlagents/trainers/trainer_metrics.py
  20. 2
      ml-agents/mlagents/trainers/trainer_util.py
  21. 4
      ml-agents/setup.py

7
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Brains/PushBlockLearning.asset


m_Name: PushBlockLearning
m_EditorClassIdentifier:
brainParameters:
vectorObservationSize: 0
vectorObservationSize: 70
cameraResolutions:
- width: 84
height: 84
blackAndWhite: 0
cameraResolutions: []
vectorActionDescriptions:
-
vectorActionSpaceType: 0

2
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab


m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
m_Name:
m_EditorClassIdentifier:
brain: {fileID: 11400000, guid: 59a04e208fb8a423586adf25bf1fecd0, type: 2}
brain: {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
agentParameters:
agentCameras:
- {fileID: 20712684238256298}

4
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity


broadcastHub:
broadcastingBrains:
- {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
m_BrainsToControl:
- {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
m_MaxSteps: 0
m_BrainsToControl: []
m_TrainingConfiguration:
width: 80
height: 80

8
config/sac_trainer_config.yaml


BouncerLearning:
normalize: true
beta: 0.0
max_steps: 5.0e5
num_layers: 2
hidden_units: 64

max_steps: 5.0e4
init_entcoef: 0.05
beta: 1.0e-2
hidden_units: 256
summary_freq: 2000
time_horizon: 64

StrikerLearning:
max_steps: 5.0e5
learning_rate: 1e-3
beta: 1.0e-2
hidden_units: 256
summary_freq: 2000
time_horizon: 128

GoalieLearning:
max_steps: 5.0e5
learning_rate: 1e-3
beta: 1.0e-2
hidden_units: 256
summary_freq: 2000
time_horizon: 128

hidden_units: 256
buffer_init_steps: 1000
num_layers: 1
beta: 1.0e-2
max_steps: 5.0e5
buffer_size: 500000
init_entcoef: 0.01

num_layers: 2
hidden_units: 128
memory_size: 256
beta: 0.0
init_entcoef: 0.1
max_steps: 5.0e5
summary_freq: 1000

num_layers: 1
hidden_units: 128
memory_size: 256
beta: 1.0e-2
gamma: 0.99
batch_size: 64
max_steps: 5.0e5

num_layers: 1
hidden_units: 128
memory_size: 256
beta: 1.0e-2
gamma: 0.99
buffer_size: 1024
batch_size: 64

2
gym-unity/setup.py


author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs==0.10.0"],
install_requires=["gym", "mlagents_envs==0.10.1"],
)

4
markdown-link-check.config.json


{
"pattern": "^https://developer.nvidia.com/compute/machine-learning/cudnn/secure",
"comment": "Requires login"
},
{
"pattern": "^https?://bair.berkeley.edu",
"comment": "Temporary berkeley outage"
}
]
}

2
ml-agents-envs/setup.py


setup(
name="mlagents_envs",
version="0.10.0",
version="0.10.1",
description="Unity Machine Learning Agents Interface",
url="https://github.com/Unity-Technologies/ml-agents",
author="Unity Technologies",

2
ml-agents/mlagents/trainers/bc/models.py


for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
hidden_reg,
size,
activation=None,
use_bias=False,

9
ml-agents/mlagents/trainers/bc/trainer.py


len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences,
self.batches_per_epoch,
)
for i in range(num_batches):
batch_size = self.n_sequences * self.policy.sequence_length
for i in range(0, num_batches * batch_size, batch_size):
start = i * self.n_sequences
end = (i + 1) * self.n_sequences
mini_batch = update_buffer.make_mini_batch(start, end)
mini_batch = update_buffer.make_mini_batch(i, i + batch_size)
run_out = self.policy.update(mini_batch, self.n_sequences)
loss = run_out["policy_loss"]
batch_losses.append(loss)

20
ml-agents/mlagents/trainers/ppo/policy.py


import logging
import numpy as np
from typing import Any, Dict
from typing import Any, Dict, Optional
from mlagents.envs.brain import BrainInfo
from mlagents.envs.brain import BrainInfo, BrainParameters
from mlagents.trainers.models import EncoderType, LearningRateSchedule
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.tf_policy import TFPolicy

class PPOPolicy(TFPolicy):
def __init__(self, seed, brain, trainer_params, is_training, load):
def __init__(
self,
seed: int,
brain: BrainParameters,
trainer_params: Dict[str, Any],
is_training: bool,
load: bool,
):
"""
Policy for Proximal Policy Optimization Networks.
:param seed: Random seed.

super().__init__(seed, brain, trainer_params)
reward_signal_configs = trainer_params["reward_signals"]
self.inference_dict = {}
self.update_dict = {}
self.inference_dict: Dict[str, tf.Tensor] = {}
self.update_dict: Dict[str, tf.Tensor] = {}
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",

self.create_reward_signals(reward_signal_configs)
with self.graph.as_default():
self.bc_module: Optional[BCModule] = None
# Create pretrainer if needed
if "pretraining" in trainer_params:
BCModule.check_config(trainer_params["pretraining"])

default_num_epoch=trainer_params["num_epoch"],
**trainer_params["pretraining"],
)
else:
self.bc_module = None
if load:
self._load_graph()

11
ml-agents/mlagents/trainers/ppo/trainer.py


mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
)
self.cumulative_returns_since_policy_update = []
batch_size = self.trainer_parameters["batch_size"]
# Make sure batch_size is a multiple of sequence length. During training, we
# will need to reshape the data into a batch_size x sequence_length tensor.
batch_size = (
self.trainer_parameters["batch_size"]
- self.trainer_parameters["batch_size"] % self.policy.sequence_length
)
# Make sure there is at least one sequence
batch_size = max(batch_size, self.policy.sequence_length)
n_sequences = max(
int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
)

7
ml-agents/mlagents/trainers/sac/models.py


)
# We assume m_size is divisible by 4
# Create the non-Policy inputs
# Use a default placeholder here so nothing has to be provided during
# Barracuda inference. Note that the default value is just the tiled input
# for the policy, which is thrown away.
self.other_memory_in = tf.placeholder(
self.other_memory_in = tf.placeholder_with_default(
input=tf.tile(self.inference_memory_in, [1, 3]),
dtype=tf.float32,
name="other_recurrent_in",
)

5
ml-agents/mlagents/trainers/sac/policy.py


import logging
from typing import Dict, Any
from typing import Dict, Any, Optional
import numpy as np
import tensorflow as tf

with self.graph.as_default():
# Create pretrainer if needed
self.bc_module: Optional[BCModule] = None
if "pretraining" in trainer_params:
BCModule.check_config(trainer_params["pretraining"])
self.bc_module = BCModule(

"Pretraining: Samples Per Update is not a valid setting for SAC."
)
self.bc_module.samples_per_update = 1
else:
self.bc_module = None
if load:
self._load_graph()

3
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.envs.brain import AllBrainInfo
from mlagents.envs.action_info import ActionInfoOutputs
from mlagents.envs.timers import timed
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.sac.policy import SACPolicy
from mlagents.trainers.rl_trainer import RLTrainer, AllRewardsOutput

with open(filename, "wb") as file_object:
self.training_buffer.update_buffer.save_to_file(file_object)
def load_replay_buffer(self) -> Buffer:
def load_replay_buffer(self) -> None:
"""
Loads the last saved replay buffer from a file.
"""

10
ml-agents/mlagents/trainers/tests/mock_brain.py


return mock_brain
def create_mock_pushblock_brain():
mock_brain = create_mock_brainparams(
vector_action_space_type="discrete",
vector_action_space_size=[7],
vector_observation_space_size=70,
)
mock_brain.brain_name = "PushblockLearning"
return mock_brain
def create_mock_banana_brain():
mock_brain = create_mock_brainparams(
number_visual_observations=1,

29
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


import os
import yaml
import pytest
from mlagents.trainers.tests.test_bc import create_bc_trainer
def test_barracuda_converter():

# cleanup
os.remove(tmpfile)
@pytest.fixture
def bc_dummy_config():
return yaml.safe_load(
"""
hidden_units: 32
learning_rate: 3.0e-4
num_layers: 1
use_recurrent: false
sequence_length: 32
memory_size: 64
batches_per_epoch: 1
batch_size: 64
summary_freq: 2000
max_steps: 4000
"""
)
@pytest.mark.parametrize("use_lstm", [False, True], ids=["nolstm", "lstm"])
@pytest.mark.parametrize("use_discrete", [True, False], ids=["disc", "cont"])
def test_bc_export(bc_dummy_config, use_lstm, use_discrete):
bc_dummy_config["use_recurrent"] = use_lstm
trainer, env = create_bc_trainer(bc_dummy_config, use_discrete)
trainer.export_model()

14
ml-agents/mlagents/trainers/tests/test_bc.py


)
def create_bc_trainer(dummy_config):
def create_bc_trainer(dummy_config, is_discrete=False):
mock_brain = mb.create_mock_3dball_brain()
mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
if is_discrete:
mock_brain = mb.create_mock_pushblock_brain()
mock_braininfo = mb.create_mock_braininfo(
num_agents=12, num_vector_observations=70
)
else:
mock_brain = mb.create_mock_3dball_brain()
mock_braininfo = mb.create_mock_braininfo(
num_agents=12, num_vector_observations=8
)
mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
env = mock_env()

73
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.envs.brain import BrainParameters
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.mock_communicator import MockCommunicator
from mlagents.trainers.tests import mock_brain as mb
@pytest.fixture

memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1
summary_path: test
model_path: test
reward_signals:
extrinsic:
strength: 1.0

VECTOR_ACTION_SPACE = [2]
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 32
NUM_AGENTS = 12
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")

np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0]))
def test_trainer_increment_step():
trainer_params = {
"trainer": "ppo",
"batch_size": 2048,
"beta": 0.005,
"buffer_size": 20480,
"epsilon": 0.2,
"gamma": 0.995,
"hidden_units": 512,
"lambd": 0.95,
"learning_rate": 0.0003,
"max_steps": "2e6",
"memory_size": 256,
"normalize": True,
"num_epoch": 3,
"num_layers": 3,
"time_horizon": 1000,
"sequence_length": 64,
"summary_freq": 3000,
"use_recurrent": False,
"use_curiosity": False,
"curiosity_strength": 0.01,
"curiosity_enc_size": 128,
"summary_path": "./summaries/test_trainer_summary",
"model_path": "./models/test_trainer_models/TestModel",
"keep_checkpoints": 5,
"reward_signals": {"extrinsic": {"strength": 1.0, "gamma": 0.99}},
}
def test_trainer_increment_step(dummy_config):
trainer_params = dummy_config
brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)
trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False)

trainer.increment_step(5)
policy_mock.increment_step.assert_called_with(5)
assert trainer.step == 10
@mock.patch("mlagents.envs.environment.UnityEnvironment")
@pytest.mark.parametrize("use_discrete", [True, False])
def test_trainer_update_policy(mock_env, dummy_config, use_discrete):
env, mock_brain, _ = mb.setup_mock_env_and_brains(
mock_env,
use_discrete,
False,
num_agents=NUM_AGENTS,
vector_action_space=VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
discrete_action_space=DISCRETE_ACTION_SPACE,
)
trainer_params = dummy_config
trainer_params["use_recurrent"] = True
trainer = PPOTrainer(mock_brain, 0, trainer_params, True, False, 0, "0", False)
# Test update with sequence length smaller than batch size
buffer = mb.simulate_rollout(env, trainer.policy, BUFFER_INIT_SAMPLES)
# Mock out reward signal eval
buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
buffer.update_buffer["extrinsic_returns"] = buffer.update_buffer["rewards"]
buffer.update_buffer["extrinsic_value_estimates"] = buffer.update_buffer["rewards"]
trainer.training_buffer = buffer
trainer.update_policy()
# Make batch length a larger multiple of sequence length
trainer.trainer_parameters["batch_size"] = 128
trainer.update_policy()
# Make batch length a larger non-multiple of sequence length
trainer.trainer_parameters["batch_size"] = 100
trainer.update_policy()
def test_add_rewards_output(dummy_config):

19
ml-agents/mlagents/trainers/trainer_metrics.py


self.delta_policy_update = 0
delta_train_start = time() - self.time_training_start
LOGGER.debug(
" Policy Update Training Metrics for {}: "
"\n\t\tTime to update Policy: {:0.3f} s \n"
"\t\tTime elapsed since training: {:0.3f} s \n"
"\t\tTime for experience collection: {:0.3f} s \n"
"\t\tBuffer Length: {} \n"
"\t\tReturns : {:0.3f}\n".format(
self.brain_name,
self.delta_policy_update,
delta_train_start,
self.delta_last_experience_collection,
self.last_buffer_length,
self.last_mean_return,
)
f" Policy Update Training Metrics for {self.brain_name}: "
f"\n\t\tTime to update Policy: {self.delta_policy_update:0.3f} s \n"
f"\t\tTime elapsed since training: {delta_train_start:0.3f} s \n"
f"\t\tTime for experience collection: {(self.delta_last_experience_collection or 0):0.3f} s \n"
f"\t\tBuffer Length: {self.last_buffer_length or 0} \n"
f"\t\tReturns : {(self.last_mean_return or 0):0.3f}\n"
)
self._add_row(delta_train_start)

2
ml-agents/mlagents/trainers/trainer_util.py


:param multi_gpu: Whether to use multi-GPU training
:return:
"""
trainers = {}
trainers: Dict[str, Trainer] = {}
trainer_parameters_dict = {}
for brain_name in external_brains:
trainer_parameters = trainer_config["default"].copy()

4
ml-agents/setup.py


setup(
name="mlagents",
version="0.10.0",
version="0.10.1",
description="Unity Machine Learning Agents",
long_description=long_description,
long_description_content_type="text/markdown",

"h5py>=2.9.0",
"jupyter",
"matplotlib",
"mlagents_envs==0.10.0",
"mlagents_envs==0.10.1",
"numpy>=1.13.3,<2.0",
"Pillow>=4.2.1",
"protobuf>=3.6",

正在加载...
取消
保存