浏览代码

Merge pull request #2514 from Unity-Technologies/hotfix-0.9.3

Hotfix 0.9.3
/0.10.1
GitHub 5 年前
当前提交
dc3ab81a
共有 9 个文件被更改,包括 72 次插入24 次删除
  1. 6
      UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/Scenes/BananaIL.unity
  2. 17
      UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/Scripts/BananaAgent.cs
  3. 4
      gym-unity/setup.py
  4. 2
      ml-agents-envs/setup.py
  5. 1
      ml-agents/mlagents/trainers/rl_trainer.py
  6. 11
      ml-agents/mlagents/trainers/tests/mock_brain.py
  7. 50
      ml-agents/mlagents/trainers/tests/test_bc.py
  8. 1
      ml-agents/mlagents/trainers/trainer.py
  9. 4
      ml-agents/setup.py

6
UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/Scenes/BananaIL.unity


timeScale: 1
targetFrameRate: 60
resetParameters:
resetParameters: []
resetParameters:
- key: laser_length
value: 1
- key: agent_scale
value: 1
agents: []
listArea: []
totalScore: 0

17
UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/Scripts/BananaAgent.cs


public bool contribute;
private RayPerception3D rayPer;
public bool useVectorObs;
public override void InitializeAgent()
{

var rightAxis = (int)act[1];
var rotateAxis = (int)act[2];
var shootAxis = (int)act[3];
switch (forwardAxis)
{
case 1:

dirToGo = -transform.forward;
break;
}
switch (rightAxis)
{
case 1:

break;
case 2:
rotateDir = transform.up;
break;
break;
}
switch (shootAxis)
{

public void SetLaserLengths()
{
laser_length = myAcademy.resetParameters["laser_length"];
laser_length = myAcademy.resetParameters.TryGetValue("laser_length", out laser_length) ? laser_length: 1.0f;
var agent_scale = myAcademy.resetParameters["agent_scale"];
gameObject.transform.localScale = new Vector3(agent_scale, agent_scale, agent_scale);
float agentScale;
agentScale = myAcademy.resetParameters.TryGetValue("agent_scale", out agentScale) ? agentScale : 1.0f;
gameObject.transform.localScale = new Vector3(agentScale, agentScale, agentScale);
public void SetResetParameters()
{
SetLaserLengths();

4
gym-unity/setup.py


setup(
name="gym_unity",
version="0.4.5",
version="0.4.6",
description="Unity Machine Learning Agents Gym Interface",
license="Apache License 2.0",
author="Unity Technologies",

install_requires=["gym", "mlagents_envs==0.9.2"],
install_requires=["gym", "mlagents_envs==0.9.3"],
)

2
ml-agents-envs/setup.py


setup(
name="mlagents_envs",
version="0.9.2",
version="0.9.3",
description="Unity Machine Learning Agents Interface",
url="https://github.com/Unity-Technologies/ml-agents",
author="Unity Technologies",

1
ml-agents/mlagents/trainers/rl_trainer.py


def __init__(self, *args, **kwargs):
super(RLTrainer, self).__init__(*args, **kwargs)
self.step = 0
# Make sure we have at least one reward_signal
if not self.trainer_parameters["reward_signals"]:
raise UnityTrainerException(

11
ml-agents/mlagents/trainers/tests/mock_brain.py


:Mock mock_brain: A mock Brain object that specifies the params of this environment.
:Mock mock_braininfo: A mock BrainInfo object that will be returned at each step and reset.
"""
brain_name = mock_brain.brain_name
mock_env.return_value.brains = {"MockBrain": mock_brain}
mock_env.return_value.external_brain_names = ["MockBrain"]
mock_env.return_value.brain_names = ["MockBrain"]
mock_env.return_value.reset.return_value = {"MockBrain": mock_braininfo}
mock_env.return_value.step.return_value = {"MockBrain": mock_braininfo}
mock_env.return_value.brains = {brain_name: mock_brain}
mock_env.return_value.external_brain_names = [brain_name]
mock_env.return_value.brain_names = [brain_name]
mock_env.return_value.reset.return_value = {brain_name: mock_braininfo}
mock_env.return_value.step.return_value = {brain_name: mock_braininfo}
def simulate_rollout(env, policy, buffer_init_samples):

50
ml-agents/mlagents/trainers/tests/test_bc.py


def dummy_config():
return yaml.safe_load(
"""
hidden_units: 128
hidden_units: 32
num_layers: 2
num_layers: 1
use_recurrent: false
sequence_length: 32
memory_size: 32

)
@mock.patch("mlagents.envs.UnityEnvironment")
def test_bc_trainer(mock_env, dummy_config):
def create_bc_trainer(dummy_config):
mock_env = mock.Mock()
mock_brain = mb.create_mock_3dball_brain()
mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

mock_brain, trainer_parameters, training=True, load=False, seed=0, run_id=0
)
trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy, 100)
return trainer, env
def test_bc_trainer_step(dummy_config):
trainer, env = create_bc_trainer(dummy_config)
# Test get_step
assert trainer.get_step == 0
# Test update policy
# Test increment step
def test_bc_trainer_add_proc_experiences(dummy_config):
trainer, env = create_bc_trainer(dummy_config)
# Test add_experiences
returned_braininfo = env.step()
trainer.add_experiences(
returned_braininfo, returned_braininfo, {}
) # Take action outputs is not used
for agent_id in returned_braininfo["Ball3DBrain"].agents:
assert trainer.evaluation_buffer[agent_id].last_brain_info is not None
assert trainer.episode_steps[agent_id] > 0
assert trainer.cumulative_rewards[agent_id] > 0
# Test process_experiences by setting done
returned_braininfo["Ball3DBrain"].local_done = 12 * [True]
trainer.process_experiences(returned_braininfo, returned_braininfo)
for agent_id in returned_braininfo["Ball3DBrain"].agents:
assert trainer.episode_steps[agent_id] == 0
assert trainer.cumulative_rewards[agent_id] == 0
def test_bc_trainer_end_episode(dummy_config):
trainer, env = create_bc_trainer(dummy_config)
returned_braininfo = env.step()
trainer.add_experiences(
returned_braininfo, returned_braininfo, {}
) # Take action outputs is not used
trainer.process_experiences(returned_braininfo, returned_braininfo)
# Should set everything to 0
trainer.end_episode()
for agent_id in returned_braininfo["Ball3DBrain"].agents:
assert trainer.episode_steps[agent_id] == 0
assert trainer.cumulative_rewards[agent_id] == 0
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")

1
ml-agents/mlagents/trainers/trainer.py


self.summary_writer = tf.summary.FileWriter(self.summary_path)
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy: Policy = None
self.step: int = 0
def check_param_keys(self):
for k in self.param_keys:

4
ml-agents/setup.py


setup(
name="mlagents",
version="0.9.2",
version="0.9.3",
description="Unity Machine Learning Agents",
long_description=long_description,
long_description_content_type="text/markdown",

),
zip_safe=False,
install_requires=[
"mlagents_envs==0.9.2",
"mlagents_envs==0.9.3",
"tensorflow>=1.7,<1.8",
"Pillow>=4.2.1",
"matplotlib",

正在加载...
取消
保存