浏览代码

Bugfix for LSTM+BC (#2679)

* Fix LSTM+BC in discrete case
* Add test for Barracuda export
* Fix LSTM training for BC
/develop-gpu-test
GitHub 5 年前
当前提交
e6240c7a
共有 5 个文件被更改,包括 56 次插入8 次删除
  1. 2
      ml-agents/mlagents/trainers/bc/models.py
  2. 9
      ml-agents/mlagents/trainers/bc/trainer.py
  3. 10
      ml-agents/mlagents/trainers/tests/mock_brain.py
  4. 29
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  5. 14
      ml-agents/mlagents/trainers/tests/test_bc.py

2
ml-agents/mlagents/trainers/bc/models.py


for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
hidden_reg,
size,
activation=None,
use_bias=False,

9
ml-agents/mlagents/trainers/bc/trainer.py


len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences,
self.batches_per_epoch,
)
for i in range(num_batches):
batch_size = self.n_sequences * self.policy.sequence_length
for i in range(0, num_batches * batch_size, batch_size):
start = i * self.n_sequences
end = (i + 1) * self.n_sequences
mini_batch = update_buffer.make_mini_batch(start, end)
mini_batch = update_buffer.make_mini_batch(i, i + batch_size)
run_out = self.policy.update(mini_batch, self.n_sequences)
loss = run_out["policy_loss"]
batch_losses.append(loss)

10
ml-agents/mlagents/trainers/tests/mock_brain.py


return mock_brain
def create_mock_pushblock_brain():
mock_brain = create_mock_brainparams(
vector_action_space_type="discrete",
vector_action_space_size=[7],
vector_observation_space_size=70,
)
mock_brain.brain_name = "PushblockLearning"
return mock_brain
def create_mock_banana_brain():
mock_brain = create_mock_brainparams(
number_visual_observations=1,

29
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


import os
import yaml
import pytest
from mlagents.trainers.tests.test_bc import create_bc_trainer
def test_barracuda_converter():

# cleanup
os.remove(tmpfile)
@pytest.fixture
def bc_dummy_config():
return yaml.safe_load(
"""
hidden_units: 32
learning_rate: 3.0e-4
num_layers: 1
use_recurrent: false
sequence_length: 32
memory_size: 64
batches_per_epoch: 1
batch_size: 64
summary_freq: 2000
max_steps: 4000
"""
)
@pytest.mark.parametrize("use_lstm", [False, True], ids=["nolstm", "lstm"])
@pytest.mark.parametrize("use_discrete", [True, False], ids=["disc", "cont"])
def test_bc_export(bc_dummy_config, use_lstm, use_discrete):
bc_dummy_config["use_recurrent"] = use_lstm
trainer, env = create_bc_trainer(bc_dummy_config, use_discrete)
trainer.export_model()

14
ml-agents/mlagents/trainers/tests/test_bc.py


)
def create_bc_trainer(dummy_config):
def create_bc_trainer(dummy_config, is_discrete=False):
mock_brain = mb.create_mock_3dball_brain()
mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
if is_discrete:
mock_brain = mb.create_mock_pushblock_brain()
mock_braininfo = mb.create_mock_braininfo(
num_agents=12, num_vector_observations=70
)
else:
mock_brain = mb.create_mock_3dball_brain()
mock_braininfo = mb.create_mock_braininfo(
num_agents=12, num_vector_observations=8
)
mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
env = mock_env()

正在加载...
取消
保存