浏览代码

Catch dimension mismatches between demos and policy (#3821)

/develop/dockerfile
GitHub 4 年前
当前提交
adeb6536
共有 6 个文件被更改,包括 165 次插入20 次删除
  1. 4
      ml-agents/mlagents/trainers/components/bc/module.py
  2. 11
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  3. 59
      ml-agents/mlagents/trainers/demo_loader.py
  4. 64
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  5. 45
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  6. 2
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

4
ml-agents/mlagents/trainers/components/bc/module.py


self.policy = policy
self.current_lr = policy_learning_rate * strength
self.model = BCModel(policy, self.current_lr, steps)
_, self.demonstration_buffer = demo_to_buffer(demo_path, policy.sequence_length)
_, self.demonstration_buffer = demo_to_buffer(
demo_path, policy.sequence_length, policy.brain
)
self.batch_size = batch_size if batch_size else default_batch_size
self.num_epoch = num_epoch if num_epoch else default_num_epoch

11
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


self.model = GAILModel(
policy, 128, learning_rate, encoding_size, use_actions, use_vail
)
_, self.demonstration_buffer = demo_to_buffer(demo_path, policy.sequence_length)
_, self.demonstration_buffer = demo_to_buffer(
demo_path, policy.sequence_length, policy.brain
)
self.has_updated = False
self.update_dict: Dict[str, tf.Tensor] = {
"gail_loss": self.model.loss,

self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
"""
Prepare inputs for update. .
:param mini_batch_demo: A mini batch of expert trajectories
:param mini_batch_policy: A mini batch of trajectories sampled from the current policy
Prepare inputs for update.
:param policy: The policy learning from GAIL signal
:param mini_batch: A mini batch from trajectories sampled from the current policy
:param num_sequences: Number of samples in batch
:return: Feed_dict for update process.
"""
# Get batch from demo buffer. Even if demo buffer is smaller, we sample with replacement

59
ml-agents/mlagents/trainers/demo_loader.py


@timed
def demo_to_buffer(
file_path: str, sequence_length: int
file_path: str, sequence_length: int, expected_brain_params: BrainParameters = None
) -> Tuple[BrainParameters, AgentBuffer]:
"""
Loads demonstration file and uses it to fill training buffer.

behavior_spec, info_action_pair, _ = load_demonstration(file_path)
demo_buffer = make_demo_buffer(info_action_pair, behavior_spec, sequence_length)
brain_params = behavior_spec_to_brain_parameters("DemoBrain", behavior_spec)
if expected_brain_params:
# check action dimensions in demonstration match
if (
brain_params.vector_action_space_size
!= expected_brain_params.vector_action_space_size
):
raise RuntimeError(
"The action dimensions {} in demonstration do not match the policy's {}.".format(
brain_params.vector_action_space_size,
expected_brain_params.vector_action_space_size,
)
)
# check the action types in demonstration match
if (
brain_params.vector_action_space_type
!= expected_brain_params.vector_action_space_type
):
raise RuntimeError(
"The action type of {} in demonstration do not match the policy's {}.".format(
brain_params.vector_action_space_type,
expected_brain_params.vector_action_space_type,
)
)
# check number of vector observations in demonstration match
if (
brain_params.vector_observation_space_size
!= expected_brain_params.vector_observation_space_size
):
raise RuntimeError(
"The vector observation dimensions of {} in demonstration do not match the policy's {}.".format(
brain_params.vector_observation_space_size,
expected_brain_params.vector_observation_space_size,
)
)
# check number of visual observations/resolutions in demonstration match
if (
brain_params.number_visual_observations
!= expected_brain_params.number_visual_observations
):
raise RuntimeError(
"Number of visual observations {} in demonstrations do not match the policy's {}.".format(
brain_params.number_visual_observations,
expected_brain_params.number_visual_observations,
)
)
for i, (resolution, expected_resolution) in enumerate(
zip(
brain_params.camera_resolutions,
expected_brain_params.camera_resolutions,
)
):
if resolution != expected_resolution:
raise RuntimeError(
"The resolution of visual observation {} in demonstrations do not match the policy's.".format(
i
)
)
return brain_params, demo_buffer

64
ml-agents/mlagents/trainers/tests/test_demo_loader.py


DemonstrationMetaProto,
)
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.demo_loader import (
load_demonstration,
demo_to_buffer,

BRAIN_PARAMS = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=1,
)
def test_load_demo():
path_prefix = os.path.dirname(os.path.abspath(__file__))

assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1)
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BRAIN_PARAMS)
assert len(demo_buffer["actions"]) == total_expected - 1

assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BRAIN_PARAMS)
def test_demo_mismatch():
path_prefix = os.path.dirname(os.path.abspath(__file__))
# observation mismatch
with pytest.raises(RuntimeError):
brain_params_obs = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=9,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=1,
)
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_obs)
# action mismatch
with pytest.raises(RuntimeError):
brain_params_act = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[],
vector_action_space_size=[3],
vector_action_descriptions=[],
vector_action_space_type=1,
)
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_act)
# action type mismatch
with pytest.raises(RuntimeError):
brain_params_type = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
)
_, demo_buffer = demo_to_buffer(
path_prefix + "/test.demo", 1, brain_params_type
)
# vis obs mismatch
with pytest.raises(RuntimeError):
brain_params_vis = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=8,
camera_resolutions=[[30, 40]],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=1,
)
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_vis)
def test_edge_cases():

45
ml-agents/mlagents/trainers/tests/test_reward_signals.py


from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
CONTINUOUS_PATH = os.path.dirname(os.path.abspath(__file__)) + "/test.demo"
DISCRETE_PATH = os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"
def ppo_dummy_config():
return yaml.safe_load(

use_recurrent: false
memory_size: 8
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
extrinsic:
strength: 1.0
gamma: 0.99
"""
)

tau: 0.005
use_recurrent: false
vis_encode_type: simple
behavioral_cloning:
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 1.0
steps: 10000000
reward_signals:
extrinsic:
strength: 1.0

"gamma": 0.9,
"encoding_size": 128,
"use_vail": True,
"demo_path": os.path.dirname(os.path.abspath(__file__)) + "/test.demo",
"demo_path": CONTINUOUS_PATH,
}
}

vector_obs_space=VECTOR_OBS_SPACE,
discrete_action_space=DISCRETE_ACTION_SPACE,
)
trainer_parameters = trainer_config
model_path = "testpath"
trainer_parameters["model_path"] = model_path

"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
def test_gail_cc(trainer_config, gail_dummy_config):
trainer_config.update(
{
"behavioral_cloning": {
"demo_path": CONTINUOUS_PATH,
"strength": 1.0,
"steps": 10000000,
}
}
)
optimizer = create_optimizer_mock(
trainer_config, gail_dummy_config, False, False, False
)

"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
def test_gail_dc_visual(trainer_config, gail_dummy_config):
gail_dummy_config["gail"]["demo_path"] = (
os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"
gail_dummy_config["gail"]["demo_path"] = DISCRETE_PATH
trainer_config.update(
{
"behavioral_cloning": {
"demo_path": DISCRETE_PATH,
"strength": 1.0,
"steps": 10000000,
}
}
)
optimizer = create_optimizer_mock(
trainer_config, gail_dummy_config, False, True, True

"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
def test_gail_rnn(trainer_config, gail_dummy_config):
trainer_config.update(
{
"behavioral_cloning": {
"demo_path": CONTINUOUS_PATH,
"strength": 1.0,
"steps": 10000000,
}
}
)
policy = create_optimizer_mock(
trainer_config, gail_dummy_config, True, False, False
)

2
ml-agents/mlagents/trainers/tests/test_simple_rl.py


agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[1],
vector_action_size=[2] if use_discrete else [1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,

正在加载...
取消
保存