浏览代码

cleanup, don't store mask

/MLA-1734-demo-provider
Chris Elion 4 年前
当前提交
c3bc8991
共有 6 个文件被更改,包括 97 次插入50 次删除
  1. 4
      ml-agents/mlagents/trainers/buffer.py
  2. 27
      ml-agents/mlagents/trainers/demonstrations/demonstration_provider.py
  3. 14
      ml-agents/mlagents/trainers/demonstrations/local_demonstration_provider.py
  4. 50
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  5. 25
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  6. 27
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py

4
ml-agents/mlagents/trainers/buffer.py


if key_list is None:
key_list = list(self.keys())
if not self.check_length(key_list):
lengths = {k: len(self._fields[k]) for k in key_list}
lengths_str = "\n\t".join(str((k, v)) for k, v in lengths.items())
f"The length of the fields {key_list} were not of same length"
f"The length of the fields were not of same length: {lengths_str}"
)
for field_key in key_list:
target_buffer[field_key].extend(

27
ml-agents/mlagents/trainers/demonstrations/demonstration_provider.py


done: bool
action: ActionTuple
prev_action: np.ndarray
action_mask: np.ndarray
interrupted: bool

agent_buffer_trajectory[BufferKey.MASKS].append(1.0)
agent_buffer_trajectory[BufferKey.DONE].append(exp.done)
# Adds the log prob and action of continuous/discrete separately
agent_buffer_trajectory[BufferKey.CONTINUOUS_ACTION].append(
exp.action.continuous
)

# Store action masks if necessary. Note that 1 means active, while
# in AgentExperience False means active.
if exp.action_mask is not None:
mask = 1 - np.concatenate(exp.action_mask)
agent_buffer_trajectory[BufferKey.ACTION_MASK].append(
mask, padding_value=1
)
else:
# This should never be needed unless the environment somehow doesn't supply the
# action mask in a discrete space.
action_shape = exp.action.discrete.shape
agent_buffer_trajectory[BufferKey.ACTION_MASK].append(
np.ones(action_shape, dtype=np.float32), padding_value=1
)
agent_buffer_trajectory[BufferKey.PREV_ACTION].append(exp.prev_action)
agent_buffer_trajectory[BufferKey.ENVIRONMENT_REWARDS].append(exp.reward)

@abc.abstractmethod
def pop_trajectories(self) -> List[DemonstrationTrajectory]:
pass
def to_agentbuffer(self, training_length: int) -> AgentBuffer:
buffer_out = AgentBuffer()
trajectories = self.pop_trajectories()
for trajectory in trajectories:
temp_buffer = trajectory.to_agentbuffer()
temp_buffer.resequence_and_append(
buffer_out, batch_size=None, training_length=training_length
)
return buffer_out

14
ml-agents/mlagents/trainers/demonstrations/local_demonstration_provider.py


action_tuple = LocalDemonstrationProvider._get_action_tuple(
pair, behavior_spec.action_spec
)
action_mask = None
if pair.agent_info.action_mask:
# TODO 2D?
action_mask = np.ndarray(
[bool(m) for m in pair.agent_info.action_mask], dtype=np.bool
)
exp = DemonstrationExperience(
obs=obs,

prev_action=previous_action,
action_mask=action_mask,
previous_action = np.ndarray(
previous_action = np.array(
pair.action_info.vector_actions_deprecated, dtype=np.float32
)
if pair.agent_info.done or pair_index == len(info_action_pairs) - 1:

# TODO 2D?
continuous_np = (
np.ndarray(continuous_actions, dtype=np.float32)
np.array(continuous_actions, dtype=np.float32)
np.ndarray(discrete_actions, dtype=np.float32) if discrete_actions else None
np.array(discrete_actions, dtype=np.float32) if discrete_actions else None
return ActionTuple(continuous_np, discrete_np)

50
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


import os
import pytest
from unittest.mock import patch
import pytest
from unittest.mock import patch
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
import os
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.demonstrations.demonstration_provider import (
DemonstrationProvider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))
class MockDemonstrationProvider(DemonstrationProvider):
def __init__(self, behavior_spec, buffer):
self._behavior_spec = behavior_spec
self._buffer = buffer
def get_behavior_spec(self) -> BehaviorSpec:
return self._behavior_spec
def pop_trajectories(self):
raise NotImplementedError()
def to_agentbuffer(self, training_length: int) -> AgentBuffer:
return self._buffer
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_observation_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],

],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
@patch.object(GAILRewardProvider, "_get_demonstration_provider")
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
mock_demo_provider: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
demo_to_buffer.return_value = None, buffer_expert
mock_demo_provider.return_value = MockDemonstrationProvider(
behavior_spec, buffer_expert
)
gail_settings = GAILSettings(
demo_path="", learning_rate=0.005, use_vail=False, use_actions=use_actions
)

],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
@patch.object(GAILRewardProvider, "_get_demonstration_provider")
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
mock_demo_provider: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
demo_to_buffer.return_value = None, buffer_expert
mock_demo_provider.return_value = MockDemonstrationProvider(
behavior_spec, buffer_expert
)
gail_settings = GAILSettings(
demo_path="", learning_rate=0.005, use_vail=True, use_actions=use_actions
)

25
ml-agents/mlagents/trainers/torch/components/bc/module.py


from mlagents.torch_utils import torch
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.demonstrations.demonstration_provider import (
DemonstrationProvider,
)
from mlagents.trainers.demonstrations.local_demonstration_provider import (
LocalDemonstrationProvider,
)
from mlagents.trainers.settings import BehavioralCloningSettings, ScheduleType
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs

)
params = self.policy.actor_critic.parameters()
self.optimizer = torch.optim.Adam(params, lr=self.current_lr)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.behavior_spec
demo_provider = self._get_demonstration_provider(settings)
# TODO check policy.behavior_spec == demo_provider_spec
self.demonstration_buffer = demo_provider.to_agentbuffer(
training_length=policy.sequence_length
self.batch_size = (
settings.batch_size if settings.batch_size else default_batch_size
)

self.has_updated = False
self.use_recurrent = self.policy.use_recurrent
self.samples_per_update = settings.samples_per_update
def _get_demonstration_provider(
self, settings: BehavioralCloningSettings
) -> DemonstrationProvider:
"""
Get the DemonstrationProvider as determined by the BehavioralCloningSettings.
This is currently always a LocalDemonstrationProvider but could change in the future,
based on the settings.
"""
return LocalDemonstrationProvider(settings.demo_path)
def update(self) -> Dict[str, np.ndarray]:
"""

27
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


)
from mlagents.trainers.settings import GAILSettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.demonstrations.demonstration_provider import (
DemonstrationProvider,
)
from mlagents.trainers.demonstrations.local_demonstration_provider import (
LocalDemonstrationProvider,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_flattener import ActionFlattener

from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.trajectory import ObsUtil

self._ignore_done = True
self._discriminator_network = DiscriminatorNetwork(specs, settings)
self._discriminator_network.to(default_device())
_, self._demo_buffer = demo_to_buffer(
settings.demo_path, 1, specs
) # This is supposed to be the sequence length but we do not have access here
demo_provider = self._get_demonstration_provider(settings)
# TODO check spec == demo_provider_spec
self._demo_buffer = demo_provider.to_agentbuffer(training_length=1)
def _get_demonstration_provider(
self, settings: GAILSettings
) -> DemonstrationProvider:
"""
Get the DemonstrationProvider as determined by the GAILSettings.
This is currently always a LocalDemonstrationProvider but could change in the future,
based on the settings.
"""
return LocalDemonstrationProvider(settings.demo_path)
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():

def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
expert_batch = self._demo_buffer.sample_mini_batch(
mini_batch.num_experiences, 1
mini_batch.num_experiences, sequence_length=1
)
loss, stats_dict = self._discriminator_network.compute_loss(
mini_batch, expert_batch

正在加载...
取消
保存