浏览代码

Basic and visual GAIL and BC integration tests (#3626)

/bug-failed-api-check
GitHub 5 年前
当前提交
2912c883
共有 6 个文件被更改,包括 257 次插入10 次删除
  1. 10
      ml-agents-envs/mlagents_envs/base_env.py
  2. 4
      ml-agents-envs/mlagents_envs/rpc_utils.py
  3. 65
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  4. 23
      ml-agents/mlagents/trainers/demo_loader.py
  5. 51
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  6. 114
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

10
ml-agents-envs/mlagents_envs/base_env.py


@property
def agent_id_to_index(self) -> Dict[AgentId, int]:
"""
Returns the index of the agent_id in this BatchedStepResult, and
-1 if agent_id is not in this BatchedStepResult.
:param agent_id: The id of the agent
:returns: The index of the agent_id, and -1 if not found.
:returns: A Dict that maps agent_id to the index of those agents in
this BatchedStepResult.
"""
if self._agent_id_to_index is None:
self._agent_id_to_index = {}

"""
if not self.contains_agent(agent_id):
raise IndexError(
"agent_id {} is not present in the BatchedStepResult".format(agent_id)
"get_agent_step_result failed. agent_id {} is not present in the BatchedStepResult".format(
agent_id
)
)
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []

4
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
from mlagents_envs.communicator_objects.observation_pb2 import (
ObservationProto,
NONE as COMPRESSION_NONE,
NONE as COMPRESSION_TYPE_NONE,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
import numpy as np

f"Observation did not have the expected shape - got {obs.shape} but expected {expected_shape}"
)
gray_scale = obs.shape[2] == 1
if obs.compression_type == COMPRESSION_NONE:
if obs.compression_type == COMPRESSION_TYPE_NONE:
img = np.array(obs.float_data.data, dtype=np.float32)
img = np.reshape(img, obs.shape)
return img

65
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


PNG,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.base_env import AgentGroupSpec, ActionType
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
from mlagents_envs.base_env import AgentGroupSpec, ActionType, BatchedStepResult
from mlagents_envs.exception import UnityObservationException
from mlagents_envs.rpc_utils import (
agent_group_spec_from_proto,

obs_proto.compression_type = NONE
obs_proto.shape.extend(in_array.shape)
return obs_proto
def proto_from_batched_step_result(
batched_step_result: BatchedStepResult
) -> List[AgentInfoProto]:
agent_info_protos: List[AgentInfoProto] = []
for agent_id in batched_step_result.agent_id:
agent_id_index = batched_step_result.agent_id_to_index[agent_id]
reward = batched_step_result.reward[agent_id_index]
done = batched_step_result.done[agent_id_index]
max_step_reached = batched_step_result.max_step[agent_id_index]
agent_mask = None
if batched_step_result.action_mask is not None:
agent_mask = [] # type: ignore
for _branch in batched_step_result.action_mask:
agent_mask = np.concatenate(
(agent_mask, _branch[agent_id_index, :]), axis=0
)
observations: List[ObservationProto] = []
for all_observations_of_type in batched_step_result.obs:
observation = all_observations_of_type[agent_id_index]
if len(observation.shape) == 3:
observations.append(generate_uncompressed_proto_obs(observation))
else:
observations.append(
ObservationProto(
float_data=ObservationProto.FloatData(data=observation),
shape=[len(observation)],
compression_type=NONE,
)
)
agent_info_proto = AgentInfoProto(
reward=reward,
done=done,
id=agent_id,
max_step_reached=max_step_reached,
action_mask=agent_mask,
observations=observations,
)
agent_info_protos.append(agent_info_proto)
return agent_info_protos
# The arguments here are the BatchedStepResult and actions for a single agent name
def proto_from_batched_step_result_and_action(
batched_step_result: BatchedStepResult, actions: np.ndarray
) -> List[AgentInfoActionPairProto]:
agent_info_protos = proto_from_batched_step_result(batched_step_result)
agent_action_protos = [
AgentActionProto(vector_actions=action) for action in actions
]
agent_info_action_pair_protos = [
AgentInfoActionPairProto(agent_info=agent_info_proto, action_info=action_proto)
for agent_info_proto, action_proto in zip(
agent_info_protos, agent_action_protos
)
]
return agent_info_action_pair_protos
def test_process_pixels():

23
ml-agents/mlagents/trainers/demo_loader.py


)
from mlagents_envs.timers import timed, hierarchical_timer
from google.protobuf.internal.decoder import _DecodeVarint32 # type: ignore
from google.protobuf.internal.encoder import _EncodeVarint # type: ignore
@timed

)
INITIAL_POS = 33
@timed
def load_demonstration(
file_path: str

"""
# First 32 bytes of file dedicated to meta-data.
INITIAL_POS = 33
file_paths = get_demo_files(file_path)
group_spec = None
brain_param_proto = None

f"No BrainParameters found in demonstration file at {file_path}."
)
return group_spec, info_action_pairs, total_expected
def write_delimited(f, message):
msg_string = message.SerializeToString()
msg_size = len(msg_string)
_EncodeVarint(f.write, msg_size)
f.write(msg_string)
def write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos):
with open(demo_path, "wb") as f:
# write metadata
write_delimited(f, meta_data_proto)
f.seek(INITIAL_POS)
write_delimited(f, brain_param_proto)
for agent in agent_info_protos:
write_delimited(f, agent)

51
ml-agents/mlagents/trainers/tests/simple_test_envs.py


BatchedStepResult,
ActionType,
)
from mlagents_envs.tests.test_rpc_utils import proto_from_batched_step_result_and_action
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

class Memory1DEnvironment(Simple1DEnvironment):
def __init__(self, brain_names, use_discrete, step_size=0.2):
super().__init__(brain_names, use_discrete, step_size=0.2)
super().__init__(brain_names, use_discrete, step_size=step_size)
# Number of steps to reveal the goal for. Lower is harder. Should be
# less than 1/step_size to force agent to use memory
self.num_show_steps = 2

m_agent_id,
action_mask,
)
class Record1DEnvironment(Simple1DEnvironment):
def __init__(
self,
brain_names,
use_discrete,
step_size=0.2,
num_visual=0,
num_vector=1,
n_demos=30,
):
super().__init__(
brain_names,
use_discrete,
step_size=step_size,
num_visual=num_visual,
num_vector=num_vector,
)
self.demonstration_protos: Dict[str, List[AgentInfoActionPairProto]] = {}
self.n_demos = n_demos
for name in self.names:
self.demonstration_protos[name] = []
def step(self) -> None:
super().step()
for name in self.names:
self.demonstration_protos[
name
] += proto_from_batched_step_result_and_action(
self.step_result[name], self.action[name]
)
self.demonstration_protos[name] = self.demonstration_protos[name][
-self.n_demos :
]
def solve(self) -> None:
self.reset()
for _ in range(self.n_demos):
for name in self.names:
if self.discrete:
self.action[name] = [[1]] if self.goal[name] > 0 else [[0]]
else:
self.action[name] = [[float(self.goal[name])]]
self.step()

114
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents.trainers.tests.simple_test_envs import (
Simple1DEnvironment,
Memory1DEnvironment,
Record1DEnvironment,
from mlagents.trainers.demo_loader import write_demo
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
BRAIN_NAME = "1D"

assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
env = Record1DEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,
)
# If we want to use true demos, we can solve the env in the usual way
# Otherwise, we can just call solve to execute the optimal policy
env.solve()
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path_name = "1DTest" + action_type + ".demo"
demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
return demo_path
return record_demo
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
override_vals = {
"max_steps": 500,
"behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1000},
"reward_signals": {
"gail": {
"strength": 1.0,
"gamma": 0.99,
"encoding_size": 32,
"demo_path": demo_path,
}
},
}
config = generate_config(trainer_config, override_vals)
_check_environment_trains(env, config, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = Simple1DEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
override_vals = {
"max_steps": 1000,
"learning_rate": 3.0e-4,
"behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1000},
"reward_signals": {
"gail": {
"strength": 1.0,
"gamma": 0.99,
"encoding_size": 32,
"demo_path": demo_path,
}
},
}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = Simple1DEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
override_vals = {
"max_steps": 500,
"batch_size": 16,
"learning_rate": 3.0e-4,
"behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1000},
"reward_signals": {
"gail": {
"strength": 1.0,
"gamma": 0.99,
"encoding_size": 32,
"demo_path": demo_path,
}
},
}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config, success_threshold=0.9)
正在加载...
取消
保存