浏览代码

record demos 1d env

/test-recurrent-gail
Andrew Cohen 5 年前
当前提交
e7836fb5
共有 7 个文件被更改,包括 394 次插入22 次删除
  1. 13
      ml-agents-envs/mlagents_envs/base_env.py
  2. 82
      ml-agents-envs/mlagents_envs/rpc_utils.py
  3. 23
      ml-agents/mlagents/trainers/demo_loader.py
  4. 29
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  5. 27
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  6. 121
      demos/1DTestContinuous.demo
  7. 121
      demos/1DTestDiscrete.demo

13
ml-agents-envs/mlagents_envs/base_env.py


def contains_agent(self, agent_id: AgentId) -> bool:
return agent_id in self.agent_id_to_index
def get_index(self, agent_id: AgentId) -> int:
if not self.contains_agent(agent_id):
raise IndexError(
"get_index failed. agent_id {} is not present in the BatchedStepResult".format(
agent_id
)
)
return self._agent_id_to_index[agent_id] # type: ignore
def get_agent_step_result(self, agent_id: AgentId) -> StepResult:
"""
returns the step result for a specific agent.

"""
if not self.contains_agent(agent_id):
raise IndexError(
"agent_id {} is not present in the BatchedStepResult".format(agent_id)
"get_agent_step_result failed. agent_id {} is not present in the BatchedStepResult".format(
agent_id
)
)
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []

82
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.exception import UnityObservationException
from mlagents_envs.timers import hierarchical_timer, timed
from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
NONE as COMPRESSION_NONE,
NONE as COMPRESSION_TYPE_NONE,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
import numpy as np

f"Observation did not have the expected shape - got {obs.shape} but expected {expected_shape}"
)
gray_scale = obs.shape[2] == 1
if obs.compression_type == COMPRESSION_NONE:
if obs.compression_type == COMPRESSION_TYPE_NONE:
img = np.array(obs.float_data.data, dtype=np.float32)
img = np.reshape(img, obs.shape)
return img

return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)
@timed
) -> AgentInfoProto:
reward = batched_step_result.reward
done = batched_step_result.done
max_step_reached = batched_step_result.max_step
agent_id = batched_step_result.agent_id
action_mask = batched_step_result.action_mask
observations = batched_step_result.obs
return AgentInfoProto(
reward=reward,
done=done,
id=agent_id,
max_step_reached=max_step_reached,
action_mask=action_mask,
observations=observations,
)
) -> List[AgentInfoProto]:
agent_info_protos: List[AgentInfoProto] = []
for agent_id in batched_step_result.agent_id:
agent_id_index = batched_step_result.get_index(agent_id)
reward = batched_step_result.reward[agent_id_index]
done = batched_step_result.done[agent_id_index]
max_step_reached = batched_step_result.max_step[agent_id_index]
agent_mask = None
if batched_step_result.action_mask is not None:
mask = batched_step_result.action_mask[0]
agent_mask = mask[agent_id_index]
observations: List[ObservationProto] = []
for all_observations_of_type in batched_step_result.obs:
observation = all_observations_of_type[agent_id_index]
if len(observation.shape) == 3:
observations.append(
ObservationProto(
compressed_data=observation,
shape=observation.shape,
compression_type=COMPRESSION_TYPE_NONE,
)
)
else:
observations.append(
ObservationProto(
float_data=ObservationProto.FloatData(data=observation),
shape=[len(observation)],
compression_type=COMPRESSION_TYPE_NONE,
)
)
agent_info_proto = AgentInfoProto(
reward=reward,
done=done,
id=agent_id,
max_step_reached=max_step_reached,
action_mask=agent_mask,
observations=observations,
)
agent_info_protos.append(agent_info_proto)
return agent_info_protos
# The arguments here are the BatchedStepResult and actions for a single agent name
def proto_from_batched_step_result_and_action(
batched_step_result: BatchedStepResult, actions: np.ndarray
) -> List[AgentInfoActionPairProto]:
agent_info_protos = proto_from_batched_step_result(batched_step_result)
agent_action_protos = [
AgentActionProto(vector_actions=action) for action in actions
]
agent_info_action_pair_protos = [
AgentInfoActionPairProto(agent_info=agent_info_proto, action_info=action_proto)
for agent_info_proto, action_proto in zip(
agent_info_protos, agent_action_protos
)
]
return agent_info_action_pair_protos
def _generate_split_indices(dims):

23
ml-agents/mlagents/trainers/demo_loader.py


)
from mlagents_envs.timers import timed, hierarchical_timer
from google.protobuf.internal.decoder import _DecodeVarint32 # type: ignore
from google.protobuf.internal.encoder import _EncodeVarint # type: ignore
@timed

)
INITIAL_POS = 33
@timed
def load_demonstration(
file_path: str

"""
# First 32 bytes of file dedicated to meta-data.
INITIAL_POS = 33
file_paths = get_demo_files(file_path)
group_spec = None
brain_param_proto = None

f"No BrainParameters found in demonstration file at {file_path}."
)
return group_spec, info_action_pairs, total_expected
def write_delimited(f, message):
msg_string = message.SerializeToString()
msg_size = len(msg_string)
_EncodeVarint(f.write, msg_size)
f.write(msg_string)
def write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos):
with open(demo_path, "wb") as f:
# write metadata
write_delimited(f, meta_data_proto)
f.seek(INITIAL_POS)
write_delimited(f, brain_param_proto)
for agent in agent_info_protos:
write_delimited(f, agent)

29
ml-agents/mlagents/trainers/tests/simple_test_envs.py


BatchedStepResult,
ActionType,
)
from mlagents_envs.rpc_utils import proto_from_batched_step_result
from mlagents_envs.rpc_utils import proto_from_batched_step_result_and_action
from mlagents_envs.agent_info_action_pair_pb2 import AgentInfoActionPairProto
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

m_agent_id,
action_mask,
)
class Record1DEnvironment(Simple1DEnvironment):
def __init__(
self, brain_names, use_discrete, step_size=0.2, num_vector=1, n_demos=30
):
super().__init__(
brain_names, use_discrete, step_size=0.2, num_vector=num_vector
)
self.demonstration_protos: Dict[str, List[AgentInfoActionPairProto]] = {}
self.n_demos = n_demos
for name in self.names:
self.demonstration_protos[name] = []
def step(self) -> None:
super().step()
# proto_from_batched_step_result(self.step_result[name])
for name in self.names:
self.demonstration_protos[
name
] += proto_from_batched_step_result_and_action(
self.step_result[name], self.action[name]
)
self.demonstration_protos[name] = self.demonstration_protos[name][
-self.n_demos :
]

27
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents.trainers.tests.simple_test_envs import (
Simple1DEnvironment,
Memory1DEnvironment,
Record1DEnvironment,
from mlagents.trainers.demo_loader import write_demo
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
BRAIN_NAME = "1D"

@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_record(use_discrete):
env = Record1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, n_demos=30)
config = generate_config(PPO_CONFIG)
_check_environment_trains(env, config)
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path = "demos/1DTest" + action_type + ".demo"
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = generate_config(PPO_CONFIG)

<<<<<<< Updated upstream
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual, use_discrete):

121
demos/1DTestContinuous.demo


*0:1D@
=o��P�j
"
��
�K5�
=o��P�j
"
��
t�^�!
=sh�?@P�j
"
��
��
=o��P�j
"
��
��
=o��P�j
"
��
��K�
=o��P�j
"
��
��
=o��P�j
"
��
��!
=sh�?@P�j
"
��
��
=o��P�j
"
�?
��
=o��P�j
"
�?
��?
=o��P�j
"
�?
�?
=o��P�j
"
�?
~�|?
=o��P�j
"
�?
;:>?
=o��P�j
"
�?
+�\>!
=sh�?@P�j
"
�?
��X?
=o��P�j
"
�?
�bL?
=o��P�j
"
�?
0?
=o��P�j
"
�?
p�B?
=o��P�j
"
�?
�]D?!
=sh�?@P�j
"
�?
<�w?
=o��P�j
"
�?
�?
=o��P�j
"
�?
�=?
=o��P�j
"
�?
 �i?
=o��P�j
"
�?
�R~?!
=sh�?@P�j
"
�?
feA?
=o��P�j
"
�?
�?
=o��P�j
"
�?
›|?
=o��P�j
"
�?
I�>
=o��P�j
"
�?
�6?!
=sh�?@P�j
"
�?
J?

121
demos/1DTestDiscrete.demo


*:1D@#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
��
�?#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
%
=sh�?@P�Zj
"
��
#
=o��P�Zj
"
�?
#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
��
�?#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
%
=sh�?@P�Zj
"
��

正在加载...
取消
保存