record demos 1d env

5 年前 · e7836fb5
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
    def contains_agent(self, agent_id: AgentId) -> bool:
        return agent_id in self.agent_id_to_index

+    def get_index(self, agent_id: AgentId) -> int:
+        if not self.contains_agent(agent_id):
+            raise IndexError(
+                "get_index failed. agent_id {} is not present in the BatchedStepResult".format(
+                    agent_id
+                )
+            )
+        return self._agent_id_to_index[agent_id]  # type: ignore
+
    def get_agent_step_result(self, agent_id: AgentId) -> StepResult:
        """
        returns the step result for a specific agent.
        """
        if not self.contains_agent(agent_id):
            raise IndexError(
-                "agent_id {} is not present in the BatchedStepResult".format(agent_id)
+                "get_agent_step_result failed. agent_id {} is not present in the BatchedStepResult".format(
+                    agent_id
+                )
            )
        agent_index = self._agent_id_to_index[agent_id]  # type: ignore
        agent_obs = []
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
 from mlagents_envs.exception import UnityObservationException
 from mlagents_envs.timers import hierarchical_timer, timed
 from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
+from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
+from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
+    AgentInfoActionPairProto,
+)
-    NONE as COMPRESSION_NONE,
+    NONE as COMPRESSION_TYPE_NONE,
 )
 from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
 import numpy as np
                f"Observation did not have the expected shape - got {obs.shape} but expected {expected_shape}"
            )
    gray_scale = obs.shape[2] == 1
-    if obs.compression_type == COMPRESSION_NONE:
+    if obs.compression_type == COMPRESSION_TYPE_NONE:
        img = np.array(obs.float_data.data, dtype=np.float32)
        img = np.reshape(img, obs.shape)
        return img
    return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)


-@timed
-) -> AgentInfoProto:
-    reward = batched_step_result.reward
-    done = batched_step_result.done
-    max_step_reached = batched_step_result.max_step
-    agent_id = batched_step_result.agent_id
-    action_mask = batched_step_result.action_mask
-    observations = batched_step_result.obs
-    return AgentInfoProto(
-        reward=reward,
-        done=done,
-        id=agent_id,
-        max_step_reached=max_step_reached,
-        action_mask=action_mask,
-        observations=observations,
-    )
+) -> List[AgentInfoProto]:
+    agent_info_protos: List[AgentInfoProto] = []
+    for agent_id in batched_step_result.agent_id:
+        agent_id_index = batched_step_result.get_index(agent_id)
+        reward = batched_step_result.reward[agent_id_index]
+        done = batched_step_result.done[agent_id_index]
+        max_step_reached = batched_step_result.max_step[agent_id_index]
+        agent_mask = None
+        if batched_step_result.action_mask is not None:
+            mask = batched_step_result.action_mask[0]
+            agent_mask = mask[agent_id_index]
+        observations: List[ObservationProto] = []
+        for all_observations_of_type in batched_step_result.obs:
+            observation = all_observations_of_type[agent_id_index]
+            if len(observation.shape) == 3:
+                observations.append(
+                    ObservationProto(
+                        compressed_data=observation,
+                        shape=observation.shape,
+                        compression_type=COMPRESSION_TYPE_NONE,
+                    )
+                )
+            else:
+                observations.append(
+                    ObservationProto(
+                        float_data=ObservationProto.FloatData(data=observation),
+                        shape=[len(observation)],
+                        compression_type=COMPRESSION_TYPE_NONE,
+                    )
+                )
+
+        agent_info_proto = AgentInfoProto(
+            reward=reward,
+            done=done,
+            id=agent_id,
+            max_step_reached=max_step_reached,
+            action_mask=agent_mask,
+            observations=observations,
+        )
+        agent_info_protos.append(agent_info_proto)
+    return agent_info_protos
+
+
+# The arguments here are the BatchedStepResult and actions for a single agent name
+def proto_from_batched_step_result_and_action(
+    batched_step_result: BatchedStepResult, actions: np.ndarray
+) -> List[AgentInfoActionPairProto]:
+    agent_info_protos = proto_from_batched_step_result(batched_step_result)
+    agent_action_protos = [
+        AgentActionProto(vector_actions=action) for action in actions
+    ]
+    agent_info_action_pair_protos = [
+        AgentInfoActionPairProto(agent_info=agent_info_proto, action_info=action_proto)
+        for agent_info_proto, action_proto in zip(
+            agent_info_protos, agent_action_protos
+        )
+    ]
+    return agent_info_action_pair_protos


 def _generate_split_indices(dims):
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
 )
 from mlagents_envs.timers import timed, hierarchical_timer
 from google.protobuf.internal.decoder import _DecodeVarint32  # type: ignore
+from google.protobuf.internal.encoder import _EncodeVarint  # type: ignore


@timed
        )


+INITIAL_POS = 33
+
+
@timed
 def load_demonstration(
    file_path: str
    """

    # First 32 bytes of file dedicated to meta-data.
-    INITIAL_POS = 33
    file_paths = get_demo_files(file_path)
    group_spec = None
    brain_param_proto = None
            f"No BrainParameters found in demonstration file at {file_path}."
        )
    return group_spec, info_action_pairs, total_expected
+
+
+def write_delimited(f, message):
+    msg_string = message.SerializeToString()
+    msg_size = len(msg_string)
+    _EncodeVarint(f.write, msg_size)
+    f.write(msg_string)
+
+
+def write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos):
+    with open(demo_path, "wb") as f:
+        # write metadata
+        write_delimited(f, meta_data_proto)
+        f.seek(INITIAL_POS)
+        write_delimited(f, brain_param_proto)
+
+        for agent in agent_info_protos:
+            write_delimited(f, agent)
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
    BatchedStepResult,
    ActionType,
 )
-from mlagents_envs.rpc_utils import proto_from_batched_step_result
+from mlagents_envs.rpc_utils import proto_from_batched_step_result_and_action
+from mlagents_envs.agent_info_action_pair_pb2 import AgentInfoActionPairProto

 OBS_SIZE = 1
 VIS_OBS_SIZE = (20, 20, 3)
            m_agent_id,
            action_mask,
        )
+
+
+class Record1DEnvironment(Simple1DEnvironment):
+    def __init__(
+        self, brain_names, use_discrete, step_size=0.2, num_vector=1, n_demos=30
+    ):
+        super().__init__(
+            brain_names, use_discrete, step_size=0.2, num_vector=num_vector
+        )
+        self.demonstration_protos: Dict[str, List[AgentInfoActionPairProto]] = {}
+        self.n_demos = n_demos
+        for name in self.names:
+            self.demonstration_protos[name] = []
+
+    def step(self) -> None:
+        super().step()
+        # proto_from_batched_step_result(self.step_result[name])
+        for name in self.names:
+            self.demonstration_protos[
+                name
+            ] += proto_from_batched_step_result_and_action(
+                self.step_result[name], self.action[name]
+            )
+            self.demonstration_protos[name] = self.demonstration_protos[name][
+                -self.n_demos :
+            ]
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
 from mlagents.trainers.tests.simple_test_envs import (
    Simple1DEnvironment,
    Memory1DEnvironment,
+    Record1DEnvironment,
+from mlagents.trainers.demo_loader import write_demo
+from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
+    DemonstrationMetaProto,
+)
+from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous

 BRAIN_NAME = "1D"



@pytest.mark.parametrize("use_discrete", [True, False])
+def test_simple_record(use_discrete):
+    env = Record1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, n_demos=30)
+    config = generate_config(PPO_CONFIG)
+    _check_environment_trains(env, config)
+    agent_info_protos = env.demonstration_protos[BRAIN_NAME]
+    meta_data_proto = DemonstrationMetaProto()
+    brain_param_proto = BrainParametersProto(
+        vector_action_size=[1],
+        vector_action_descriptions=[""],
+        vector_action_space_type=discrete if use_discrete else continuous,
+        brain_name=BRAIN_NAME,
+        is_training=True,
+    )
+    action_type = "Discrete" if use_discrete else "Continuous"
+    demo_path = "demos/1DTest" + action_type + ".demo"
+    write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
+
+
+@pytest.mark.parametrize("use_discrete", [True, False])
 def test_simple_ppo(use_discrete):
    env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    config = generate_config(PPO_CONFIG)
-<<<<<<< Updated upstream
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
 def test_visual_ppo(num_visual, use_discrete):
--- a/demos/1DTestContinuous.demo
+++ b/demos/1DTestContinuous.demo
+
*0:1D@
+=o��P�j
+"
+��
+�K5�
+=o��P�j
+"
+��
+t�^�!
+=sh�?@P�j
+"
+��
+��
+=o��P�j
+"
+��
+��
+=o��P�j
+"
+��
+��K�
+=o��P�j
+"
+��
+��
+=o��P�j
+"
+��
+��!
+=sh�?@P�j
+"
+��
+��
+=o��P�j
+"
+�?
+��
+=o��P�j
+"
+�?
+��?
+=o��P�j
+"
+�?
+�?
+=o��P�j
+"
+�?
+~�|?
+=o��P�j
+"
+�?
+;:>?
+=o��P�j
+"
+�?
+�\>!
+=sh�?@P�j
+"
+�?
+��X?
+=o��P�j
+"
+�?
+�bL?
+=o��P�j
+"
+�?
+0?
+=o��P�j
+"
+�?
+p�B?
+=o��P�j
+"
+�?
+�]D?!
+=sh�?@P�j
+"
+�?
+<�w?
+=o��P�j
+"
+�?
+�?
+=o��P�j
+"
+�?
+�=?
+=o��P�j
+"
+�?
+�i?
+=o��P�j
+"
+�?
+�R~?!
+=sh�?@P�j
+"
+�?
+feA?
+=o��P�j
+"
+�?
+�?
+=o��P�j
+"
+�?
+|?
+=o��P�j
+"
+�?
+I�>
+=o��P�j
+"
+�?
+�6?!
+=sh�?@P�j
+"
+�?
+J?
--- a/demos/1DTestDiscrete.demo
+++ b/demos/1DTestDiscrete.demo
+*:1D@#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?%
+=sh�?@P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?%
+=sh�?@P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+��
+�?#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+%
+=sh�?@P�Zj
+"
+��
+#
+=o��P�Zj
+"
+�?
+#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+�?
+�?%
+=sh�?@P�Zj
+"
+�?
+�?#
+=o��P�Zj
+"
+��
+�?#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+#
+=o��P�Zj
+"
+��
+%
+=sh�?@P�Zj
+"
+��
+