write to proto

5 年前 · 0cc2956d
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
            action_mask = np.split(action_mask, indices, axis=1)
    return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)

+@timed
+def proto_from_batched_step_result(batched_step_result: BatchedStepResult) -> AgentInfoProto:
+    reward = batched_step_result.reward
+    done = batched_step_result.done
+    max_step_reached = batched_step_result.max_step
+    agent_id = batched_step_result.agent_id
+    action_mask = batched_step_result.action_mask
+    observations = batched_step_result.obs
+    return AgentInfoProto(reward=reward, done=done, id=agent_id, max_step_reached=max_step_reached, action_mask=action_mask, observations=observations)

 def _generate_split_indices(dims):
    if len(dims) <= 1:
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
    BatchedStepResult,
    ActionType,
 )
+from mlagents_envs.rpc_utils import proto_from_batched_step_result

 OBS_SIZE = 1
 STEP_SIZE = 0.1
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
    _check_environment_trains(env, PPO_CONFIG)


+<<<<<<< Updated upstream
@pytest.mark.parametrize("use_discrete", [True, False])
 def test_recurrent_ppo(use_discrete):
    env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)