|
|
|
|
|
|
import io |
|
|
|
import numpy as np |
|
|
|
import pytest |
|
|
|
from typing import List, Tuple |
|
|
|
from typing import List, Tuple, Any |
|
|
|
|
|
|
|
from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto |
|
|
|
from mlagents_envs.communicator_objects.observation_pb2 import ( |
|
|
|
|
|
|
reward = decision_steps.reward[agent_id_index] |
|
|
|
done = False |
|
|
|
max_step_reached = False |
|
|
|
agent_mask = None |
|
|
|
agent_mask: Any = None |
|
|
|
agent_mask = [] # type: ignore |
|
|
|
agent_mask = [] |
|
|
|
agent_mask = agent_mask.astype(np.bool).tolist() |
|
|
|
observations: List[ObservationProto] = [] |
|
|
|
for all_observations_of_type in decision_steps.obs: |
|
|
|
observation = all_observations_of_type[agent_id_index] |
|
|
|
|
|
|
reward=reward, |
|
|
|
done=done, |
|
|
|
id=agent_id, |
|
|
|
max_step_reached=max_step_reached, |
|
|
|
max_step_reached=bool(max_step_reached), |
|
|
|
action_mask=agent_mask, |
|
|
|
observations=observations, |
|
|
|
) |
|
|
|
|
|
|
reward=reward, |
|
|
|
done=done, |
|
|
|
id=agent_id, |
|
|
|
max_step_reached=max_step_reached, |
|
|
|
max_step_reached=bool(max_step_reached), |
|
|
|
action_mask=None, |
|
|
|
observations=final_observations, |
|
|
|
) |
|
|
|