|
|
|
|
|
|
"action": ActionTuple(continuous=np.array([[0.1], [0.1]])), |
|
|
|
"entropy": np.array([1.0], dtype=np.float32), |
|
|
|
"learning_rate": 1.0, |
|
|
|
"pre_action": [0.1, 0.1], |
|
|
|
"log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])), |
|
|
|
} |
|
|
|
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps( |
|
|
|
|
|
|
) |
|
|
|
fake_action_info = ActionInfo( |
|
|
|
action=ActionTuple(continuous=np.array([[0.1], [0.1]])), |
|
|
|
env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])), |
|
|
|
value=[0.1, 0.1], |
|
|
|
outputs=fake_action_outputs, |
|
|
|
agent_ids=mock_decision_steps.agent_id, |
|
|
|
|
|
|
action_spec=ActionSpec.create_continuous(2), |
|
|
|
) |
|
|
|
processor.add_experiences( |
|
|
|
mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, []) |
|
|
|
mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty() |
|
|
|
) |
|
|
|
# Assert that the AgentProcessor is still empty |
|
|
|
assert len(processor.experience_buffers[0]) == 0 |
|
|
|
|
|
|
"action": ActionTuple(continuous=np.array([[0.1]])), |
|
|
|
"entropy": np.array([1.0], dtype=np.float32), |
|
|
|
"learning_rate": 1.0, |
|
|
|
"pre_action": [0.1], |
|
|
|
"log_probs": LogProbsTuple(continuous=np.array([[0.1]])), |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
) |
|
|
|
fake_action_info = ActionInfo( |
|
|
|
action=ActionTuple(continuous=np.array([[0.1]])), |
|
|
|
env_action=ActionTuple(continuous=np.array([[0.1]])), |
|
|
|
value=[0.1], |
|
|
|
outputs=fake_action_outputs, |
|
|
|
agent_ids=mock_decision_step.agent_id, |
|
|
|
|
|
|
"action": ActionTuple(continuous=np.array([[0.1]])), |
|
|
|
"entropy": np.array([1.0], dtype=np.float32), |
|
|
|
"learning_rate": 1.0, |
|
|
|
"pre_action": [0.1], |
|
|
|
"log_probs": LogProbsTuple(continuous=np.array([[0.1]])), |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
) |
|
|
|
fake_action_info = ActionInfo( |
|
|
|
action=ActionTuple(continuous=np.array([[0.1]])), |
|
|
|
env_action=ActionTuple(continuous=np.array([[0.1]])), |
|
|
|
value=[0.1], |
|
|
|
outputs=fake_action_outputs, |
|
|
|
agent_ids=mock_decision_step.agent_id, |
|
|
|