浏览代码

[bug-fix] Fix padding for List entries in buffer (#5046)

* Fix padding for List entries in buffer

* Revert to coonverting to np.array

* Fix dtype in PPO trainer
/develop/gail-srl-hack
GitHub 4 年前
当前提交
47db8ce1
共有 3 个文件被更改,包括 50 次插入7 次删除
  1. 15
      ml-agents/mlagents/trainers/buffer.py
  2. 10
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 32
      ml-agents/mlagents/trainers/tests/test_buffer.py

15
ml-agents/mlagents/trainers/buffer.py


else:
return return_data
def append(self, element: np.ndarray, padding_value: float = 0.0) -> None:
@property
def contains_lists(self) -> bool:
"""
Checks whether this AgentBufferField contains List[np.ndarray].
"""
return len(self) > 0 and isinstance(self[0], list)
def append(self, element: BufferEntry, padding_value: float = 0.0) -> None:
"""
Adds an element to this list. Also lets you change the padding
type, so that it can be set on append (e.g. action_masks should

" too large given the current number of data points."
)
if batch_size * training_length > len(self):
padding = np.array(self[-1], dtype=np.float32) * self.padding_value
if self.contains_lists:
padding = []
else:
# We want to duplicate the last value in the array, multiplied by the padding_value.
padding = np.array(self[-1], dtype=np.float32) * self.padding_value
return [padding] * (training_length - leftover) + self[:]
else:

10
ml-agents/mlagents/trainers/ppo/trainer.py


self.policy.update_normalization(agent_buffer_trajectory)
# Get all value estimates
value_estimates, value_next, value_memories = self.optimizer.get_trajectory_value_estimates(
(
value_estimates,
value_next,
value_memories,
) = self.optimizer.get_trajectory_value_estimates(
agent_buffer_trajectory,
trajectory.next_obs,
trajectory.done_reached and not trajectory.interrupted,

int(self.hyperparameters.batch_size / self.policy.sequence_length), 1
)
advantages = np.array(self.update_buffer[BufferKey.ADVANTAGES].get_batch())
advantages = np.array(
self.update_buffer[BufferKey.ADVANTAGES].get_batch(), dtype=np.float32
)
self.update_buffer[BufferKey.ADVANTAGES].set(
(advantages - advantages.mean()) / (advantages.std() + 1e-10)
)

32
ml-agents/mlagents/trainers/tests/test_buffer.py


]
),
)
# Test group entries return Lists of Lists
# Test padding
a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(
batch_size=None, training_length=4, sequential=True
)
assert_array(
np.array(a),
np.array(
[
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[201, 202, 203],
[211, 212, 213],
[221, 222, 223],
[231, 232, 233],
[241, 242, 243],
[251, 252, 253],
[261, 262, 263],
[271, 272, 273],
[281, 282, 283],
]
),
)
# Test group entries return Lists of Lists. Make sure to pad properly!
batch_size=2, training_length=1, sequential=True
batch_size=None, training_length=4, sequential=True
for _group_entry in a:
for _group_entry in a[:3]:
assert len(_group_entry) == 0
for _group_entry in a[3:]:
assert len(_group_entry) == 3
agent_1_buffer.reset_agent()

正在加载...
取消
保存