浏览代码

[perf] Optimizations for performance (#5192)

* Lazy init the buffer when sampling

* Update references rather than copy data

* Don't create unneeded numpy arrays

* Remove self[key] from loop
/check-for-ModelOverriders
GitHub 3 年前
当前提交
b9cab453
共有 3 个文件被更改,包括 23 次插入18 次删除
  1. 8
      ml-agents/mlagents/trainers/buffer.py
  2. 27
      ml-agents/mlagents/trainers/torch/encoders.py
  3. 6
      ml-agents/mlagents/trainers/trajectory.py

8
ml-agents/mlagents/trainers/buffer.py


s = np.arange(len(self[key_list[0]]) // sequence_length)
np.random.shuffle(s)
for key in key_list:
buffer_field = self[key]
tmp += self[key][i * sequence_length : (i + 1) * sequence_length]
self[key][:] = tmp
tmp += buffer_field[i * sequence_length : (i + 1) * sequence_length]
buffer_field.set(tmp)
def make_mini_batch(self, start: int, end: int) -> "AgentBuffer":
"""

* sequence_length
) # Sample random sequence starts
for key in self:
mb_list = [self[key][i : i + sequence_length] for i in start_idxes]
buffer_field = self[key]
mb_list = (buffer_field[i : i + sequence_length] for i in start_idxes)
# See comparison of ways to make a list from a list of lists here:
# https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists
mini_batch[key].set(list(itertools.chain.from_iterable(mb_list)))

27
ml-agents/mlagents/trainers/torch/encoders.py


return normalized_state
def update(self, vector_input: torch.Tensor) -> None:
steps_increment = vector_input.size()[0]
total_new_steps = self.normalization_steps + steps_increment
with torch.no_grad():
steps_increment = vector_input.size()[0]
total_new_steps = self.normalization_steps + steps_increment
input_to_old_mean = vector_input - self.running_mean
new_mean = self.running_mean + (input_to_old_mean / total_new_steps).sum(0)
input_to_old_mean = vector_input - self.running_mean
new_mean: torch.Tensor = self.running_mean + (
input_to_old_mean / total_new_steps
).sum(0)
input_to_new_mean = vector_input - new_mean
new_variance = self.running_variance + (
input_to_new_mean * input_to_old_mean
).sum(0)
# Update in-place
self.running_mean.data.copy_(new_mean.data)
self.running_variance.data.copy_(new_variance.data)
self.normalization_steps.data.copy_(total_new_steps.data)
input_to_new_mean = vector_input - new_mean
new_variance = self.running_variance + (
input_to_new_mean * input_to_old_mean
).sum(0)
# Update references. This is much faster than in-place data update.
self.running_mean: torch.Tensor = new_mean
self.running_variance: torch.Tensor = new_variance
self.normalization_steps: torch.Tensor = total_new_steps
def copy_from(self, other_normalizer: "Normalizer") -> None:
self.normalization_steps.data.copy_(other_normalizer.normalization_steps.data)

6
ml-agents/mlagents/trainers/trajectory.py


exp.action.discrete
)
cont_next_actions = np.zeros_like(exp.action.continuous)
disc_next_actions = np.zeros_like(exp.action.discrete)
else:
cont_next_actions = np.zeros_like(exp.action.continuous)
disc_next_actions = np.zeros_like(exp.action.discrete)
agent_buffer_trajectory[BufferKey.NEXT_CONT_ACTION].append(
cont_next_actions

正在加载...
取消
保存