|
|
|
|
|
|
self.update_buffer.shuffle(sequence_length=self.policy.sequence_length) |
|
|
|
buffer = self.update_buffer |
|
|
|
max_num_batch = buffer_length // batch_size |
|
|
|
for l in range(0, max_num_batch * batch_size, batch_size): |
|
|
|
for i in range(0, max_num_batch * batch_size, batch_size): |
|
|
|
buffer.make_mini_batch(l, l + batch_size), n_sequences |
|
|
|
buffer.make_mini_batch(i, i + batch_size), n_sequences |
|
|
|
) |
|
|
|
for stat_name, value in update_stats.items(): |
|
|
|
batch_update_stats[stat_name].append(value) |
|
|
|