浏览代码

Rename append_update_buffer to append_to_update_buffer

/develop-newnormalization
Ervin Teng 5 年前
当前提交
fd0647a6
共有 7 个文件被更改,包括 16 次插入16 次删除
  1. 4
      ml-agents/mlagents/trainers/buffer.py
  2. 4
      ml-agents/mlagents/trainers/demo_loader.py
  3. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  4. 2
      ml-agents/mlagents/trainers/sac/trainer.py
  5. 2
      ml-agents/mlagents/trainers/tests/mock_brain.py
  6. 16
      ml-agents/mlagents/trainers/tests/test_buffer.py
  7. 2
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py

4
ml-agents/mlagents/trainers/buffer.py


for k in agent_ids:
self[k].reset_agent()
def append_update_buffer(
def append_to_update_buffer(
self,
update_buffer: AgentBuffer,
agent_id: str,

:param training_length: The length of the samples that must be appended. If None: only takes one element.
"""
for agent_id in self.keys():
self.append_update_buffer(
self.append_to_update_buffer(
update_buffer, agent_id, key_list, batch_size, training_length
)

4
ml-agents/mlagents/trainers/demo_loader.py


)
demo_process_buffer[0]["prev_action"].append(previous_action)
if next_brain_info.local_done[0]:
demo_process_buffer.append_update_buffer(
demo_process_buffer.append_to_update_buffer(
demo_process_buffer.append_update_buffer(
demo_process_buffer.append_to_update_buffer(
demo_buffer, 0, batch_size=None, training_length=sequence_length
)
return demo_buffer

2
ml-agents/mlagents/trainers/ppo/trainer.py


global_returns
)
self.processing_buffer.append_update_buffer(
self.processing_buffer.append_to_update_buffer(
self.update_buffer,
agent_id,
batch_size=None,

2
ml-agents/mlagents/trainers/sac/trainer.py


] = bootstrapping_info.vector_observations[idx]
self.processing_buffer[agent_id]["done"][-1] = False
self.processing_buffer.append_update_buffer(
self.processing_buffer.append_to_update_buffer(
self.update_buffer,
agent_id,
batch_size=None,

2
ml-agents/mlagents/trainers/tests/mock_brain.py


)
buffer[0]["memory"].append(np.ones(memory_size))
buffer.append_update_buffer(
buffer.append_to_update_buffer(
update_buffer, 0, batch_size=None, training_length=sequence_length
)
return update_buffer

16
ml-agents/mlagents/trainers/tests/test_buffer.py


b[4].reset_agent()
assert len(b[4]) == 0
update_buffer = AgentBuffer()
b.append_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
assert len(update_buffer["action"]) == 20
assert np.array(update_buffer["action"]).shape == (20, 2)

def test_buffer_sample():
b = construct_fake_processing_buffer()
update_buffer = AgentBuffer()
b.append_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
# Test non-LSTM
mb = update_buffer.sample_mini_batch(batch_size=4, sequence_length=1)
assert mb.keys() == update_buffer.keys()

def test_buffer_truncate():
b = construct_fake_processing_buffer()
update_buffer = AgentBuffer()
b.append_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
b.append_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
# Test LSTM, truncate should be some multiple of sequence_length
update_buffer.truncate(4, sequence_length=3)
assert len(update_buffer["action"]) == 3

2
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


trainer = create_rl_trainer()
trainer.processing_buffer = construct_fake_processing_buffer()
trainer.update_buffer = AgentBuffer()
trainer.processing_buffer.append_update_buffer(
trainer.processing_buffer.append_to_update_buffer(
trainer.update_buffer, 2, batch_size=None, training_length=2
)
trainer.clear_update_buffer()
正在加载...
取消
保存