浏览代码

Fix demo loader and remaining tests

/develop-newnormalization
Ervin Teng 5 年前
当前提交
a80b47d1
共有 3 个文件被更改,包括 19 次插入17 次删除
  1. 30
      ml-agents/mlagents/trainers/demo_loader.py
  2. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  3. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py

30
ml-agents/mlagents/trainers/demo_loader.py


sequence_length: int,
) -> AgentBuffer:
# Create and populate buffer using experiences
demo_buffer = AgentProcessorBuffer()
update_buffer = AgentBuffer()
demo_process_buffer = AgentProcessorBuffer()
demo_buffer = AgentBuffer()
for idx, experience in enumerate(pair_infos):
if idx > len(pair_infos) - 2:
break

previous_action = np.array(pair_infos[idx].action_info.vector_actions) * 0
if idx > 0:
previous_action = np.array(pair_infos[idx - 1].action_info.vector_actions)
demo_buffer[0].last_brain_info = current_brain_info
demo_buffer[0]["done"].append(next_brain_info.local_done[0])
demo_buffer[0]["rewards"].append(next_brain_info.rewards[0])
demo_process_buffer[0].last_brain_info = current_brain_info
demo_process_buffer[0]["done"].append(next_brain_info.local_done[0])
demo_process_buffer[0]["rewards"].append(next_brain_info.rewards[0])
demo_buffer[0]["visual_obs%d" % i].append(
demo_process_buffer[0]["visual_obs%d" % i].append(
demo_buffer[0]["vector_obs"].append(
demo_process_buffer[0]["vector_obs"].append(
demo_buffer[0]["actions"].append(current_pair_info.action_info.vector_actions)
demo_buffer[0]["prev_action"].append(previous_action)
demo_process_buffer[0]["actions"].append(
current_pair_info.action_info.vector_actions
)
demo_process_buffer[0]["prev_action"].append(previous_action)
demo_buffer.append_update_buffer(
update_buffer, 0, batch_size=None, training_length=sequence_length
demo_process_buffer.append_update_buffer(
demo_buffer, 0, batch_size=None, training_length=sequence_length
demo_buffer.reset_local_buffers()
demo_buffer.append_update_buffer(
update_buffer, 0, batch_size=None, training_length=sequence_length
demo_process_buffer.reset_local_buffers()
demo_process_buffer.append_update_buffer(
demo_buffer, 0, batch_size=None, training_length=sequence_length
)
return demo_buffer

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1)
assert len(demo_buffer.update_buffer["actions"]) == total_expected - 1
assert len(demo_buffer["actions"]) == total_expected - 1
def test_load_demo_dir():

assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
assert len(demo_buffer.update_buffer["actions"]) == total_expected - 1
assert len(demo_buffer["actions"]) == total_expected - 1

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


def reward_signal_update(env, policy, reward_signal_name):
buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
feed_dict = policy.reward_signals[reward_signal_name].prepare_update(
policy.model, buffer.update_buffer.make_mini_batch(0, 10), 2
policy.model, buffer.make_mini_batch(0, 10), 2
)
out = policy._execute_model(
feed_dict, policy.reward_signals[reward_signal_name].update_dict

正在加载...
取消
保存