浏览代码

Fix BC and tests

/develop-newnormalization
Ervin Teng 5 年前
当前提交
38ff674e
共有 2 个文件被更改,包括 40 次插入50 次删除
  1. 35
      ml-agents/mlagents/trainers/bc/trainer.py
  2. 55
      ml-agents/mlagents/trainers/tests/test_bc.py

35
ml-agents/mlagents/trainers/bc/trainer.py


import logging
import numpy as np
from collections import defaultdict
from mlagents.trainers.bc.policy import BCPolicy
from mlagents.trainers.buffer import AgentBuffer

super(BCTrainer, self).__init__(brain, trainer_parameters, training, run_id)
self.policy = BCPolicy(seed, brain, trainer_parameters, load)
self.n_sequences = 1
self.cumulative_rewards = {}
self.cumulative_rewards = defaultdict(lambda: 0)
self.episode_steps = {}
self.stats = {
"Losses/Cloning Loss": [],

agent_buffer_trajectory = trajectory_to_agentbuffer(trajectory)
# Evaluate all reward functions
self.collected_rewards["environment"][agent_id] += np.sum(
self.cumulative_rewards[agent_id] += np.sum(
# Increment episode steps
if agent_id not in self.episode_steps:
self.episode_steps[agent_id] = 0
else:
self.episode_steps[agent_id] += len(trajectory.steps)
for name, rewards in self.collected_rewards.items():
if name == "environment":
self.cumulative_returns_since_policy_update.append(
rewards.get(agent_id, 0)
)
self.stats["Environment/Cumulative Reward"].append(
rewards.get(agent_id, 0)
)
self.reward_buffer.appendleft(rewards.get(agent_id, 0))
rewards[agent_id] = 0
else:
self.stats[self.policy.reward_signals[name].stat_name].append(
rewards.get(agent_id, 0)
)
rewards[agent_id] = 0
self.cumulative_returns_since_policy_update.append(
self.cumulative_rewards.get(agent_id, 0)
)
self.stats["Environment/Cumulative Reward"].append(
self.cumulative_rewards.get(agent_id, 0)
)
self.cumulative_rewards[agent_id] = 0
def end_episode(self):
"""

self.evaluation_buffer.reset_local_buffers()
for agent_id in self.cumulative_rewards:
self.cumulative_rewards[agent_id] = 0
for agent_id in self.episode_steps:

55
ml-agents/mlagents/trainers/tests/test_bc.py


from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.mock_communicator import MockCommunicator
from mlagents.trainers.tests.mock_brain import make_brain_parameters
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
@pytest.fixture

assert trainer.step == 1
def test_bc_trainer_add_proc_experiences(dummy_config):
trainer, env = create_bc_trainer(dummy_config)
# Test add_experiences
returned_braininfo = env.step()
brain_name = "Ball3DBrain"
trainer.add_experiences(
returned_braininfo[brain_name], returned_braininfo[brain_name], {}
) # Take action outputs is not used
for agent_id in returned_braininfo[brain_name].agents:
assert trainer.evaluation_buffer[agent_id].last_brain_info is not None
assert trainer.episode_steps[agent_id] > 0
assert trainer.cumulative_rewards[agent_id] > 0
# Test process_experiences by setting done
returned_braininfo[brain_name].local_done = 12 * [True]
trainer.process_experiences(
returned_braininfo[brain_name], returned_braininfo[brain_name]
)
for agent_id in returned_braininfo[brain_name].agents:
assert trainer.episode_steps[agent_id] == 0
assert trainer.cumulative_rewards[agent_id] == 0
def test_bc_trainer_process_trajectory(dummy_config):
trainer, _ = create_bc_trainer(dummy_config)
# Test process_trajectory
agent_id = "test_agent"
trajectory = make_fake_trajectory(length=15)
trainer.process_trajectory(trajectory)
assert len(trainer.stats["Environment/Cumulative Reward"]) > 0
# Assert that the done reset the steps
assert trainer.episode_steps[agent_id] == 0
assert trainer.cumulative_rewards[agent_id] == 0
# Create a trajectory without a done
trajectory = make_fake_trajectory(length=15, max_step_complete=True)
trainer.process_trajectory(trajectory)
assert trainer.episode_steps[agent_id] == 15
assert trainer.cumulative_rewards[agent_id] > 0
trainer, env = create_bc_trainer(dummy_config)
returned_braininfo = env.step()
brain_name = "Ball3DBrain"
trainer.add_experiences(
returned_braininfo[brain_name], returned_braininfo[brain_name], {}
) # Take action outputs is not used
trainer.process_experiences(
returned_braininfo[brain_name], returned_braininfo[brain_name]
)
trainer, _ = create_bc_trainer(dummy_config)
trajectory = make_fake_trajectory(length=15)
trainer.process_trajectory(trajectory)
for agent_id in returned_braininfo[brain_name].agents:
assert trainer.episode_steps[agent_id] == 0
assert trainer.cumulative_rewards[agent_id] == 0
agent_id = "test_agent"
assert trainer.episode_steps[agent_id] == 0
assert trainer.cumulative_rewards[agent_id] == 0
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")

正在加载...
取消
保存