浏览代码

Add PushBlockCollab config and fix some stuff

/develop/action-slice
Ervin Teng 4 年前
当前提交
4b159789
共有 3 个文件被更改,包括 30 次插入3 次删除
  1. 6
      ml-agents/mlagents/trainers/coma/trainer.py
  2. 1
      ml-agents/mlagents/trainers/settings.py
  3. 26
      config/ppo/PushBlockCollab.yaml

6
ml-agents/mlagents/trainers/coma/trainer.py


)
self.seed = seed
self.policy: Policy = None # type: ignore
self.collected_rewards["environment_team"] = defaultdict(lambda: 0)
self.collected_group_rewards = defaultdict(lambda: 0)
def _process_trajectory(self, trajectory: Trajectory) -> None:
"""

self.collected_rewards["environment"][agent_id] += np.sum(
agent_buffer_trajectory[BufferKey.ENVIRONMENT_REWARDS]
)
self.collected_rewards["environment_team"][agent_id] += np.sum(
self.collected_group_rewards[agent_id] += np.sum(
agent_buffer_trajectory[BufferKey.GROUP_REWARD]
)
for name, reward_signal in self.optimizer.reward_signals.items():

if "environment_team" in self.collected_rewards:
self.stats_reporter.add_stat(
"Environment/Team Cumulative Reward",
self.collected_rewards["environment_team"].get(agent_id, 0),
self.collected_group_rewards.get(agent_id, 0),
aggregation=StatsAggregationMethod.HISTOGRAM,
)

1
ml-agents/mlagents/trainers/settings.py


def to_settings(self) -> type:
_mapping = {
RewardSignalType.EXTRINSIC: RewardSignalSettings,
RewardSignalType.GROUP_EXTRINSIC: RewardSignalSettings,
RewardSignalType.GAIL: GAILSettings,
RewardSignalType.CURIOSITY: CuriositySettings,
RewardSignalType.RND: RNDSettings,

26
config/ppo/PushBlockCollab.yaml


behaviors:
PushBlock:
trainer_type: coma
hyperparameters:
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
group:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000 #2000000
time_horizon: 64
summary_freq: 60000
threaded: true
正在加载...
取消
保存