Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

43 行
1.7 KiB

import numpy as np
from typing import Dict
from mlagents.trainers.buffer import AgentBuffer, BufferKey
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.settings import RewardSignalSettings
class ExtrinsicRewardProvider(BaseRewardProvider):
"""
Evaluates extrinsic reward. For single-agent, this equals the individual reward
given to the agent. For the POCA algorithm, we want not only the individual reward
but also the team and the individual rewards of the other agents.
"""
def __init__(self, specs: BehaviorSpec, settings: RewardSignalSettings) -> None:
super().__init__(specs, settings)
self.add_groupmate_rewards = False
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
indiv_rewards = np.array(
mini_batch[BufferKey.ENVIRONMENT_REWARDS], dtype=np.float32
)
total_rewards = indiv_rewards
if BufferKey.GROUPMATE_REWARDS in mini_batch and self.add_groupmate_rewards:
groupmate_rewards_list = mini_batch[BufferKey.GROUPMATE_REWARDS]
groupmate_rewards_sum = np.array(
[sum(_rew) for _rew in groupmate_rewards_list], dtype=np.float32
)
total_rewards += groupmate_rewards_sum
if BufferKey.GROUP_REWARD in mini_batch:
group_rewards = np.array(
mini_batch[BufferKey.GROUP_REWARD], dtype=np.float32
)
# Add all the group rewards to the individual rewards
total_rewards += group_rewards
return total_rewards
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
return {}