浏览代码

add_policy and create_policy separated

/develop/tanhsquash
Andrew Cohen 5 年前
当前提交
8578b0b7
共有 6 个文件被更改,包括 43 次插入28 次删除
  1. 9
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
  2. 15
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 14
      ml-agents/mlagents/trainers/rl_trainer.py
  4. 13
      ml-agents/mlagents/trainers/sac/trainer.py
  5. 14
      ml-agents/mlagents/trainers/trainer.py
  6. 6
      ml-agents/mlagents/trainers/trainer_controller.py

9
UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs


//string m_BehaviorIdentifier;
private string concatBehaviorIdentifiers()
{
return m_BehaviorName + "?team=" + m_TeamID;
}
public BrainParameters brainParameters
{
get { return m_BrainParameters; }

{
get { return concatBehaviorIdentifiers();}
get { return m_BehaviorName + "?team=" + m_TeamID;}
}

case BehaviorType.Default:
if (FindObjectOfType<Academy>().IsCommunicatorOn)
{
return new RemotePolicy(m_BrainParameters, concatBehaviorIdentifiers());
return new RemotePolicy(m_BrainParameters, behaviorName);
}
if (m_Model != null)
{

15
ml-agents/mlagents/trainers/ppo/trainer.py


import numpy as np
from mlagents.envs.brain import BrainParameters, BrainInfo
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy, get_devices
from mlagents.trainers.rl_trainer import RLTrainer, AllRewardsOutput

self.policy = None
def process_experiences(
self, current_info: BrainInfo, next_info: BrainInfo
self, name_behavior_id: str, current_info: BrainInfo, next_info: BrainInfo
:param name_behavior_id: string policy identifier.
:param current_info: current BrainInfo.
:param next_info: next BrainInfo.
"""

self.clear_update_buffer()
self.trainer_metrics.end_policy_update()
def add_policy(self, brain_parameters: BrainParameters) -> None:
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
self.policy = MultiGpuPPOPolicy(
policy = MultiGpuPPOPolicy(
self.seed,
brain_parameters,
self.trainer_parameters,

else:
self.policy = PPOPolicy(
policy = PPOPolicy(
self.seed,
brain_parameters,
self.trainer_parameters,

for _reward_signal in self.policy.reward_signals.keys():
for _reward_signal in policy.reward_signals.keys():
def get_policy(self, brain_name: str) -> PPOPolicy:
return self.policy
return policy
def discount_rewards(r, gamma=0.99, value_next=0.0):

14
ml-agents/mlagents/trainers/rl_trainer.py


from typing import Dict, List, Any, NamedTuple
import numpy as np
from mlagents.envs.brain import BrainInfo
from mlagents.envs.brain import BrainParameters, BrainInfo
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.components.reward_signals import RewardSignalResult
LOGGER = logging.getLogger("mlagents.trainers")

self.processing_buffer = ProcessingBuffer()
self.update_buffer = AgentBuffer()
self.episode_steps = {}
self.policy: TFPolicy = None
def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
"""

def add_experiences(
self,
name_behavior_id: str,
curr_info: BrainInfo,
next_info: BrainInfo,
take_action_outputs: ActionInfoOutputs,

:param name_behavior_id: string policy identifier.
:param curr_info: current BrainInfo.
:param next_info: next BrainInfo.
:param take_action_outputs: The outputs of the Policy's get_action method.

raise UnityTrainerException(
"The add_rewards_outputs method was not implemented."
)
def add_policy(self, brain_parameters: BrainParameters) -> None:
"""
Adds policy to trainers list of policies
"""
policy = self.create_policy(brain_parameters)
self.policy = policy
self.policies[brain_parameters.brain_name] = policy

13
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.envs.brain import BrainParameters, BrainInfo
from mlagents.envs.action_info import ActionInfoOutputs
from mlagents.envs.timers import timed
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.sac.policy import SACPolicy
from mlagents.trainers.rl_trainer import RLTrainer, AllRewardsOutput

)
def process_experiences(
self, current_info: BrainInfo, next_info: BrainInfo
self, name_behavior_id: str, current_info: BrainInfo, next_info: BrainInfo
:param name_behavior_id: string policy identifier.
:param current_info: current BrainInfo.
:param next_info: next BrainInfo.
"""

self.update_reward_signals()
self.trainer_metrics.end_policy_update()
def add_policy(self, brain_parameters: BrainParameters) -> None:
self.policy = SACPolicy(
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
policy = SACPolicy(
self.seed,
brain_parameters,
self.trainer_parameters,

for _reward_signal in self.policy.reward_signals.keys():
for _reward_signal in policy.reward_signals.keys():
def get_policy(self, brain_name: str) -> SACPolicy:
return self.policy
return policy
def update_sac_policy(self) -> None:
"""

14
ml-agents/mlagents/trainers/trainer.py


self.summary_writer = tf.summary.FileWriter(self.summary_path)
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy: TFPolicy = None
self.policies: Dict[str, TFPolicy] = {}
self.step: int = 0
def check_param_keys(self):

def add_experiences(
self,
name_behavior_id: str,
curr_info: BrainInfo,
next_info: BrainInfo,
take_action_outputs: ActionInfoOutputs,

:param name_behavior_id: string policy identifier.
:param curr_info: current BrainInfo.
:param next_info: next BrainInfo.
:param take_action_outputs: The outputs of the Policy's get_action method.

def process_experiences(
self, current_info: BrainInfo, next_info: BrainInfo
self, name_behavior_id: str, current_info: BrainInfo, next_info: BrainInfo
:param name_behavior_id: string policy identifier.
:param current_info: current BrainInfo.
:param next_info: next BrainInfo.
"""

"""
raise UnityTrainerException("The update_model method was not implemented.")
def add_policy(self, brain_parameters: BrainParameters) -> None:
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
Adds policy to trainers list of policies
Creates policy
Adds policy to trainers list of policies
Gets policy from trainers list of policies
raise UnityTrainerException("The update_model method was not implemented.")
return self.policies[brain_name]

6
ml-agents/mlagents/trainers/trainer_controller.py


brain_name, _ = name_behavior_id.split("?")
# This could be done with a try/except which may improve performance?
if brain_name in self.trainers:
try:
else:
except KeyError:
trainer = self.trainer_factory.generate(brain_name)
self.trainers[brain_name] = trainer
self.logger.info(trainer)

for behavior_identifier in self.brain_name_to_identifier[brain_name]:
if step_info.has_actions_for_brain(behavior_identifier):
trainer.add_experiences(
behavior_identifier,
step_info.previous_all_brain_info[behavior_identifier],
step_info.current_all_brain_info[behavior_identifier],
step_info.brain_name_to_action_info[

trainer.process_experiences(
behavior_identifier,
step_info.previous_all_brain_info[behavior_identifier],
step_info.current_all_brain_info[behavior_identifier],
)

正在加载...
取消
保存