浏览代码

Replace brain_name with behavior_name (#4419)

brain_name -> behavior_name
some prob -> log_prob in comments
rename files optimizer -> optimizer_tf for tensorflow
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
bfda9576
共有 15 个文件被更改,包括 42 次插入33 次删除
  1. 10
      ml-agents/mlagents/trainers/model_saver/model_saver.py
  2. 10
      ml-agents/mlagents/trainers/model_saver/tf_model_saver.py
  3. 8
      ml-agents/mlagents/trainers/model_saver/torch_model_saver.py
  4. 4
      ml-agents/mlagents/trainers/policy/torch_policy.py
  5. 13
      ml-agents/mlagents/trainers/ppo/trainer.py
  6. 2
      ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  7. 13
      ml-agents/mlagents/trainers/sac/trainer.py
  8. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  9. 4
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  10. 2
      ml-agents/mlagents/trainers/tests/test_sac.py
  11. 2
      ml-agents/mlagents/trainers/tests/test_saver.py
  12. 4
      ml-agents/mlagents/trainers/tf/models.py
  13. 1
      ml-agents/mlagents/trainers/torch/model_serialization.py
  14. 0
      /ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  15. 0
      /ml-agents/mlagents/trainers/sac/optimizer_tf.py

10
ml-agents/mlagents/trainers/model_saver/model_saver.py


pass
@abc.abstractmethod
def save_checkpoint(self, brain_name: str, step: int) -> str:
def save_checkpoint(self, behavior_name: str, step: int) -> str:
:param brain_name: Brain name of brain to be trained
:param behavior_name: Behavior name of bevavior to be trained
def export(self, output_filepath: str, brain_name: str) -> None:
def export(self, output_filepath: str, behavior_name: str) -> None:
Saves the serialized model, given a path and brain name.
Saves the serialized model, given a path and behavior name.
:param brain_name: Brain name of brain to be trained.
:param behavior_name: Behavior name of behavior to be trained.
"""
pass

10
ml-agents/mlagents/trainers/model_saver/tf_model_saver.py


with self.policy.graph.as_default():
self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
def save_checkpoint(self, brain_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
def save_checkpoint(self, behavior_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")
# Save the TF checkpoint and graph definition
if self.graph:
with self.graph.as_default():

self.graph, self.model_path, "raw_graph_def.pb", as_text=False
)
# also save the policy so we have optimized model files for each checkpoint
self.export(checkpoint_path, brain_name)
self.export(checkpoint_path, behavior_name)
def export(self, output_filepath: str, brain_name: str) -> None:
def export(self, output_filepath: str, behavior_name: str) -> None:
self.model_path, output_filepath, brain_name, self.graph, self.sess
self.model_path, output_filepath, behavior_name, self.graph, self.sess
)
def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None:

8
ml-agents/mlagents/trainers/model_saver/torch_model_saver.py


self.policy = module
self.exporter = ModelSerializer(self.policy)
def save_checkpoint(self, brain_name: str, step: int) -> str:
def save_checkpoint(self, behavior_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")
self.export(checkpoint_path, brain_name)
self.export(checkpoint_path, behavior_name)
def export(self, output_filepath: str, brain_name: str) -> None:
def export(self, output_filepath: str, behavior_name: str) -> None:
if self.exporter is not None:
self.exporter.export_policy_model(output_filepath)

4
ml-agents/mlagents/trainers/policy/torch_policy.py


also use a CNN to encode visual input prior to the MLP. Supports discrete and
continuous action spaces, as well as recurrent networks.
:param seed: Random seed.
:param brain: Assigned BrainParameters object.
:param behavior_spec: Assigned BehaviorSpec object.
:param trainer_settings: Defined training parameters.
:param load: Whether a pre-trained model will be loaded or a new one created.
:param tanh_squash: Whether to use a tanh function on the continuous output,

"""
Decides actions given observations information, and takes them in environment.
:param worker_id:
:param decision_requests: A dictionary of brain names and BrainInfo from environment.
:param decision_requests: A dictionary of behavior names and DecisionSteps from environment.
:return: an ActionInfo containing action, memories, values and an object
to be passed to add experiences
"""

13
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType

def __init__(
self,
brain_name: str,
behavior_name: str,
reward_buff_cap: int,
trainer_settings: TrainerSettings,
training: bool,

):
"""
Responsible for collecting experiences and training PPO model.
:param brain_name: The name of the brain associated with trainer config
:param behavior_name: The name of the behavior associated with trainer config
:param reward_buff_cap: Max reward history to track in the reward buffer
:param trainer_settings: The parameters for the trainer.
:param training: Whether the trainer is set for training.

"""
super().__init__(
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
behavior_name,
trainer_settings,
training,
load,
artifact_path,
reward_buff_cap,
)
self.hyperparameters: PPOSettings = cast(
PPOSettings, self.trainer_settings.hyperparameters

2
ml-agents/mlagents/trainers/ppo/optimizer_tf.py


name="old_probabilities",
)
# Break old log probs into separate branches
# Break old log log_probs into separate branches
old_log_prob_branches = ModelUtils.break_into_branches(
self.all_old_log_probs, self.policy.act_size
)

13
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.policy import Policy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.sac.optimizer_tf import SACOptimizer
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers

def __init__(
self,
brain_name: str,
behavior_name: str,
reward_buff_cap: int,
trainer_settings: TrainerSettings,
training: bool,

):
"""
Responsible for collecting experiences and training SAC model.
:param brain_name: The name of the brain associated with trainer config
:param behavior_name: The name of the behavior associated with trainer config
:param reward_buff_cap: Max reward history to track in the reward buffer
:param trainer_settings: The parameters for the trainer.
:param training: Whether the trainer is set for training.

"""
super().__init__(
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
behavior_name,
trainer_settings,
training,
load,
artifact_path,
reward_buff_cap,
)
self.seed = seed

2
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb

4
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import os
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.sac.optimizer_tf import SACOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
from mlagents.trainers.settings import (
GAILSettings,

2
ml-agents/mlagents/trainers/tests/test_sac.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.sac.optimizer_tf import SACOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb

2
ml-agents/mlagents/trainers/tests/test_saver.py


from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
def test_register(tmp_path):

4
ml-agents/mlagents/trainers/tf/models.py


:param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
:param action_size: A list containing the number of possible actions for each branch
:return: The action output dimension [batch_size, num_branches], the concatenated
normalized probs (after softmax)
and the concatenated normalized log probs
normalized log_probs (after softmax)
and the concatenated normalized log log_probs
"""
branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
raw_probs = [

1
ml-agents/mlagents/trainers/torch/model_serialization.py


Exports a Torch model for a Policy to .onnx format for Unity embedding.
:param output_filepath: file path to output the model (without file suffix)
:param brain_name: Brain name of brain to be trained
"""
if not os.path.exists(output_filepath):
os.makedirs(output_filepath)

/ml-agents/mlagents/trainers/ppo/optimizer.py → /ml-agents/mlagents/trainers/ppo/optimizer_tf.py

/ml-agents/mlagents/trainers/sac/optimizer.py → /ml-agents/mlagents/trainers/sac/optimizer_tf.py

正在加载...
取消
保存