浏览代码

Revert unneeded changes back to master

/develop/add-fire/clean2
Ervin Teng 4 年前
当前提交
987ea2d0
共有 9 个文件被更改,包括 17 次插入18 次删除
  1. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 2
      ml-agents/mlagents/trainers/ppo/optimizer.py
  3. 10
      ml-agents/mlagents/trainers/saver/saver.py
  4. 11
      ml-agents/mlagents/trainers/saver/tf_saver.py
  5. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  6. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  7. 2
      ml-agents/mlagents/trainers/tests/test_saver.py
  8. 4
      ml-agents/mlagents/trainers/tf/models.py
  9. 0
      /ml-agents/mlagents/trainers/ppo/optimizer.py

2
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType

2
ml-agents/mlagents/trainers/ppo/optimizer.py


name="old_probabilities",
)
# Break old log log_probs into separate branches
# Break old log probs into separate branches
old_log_prob_branches = ModelUtils.break_into_branches(
self.all_old_log_probs, self.policy.act_size
)

10
ml-agents/mlagents/trainers/saver/saver.py


pass
@abc.abstractmethod
def save_checkpoint(self, behavior_name: str, step: int) -> str:
def save_checkpoint(self, brain_name: str, step: int) -> str:
:param behavior_name: Behavior name of behavior to be trained
:param brain_name: Brain name of brain to be trained
def export(self, output_filepath: str, behavior_name: str) -> None:
def export(self, output_filepath: str, brain_name: str) -> None:
Saves the serialized model, given a path and behavior name.
Saves the serialized model, given a path and brain name.
:param behavior_name: Behavior name of behavior to be trained.
:param brain_name: Brain name of brain to be trained.
"""
pass

11
ml-agents/mlagents/trainers/saver/tf_saver.py


with self.policy.graph.as_default():
self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
def save_checkpoint(self, behavior_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")
def save_checkpoint(self, brain_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
# Save the TF checkpoint and graph definition
if self.graph:
with self.graph.as_default():

self.graph, self.model_path, "raw_graph_def.pb", as_text=False
)
# also save the policy so we have optimized model files for each checkpoint
self.export(checkpoint_path, behavior_name)
self.export(checkpoint_path, brain_name)
def export(self, output_filepath: str, behavior_name: str) -> None:
def export(self, output_filepath: str, brain_name: str) -> None:
self.model_path, output_filepath, behavior_name, self.graph, self.sess
self.model_path, output_filepath, brain_name, self.graph, self.sess
)
def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None:

self._load_graph(policy, self.model_path, reset_global_steps=reset_steps)
else:
policy.initialize()
TFPolicy.broadcast_global_variables(0)
def _load_graph(

2
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
from mlagents.trainers.settings import (
GAILSettings,

2
ml-agents/mlagents/trainers/tests/test_saver.py


from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
def test_register(tmp_path):

4
ml-agents/mlagents/trainers/tf/models.py


:param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
:param action_size: A list containing the number of possible actions for each branch
:return: The action output dimension [batch_size, num_branches], the concatenated
normalized log_probs (after softmax)
and the concatenated normalized log log_probs
normalized probs (after softmax)
and the concatenated normalized log probs
"""
branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
raw_probs = [

/ml-agents/mlagents/trainers/ppo/optimizer_tf.py → /ml-agents/mlagents/trainers/ppo/optimizer.py

正在加载...
取消
保存