浏览代码

Merge pull request #4334 from Unity-Technologies/global-variables

Adding rank to ml-agents
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
76deba36
共有 4 个文件被更改,包括 41 次插入1 次删除
  1. 15
      ml-agents/mlagents/trainers/policy/tf_policy.py
  2. 6
      ml-agents/mlagents/trainers/stats.py
  3. 8
      ml-agents/mlagents/trainers/trainer_controller.py
  4. 13
      ml-agents/mlagents/tf_utils/globals.py

15
ml-agents/mlagents/trainers/policy/tf_policy.py


from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, Callable
import numpy as np
from distutils.version import LooseVersion

GaussianDistribution,
MultiCategoricalDistribution,
)
from mlagents.tf_utils.globals import get_rank
logger = get_logger(__name__)

Contains a learning model, and the necessary
functions to save/load models and create the input placeholders.
"""
# Callback function used at the start of training to synchronize weights.
# By default, this nothing.
# If this needs to be used, it should be done from outside ml-agents.
broadcast_global_variables: Callable[[int], None] = lambda root_rank: None
def __init__(
self,

self.grads = None
self.update_batch: Optional[tf.Operation] = None
self.trainable_variables: List[tf.Variable] = []
self.rank = get_rank()
if create_tf_graph:
self.create_tf_graph()

self._load_graph(self.model_path, reset_global_steps=reset_steps)
else:
self._initialize_graph()
# broadcast initial weights from worker-0
TFPolicy.broadcast_global_variables(0)
def get_weights(self):
with self.graph.as_default():

:param output_filepath: path (without suffix) for the model file(s)
:param settings: SerializationSettings for how to save the model.
"""
# save model if there is only one worker or
# only on worker-0 if there are multiple workers
if self.rank is not None and self.rank != 0:
return
export_policy_model(output_filepath, settings, self.graph, self.sess)
def update_normalization(self, vector_obs: np.ndarray) -> None:

6
ml-agents/mlagents/trainers/stats.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from mlagents.tf_utils import tf, generate_session_config
from mlagents.tf_utils.globals import get_rank
logger = get_logger(__name__)

# If self-play, we want to print ELO as well as reward
self.self_play = False
self.self_play_team = -1
self.rank = get_rank()
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int

log_info.append(f"Time Elapsed: {elapsed_time:0.3f} s")
if "Environment/Cumulative Reward" in values:
stats_summary = values["Environment/Cumulative Reward"]
if self.rank is not None:
log_info.append(f"Rank: {self.rank}")
if self.self_play and "Self-play/ELO" in values:
elo_stats = values["Self-play/ELO"]
log_info.append(f"ELO: {elo_stats.mean:0.3f}")

8
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager
from mlagents.tf_utils.globals import get_rank
class TrainerController:

self.kill_trainers = False
np.random.seed(training_seed)
tf.set_random_seed(training_seed)
self.rank = get_rank()
@timed
def _save_models(self):

if self.rank is not None and self.rank != 0:
return
for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()
self.logger.info("Saved Model")

"""
Saves models for all trainers.
"""
if self.rank is not None and self.rank != 0:
return
for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()

13
ml-agents/mlagents/tf_utils/globals.py


from typing import Optional
_rank: Optional[int] = None
def get_rank() -> Optional[int]:
"""
Returns the rank (in the MPI sense) of the current node.
For local training, this will always be None.
If this needs to be used, it should be done from outside ml-agents.
:return:
"""
return _rank
正在加载...
取消
保存