浏览代码

Multiprocessing but Stats are quite broken

/develop/multiprocess
Ervin Teng 4 年前
当前提交
3b15cc32
共有 28 个文件被更改,包括 6357 次插入47 次删除
  1. 5
      Project/ProjectSettings/EditorBuildSettings.asset
  2. 2
      Project/ProjectSettings/GraphicsSettings.asset
  3. 2
      Project/ProjectSettings/UnityConnectSettings.asset
  4. 30
      ml-agents/mlagents/trainers/agent_processor.py
  5. 6
      ml-agents/mlagents/trainers/learn.py
  6. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  7. 2
      ml-agents/mlagents/trainers/settings.py
  8. 9
      ml-agents/mlagents/trainers/stats.py
  9. 2
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  10. 74
      ml-agents/mlagents/trainers/trainer_controller.py
  11. 8
      Project/Assets/ML-Agents/Examples/CubeWars.meta
  12. 1001
      Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 1.onnx
  13. 13
      Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 1.onnx.meta
  14. 1001
      Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 2.onnx
  15. 14
      Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 2.onnx.meta
  16. 1001
      Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump-3999970.nn
  17. 11
      Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump-3999970.nn.meta
  18. 1001
      Project/Recordings/1v1.mp4
  19. 4
      testdir.nn
  20. 2
      testdir/checkpoint
  21. 180
      testdir/frozen_graph_def.pb
  22. 7
      testdir/last_replay_buffer.hdf5
  23. 1001
      testdir/raw_graph_def.pb
  24. 12
      testdir/test-0.ckpt.data-00000-of-00001
  25. 9
      testdir/test-0.ckpt.index
  26. 1001
      testdir/test-0.ckpt.meta
  27. 4
      testdir/test-0.nn

5
Project/ProjectSettings/EditorBuildSettings.asset


EditorBuildSettings:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Scenes: []
m_Scenes:
- enabled: 1
path: Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
guid: 25c0c9e81e55c4e129e1a5c0ac254100
m_configObjects: {}

2
Project/ProjectSettings/GraphicsSettings.asset


- {fileID: 10770, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 10783, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 16000, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 16001, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 16001, guid: 0000000000000000f000000000000000, type: 0}
m_PreloadedShaders: []
m_SpritesDefaultMaterial: {fileID: 10754, guid: 0000000000000000f000000000000000,
type: 0}

2
Project/ProjectSettings/UnityConnectSettings.asset


UnityConnectSettings:
m_ObjectHideFlags: 0
serializedVersion: 1
m_Enabled: 1
m_Enabled: 0
m_TestMode: 0
m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
m_EventUrl: https://cdp.cloud.unity3d.com/v1/events

30
ml-agents/mlagents/trainers/agent_processor.py


from typing import List, Dict, TypeVar, Generic, Tuple, Any, Union
from collections import defaultdict, Counter
import queue
from torch import multiprocessing
from mlagents_envs.base_env import (
DecisionSteps,

def __init__(
self,
policy: Policy,
behavior_id: str,
stats_reporter: StatsReporter,
max_trajectory_length: int = sys.maxsize,

self.last_take_action_outputs: Dict[str, ActionInfoOutputs] = {}
# Note: In the future this policy reference will be the policy of the env_manager and not the trainer.
# We can in that case just grab the action from the policy rather than having it passed in.
self.policy = policy
self.policy = None
self.episode_steps: Counter = Counter()
self.episode_rewards: Dict[str, float] = defaultdict(float)
self.stats_reporter = stats_reporter

# If the ID doesn't have a last step result, the agent just reset,
# don't store the action.
if _gid in self.last_step_result:
if "action" in take_action_outputs:
if "action" in take_action_outputs and self.policy is not None:
self.policy.save_previous_action(
[_gid], take_action_outputs["action"]
)

# This state is the consequence of a past action
if stored_decision_step is not None and stored_take_action_outputs is not None:
obs = stored_decision_step.obs
if self.policy.use_recurrent:
if self.policy is not None and self.policy.use_recurrent:
memory = self.policy.retrieve_memories([global_id])[0, :]
else:
memory = None

action = stored_take_action_outputs["action"][idx]
if self.policy.use_continuous_act:
if self.policy is not None and self.policy.use_continuous_act:
prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
if self.policy is not None:
prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
experience = AgentExperience(
obs=obs,
reward=step.reward,

self._safe_delete(self.last_step_result, global_id)
self._safe_delete(self.episode_steps, global_id)
self._safe_delete(self.episode_rewards, global_id)
self.policy.remove_previous_action([global_id])
self.policy.remove_memories([global_id])
if self.policy is not None:
self.policy.remove_previous_action([global_id])
self.policy.remove_memories([global_id])
def _safe_delete(self, my_dictionary: Dict[Any, Any], key: Any) -> None:
"""

pass
def __init__(self, behavior_id: str, maxlen: int = 0):
def __init__(self, behavior_id: str, maxlen: int = 100):
self._queue: queue.Queue = queue.Queue(maxsize=maxlen)
self._queue: multiprocessing.Queue = multiprocessing.Queue(maxsize=maxlen)
self._behavior_id = behavior_id
@property

Returns the approximate size of the queue. Note that values may differ
depending on the underlying queue implementation.
"""
return self._queue.qsize()
try:
return self._queue.qsize()
except NotImplementedError:
return self._maxlen
def empty(self) -> bool:
return self._queue.empty()

def __init__(
self,
policy: Policy,
super().__init__(policy, behavior_id, stats_reporter, max_trajectory_length)
super().__init__(behavior_id, stats_reporter, max_trajectory_length)
trajectory_queue_len = 20 if threaded else 0
self.trajectory_queue: AgentManagerQueue[Trajectory] = AgentManagerQueue(
self.behavior_id, maxlen=trajectory_queue_len

6
ml-agents/mlagents/trainers/learn.py


# # Unity ML-Agents Toolkit
from mlagents import torch_utils
from torch.multiprocessing import Manager
import yaml
import os

)
gauge_write = GaugeWriter()
console_writer = ConsoleWriter()
# Share writers across all threads
manager = Manager()
StatsReporter.writers = manager.list()
StatsReporter.add_writer(tb_writer)
StatsReporter.add_writer(gauge_write)
StatsReporter.add_writer(console_writer)

2
ml-agents/mlagents/trainers/ppo/trainer.py


)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
for queue in self.policy_queues:
queue.put(self.policy)
self.optimizer = self.create_ppo_optimizer()
for _reward_signal in self.optimizer.reward_signals.keys():

2
ml-agents/mlagents/trainers/settings.py


class DefaultTrainerDict(collections.defaultdict):
def __init__(self, *args):
super().__init__(TrainerSettings, *args)
super().__init__(None, *args)
def __missing__(self, key: Any) -> "TrainerSettings":
if TrainerSettings.default_override is not None:

9
ml-agents/mlagents/trainers/stats.py


import abc
import os
import time
from threading import RLock
from torch.multiprocessing import RLock
from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge

log_info.append("No episode was completed since last summary")
log_info.append(is_training)
logger.info(". ".join(log_info) + ".")
print(". ".join(log_info) + ".")
def add_property(
self, category: str, property_type: StatsPropertyType, value: Any

num=len(StatsReporter.stats_dict[self.category][key]),
)
return StatsSummary.empty()
def change_writers(self, stats_writers: List) -> None:
"""
Changes out the static writers to something else. Used for multiprocessing.
"""
StatsReporter.writers = stats_writers

2
ml-agents/mlagents/trainers/trainer/rl_trainer.py


# of what reward signals are actually present.
self.cumulative_returns_since_policy_update: List[float] = []
self.collected_rewards: Dict[str, Dict[str, int]] = {
"environment": defaultdict(lambda: 0)
"environment": defaultdict(int)
}
self.update_buffer: AgentBuffer = AgentBuffer()
self._stats_reporter.add_property(

74
ml-agents/mlagents/trainers/trainer_controller.py


"""Launches trainers for each External Brains in a Unity Environment."""
import os
import threading
from mlagents.trainers.stats import StatsReporter
from torch import multiprocessing
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep

self.ghost_controller = self.trainer_factory.ghost_controller
self.registered_behavior_ids: Set[str] = set()
self.trainer_threads: List[threading.Thread] = []
self.trainer_threads: List[multiprocessing.Process] = []
self.kill_trainers = False
np.random.seed(training_seed)
if tf_utils.is_available():

parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(name_behavior_id)
brain_name = parsed_behavior_id.brain_name
trainerthread = None
if brain_name in self.trainers:
if brain_name in self.trainer_threads:
trainer = self.trainer_factory.generate(brain_name)
self.trainers[brain_name] = trainer
if trainer.threaded:
# trainer = self.trainer_factory.generate(brain_name)
# self.trainers[brain_name] = trainer
if True:
trainerthread = threading.Thread(
target=self.trainer_update_func, args=(trainer,), daemon=True
trainer = self.trainer_factory.generate(name_behavior_id)
agent_manager = AgentManager(
name_behavior_id,
StatsReporter(parsed_behavior_id.brain_name),
16,
threaded=True,
self.trainer_threads.append(trainerthread)
policy = trainer.create_policy(
parsed_behavior_id, env_manager.training_behaviors[name_behavior_id]
)
trainer.add_policy(parsed_behavior_id, policy)
agent_manager = AgentManager(
policy,
name_behavior_id,
trainer.stats_reporter,
trainer.parameters.time_horizon,
threaded=trainer.threaded,
)
env_manager.set_agent_manager(name_behavior_id, agent_manager)
env_manager.set_policy(name_behavior_id, policy)
self.brain_name_to_identifier[brain_name].add(name_behavior_id)
trainerthread = multiprocessing.Process(
target=self.trainer_update_func,
args=(
trainer,
name_behavior_id,
env_manager.training_behaviors[name_behavior_id],
agent_manager.policy_queue,
agent_manager.trajectory_queue,
StatsReporter.writers,
),
daemon=True,
)
trainer.publish_policy_queue(agent_manager.policy_queue)
trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
env_manager.set_agent_manager(name_behavior_id, agent_manager)
self.brain_name_to_identifier[brain_name].add(name_behavior_id)
# env_manager.set_policy(name_behavior_id, policy)
self.trainer_threads.append(trainerthread)
# Only start new trainers
if trainerthread is not None:

)
merge_gauges(thread_timer_stack.gauges)
def trainer_update_func(self, trainer: Trainer) -> None:
def trainer_update_func(
self,
trainer,
name_behavior_id,
behavior_spec,
policy_queue,
trajectory_queue,
stats_writers,
) -> None:
# trainer = self.trainer_factory.generate(name_behavior_id)
parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(name_behavior_id)
trainer.stats_reporter.change_writers(stats_writers)
policy = trainer.create_policy(parsed_behavior_id, behavior_spec)
trainer.publish_policy_queue(policy_queue)
trainer.subscribe_trajectory_queue(trajectory_queue)
trainer.add_policy(parsed_behavior_id, policy)
while not self.kill_trainers:
with hierarchical_timer("trainer_advance"):
trainer.advance()

8
Project/Assets/ML-Agents/Examples/CubeWars.meta


fileFormatVersion: 2
guid: 624ff327b14154c41869c6ecdcdcc167
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 1.onnx
文件差异内容过多而无法显示
查看文件

13
Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 1.onnx.meta


fileFormatVersion: 2
guid: dda86ccf589e74071b005ab94b6764bb
ScriptedImporter:
fileIDToRecycleName:
2186277476908879412: ImportLogs
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
optimizeModel: 1
forceArbitraryBatchSize: 1
treatErrorsAsWarnings: 0

1001
Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 2.onnx
文件差异内容过多而无法显示
查看文件

14
Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs 2.onnx.meta


fileFormatVersion: 2
guid: 5858c5051d73d4168bdb96071e09185e
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj
11400002: model data
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
optimizeModel: 1
forceArbitraryBatchSize: 1
treatErrorsAsWarnings: 0

1001
Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump-3999970.nn
文件差异内容过多而无法显示
查看文件

11
Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump-3999970.nn.meta


fileFormatVersion: 2
guid: 327490f9c2aca401a85b04307d24367a
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj
11400002: model data
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
Project/Recordings/1v1.mp4
文件差异内容过多而无法显示
查看文件

4
testdir.nn


vector_observation����actionaction_output_shape������? action_output_shape memory_sizeversion_numbertrainer_patch_versiontrainer_minor_versiontrainer_major_versionis_continuous_controlpolicy_1/random_normal/stddevpolicy_1/clip_by_value/ypolicy_1/random_normal/mean  policy_1/clip_by_value/Minimum/y
$policy/main_graph_0/hidden_0/BiasAdd�����?vector_observation#policy/main_graph_0/hidden_0/kernel �!policy/main_graph_0/hidden_0/bias� policy/main_graph_0/hidden_0/Mul2 �����?$policy/main_graph_0/hidden_0/BiasAddpolicy_1/mu/BiasAdd�����? policy/main_graph_0/hidden_0/Mulpolicy/mu/kernel� policy/mu/bias�policy_1/log_std/BiasAdd�����? policy/main_graph_0/hidden_0/Mulpolicy/log_std/kernel� policy/log_std/bias�policy_1/clip_by_value/Minimumn�����?policy_1/log_std/BiasAdd policy_1/clip_by_value/Minimum/ypolicy_1/clip_by_valueo�����?policy_1/clip_by_value/Minimumpolicy_1/clip_by_value/y policy_1/Exp2q�����?policy_1/clip_by_value+policy_1/random_normal/RandomStandardNormal@�����?policy_1/mu/BiasAddpolicy_1/random_normal/mulf�����?+policy_1/random_normal/RandomStandardNormalpolicy_1/random_normal/stddevpolicy_1/random_normald�����?policy_1/random_normal/mulpolicy_1/random_normal/mean policy_1/mulf�����? policy_1/Exppolicy_1/random_normal policy_1/addd�����?policy_1/mu/BiasAdd policy_1/mul policy_1/Tanh2�����? policy_1/addaction2�����? policy_1/Tanh@@�A�?�?��@�?��E��$��s�>���RI�}\$=!`ջ�0M>C������5'=?X=��q>u}���������u�>I���(9���?<>L�m=��F>y�>�+>�`Ҿ���J9�>��� c�� l=>��=WF��#������>=)"�>��G����3|�>��(?/ݹ=��򽸟.�O��>H�>�dʾnŋ�=پM���F��>�a�=�O�>Ж?�����?�Z�� !u>� нz����`�:��>�4.�M*��Yy<>F�?�f��3"?3Z�<� ]�}c���r�5]h���k���>� ?�W�<�E�S ?g�(�t>
���i��*>.��U֤���=2}=�؉>[*P���:�S?;>�#�=p�2��ʂ��w��+�=���>���=��w�����]����C�=m��zL��JB:>2�����>�Ř=�h?4F?ec�>1��=J��=���i��>����6�6��'K���=%��ˁ?�#=k��x?�uL�ʓ1��a*:ޜA�9S��$)�;_y��L5;�����x��1
��m <mI<��; {!<k�F��*��(�=3-�<�=R����<����:==�T�-��<62_=� ���A��Q� �wV�<�¼\�E=E.��������;+��x�J��._;//I=l�E=��<�a�.s9< ȼ�#������v ��v�<����3͕��°<IL <�+=ߘ�<Q�-=�IؼD2��?�<��C<,X�:NE`;��<�(,�M˺��6�

2
testdir/checkpoint


model_checkpoint_path: "test-0.ckpt"
all_model_checkpoint_paths: "test-0.ckpt"

180
testdir/frozen_graph_def.pb


L
vector_observation Placeholder*
dtype0*
shape: ���������
?
is_continuous_controlConst*
value B:*
dtype0
?
trainer_major_versionConst*
value B:*
dtype0
?
trainer_minor_versionConst*
value B:*
dtype0
?
trainer_patch_versionConst*
value B:*
dtype0
8
version_numberConst*
value B:*
dtype0
5
memory_sizeConst*
value B:*
dtype0
=
action_output_shapeConst*
value B:*
dtype0
�
#policy/main_graph_0/hidden_0/kernelConst*�
value�B�"��?��E��$��s�>���RI�}\$=!`ջ�0M>C������5'=?X=��q>u}���������u�>I���(9���?<>L�m=��F>y�>�+>�`Ҿ���J9�>��� c�� l=>��=WF��#������>=)"�>��G����3|�>��(?/ݹ=��򽸟.�O��>H�>�dʾnŋ�=پM���F��>�a�=�O�>Ж?�����?�Z�� !u>� нz����`�:��>�4.�M*��Yy<>F�?�f��3"?3Z�<� ]�}c���r�5]h���k���>� ?�W�<�E�S ?g�(�t>
���i��*>.��U֤���=2}=�؉>[*P���:�S?;>�#�=p�2��ʂ��w��+�=���>���=��w�����]����C�=m��zL��JB:>2�����>�Ř=�h?4F?ec�>1��=J��=���i��>����6�6��'K���=%��ˁ?�#=k��x?�uL�ʓ1�*
dtype0
�
(policy/main_graph_0/hidden_0/kernel/readIdentity#policy/main_graph_0/hidden_0/kernel*
T0*6
_class,
*(loc:@policy/main_graph_0/hidden_0/kernel
�
!policy/main_graph_0/hidden_0/biasConst*U
valueLBJ"@*
dtype0
�
&policy/main_graph_0/hidden_0/bias/readIdentity!policy/main_graph_0/hidden_0/bias*
T0*4
_class*
(&loc:@policy/main_graph_0/hidden_0/bias
�
#policy/main_graph_0/hidden_0/MatMulMatMulvector_observation(policy/main_graph_0/hidden_0/kernel/read*
T0*
transpose_a(*
transpose_b(
�
$policy/main_graph_0/hidden_0/BiasAddBiasAdd#policy/main_graph_0/hidden_0/MatMul&policy/main_graph_0/hidden_0/bias/read*
T0*
data_formatNHWC
^
$policy/main_graph_0/hidden_0/SigmoidSigmoid$policy/main_graph_0/hidden_0/BiasAdd*
T0
|
policy/main_graph_0/hidden_0/MulMul$policy/main_graph_0/hidden_0/BiasAdd$policy/main_graph_0/hidden_0/Sigmoid*
T0
�
policy/mu/kernelConst*�
value�B�"��a*:ޜA�9S��$)�;_y��L5;�����x��1
��m <mI<��; {!<k�F��*��(�=3-�<�=R����<����:==�T�-��<62_=� ���A��Q� �wV�<�¼\�E=E.��*
dtype0
a
policy/mu/kernel/readIdentitypolicy/mu/kernel*
T0*#
_class
loc:@policy/mu/kernel
C
policy/mu/biasConst*
valueB"*
dtype0
[
policy/mu/bias/readIdentitypolicy/mu/bias*
T0*!
_class
loc:@policy/mu/bias
�
policy_1/mu/MatMulMatMul policy/main_graph_0/hidden_0/Mulpolicy/mu/kernel/read*
transpose_a(*
transpose_b(*
T0
g
policy_1/mu/BiasAddBiasAddpolicy_1/mu/MatMulpolicy/mu/bias/read*
data_formatNHWC*
T0
�
policy/log_std/kernelConst*�
value�B�"�������;+��x�J��._;//I=l�E=��<�a�.s9< ȼ�#������v ��v�<����3͕��°<IL <�+=ߘ�<Q�-=�IؼD2��?�<��C<,X�:NE`;��<�(,�M˺��6�*
dtype0
p
policy/log_std/kernel/readIdentitypolicy/log_std/kernel*
T0*(
_class
loc:@policy/log_std/kernel
H
policy/log_std/biasConst*
valueB"*
dtype0
j
policy/log_std/bias/readIdentitypolicy/log_std/bias*
T0*&
_class
loc:@policy/log_std/bias
�
policy_1/log_std/MatMulMatMul policy/main_graph_0/hidden_0/Mulpolicy/log_std/kernel/read*
transpose_a(*
transpose_b(*
T0
v
policy_1/log_std/BiasAddBiasAddpolicy_1/log_std/MatMulpolicy/log_std/bias/read*
T0*
data_formatNHWC
M
policy_1/clip_by_value/Minimum/yConst*
value B
*@*
dtype0
n
policy_1/clip_by_value/MinimumMinimumpolicy_1/log_std/BiasAdd policy_1/clip_by_value/Minimum/y*
T0
E
policy_1/clip_by_value/yConst*
value B
*��*
dtype0
d
policy_1/clip_by_valueMaximumpolicy_1/clip_by_value/Minimumpolicy_1/clip_by_value/y*
T0
4
policy_1/ExpExppolicy_1/clip_by_value*
T0
E
policy_1/ShapeShapepolicy_1/mu/BiasAdd*
T0*
out_type0
H
policy_1/random_normal/meanConst*
value B
**
dtype0
J
policy_1/random_normal/stddevConst*
value B
*�?*
dtype0
�
+policy_1/random_normal/RandomStandardNormalRandomStandardNormalpolicy_1/Shape*
dtype0*
seed2`*
seed*
T0
v
policy_1/random_normal/mulMul+policy_1/random_normal/RandomStandardNormalpolicy_1/random_normal/stddev*
T0
_
policy_1/random_normalAddpolicy_1/random_normal/mulpolicy_1/random_normal/mean*
T0
B
policy_1/mulMul policy_1/Exppolicy_1/random_normal*
T0
A
policy_1/addAddV2policy_1/mu/BiasAdd policy_1/mul*
T0
,
policy_1/TanhTanh policy_1/add*
T0
*
actionIdentity policy_1/Tanh*
T0

7
testdir/last_replay_buffer.hdf5
文件差异内容过多而无法显示
查看文件

1001
testdir/raw_graph_def.pb
文件差异内容过多而无法显示
查看文件

12
testdir/test-0.ckpt.data-00000-of-00001


�?
ף;fff?fff?fff?w�?w�?w�?"5 ?��O>�����ێ�Ȅ9>� �>�x��P&>�G?�Y��T�>�a=�ӻ>��>bw־@� ?C$A>M��>��?_���=��=���=��>v�.>D��<:�P��|�`k�W{ <j��=�=��Ж����=��(��Ǒ�F(���ľn�7�?�S�8?j�z>}��>)����=�����j��E��_��>�j�[ ?�>���>���d�v�W>�ʙ��Ϣ��$>�v=�8���+��M?h>�ؽ��<>�d�*�=��H����>J�����>9�+�2����'���y�>t����b�>�Z�>7��̇��6�����<qnI=�u*�߬>u�=+0��9��<�������>LK����)��>�B��E��#9>���<ތ�>$݉>��>n*�K��>Ʋ���w�>m�=�+�> �+���> �=�➽�^">KIԾB��*��>d$n��[V>����^��B�=��۾�o�>i�=�����أ�ʎ��� �=1P>v����>��>�3��Hޟ�)���Ț��냾��>���%�;7���oM>d� ���<Z���[��=���;�=���U5>�a�>~?m?� �9X?篺���}>eI>��~���|��q�<&��>�g�����>{��=��>��>U�?,��>N4�>7���*�i� �?U���� >q�>�sR���Y>I��@���@�k=n���T�=��?P��=�*G>h_ >�J��0�f���(���>L��>�uݾйW�5dξx�>{]�>�?�^$�r�:�2ɾ�A1�,�<M�-�=n?>t�>�t#>�!>�T�>�� ��(?.�:���>[�2�E�E>�h?q���}x���Y��S�>VL>+��=��D>g;�>LuY�����:�>*�q�����������=ٌ>����cTc�����ɽ>9��>��3�f6?e�Y��:��
d$��ٱ��Y_<\��>�cr��d3��8�=��3���>�o��� �>�!>f� >.L�<=����*��)G�����> ��w�����н�y�= ���v�r>a��>���x�޾SX�>{r�>�y�� ��
��x�>����?<��>;T ���*� ,ƾ!��=��,�;Δ�O��K�6=�J��xD�= g?#����ܾ��3>i}�>D�=1h�>���M��=7�J��ف>ց>$wܾ�=����p>A
�=R@2���|<)�=�?�R>$�'���X4?JH�� �>h�>A��bc����=���>
��[�>�1X>H��r�����> �/>- :>��=���>���<���>�d[�c4H<��ҽ���>�+ܾئ�="�r>�)(>�H>wM'��%���供yj�<��;h���[b>��j>�ý{� >9C��Fp���?k ��oe����ܾ�=?��e�T��>�\�����T��=9�> ]þ��Z>����Hս}��*�ƾV�U=��=�$�����"?�y
>è1���>G">��>��@����<�S>@*�=�ӫ= ?�A>��^>m��j�Z>l�n�-���QR����F>��>N�����;Ei<>>�Ļ���q��>�$*�G�V��.?0e>ă4�,��>����34M>IU:?񃺾ږQ���r�l�(�N��>�>q>����+��>'8�c������9?����f��3��=�����7w=�7���Z����"?b�$�C� ����>�&���^ֽe79�$W��9�>b�W���#���;@��='�A��(Q>�C����D>��3����.���J��g �>��Ż75�=Y���Q�'���*?�m���jM��G���S��c9�П?M��>���=�=|=TT.�`O��;u\< �R�䜾w< ��^���e���VI�A0�<襓>�է>������->� ��4��^�?�*g�\M�>�/�=c�ž)R�P�P=u����ݼ��}�’� A=�O������>�覾�]��������;+��x�J��._;//I=l�E=��<�a�.s9< ȼ�#������v ��v�<����3͕��°<IL <�+=ߘ�<Q�-=�IؼD2��?�<��C<,X�:NE`;��<�(,�M˺��6��?��E��$��s�>���RI�}\$=!`ջ�0M>C������5'=?X=��q>u}���������u�>I���(9���?<>L�m=��F>y�>�+>�`Ҿ���J9�>��� c�� l=>��=WF��#������>=)"�>��G����3|�>��(?/ݹ=��򽸟.�O��>H�>�dʾnŋ�=پM���F��>�a�=�O�>Ж?�����?�Z�� !u>� нz����`�:��>�4.�M*��Yy<>F�?�f��3"?3Z�<� ]�}c���r�5]h���k���>� ?�W�<�E�S ?g�(�t>
���i��*>.��U֤���=2}=�؉>[*P���:�S?;>�#�=p�2��ʂ��w��+�=���>���=��w�����]����C�=m��zL��JB:>2�����>�Ř=�h?4F?ec�>1��=J��=���i��>����6�6��'K���=%��ˁ?�#=k��x?�uL�ʓ1��a*:ޜA�9S��$)�;_y��L5;�����x��1
��m <mI<��; {!<k�F��*��(�=3-�<�=R����<����:==�T�-��<62_=� ���A��Q� �wV�<�¼\�E=E.��1���C4��߿ɾ��?�HL�;h:>� ��a!��#�-? ����R�>��<,����K�=��o>��D������ �>��>��>�̥>���>�{��sĽ��=?Y��=LW?�U���r�J|2�y�>/K�>pX+?�C
�nZ�>Ϳ$��֦���=~G�>˽�n�>��>��=����ӄ�> �p��?8*�f�!����=�@�<P��>� ��<����e>� ���?&==��=g�{�~�"��<X�8GN>��?$i5��9���qþos-��=<��M�"��`>�S
�9�;����=�u���� ��������>�1K��Oc�\��=�Z���`޽����64ƾ�}Q��,?�2��s+���ν�|����P½!f�=��z>2�_>`�C?�:ʽ�W�4R%�,T���+>n�����"<�%�=�����L?wr�>Zs>``X>��.>"鵾�^A?���5�?�*&��f�>�����z,>��t�����"����7���i;�����CӾAË=v��>�E'��"�T >`ⶼ�ľ��M�� �=����q�
�@�/<(��"�>�0����޾�'>��>

9
testdir/test-0.ckpt.index


 Variable(5���+ _1 (5��� action_output_shape (5��s beta1_power (5����  _1 (5����  2 (5���� 2_power (5����  _1 (5����  2 (5����&critic/q/q1_encoding/extrinsic_q1/bias $(5�{:&/sac_value_opt ((5�{:4_1 ,(5�{:"kernel 0(@5�� (/sac_value_opt p(@5i�Py6_1 �(@5i�Py-critic/q/q1_encoding/q1_encoder/hidden_0/bias �(@5i�Py-/sac_value_opt �(@5i�Py;_1 �(@5i�Py)kernel
 �(�5���//sac_value_opt
 �(�5'� �=_1
 � (�5'� �
2_encoding/extrinsic_q2/bias �(5�{:&/sac_value_opt �(5�{:4_1 �(5�{:"kernel �(@5OV��(/sac_value_opt �(@5i�Py6_1 �(@5i�Pyq2_encoder/hidden_0/bias �(@5i�Py-/sac_value_opt �(@5i�Py;_1 �(@5i�Py)kernel
 �(�5���q=critic/q/q2_encoding/q2_encoder/hidden_0/kernel/sac_value_opt
 �(�5'� �=_1
 �(�5'� �value/encoder/hidden_0/bias �$(@5i�Py"/sac_value_opt �$(@5i�Py0_1 �%(@5i�Pykernel �%(�5Y?Q5$/sac_value_opt �)(�5�L~2_1 �-(�5�L~xtrinsic_value/bias �1(5�{:!/sac_value_opt �2(5�{:/_1 �2(5�{:kernel �2(@5�CD/#/sac_value_opt �2(@5i�Py1_1 �3(@5i�Py global_step �3(5�{:is_continuous_control �3(5�e log_ent_coef �3(5J�� /sac_entropy_opt �3(5�{:_1 �3(5�{: memory_size �3(5�{:policy/log_std/bias �3(5)�/sac_policy_opt �3(5)�"_1 �3(5)�kernel �3(�5��Q/sac_policy_opt �4(�5&�8l$_1 �5(�5&�8lmain_graph_0/hidden_0/bias �6(@5i�Py!/sac_policy_opt �7(@5i�Py0_1 �7(@5i�Pykernel �8(�5�P��#/sac_policy_opt �<(�5�L~2_1 �@(�5�L~policy/mu/bias �D(5)�/sac_policy_opt �D(5)�_1 �D(5)�
kernel �D(�5{H>R/sac_policy_opt �E(�5&�8l_1 �F(�5&�8l1target_network/critic/value/encoder/hidden_0/bias �G(@5i�Py-kernel �H(�5���xtrinsic_value/bias �L(5�{:,kernel �L(@5����rainer_major_version �L(5�{: inor_version �L(5�Z�& patch_version �L(5�{:version_number �L(5��s���������w��)=���W���$uG�

1001
testdir/test-0.ckpt.meta
文件差异内容过多而无法显示
查看文件

4
testdir/test-0.nn


vector_observation����actionaction_output_shape������? action_output_shape memory_sizeversion_numbertrainer_patch_versiontrainer_minor_versiontrainer_major_versionis_continuous_controlpolicy_1/random_normal/stddevpolicy_1/clip_by_value/ypolicy_1/random_normal/mean  policy_1/clip_by_value/Minimum/y
$policy/main_graph_0/hidden_0/BiasAdd�����?vector_observation#policy/main_graph_0/hidden_0/kernel �!policy/main_graph_0/hidden_0/bias� policy/main_graph_0/hidden_0/Mul2 �����?$policy/main_graph_0/hidden_0/BiasAddpolicy_1/mu/BiasAdd�����? policy/main_graph_0/hidden_0/Mulpolicy/mu/kernel� policy/mu/bias�policy_1/log_std/BiasAdd�����? policy/main_graph_0/hidden_0/Mulpolicy/log_std/kernel� policy/log_std/bias�policy_1/clip_by_value/Minimumn�����?policy_1/log_std/BiasAdd policy_1/clip_by_value/Minimum/ypolicy_1/clip_by_valueo�����?policy_1/clip_by_value/Minimumpolicy_1/clip_by_value/y policy_1/Exp2q�����?policy_1/clip_by_value+policy_1/random_normal/RandomStandardNormal@�����?policy_1/mu/BiasAddpolicy_1/random_normal/mulf�����?+policy_1/random_normal/RandomStandardNormalpolicy_1/random_normal/stddevpolicy_1/random_normald�����?policy_1/random_normal/mulpolicy_1/random_normal/mean policy_1/mulf�����? policy_1/Exppolicy_1/random_normal policy_1/addd�����?policy_1/mu/BiasAdd policy_1/mul policy_1/Tanh2�����? policy_1/addaction2�����? policy_1/Tanh@@�A�?�?��@�?��E��$��s�>���RI�}\$=!`ջ�0M>C������5'=?X=��q>u}���������u�>I���(9���?<>L�m=��F>y�>�+>�`Ҿ���J9�>��� c�� l=>��=WF��#������>=)"�>��G����3|�>��(?/ݹ=��򽸟.�O��>H�>�dʾnŋ�=پM���F��>�a�=�O�>Ж?�����?�Z�� !u>� нz����`�:��>�4.�M*��Yy<>F�?�f��3"?3Z�<� ]�}c���r�5]h���k���>� ?�W�<�E�S ?g�(�t>
���i��*>.��U֤���=2}=�؉>[*P���:�S?;>�#�=p�2��ʂ��w��+�=���>���=��w�����]����C�=m��zL��JB:>2�����>�Ř=�h?4F?ec�>1��=J��=���i��>����6�6��'K���=%��ˁ?�#=k��x?�uL�ʓ1��a*:ޜA�9S��$)�;_y��L5;�����x��1
��m <mI<��; {!<k�F��*��(�=3-�<�=R����<����:==�T�-��<62_=� ���A��Q� �wV�<�¼\�E=E.��������;+��x�J��._;//I=l�E=��<�a�.s9< ȼ�#������v ��v�<����3͕��°<IL <�+=ߘ�<Q�-=�IؼD2��?�<��C<,X�:NE`;��<�(,�M˺��6�
正在加载...
取消
保存