浏览代码

Revert "action enc"

This reverts commit 9acb5f846a77a47d14694cb1271e2a370f395b6a.
/develop/bisim-review
Andrew Cohen 4 年前
当前提交
12f3786c
共有 4 个文件被更改,包括 25 次插入24 次删除
  1. 4
      config/ppo_transfer/3DBall.yaml
  2. 4
      config/ppo_transfer/3DBallHard.yaml
  3. 6
      config/ppo_transfer/3DBallHardTransfer.yaml
  4. 35
      ml-agents/mlagents/trainers/policy/transfer_policy.py

4
config/ppo_transfer/3DBall.yaml


encoder_layers: 1
action_layers: 1
policy_layers: 1
forward_layers: 0
forward_layers: 1
action_feature_size: 16
action_feature_size: 32
reuse_encoder: true
in_epoch_alter: false
use_op_buffer: false

4
config/ppo_transfer/3DBallHard.yaml


encoder_layers: 1
action_layers: 1
policy_layers: 1
forward_layers: 0
forward_layers: 1
action_feature_size: 16
action_feature_size: 32
reuse_encoder: true
in_epoch_alter: false
use_op_buffer: false

6
config/ppo_transfer/3DBallHardTransfer.yaml


encoder_layers: 1
action_layers: 1
policy_layers: 1
forward_layers: 0
forward_layers: 1
action_feature_size: 16
action_feature_size: 32
reuse_encoder: true
in_epoch_alter: false
use_op_buffer: false

train_model: false
load_model: true
train_action: false
load_action: true
load_action: false
train_policy: true
load_policy: false
train_value: true

35
ml-agents/mlagents/trainers/policy/transfer_policy.py


reuse_encoder,
)
self.action_encoder = self._create_action_encoder(
self.current_action,
self.h_size,
self.action_feature_size,
action_layers,
)
self.action_encoder = self.current_action # self._create_action_encoder(
# self.current_action,
# self.h_size,
# self.action_feature_size,
# action_layers,
# )
if not reuse_encoder:
self.targ_encoder = tf.stop_gradient(self.targ_encoder)

encoding_checkpoint = os.path.join(self.model_path, f"encoding.ckpt")
encoding_saver.save(self.sess, encoding_checkpoint)
action_vars = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, "action_enc"
)
action_saver = tf.train.Saver(action_vars)
action_checkpoint = os.path.join(self.model_path, f"action_enc.ckpt")
action_saver.save(self.sess, action_checkpoint)
# action_vars = tf.get_collection(
# tf.GraphKeys.TRAINABLE_VARIABLES, "action_enc"
# )
# action_saver = tf.train.Saver(action_vars)
# action_checkpoint = os.path.join(self.model_path, f"action_enc.ckpt")
# action_saver.save(self.sess, action_checkpoint)
latent_vars = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, "encoding/latent"

:param encoded_state: Tensor corresponding to encoded current state.
:param encoded_next_state: Tensor corresponding to encoded next state.
"""
if not self.transfer:
encoded_state = tf.stop_gradient(encoded_state)
if not self.transfer:
hidden = tf.stop_gradient(hidden)
for i in range(forward_layers):
hidden = tf.layers.dense(
hidden,

forward_layers: int,
separate_train: bool = False,
):
if not self.transfer:
encoded_state = tf.stop_gradient(encoded_state)
if not self.transfer:
hidden = tf.stop_gradient(hidden)
for i in range(forward_layers):
hidden = tf.layers.dense(

正在加载...
取消
保存