浏览代码

transfer path

/develop/bisim-review
yanchaosun 4 年前
当前提交
ad95032b
共有 4 个文件被更改,包括 45 次插入19 次删除
  1. 3
      config/ppo_transfer/CrawlerStatic.yaml
  2. 7
      ml-agents/mlagents/trainers/learn.py
  3. 51
      ml-agents/mlagents/trainers/policy/transfer_policy.py
  4. 3
      ml-agents/mlagents/trainers/ppo_transfer/optimizer.py

3
config/ppo_transfer/CrawlerStatic.yaml


encoder_layers: 2
policy_layers: 1
value_layers: 1
feature_size: 256
feature_size: 128
reuse_encoder: true
network_settings:
normalize: true
hidden_units: 512

7
ml-agents/mlagents/trainers/learn.py


except Exception:
print("\n\n\tUnity Technologies\n")
print(get_version_string())
print(options)
print("transfer:", options.behaviors["3DBall"].transfer)
if options.debug:
log_level = logging_util.DEBUG

if options.env_settings.seed == -1:
run_seed = np.random.randint(0, 10000)
run_training(run_seed, options)
os.system('mlagents-learn config/ppo_transfer/3DBallHard.yaml --run-id=hardball-transfer --env=/unity-volume/3dballhard --num-envs=4 --force')
os.system('export SCENE_NAME=3dballhard')
os.system('mlagents-learn config/ppo_transfer/3DBallHard.yaml --run-id=hardball-transfer --env=/unity-volume/executable --num-envs=4 --force')
# os.system('mlagents-learn config/ppo_transfer/3DBallHard.yaml --run-id=hardball-transfer --env=/unity-volume/3dballhard --num-envs=4 --force')
def main():

51
ml-agents/mlagents/trainers/policy/transfer_policy.py


def create_tf_graph(self,
encoder_layers = 1,
policy_layers = 1,
forward_layers = 1,
inverse_layers = 1,
feature_size = 16,
transfer=False,
separate_train=False,

if not reuse_encoder:
self.targ_encoder = tf.stop_gradient(self.targ_encoder)
self._create_hard_copy()
if self.inverse_model:
with tf.variable_scope("inverse"):
self.create_inverse_model(self.encoder, self.targ_encoder, inverse_layers)
with tf.variable_scope("inverse"):
self.create_inverse_model(self.encoder, self.targ_encoder)
self.create_forward_model(self.encoder, self.targ_encoder)
self.create_forward_model(self.encoder, self.targ_encoder, forward_layers)
# if var_predict:
# self.predict_distribution, self.predict = self._create_var_world_model(

return encoded_state, encoded_next_state
def create_inverse_model(
self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor
self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor, inverse_layers: int
) -> None:
"""
Creates inverse model TensorFlow ops for Curiosity module.

"""
combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
# hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
hidden = combined_input
for i in range(inverse_layers-1):
hidden = tf.layers.dense(
hidden,
self.h_size,
activation=ModelUtils.swish,
name="hidden_{}".format(i),
kernel_initializer=tf.initializers.variance_scaling(1.0),
)
combined_input, self.act_size[0], activation=None
hidden, self.act_size[0], activation=None, name="pred_action"
)
squared_difference = tf.reduce_sum(
tf.squared_difference(pred_action, self.current_action), axis=1

pred_action = tf.concat(
[
tf.layers.dense(
combined_input, self.act_size[i], activation=tf.nn.softmax
hidden, self.act_size[i], activation=tf.nn.softmax, name="pred_action"
)
for i in range(len(self.act_size))
],

)
def create_forward_model(
self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor
self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor, forward_layers: int
) -> None:
"""
Creates forward model TensorFlow ops for Curiosity module.

combined_input = tf.concat(
[encoded_state, self.current_action], axis=1
)
hidden = combined_input
for i in range(forward_layers):
hidden = tf.layers.dense(
hidden,
self.h_size
* (self.vis_obs_size + int(self.vec_obs_size > 0)),
activation=None,
name="hidden_{}".format(i)
)
predict = tf.layers.dense(
combined_input,
self.h_size
* (self.vis_obs_size + int(self.vec_obs_size > 0)),
activation=None,
)
# predict = tf.layers.dense(
# combined_input,
# self.h_size
# * (self.vis_obs_size + int(self.vec_obs_size > 0)),
# activation=None,
# )
predict,
hidden,
self.feature_size,
name="latent"
)

3
ml-agents/mlagents/trainers/ppo_transfer/optimizer.py


self.model_update_dict: Dict[str, tf.Tensor] = {}
# Create the graph here to give more granular control of the TF graph to the Optimizer.
policy.create_tf_graph(hyperparameters.encoder_layers, hyperparameters.policy_layers, hyperparameters.feature_size,
policy.create_tf_graph(hyperparameters.encoder_layers, hyperparameters.policy_layers,
hyperparameters.forward_layers, hyperparameters.inverse_layers, hyperparameters.feature_size,
self.use_transfer, self.separate_policy_train, self.use_var_encoder, self.use_var_predict,
self.predict_return, self.use_inverse_model, self.reuse_encoder)

正在加载...
取消
保存