浏览代码

no normalization

/develop/model-transfer/train
yanchaosun 4 年前
当前提交
1fdbfe65
共有 5 个文件被更改,包括 38 次插入32 次删除
  1. 6
      config/ppo_transfer/CrawlerStaticOpbuffer.yaml
  2. 4
      config/ppo_transfer/WalkerStaticSingle.yaml
  3. 40
      ml-agents/mlagents/trainers/policy/transfer_policy.py
  4. 2
      ml-agents/mlagents/trainers/ppo_transfer/optimizer.py
  5. 18
      ml-agents/mlagents/trainers/tests/test_simple_transfer.py

6
config/ppo_transfer/CrawlerStaticOpbuffer.yaml


hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
learning_rate: 0.0002
beta: 0.005
epsilon: 0.2
lambd: 0.95

feature_size: 32
reuse_encoder: true
in_epoch_alter: true
use_op_buffer: true
separate_value_net: true
use_var_predict: true
with_prior: true
network_settings:
normalize: true
hidden_units: 512

4
config/ppo_transfer/WalkerStaticSingle.yaml


hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0002
learning_rate: 0.0005
beta: 0.005
epsilon: 0.2
lambd: 0.95

forward_layers: 2
inverse_layers: 1
separate_value_net: true
feature_size: 64
feature_size: 128
reuse_encoder: true
in_epoch_alter: true
use_op_buffer: true

40
ml-agents/mlagents/trainers/policy/transfer_policy.py


self.brain.camera_resolutions
)
self.vector_next = ModelUtils.create_vector_input(self.vec_obs_size)
if self.normalize:
self.processed_vector_next = ModelUtils.normalize_vector_obs(
self.vector_next,
self.running_mean,
self.running_variance,
self.normalization_steps,
)
else:
self.processed_vector_next = self.vector_next
# if self.normalize:
# self.processed_vector_next = ModelUtils.normalize_vector_obs(
# self.vector_next,
# self.running_mean,
# self.running_variance,
# self.normalization_steps,
# )
# else:
# self.processed_vector_next = self.vector_next
self.processed_vector_next,
self.vector_next,
1,
h_size,
num_layers,

self.brain.camera_resolutions
)
self.vector_next = ModelUtils.create_vector_input(self.vec_obs_size)
if self.normalize:
self.processed_vector_next = ModelUtils.normalize_vector_obs(
self.vector_next,
self.running_mean,
self.running_variance,
self.normalization_steps,
)
else:
self.processed_vector_next = self.vector_next
# if self.normalize:
# self.processed_vector_next = ModelUtils.normalize_vector_obs(
# self.vector_next,
# self.running_mean,
# self.running_variance,
# self.normalization_steps,
# )
# else:
# self.processed_vector_next = self.vector_next
self.processed_vector_next,
self.vector_next,
1,
h_size,
num_layers,

2
ml-agents/mlagents/trainers/ppo_transfer/optimizer.py


self.policy.mask_input: mini_batch["masks"] * burn_in_mask,
self.advantage: mini_batch["advantages"],
self.all_old_log_probs: mini_batch["action_probs"],
self.policy.processed_vector_next: mini_batch["next_vector_in"],
self.policy.vector_next: mini_batch["next_vector_in"],
# self.policy.next_vector_in: mini_batch["next_vector_in"],
self.policy.current_action: mini_batch["actions"],
self.policy.current_reward: mini_batch["extrinsic_rewards"],

18
ml-agents/mlagents/trainers/tests/test_simple_transfer.py


learning_rate_schedule=ScheduleType.CONSTANT,
batch_size=16,
buffer_size=64,
feature_size=2,
feature_size=4,
reuse_encoder=True,
in_epoch_alter=True,
# in_batch_alter=True,

# assert all(reward > success_threshold for reward in processed_rewards)
def test_2d_model(config=Transfer_CONFIG, obs_spec_type="normal", run_id="model_normal", seed=0):
def test_2d_model(config=Transfer_CONFIG, obs_spec_type="normal", run_id="model_normal_f4_varp-pri-test", seed=0):
config.hyperparameters, batch_size=120, buffer_size=12000, learning_rate=5.0e-3
config.hyperparameters, batch_size=120, buffer_size=12000, learning_rate=5.0e-3,
use_var_predict=True, with_prior=True
def test_2d_transfer(config=Transfer_CONFIG, obs_spec_type="rich2", run_id="transfer_rich2_from-rich1", seed=1337):
def test_2d_transfer(config=Transfer_CONFIG, obs_spec_type="rich1", run_id="transfer_rich1_from-normal_varp-pri_retrain-all_5e-2", seed=1337):
env = SimpleTransferEnvironment(
[BRAIN_NAME], use_discrete=False, action_size=2, step_size=0.1,
num_vector=2, obs_spec_type=obs_spec_type, goal_type="hard"

transfer_path="./transfer_results/model_rich1_s0/Simple",
use_op_buffer=True, in_epoch_alter=True, in_batch_alter=False, learning_rate=5e-4,
train_policy=False, train_value=False, train_model=False, feature_size=2
transfer_path="./transfer_results/model_normal_f4_varp-pri_s0/Simple",
use_op_buffer=True, in_epoch_alter=True, in_batch_alter=False, learning_rate=5.0e-2,
train_policy=True, train_value=True, train_model=True, feature_size=4, learning_rate_schedule=ScheduleType.LINEAR,
use_var_predict=True, with_prior=True
)
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=200000, summary_freq=5000)
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed)

# test_2d_model(seed=0)
# test_2d_model(config=PPO_CONFIG, run_id="ppo_normal", seed=0)
test_2d_transfer(seed=0)
test_2d_transfer(seed=123)
# for i in range(5):
# test_2d_model(seed=i)
正在加载...
取消
保存