浏览代码

add two envs

/develop/bisim-review
yanchaosun 4 年前
当前提交
caeffa3e
共有 3 个文件被更改,包括 76 次插入30 次删除
  1. 5
      ml-agents/mlagents/trainers/policy/transfer_policy.py
  2. 79
      ml-agents/mlagents/trainers/tests/test_simple_transfer.py
  3. 22
      ml-agents/mlagents/trainers/tests/transfer_test_envs.py

5
ml-agents/mlagents/trainers/policy/transfer_policy.py


squared_difference = 0.5 * tf.reduce_sum(
tf.squared_difference(self.predict, tf.stop_gradient(encoded_next_state)),
# tf.squared_difference(self.predict, encoded_next_state),
axis=1,
)

hidden,
self.h_size * (self.vis_obs_size + int(self.vec_obs_size > 0)),
name="hidden_{}".format(i),
reuse=True
activation=ModelUtils.swish,
reuse=True,
activation=ModelUtils.swish,
# kernel_initializer=tf.initializers.variance_scaling(1.0),
)
self.bisim_pred_reward = tf.layers.dense(

79
ml-agents/mlagents/trainers/tests/test_simple_transfer.py


# assert all(not math.isnan(reward) for reward in processed_rewards)
# assert all(reward > success_threshold for reward in processed_rewards)
def test_2d_ppo(
config=PPO_CONFIG, obs_spec_type="rich1", run_id="ppo_rich1", seed=0
):
env = SimpleTransferEnvironment(
[BRAIN_NAME],
use_discrete=False,
action_size=2,
step_size=0.1,
num_vector=2,
obs_spec_type=obs_spec_type,
goal_type="hard",
)
new_hyperparams = attr.evolve(
config.hyperparameters,
batch_size=1200,
buffer_size=12000,
learning_rate=5.0e-3,
)
config = attr.evolve(
config, hyperparameters=new_hyperparams, max_steps=350000, summary_freq=5000
)
_check_environment_trains(
env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed
)
def test_2d_model(
config=Transfer_CONFIG, obs_spec_type="rich1", run_id="model_rich1", seed=0

forward_layers=1,
encoder_layers=2,
feature_size=32,
# use_inverse_model=True
)
config = attr.evolve(
config, hyperparameters=new_hyperparams, max_steps=350000, summary_freq=5000

learning_rate=5.0e-3,
train_policy=True,
train_value=True,
train_model=False,
train_model=True, # YS: I tried retraining model
separate_value_train=True,
separate_policy_train=False,
feature_size=32,

value_layers=1,
encoder_layers=2,
use_bisim=False,
reuse_encoder=True, # YS: I added this
)
config = attr.evolve(
config, hyperparameters=new_hyperparams, max_steps=350000, summary_freq=5000

if __name__ == "__main__":
for seed in range(5):
if seed > -1:
for obs in ["normal", "rich1", "rich2"]:
test_2d_model(seed=seed, obs_spec_type=obs, run_id="model_" + obs)
# for seed in range(5):
# if seed > -1:
# for obs in ["normal", "rich1", "rich2"]:
# test_2d_model(seed=seed, obs_spec_type=obs, run_id="model_" + obs)
# test_2d_model(config=SAC_CONFIG, run_id="sac_rich2_hard", seed=0)
for obs in ["normal", "rich2"]:
test_2d_transfer(
seed=seed,
obs_spec_type="rich1",
transfer_from="./transfer_results/model_" + obs + "_s" + str(seed) + "/Simple",
run_id=obs + "transfer_to_rich1",
)
# # test_2d_model(config=SAC_CONFIG, run_id="sac_rich2_hard", seed=0)
# for obs in ["normal", "rich2"]:
# test_2d_transfer(
# seed=seed,
# obs_spec_type="rich1",
# transfer_from="./transfer_results/model_" + obs + "_s" + str(seed) + "/Simple",
# run_id=obs + "transfer_to_rich1",
# )
for obs in ["normal", "rich1"]:
test_2d_transfer(
seed=seed,
obs_spec_type="rich2",
transfer_from="./transfer_results/model_" + obs + "_s" + str(seed) + "/Simple",
run_id=obs + "transfer_to_rich2",
)
# for obs in ["normal", "rich1"]:
# test_2d_transfer(
# seed=seed,
# obs_spec_type="rich2",
# transfer_from="./transfer_results/model_" + obs + "_s" + str(seed) + "/Simple",
# run_id=obs + "transfer_to_rich2",
# )
# for obs in ["normal"]:
# test_2d_transfer(seed=0, obs_spec_type="rich1",
# transfer_from="./transfer_results/model_"+ obs +"_f4_pv-l0_rew_bisim-nop_newalter_noreuse-soft0.1_s0/Simple",
# run_id="transfer_rich1_retrain-all_f4_pv-l0_rew_bisim-nop_noreuse-soft0.1_from_" + obs)
# for i in range(5):
# test_2d_model(seed=i)
for obs in ["longpre"]:
test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs)
test_2d_ppo(seed=0, obs_spec_type=obs, run_id="ppo_" + obs)
# test_2d_transfer(seed=0, obs_spec_type="longpre",
# transfer_from="./transfer_results/model_normal_s0/Simple",
# run_id="normal_transfer_to_longpre_reuse_trainmod")

22
ml-agents/mlagents/trainers/tests/transfer_test_envs.py


TIME_PENALTY = 0.01
MIN_STEPS = int(1.0 / STEP_SIZE) + 1
SUCCESS_REWARD = 1.0 + MIN_STEPS * TIME_PENALTY
EXTRA_OBS_SIZE = 10
def clamp(x, min_val, max_val):

vis_obs_size=VIS_OBS_SIZE,
vec_obs_size=OBS_SIZE,
action_size=1,
obs_spec_type="normal", # normal: (x,y); rich: (x+y, x-y, x*y)
obs_spec_type="normal", # normal: (x,y); rich: (x+y, x-y, x*y); long: (x,y,1,...,1)
extra_obs_size=EXTRA_OBS_SIZE,
):
super().__init__()
self.discrete = use_discrete

self.vec_obs_size = vec_obs_size
self.obs_spec_type = obs_spec_type
self.extra_obs_size = extra_obs_size
self.goal_type = goal_type
action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
self.behavior_spec = BehaviorSpec(

self.step_result: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
self.agent_id: Dict[str, int] = {}
self.step_size = step_size # defines the difficulty of the test
for name in self.names:
self.agent_id[name] = 0

if "rich" in self.obs_spec_type:
for _ in range(self.num_vector + 1):
obs_spec.append((self.vec_obs_size,))
if "long" in self.obs_spec_type:
for _ in range(self.num_vector + self.extra_obs_size):
obs_spec.append((self.vec_obs_size,))
print("obs_spec:", obs_spec)
return obs_spec

obs.append(
np.ones((1, self.vec_obs_size), dtype=np.float32) * (2 * i - j)
)
elif self.obs_spec_type == "long":
for name in self.names:
for i in self.positions[name]:
obs.append(np.ones((1, self.vec_obs_size), dtype=np.float32) * i)
for _ in range(self.extra_obs_size):
obs.append(np.ones((1, self.vec_obs_size), dtype=np.float32))
elif self.obs_spec_type == "longpre":
for name in self.names:
for _ in range(self.extra_obs_size):
obs.append(np.ones((1, self.vec_obs_size), dtype=np.float32))
for i in self.positions[name]:
obs.append(np.ones((1, self.vec_obs_size), dtype=np.float32) * i)
return obs
@property

正在加载...
取消
保存