浏览代码

replace use_discrete with action_sizes in simple_rl

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
88b8f4b4
共有 4 个文件被更改,包括 123 次插入123 次删除
  1. 5
      ml-agents/mlagents/trainers/demo_loader.py
  2. 30
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  3. 110
      ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
  4. 101
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py

5
ml-agents/mlagents/trainers/demo_loader.py


for i, obs in enumerate(split_obs.visual_observations):
demo_raw_buffer["visual_obs%d" % i].append(obs)
demo_raw_buffer["vector_obs"].append(split_obs.vector_observations)
if behavior_spec.action_spec.is_continuous():
# TODO: update to read from the new proto format
if behavior_spec.action_spec.continuous_size > 0:
else:
if behavior_spec.action_spec.discrete_size > 0:
demo_raw_buffer["discrete_action"].append(
current_pair_info.action_info.vector_actions
)

30
ml-agents/mlagents/trainers/tests/simple_test_envs.py


OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)
STEP_SIZE = 0.1
STEP_SIZE = 0.2
TIME_PENALTY = 0.01
MIN_STEPS = int(1.0 / STEP_SIZE) + 1

num_vector=1,
vis_obs_size=VIS_OBS_SIZE,
vec_obs_size=OBS_SIZE,
continuous_action_size=0,
discrete_action_size=0,
action_sizes=(1, 0),
):
super().__init__()
self.num_visual = num_visual

continuous_action_size, discrete_action_size = action_sizes
discrete_tuple = tuple(2 for _ in range(discrete_action_size))
if continuous_action_size > 0:
if discrete_action_size > 0:

class MemoryEnvironment(SimpleEnvironment):
def __init__(
self,
brain_names,
continuous_action_size=1,
discrete_action_size=1,
step_size=0.2,
):
super().__init__(
brain_names,
continuous_action_size=continuous_action_size,
discrete_action_size=discrete_action_size,
step_size=step_size,
)
def __init__(self, brain_names, action_sizes=(1, 0), step_size=0.2):
super().__init__(brain_names, action_sizes=action_sizes, step_size=step_size)
# Number of steps to reveal the goal for. Lower is harder. Should be
# less than 1/step_size to force agent to use memory
self.num_show_steps = 2

def __init__(
self,
brain_names,
use_discrete,
action_sizes=(1, 0),
use_discrete,
action_sizes=action_sizes,
)
self.demonstration_protos: Dict[str, List[AgentInfoActionPairProto]] = {}
self.n_demos = n_demos

def step(self) -> None:
super().step()
for name in self.names:
if self.discrete:
if self.action_spec.discrete_size > 0:
action = self.action[name].discrete
else:
action = self.action[name].continuous

self.reset()
for _ in range(self.n_demos):
for name in self.names:
if self.discrete:
if self.action_spec.discrete_size > 0:
self.action[name] = ActionTuple(
np.array([], dtype=np.float32),
np.array(

110
ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py


assert all(reward > success_threshold for reward in processed_rewards)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ppo(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_ppo(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
new_hyperparams = attr.evolve(
PPO_TF_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)

_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_ppo(num_visual, use_discrete):
def test_visual_ppo(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=(0, 1),
num_visual=num_visual,
num_vector=0,
step_size=0.5,

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_ppo(action_sizes):
env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
new_network_settings = attr.evolve(
PPO_TF_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_sac(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_2d_sac(action_sizes):
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
[BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters, buffer_init_steps=2000)
config = attr.evolve(

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_sac(num_visual, use_discrete):
def test_visual_sac(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_sac(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=(0, 1),
num_visual=num_visual,
num_vector=0,
step_size=0.5,

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.2 if use_discrete else 0.5
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_sac(action_sizes):
step_size = 0.2 if action_sizes else 0.5
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
[BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_TF_CONFIG.network_settings,

_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000

_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.

)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0,

_check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the team that us not learning when both have reached
# max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
def record_demo(action_sizes, num_visual=0, num_vector=1):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,

env.solve()
continuous_size, discrete_size = action_sizes
use_discrete = True if discrete_size > 0 else False
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(

return record_demo
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
def test_gail(simple_record, action_sizes, trainer_config):
demo_path = simple_record(action_sizes)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_ppo(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_sac(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)

101
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
def test_recurrent_ppo():
env = MemoryEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1
)
new_network_settings = attr.evolve(
PPO_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters,
learning_rate=1.0e-3,
batch_size=64,
buffer_size=128,
)
config = attr.evolve(
PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=5000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1, step_size=0.8
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=32, buffer_size=1280
)
config = attr.evolve(
PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
)
config = attr.evolve(PPO_TORCH_CONFIG)
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=0, step_size=0.8
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=0
)
config = attr.evolve(PPO_TORCH_CONFIG)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)

env = SimpleEnvironment(
[BRAIN_NAME], continuous_action_size=0, discrete_action_size=1, step_size=0.8
[BRAIN_NAME], continuous_action_size=0, discrete_action_size=1
def test_3cdhybrid_ppo():
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual):
[BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8
[BRAIN_NAME],
num_visual=num_visual,
num_vector=0,
continuous_action_size=1,
discrete_action_size=1,
PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
config = attr.evolve(
PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config})
def test_3ddhybrid_ppo():
env = SimpleEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8
def test_recurrent_ppo():
env = MemoryEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1
)
new_network_settings = attr.evolve(
PPO_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),
PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
PPO_TORCH_CONFIG.hyperparameters,
learning_rate=1.0e-3,
batch_size=64,
buffer_size=128,
PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=100000,
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
# def test_3cdhybrid_ppo():
# env = SimpleEnvironment(
# [BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#
#
# def test_3ddhybrid_ppo():
# env = SimpleEnvironment(
# [BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
正在加载...
取消
保存