浏览代码

action buffer passes continuous

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
bd917c9c
共有 11 个文件被更改,包括 564 次插入449 次删除
  1. 66
      ml-agents-envs/mlagents_envs/base_env.py
  2. 11
      ml-agents/mlagents/trainers/agent_processor.py
  3. 16
      ml-agents/mlagents/trainers/policy/policy.py
  4. 20
      ml-agents/mlagents/trainers/policy/torch_policy.py
  5. 8
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  6. 5
      ml-agents/mlagents/trainers/simple_env_manager.py
  7. 5
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  8. 702
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  9. 10
      ml-agents/mlagents/trainers/torch/networks.py
  10. 147
      ml-agents/mlagents/trainers/torch/utils.py
  11. 23
      ml-agents/mlagents/trainers/trajectory.py

66
ml-agents-envs/mlagents_envs/base_env.py


continuous: np.ndarray
discrete: np.ndarray
@staticmethod
def from_numpy_dict(action_dict: Dict[str, np.ndarray]) -> "ActionBuffers":
continuous: np.ndarray = []
discrete: np.ndarray = []
if "continuous_action" in action_dict:
continuous = action_dict["continuous_action"]
if "discrete_action" in action_dict:
discrete = action_dict["discrete_action"]
return ActionBuffers(continuous, discrete)
class ActionSpec(NamedTuple):
"""

"""
return len(self.discrete_branches)
def empty_action(self, n_agents: int) -> ActionBuffers:
def empty_action(self, n_agents: int) -> Dict[str, np.ndarray]:
return ActionBuffers(
np.zeros((n_agents, self.continuous_size), dtype=np.float32),
np.zeros((n_agents, self.discrete_size), dtype=np.int32),
)
action_dict: Dict[str, np.ndarray] = {}
if self.continuous_size > 0:
action_dict["continuous_action"] = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
if self.discrete_size > 0:
action_dict["discrete_action"] = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
return action_dict
# return ActionBuffers(
# np.zeros((n_agents, self.continuous_size), dtype=np.float32),
# np.zeros((n_agents, self.discrete_size), dtype=np.int32),
# )
def random_action(self, n_agents: int) -> ActionBuffers:
def random_action(self, n_agents: int) -> Dict[str, np.ndarray]:
continuous_action = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
).astype(np.float32)
action_dict: Dict[str, np.ndarray] = {}
if self.continuous_size > 0:
continuous_action = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
).astype(np.float32)
action_dict["continuous_action"] = continuous_action
discrete_action = np.column_stack(
[
np.random.randint(
0,
self.discrete_branches[i], # type: ignore
size=(n_agents),
dtype=np.int32,
)
for i in range(self.discrete_size)
]
)
return ActionBuffers(continuous_action, discrete_action)
if self.discrete_size > 0:
discrete_action = np.column_stack(
[
np.random.randint(
0,
self.discrete_branches[i], # type: ignore
size=(n_agents),
dtype=np.int32,
)
for i in range(self.discrete_size)
]
)
action_dict["discrete_action"] = discrete_action
return action_dict
#return ActionBuffers(continuous_action, discrete_action)
def _validate_action(
self, actions: ActionBuffers, n_agents: int, name: str

11
ml-agents/mlagents/trainers/agent_processor.py


done = terminated # Since this is an ongoing step
interrupted = step.interrupted if terminated else False
# Add the outputs of the last eval
action = stored_take_action_outputs["action"][idx]
action_dict = stored_take_action_outputs["action"]
action: Dict[str, np.ndarray] = {}
for act_type, act_array in action_dict.items():
action[act_type] = act_array[idx]
action_probs = stored_take_action_outputs["log_probs"][idx]
action_probs_dict = stored_take_action_outputs["log_probs"]
action_probs: Dict[str, np.ndarray] = {}
for prob_type, prob_array in action_probs_dict.items():
action_probs[prob_type] = prob_array[idx]
action_mask = stored_decision_step.action_mask
prev_action = self.policy.retrieve_previous_action([global_id])#[0, :]
experience = AgentExperience(

16
ml-agents/mlagents/trainers/policy/policy.py


return self.behavior_spec.action_spec.empty_action(num_agents)
def save_previous_action(
self, agent_ids: List[str], action_buffers: Optional[ActionBuffers]
self, agent_ids: List[str], action_dict: Dict[str, np.ndarray]
if action_buffers is None:
if action_dict is None:
self.previous_action_dict[agent_id] = action_buffers
self.previous_action_dict[agent_id] = action_dict
def retrieve_previous_action(self, agent_ids: List[str]) -> ActionBuffers:
action_buffers = self.behavior_spec.action_spec.create_empty(len(agent_ids))
def retrieve_previous_action(self, agent_ids: List[str]) -> Dict[str, np.ndarray]:
action_dict = self.behavior_spec.action_spec.empty_action(len(agent_ids))
for action, previous_action in zip(action_buffers, self.previous_action_dict[agent_id]):
action[index, :] = previous_action
return action_buffers
for act_type in action_dict:
action_dict[act_type][index, :] = self.previous_action_dict[agent_id][act_type]
return action_dict
def remove_previous_action(self, agent_ids):
for agent_id in agent_ids:

20
ml-agents/mlagents/trainers/policy/torch_policy.py


SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
EPSILON = 1e-7 # Small value to avoid divide by zero

vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
log_probs_list, entropies, all_logs = ModelUtils.get_probs_and_entropy(
log_probs = ActionLogProbs.create_action_log_probs(log_probs_list, self.behavior_spec.action_spec)
# actions = torch.stack(action_list, dim=-1)
# if self.use_continuous_act:
# actions = actions[:, :, 0]

self,
vec_obs: torch.Tensor,
vis_obs: torch.Tensor,
actions: List[torch.Tensor],
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,

)
#action_list = [actions[..., i] for i in range(actions.shape[-1])]
#log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(actions, dists)
log_probs_list, entropies, _ = ModelUtils.get_probs_and_entropy(actions, dists)
log_probs = ActionLogProbs.create_action_log_probs(log_probs_list, self.behavior_spec.action_spec)
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropies, dim=1)
return log_probs, entropy_sum, value_heads

action, log_probs, entropy, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = ModelUtils.to_action_buffers(action, self.behavior_spec.action_spec)
run_out["pre_action"] = ModelUtils.to_action_buffers(action, self.behavior_spec.action_spec)
# Todo - make pre_action difference
run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
run_out["action"] = action.to_numpy_dict()
run_out["pre_action"] = action.to_numpy_dict()["continuous_action"] # Todo - make pre_action difference
run_out["log_probs"] = log_probs.to_numpy_dict()
run_out["entropy"] = ModelUtils.to_numpy(entropy)
run_out["learning_rate"] = 0.0
if self.use_recurrent:

8
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
class TorchPPOOptimizer(TorchOptimizer):

vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = ModelUtils.action_buffers_to_tensor_list(batch["actions"], self.policy.behavior_spec.action_spec)
actions = AgentAction.extract_agent_action(batch)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])

memories=memories,
seq_len=self.policy.sequence_length,
)
old_log_probs = ActionLogProbs.extract_action_log_probs(batch).flatten()
log_probs = log_probs.flatten()
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(
values, old_values, returns, decay_eps, loss_masks

log_probs,
ModelUtils.list_to_tensor(batch["action_probs"]),
old_log_probs,
loss_masks,
)
loss = (

5
ml-agents/mlagents/trainers/simple_env_manager.py


from typing import Dict, List
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec, ActionBuffers
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
from mlagents_envs.timers import timed
from mlagents.trainers.action_info import ActionInfo

self.previous_all_action_info = all_action_info
for brain_name, action_info in all_action_info.items():
self.env.set_actions(brain_name, action_info.action)
_action = ActionBuffers.from_numpy_dict(action_info.action)
self.env.set_actions(brain_name, _action)
self.env.step()
all_step_result = self._generate_all_results()

5
ml-agents/mlagents/trainers/subprocess_env_manager.py


from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection
from queue import Empty as EmptyQueueException
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec, ActionBuffers
from mlagents_envs import logging_util
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
from mlagents_envs.timers import (

all_action_info = req.payload
for brain_name, action_info in all_action_info.items():
if len(action_info.action) != 0:
env.set_actions(brain_name, action_info.action)
_action = ActionBuffers.from_numpy_dict(action_info.action)
env.set_actions(brain_name, _action)
env.step()
all_step_result = _generate_all_results()
# The timers in this process are independent from all the processes and the "main" process

702
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("use_discrete", [False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(PPO_TORCH_CONFIG)

@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)
config = attr.evolve(
PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
)
check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual, use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
)
new_networksettings = attr.evolve(
SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
)
config = attr.evolve(
PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=900,
summary_freq=100,
)
# The number of steps is pretty small for these encoders
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
new_network_settings = attr.evolve(
PPO_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters,
learning_rate=1.0e-3,
batch_size=64,
buffer_size=128,
)
config = attr.evolve(
PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=5000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(SAC_TORCH_CONFIG)
check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000
)
config = attr.evolve(
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_sac(num_visual, use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4
)
config = attr.evolve(SAC_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
def test_visual_advanced_sac(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
)
new_networksettings = attr.evolve(
SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters,
batch_size=16,
learning_rate=3e-4,
buffer_init_steps=0,
)
config = attr.evolve(
SAC_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=100,
)
# The number of steps is pretty small for these encoders
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_2d_ppo(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
# )
# check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.2 if use_discrete else 0.5
env = MemoryEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters,
batch_size=256,
learning_rate=1e-3,
buffer_init_steps=1000,
steps_per_update=2,
)
config = attr.evolve(
SAC_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=2000,
)
check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
)
config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=4000
)
config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=None)
processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost(use_discrete):
# Make opponent for asymmetric case
brain_name_opp = BRAIN_NAME + "Opp"
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0,
save_steps=10000,
swap_steps=10000,
team_change=400,
)
config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=4000)
check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost_fails(use_discrete):
# Make opponent for asymmetric case
brain_name_opp = BRAIN_NAME + "Opp"
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the team that us not learning when both have reached
# max step should be executing the initial, untrained poliy.
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=0.0,
save_steps=5000,
swap_steps=5000,
team_change=2000,
)
config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=3000)
check_environment_trains(
env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
)
processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
env = RecordEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,
)
# If we want to use true demos, we can solve the env in the usual way
# Otherwise, we can just call solve to execute the optimal policy
env.solve()
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[2] if use_discrete else [1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path_name = "1DTest" + action_type + ".demo"
demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
return demo_path
return record_demo
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("trainer_config", [PPO_TORCH_CONFIG, SAC_TORCH_CONFIG])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
config = attr.evolve(
trainer_config,
reward_signals=reward_signals,
behavioral_cloning=bc_settings,
max_steps=500,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3e-4)
config = attr.evolve(
PPO_TORCH_CONFIG,
reward_signals=reward_signals,
hyperparameters=hyperparams,
behavioral_cloning=bc_settings,
max_steps=1000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16
)
config = attr.evolve(
SAC_TORCH_CONFIG,
reward_signals=reward_signals,
hyperparameters=hyperparams,
behavioral_cloning=bc_settings,
max_steps=500,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
#@pytest.mark.parametrize("use_discrete", [True, False])
#@pytest.mark.parametrize("num_visual", [1, 2])
#def test_visual_ppo(num_visual, use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=use_discrete,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.2,
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
# )
# config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
# check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("num_visual", [1, 2])
#@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
#def test_visual_advanced_ppo(vis_encode_type, num_visual):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=True,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.5,
# vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
# )
# new_networksettings = attr.evolve(
# SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_networksettings,
# max_steps=900,
# summary_freq=100,
# )
# # The number of steps is pretty small for these encoders
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_recurrent_ppo(use_discrete):
# env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# new_network_settings = attr.evolve(
# PPO_TORCH_CONFIG.network_settings,
# memory=NetworkSettings.MemorySettings(memory_size=16),
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters,
# learning_rate=1.0e-3,
# batch_size=64,
# buffer_size=128,
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_network_settings,
# max_steps=5000,
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_sac(use_discrete):
# env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# config = attr.evolve(SAC_TORCH_CONFIG)
# check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_2d_sac(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
# )
# new_hyperparams = attr.evolve(
# SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000
# )
# config = attr.evolve(
# SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#@pytest.mark.parametrize("num_visual", [1, 2])
#def test_visual_sac(num_visual, use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=use_discrete,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.2,
# )
# new_hyperparams = attr.evolve(
# SAC_TORCH_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4
# )
# config = attr.evolve(SAC_TORCH_CONFIG, hyperparameters=new_hyperparams)
# check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("num_visual", [1, 2])
#@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
#def test_visual_advanced_sac(vis_encode_type, num_visual):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=True,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.5,
# vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
# )
# new_networksettings = attr.evolve(
# SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
# )
# new_hyperparams = attr.evolve(
# SAC_TORCH_CONFIG.hyperparameters,
# batch_size=16,
# learning_rate=3e-4,
# buffer_init_steps=0,
# )
# config = attr.evolve(
# SAC_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_networksettings,
# max_steps=100,
# )
# # The number of steps is pretty small for these encoders
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_recurrent_sac(use_discrete):
# step_size = 0.2 if use_discrete else 0.5
# env = MemoryEnvironment(
# [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
# )
# new_networksettings = attr.evolve(
# SAC_TORCH_CONFIG.network_settings,
# memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
# )
# new_hyperparams = attr.evolve(
# SAC_TORCH_CONFIG.hyperparameters,
# batch_size=256,
# learning_rate=1e-3,
# buffer_init_steps=1000,
# steps_per_update=2,
# )
# config = attr.evolve(
# SAC_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_networksettings,
# max_steps=2000,
# )
# check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_ghost(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
# )
# self_play_settings = SelfPlaySettings(
# play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
# )
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
# check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_ghost_fails(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
# )
# # This config should fail because the ghosted policy is never swapped with a competent policy.
# # Swap occurs after max step is reached.
# self_play_settings = SelfPlaySettings(
# play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=4000
# )
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=None)
# processed_rewards = [
# default_reward_processor(rewards) for rewards in env.final_rewards.values()
# ]
# success_threshold = 0.9
# assert any(reward > success_threshold for reward in processed_rewards) and any(
# reward < success_threshold for reward in processed_rewards
# )
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_asymm_ghost(use_discrete):
# # Make opponent for asymmetric case
# brain_name_opp = BRAIN_NAME + "Opp"
# env = SimpleEnvironment(
# [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
# )
# self_play_settings = SelfPlaySettings(
# play_against_latest_model_ratio=1.0,
# save_steps=10000,
# swap_steps=10000,
# team_change=400,
# )
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=4000)
# check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_asymm_ghost_fails(use_discrete):
# # Make opponent for asymmetric case
# brain_name_opp = BRAIN_NAME + "Opp"
# env = SimpleEnvironment(
# [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
# )
# # This config should fail because the team that us not learning when both have reached
# # max step should be executing the initial, untrained poliy.
# self_play_settings = SelfPlaySettings(
# play_against_latest_model_ratio=0.0,
# save_steps=5000,
# swap_steps=5000,
# team_change=2000,
# )
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=3000)
# check_environment_trains(
# env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
# )
# processed_rewards = [
# default_reward_processor(rewards) for rewards in env.final_rewards.values()
# ]
# success_threshold = 0.9
# assert any(reward > success_threshold for reward in processed_rewards) and any(
# reward < success_threshold for reward in processed_rewards
# )
#
#
#@pytest.fixture(scope="session")
#def simple_record(tmpdir_factory):
# def record_demo(use_discrete, num_visual=0, num_vector=1):
# env = RecordEnvironment(
# [BRAIN_NAME],
# use_discrete=use_discrete,
# num_visual=num_visual,
# num_vector=num_vector,
# n_demos=100,
# )
# # If we want to use true demos, we can solve the env in the usual way
# # Otherwise, we can just call solve to execute the optimal policy
# env.solve()
# agent_info_protos = env.demonstration_protos[BRAIN_NAME]
# meta_data_proto = DemonstrationMetaProto()
# brain_param_proto = BrainParametersProto(
# vector_action_size=[2] if use_discrete else [1],
# vector_action_descriptions=[""],
# vector_action_space_type=discrete if use_discrete else continuous,
# brain_name=BRAIN_NAME,
# is_training=True,
# )
# action_type = "Discrete" if use_discrete else "Continuous"
# demo_path_name = "1DTest" + action_type + ".demo"
# demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
# write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
# return demo_path
#
# return record_demo
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#@pytest.mark.parametrize("trainer_config", [PPO_TORCH_CONFIG, SAC_TORCH_CONFIG])
#def test_gail(simple_record, use_discrete, trainer_config):
# demo_path = simple_record(use_discrete)
# env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
# bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
# reward_signals = {
# RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
# }
# config = attr.evolve(
# trainer_config,
# reward_signals=reward_signals,
# behavioral_cloning=bc_settings,
# max_steps=500,
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_gail_visual_ppo(simple_record, use_discrete):
# demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
# env = SimpleEnvironment(
# [BRAIN_NAME],
# num_visual=1,
# num_vector=0,
# use_discrete=use_discrete,
# step_size=0.2,
# )
# bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
# reward_signals = {
# RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
# }
# hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3e-4)
# config = attr.evolve(
# PPO_TORCH_CONFIG,
# reward_signals=reward_signals,
# hyperparameters=hyperparams,
# behavioral_cloning=bc_settings,
# max_steps=1000,
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_gail_visual_sac(simple_record, use_discrete):
# demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
# env = SimpleEnvironment(
# [BRAIN_NAME],
# num_visual=1,
# num_vector=0,
# use_discrete=use_discrete,
# step_size=0.2,
# )
# bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
# reward_signals = {
# RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
# }
# hyperparams = attr.evolve(
# SAC_TORCH_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16
# )
# config = attr.evolve(
# SAC_TORCH_CONFIG,
# reward_signals=reward_signals,
# hyperparameters=hyperparams,
# behavioral_cloning=bc_settings,
# max_steps=500,
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

10
ml-agents/mlagents/trainers/torch/networks.py


DistInstance,
)
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.utils import ModelUtils, AgentAction
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import LSTM, LinearEncoder
from mlagents.trainers.torch.model_serialization import exporting_to_onnx

def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(vector_obs)
def sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
def sample_action(self, dists: List[DistInstance]) -> AgentAction:
return actions
return AgentAction.create_agent_action(actions, self.action_spec)
def get_dists(
self,

"""
dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
if self.action_spec.is_continuous():
action_list = self.sample_action(dists)
action_out = torch.stack(action_list, dim=-1)
agent_action = self.sample_action(dists)
action_out = agent_action.flatten()#torch.stack(action_list, dim=-1)
else:
action_out = torch.cat([dist.all_log_prob() for dist in dists], dim=1)
return (

147
ml-agents/mlagents/trainers/torch/utils.py


from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, NamedTuple, Dict
from mlagents.torch_utils import torch, nn
import numpy as np

from mlagents.trainers.torch.distributions import DistInstance, DiscreteDistInstance
class AgentAction(NamedTuple):
continuous: torch.Tensor
discrete: List[torch.Tensor]
def to_numpy_dict(self) -> Dict[str, np.ndarray]:
action_arrays_dict: Dict[str, np.ndarray] = {}
if self.continuous is not None:
action_arrays_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous)
if self.discrete is not None:
action_arrays_dict["discrete_action"] = ModelUtils.to_numpy(self.discrete)
return action_arrays_dict
def to_tensor_list(self) -> List[torch.Tensor]:
tensor_list: List[torch.Tensor] = []
if self.continuous is not None:
tensor_list.append(self.continuous)
if self.discrete is not None:
tensor_list += self.discrete
return tensor_list
def flatten(self) -> torch.Tensor:
return torch.stack(self.to_tensor_list(), dim=-1)
@staticmethod
def extract_agent_action(buff: Dict[str, np.ndarray]) -> "AgentAction":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
if "continuous_action" in buff:
continuous = ModelUtils.list_to_tensor(buff["continuous_action"])
if "discrete_action" in buff:
discrete = ModelUtils.list_to_tensor(buff["discrete_action"])
return AgentAction(continuous, discrete)
@staticmethod
def create_agent_action(action_tensors: List[torch.Tensor], action_spec: ActionSpec) -> "AgentAction":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
_offset = 0
if action_spec.continuous_size > 0:
continuous = action_tensors[0]
_offset = 1
if action_spec.discrete_size > 0:
discrete = action_tensors[_offset:]
return AgentAction(continuous, discrete)
class ActionLogProbs(NamedTuple):
continuous: torch.Tensor
discrete: List[torch.Tensor]
def to_numpy_dict(self) -> Dict[str, np.ndarray]:
log_prob_arrays_dict: Dict[str, np.ndarray] = {}
if self.continuous is not None:
log_prob_arrays_dict["continuous_log_probs"] = ModelUtils.to_numpy(self.continuous)
if self.discrete is not None:
log_prob_arrays_dict["discrete_log_probs"] = ModelUtils.to_numpy(self.discrete)
return log_prob_arrays_dict
def to_tensor_list(self) -> List[torch.Tensor]:
tensor_list: List[torch.Tensor] = []
if self.continuous is not None:
tensor_list.append(self.continuous)
if self.discrete is not None:
tensor_list += self.discrete
return tensor_list
def flatten(self) -> torch.Tensor:
return torch.stack(self.to_tensor_list(), dim=-1)
@staticmethod
def extract_action_log_probs(buff: Dict[str, np.ndarray]) -> "AgentAction":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
if "continuous_action" in buff:
continuous = ModelUtils.list_to_tensor(buff["continuous_log_probs"])
if "discrete_action" in buff:
discrete = ModelUtils.list_to_tensor(buff["discrete_log_probs"])
return ActionLogProbs(continuous, discrete)
@staticmethod
def create_action_log_probs(log_prob_tensors: List[torch.Tensor], action_spec: ActionSpec) -> "AgentAction":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
_offset = 0
if action_spec.continuous_size > 0:
continuous = log_prob_tensors[0]
_offset = 1
if action_spec.discrete_size > 0:
discrete = log_prob_tensors[_offset:]
return ActionLogProbs(continuous, discrete)
class ModelUtils:
# Minimum supported side for each encoder type. If refactoring an encoder, please
# adjust these also.

)
@staticmethod
def to_action_buffers(actions: List[torch.Tensor], action_spec: ActionSpec) -> ActionBuffers:
def to_action_buffers(agent_actions: AgentAction, action_spec: ActionSpec) -> ActionBuffers:
continuous_action: np.ndarray = np.array([])
discrete_action_list: List[np.ndarray] = []
discrete_action: np.ndarray = np.array([])
# offset to index discrete actions depending on presence of continuous actions
_offset = 0
if action_spec.continuous_size > 0:
continuous_action = actions[0].detach().cpu().numpy()
_offset = 1
if action_spec.discrete_size > 0:
for _disc in range(action_spec.discrete_size):
discrete_action_list.append(actions[_disc + _offset].detach().cpu().numpy())
#print(discrete_action_list)
discrete_action = np.array(discrete_action_list)
return ActionBuffers(continuous_action, discrete_action)
return ActionBuffers(agent_actions.continuous.detach().cpu().numpy(), agent_actions.discrete.detach().cpu().numpy())
@staticmethod
def action_buffers_to_tensor_list(
action_buffers: ActionBuffers, action_spec: ActionSpec, dtype: Optional[torch.dtype] = None
) -> List[torch.Tensor]:
"""
Converts ActionBuffers fields into a List of tensors.
"""
#print(action_buffers)
action_tensors: List[torch.Tensor] = []
if action_spec.continuous_size > 0:
action_tensors.append(torch.as_tensor(np.asanyarray(action_buffers.continuous), dtype=dtype))
if action_spec.discrete_size > 0:
for _disc in range(action_buffers.discrete):
action_tensors.append(torch.as_tensor(np.asanyarray(_disc), dtype=dtype))
return actiion_tensors
#@staticmethod
#def action_buffers_to_agent_action(
# action_buffers: ActionBuffers, dtype: Optional[torch.dtype] = None
#) -> AgentAction:
# """
# Converts ActionBuffers fields into a AgentAction fields
# """
# return AgentAction(torch.as_tensor(np.asanyarray(action_buffers.continuous), dtype=dtype),
#torch.as_tensor(np.asanyarray(_disc), dtype=dtype))
@staticmethod
def list_to_tensor(

@staticmethod
def get_probs_and_entropy(
action_list: List[torch.Tensor], dists: List[DistInstance]
) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
agent_action: AgentAction, dists: List[DistInstance]
) -> Tuple[List[torch.Tensor], torch.Tensor, Optional[torch.Tensor]]:
action_list = agent_action.to_tensor_list()
for action, action_dist in zip(action_list, dists):
log_prob = action_dist.log_prob(action)
log_probs_list.append(log_prob)

log_probs = torch.stack(log_probs_list, dim=-1)
#log_probs = torch.stack(log_probs_list, dim=-1)
log_probs = log_probs.squeeze(-1)
# log_probs = log_probs.squeeze(-1)
return log_probs, entropies, all_probs
return log_probs_list, entropies, all_probs
@staticmethod
def masked_mean(tensor: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:

alpha=tau,
out=target_param.data,
)

23
ml-agents/mlagents/trainers/trajectory.py


from typing import List, NamedTuple
from typing import List, NamedTuple, Dict
import numpy as np
from mlagents.trainers.buffer import AgentBuffer

obs: List[np.ndarray]
reward: float
done: bool
action: ActionBuffers
action_probs: np.ndarray
action: Dict[str, np.ndarray]
action_probs: Dict[str, np.ndarray]
prev_action: ActionBuffers
prev_action: Dict[str, np.ndarray]
interrupted: bool
memory: np.ndarray

actions_pre = exp.action_pre
agent_buffer_trajectory["actions_pre"].append(actions_pre)
# value is a dictionary from name of reward to value estimate of the value head
agent_buffer_trajectory["actions"].append(exp.action)
agent_buffer_trajectory["action_probs"].append(exp.action_probs)
# Adds the log prob and action of continuous/discrete separately
for act_type, act_array in exp.action.items():
agent_buffer_trajectory[act_type].append(act_array)
for log_type, log_array in exp.action_probs.items():
agent_buffer_trajectory[log_type].append(log_array)
# Store action masks if necessary. Note that 1 means active, while
# in AgentExperience False means active.

# This should never be needed unless the environment somehow doesn't supply the
# action mask in a discrete space.
agent_buffer_trajectory["action_mask"].append(
np.ones(exp.action_probs.shape, dtype=np.float32), padding_value=1
np.ones(exp.action_probs["continuous_log_probs"].shape, dtype=np.float32), padding_value=1
agent_buffer_trajectory["prev_action"].append(exp.prev_action)
#agent_buffer_trajectory["prev_action"].append(exp.prev_action)
for act_type, act_array in exp.prev_action.items():
agent_buffer_trajectory["prev_" + act_type].append(act_array)
agent_buffer_trajectory["environment_rewards"].append(exp.reward)
# Store the next visual obs as the current

正在加载...
取消
保存