浏览代码

[feature] Fix TF tests, add --torch CLI option, allow run TF without torch installed (#4305)

/develop/add-fire
GitHub 4 年前
当前提交
93517833
共有 13 个文件被更改,包括 74 次插入24 次删除
  1. 2
      .circleci/config.yml
  2. 4
      experiment_torch.py
  3. 7
      ml-agents/mlagents/trainers/cli_utils.py
  4. 2
      ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  5. 17
      ml-agents/mlagents/trainers/ppo/trainer.py
  6. 12
      ml-agents/mlagents/trainers/sac/trainer.py
  7. 14
      ml-agents/mlagents/trainers/settings.py
  8. 4
      ml-agents/mlagents/trainers/tests/test_ppo.py
  9. 4
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  10. 5
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  11. 3
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  12. 21
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  13. 3
      test_requirements.txt

2
.circleci/config.yml


. venv/bin/activate
mkdir test-reports
pip freeze > test-reports/pip_versions.txt
pytest -n 2 --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
- run:
name: Verify there are no hidden/missing metafiles.

4
experiment_torch.py


evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
else:
if algo == "ppo":
update_total = update["TFPPOOptimizer.update"]["total"]
update_count = update["TFPPOOptimizer.update"]["count"]
update_total = update["PPOOptimizer.update"]["total"]
update_count = update["PPOOptimizer.update"]["count"]
else:
update_total = update["SACTrainer._update_policy"]["total"]
update_count = update["SACTrainer._update_policy"]["count"]

7
ml-agents/mlagents/trainers/cli_utils.py


action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)
argparser.add_argument(
"--torch",
default=False,
action=DetectDefaultStoreTrue,
help="(Experimental) Use the PyTorch framework instead of TensorFlow. Install PyTorch "
"before using this option",
)
eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(

2
ml-agents/mlagents/trainers/ppo/optimizer_tf.py


from mlagents.trainers.settings import TrainerSettings, PPOSettings
class TFPPOOptimizer(TFOptimizer):
class PPOOptimizer(TFOptimizer):
def __init__(self, policy: TFPolicy, trainer_params: TrainerSettings):
"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.

17
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import (

FrameworkType,
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchPPOOptimizer = None # type: ignore
logger = get_logger(__name__)

)
self.load = load
self.seed = seed
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self.policy: Policy = None # type: ignore

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TFPPOOptimizer( # type: ignore
self.optimizer = PPOOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
for _reward_signal in self.optimizer.reward_signals.keys():

12
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchSACOptimizer = None # type: ignore
logger = get_logger(__name__)

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TorchSACOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore

14
ml-agents/mlagents/trainers/settings.py


return _mapping[self]
class FrameworkType(Enum):
TENSORFLOW: str = "tensorflow"
PYTORCH: str = "pytorch"
@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
trainer_type: TrainerType = TrainerType.PPO

threaded: bool = True
self_play: Optional[SelfPlaySettings] = None
behavioral_cloning: Optional[BehavioralCloningSettings] = None
framework: FrameworkType = FrameworkType.TENSORFLOW
cattr.register_structure_hook(
Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure

configured_dict["engine_settings"][key] = val
else: # Base options
configured_dict[key] = val
return RunOptions.from_dict(configured_dict)
# Apply --torch retroactively
final_runoptions = RunOptions.from_dict(configured_dict)
if "torch" in DetectDefault.non_default_args:
for trainer_set in final_runoptions.behaviors.values():
trainer_set.framework = FrameworkType.PYTORCH
return final_runoptions
@staticmethod
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":

4
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb

policy = TFPolicy(
0, mock_specs, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = TFPPOOptimizer(policy, trainer_settings)
optimizer = PPOOptimizer(policy, trainer_settings)
return optimizer

4
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
from mlagents.trainers.settings import (
GAILSettings,

if trainer_settings.trainer_type == TrainerType.SAC:
optimizer = SACOptimizer(policy, trainer_settings)
else:
optimizer = TFPPOOptimizer(policy, trainer_settings)
optimizer = PPOOptimizer(policy, trainer_settings)
return optimizer

5
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


def add_policy(self, mock_behavior_id, mock_policy):
self.policies[mock_behavior_id] = mock_policy
def create_policy(self):
def create_tf_policy(self):
return mock.Mock()
def create_torch_policy(self):
return mock.Mock()
def _process_trajectory(self, trajectory):

3
ml-agents/mlagents/trainers/tests/torch/test_networks.py


def test_networkbody_lstm():
torch.manual_seed(0)
obs_size = 4
seq_len = 16
network_settings = NetworkSettings(

def test_networkbody_visual():
torch.manual_seed(0)
vec_obs_size = 4
obs_size = (84, 84, 3)
network_settings = NetworkSettings()

def test_valuenetwork():
torch.manual_seed(0)
obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()

21
ml-agents/mlagents/trainers/trainer/rl_trainer.py


from mlagents_envs.timers import hierarchical_timer
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy.policy import Policy
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.settings import TestingConfiguration
from mlagents.trainers.settings import TestingConfiguration, FrameworkType
from mlagents.trainers.exception import UnityTrainerException
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
RewardSignalResults = Dict[str, RewardSignalResult]

self._stats_reporter.add_property(
StatsPropertyType.HYPERPARAMETERS, self.trainer_settings.as_dict()
)
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
self.framework = self.trainer_settings.framework
logger.debug(f"Using framework {self.framework.value}")
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self._next_save_step = 0

def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> Policy:
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH and TorchPolicy is None:
raise UnityTrainerException(
"To use the experimental PyTorch backend, install the PyTorch Python package first."
)
elif self.framework == FrameworkType.PYTORCH:
return self.create_torch_policy(parsed_behavior_id, behavior_spec)
else:
return self.create_tf_policy(parsed_behavior_id, behavior_spec)

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
elif n_policies == 0:
logger.warning("Trainer has no policies, not saving anything.")
return
policy = list(self.policies.values())[0]
settings = SerializationSettings(policy.model_path, self.brain_name)
model_checkpoint = self._checkpoint()

3
test_requirements.txt


pytest-cov==2.6.1
pytest-xdist
# PyTorch tests are here for the time being, before they are used in the codebase.
torch>=1.5.0
# onnx doesn't currently have a wheel for 3.8
tf2onnx>=1.5.5;python_version<'3.8'
正在加载...
取消
保存