Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

321 行
12 KiB

# # Unity ML-Agents Toolkit
import yaml
import os
import numpy as np
import json
from typing import Callable, Optional, List, Dict
import mlagents.trainers
import mlagents_envs
from mlagents import tf_utils
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories
from mlagents.trainers.stats import (
TensorboardWriter,
CSVWriter,
StatsReporter,
GaugeWriter,
ConsoleWriter,
)
from mlagents.trainers.cli_utils import parser
from mlagents_envs.environment import UnityEnvironment
from mlagents.trainers.settings import RunOptions
from mlagents.trainers.training_status import GlobalTrainingStatus
from mlagents_envs.base_env import BaseEnv
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.timers import (
hierarchical_timer,
get_timer_tree,
add_metadata as add_timer_metadata,
)
from mlagents_envs import logging_util
logger = logging_util.get_logger(__name__)
TRAINING_STATUS_FILE_NAME = "training_status.json"
def get_version_string() -> str:
# pylint: disable=no-member
return f""" Version information:
ml-agents: {mlagents.trainers.__version__},
ml-agents-envs: {mlagents_envs.__version__},
Communicator API: {UnityEnvironment.API_VERSION},
TensorFlow: {tf_utils.tf.__version__}"""
def parse_command_line(argv: Optional[List[str]] = None) -> RunOptions:
args = parser.parse_args(argv)
return RunOptions.from_argparse(args)
def run_training(run_seed: int, options: RunOptions) -> None:
"""
Launches training session.
:param options: parsed command line arguments
:param run_seed: Random seed used for training.
:param run_options: Command line arguments for training.
"""
with hierarchical_timer("run_training.setup"):
checkpoint_settings = options.checkpoint_settings
env_settings = options.env_settings
engine_settings = options.engine_settings
base_path = "results"
write_path = os.path.join(base_path, checkpoint_settings.run_id)
maybe_init_path = (
os.path.join(base_path, checkpoint_settings.initialize_from)
if checkpoint_settings.initialize_from
else None
)
run_logs_dir = os.path.join(write_path, "run_logs")
port: Optional[int] = env_settings.base_port
# Check if directory exists
handle_existing_directories(
write_path,
checkpoint_settings.resume,
checkpoint_settings.force,
maybe_init_path,
)
# Make run logs directory
os.makedirs(run_logs_dir, exist_ok=True)
# Load any needed states
if checkpoint_settings.resume:
GlobalTrainingStatus.load_state(
os.path.join(run_logs_dir, "training_status.json")
)
# Configure CSV, Tensorboard Writers and StatsReporter
# We assume reward and episode length are needed in the CSV.
csv_writer = CSVWriter(
write_path,
required_fields=[
"Environment/Cumulative Reward",
"Environment/Episode Length",
],
)
tb_writer = TensorboardWriter(
write_path, clear_past_data=not checkpoint_settings.resume
)
gauge_write = GaugeWriter()
console_writer = ConsoleWriter()
StatsReporter.add_writer(tb_writer)
StatsReporter.add_writer(csv_writer)
StatsReporter.add_writer(gauge_write)
StatsReporter.add_writer(console_writer)
if env_settings.env_path is None:
port = None
env_factory = create_environment_factory(
env_settings.env_path,
engine_settings.no_graphics,
run_seed,
port,
env_settings.env_args,
os.path.abspath(run_logs_dir), # Unity environment requires absolute path
)
engine_config = EngineConfig(
width=engine_settings.width,
height=engine_settings.height,
quality_level=engine_settings.quality_level,
time_scale=engine_settings.time_scale,
target_frame_rate=engine_settings.target_frame_rate,
capture_frame_rate=engine_settings.capture_frame_rate,
)
env_manager = SubprocessEnvManager(
env_factory, engine_config, env_settings.num_envs
)
maybe_meta_curriculum = try_create_meta_curriculum(
options.curriculum, env_manager, restore=checkpoint_settings.resume
)
maybe_add_samplers(options.parameter_randomization, env_manager, run_seed)
trainer_factory = TrainerFactory(
options.behaviors,
write_path,
not checkpoint_settings.inference,
checkpoint_settings.resume,
run_seed,
maybe_init_path,
maybe_meta_curriculum,
False,
)
# Create controller and begin training.
tc = TrainerController(
trainer_factory,
write_path,
checkpoint_settings.run_id,
maybe_meta_curriculum,
not checkpoint_settings.inference,
run_seed,
)
# Begin training
try:
tc.start_learning(env_manager)
finally:
env_manager.close()
write_run_options(write_path, options)
write_timing_tree(run_logs_dir)
write_training_status(run_logs_dir)
def write_run_options(output_dir: str, run_options: RunOptions) -> None:
run_options_path = os.path.join(output_dir, "configuration.yaml")
try:
with open(run_options_path, "w") as f:
try:
yaml.dump(run_options.as_dict(), f, sort_keys=False)
except TypeError: # Older versions of pyyaml don't support sort_keys
yaml.dump(run_options.as_dict(), f)
except FileNotFoundError:
logger.warning(
f"Unable to save configuration to {run_options_path}. Make sure the directory exists"
)
def write_training_status(output_dir: str) -> None:
GlobalTrainingStatus.save_state(os.path.join(output_dir, TRAINING_STATUS_FILE_NAME))
def write_timing_tree(output_dir: str) -> None:
timing_path = os.path.join(output_dir, "timers.json")
try:
with open(timing_path, "w") as f:
json.dump(get_timer_tree(), f, indent=4)
except FileNotFoundError:
logger.warning(
f"Unable to save to {timing_path}. Make sure the directory exists"
)
def maybe_add_samplers(
sampler_config: Optional[Dict], env: SubprocessEnvManager, run_seed: int
) -> None:
"""
Adds samplers to env if sampler config provided and sets seed if not configured.
:param sampler_config: validated dict of sampler configs. None if not included.
:param env: env manager to pass samplers via reset
:param run_seed: Random seed used for training.
"""
if sampler_config is not None:
# If the seed is not specified in yaml, this will grab the run seed
for offset, v in enumerate(sampler_config.values()):
if v.seed == -1:
v.seed = run_seed + offset
env.reset(config=sampler_config)
def try_create_meta_curriculum(
curriculum_config: Optional[Dict], env: SubprocessEnvManager, restore: bool = False
) -> Optional[MetaCurriculum]:
if curriculum_config is None or len(curriculum_config) <= 0:
return None
else:
meta_curriculum = MetaCurriculum(curriculum_config)
if restore:
meta_curriculum.try_restore_all_curriculum()
return meta_curriculum
def create_environment_factory(
env_path: Optional[str],
no_graphics: bool,
seed: int,
start_port: Optional[int],
env_args: Optional[List[str]],
log_folder: str,
) -> Callable[[int, List[SideChannel]], BaseEnv]:
def create_unity_environment(
worker_id: int, side_channels: List[SideChannel]
) -> UnityEnvironment:
# Make sure that each environment gets a different seed
env_seed = seed + worker_id
return UnityEnvironment(
file_name=env_path,
worker_id=worker_id,
seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
additional_args=env_args,
side_channels=side_channels,
log_folder=log_folder,
)
return create_unity_environment
def run_cli(options: RunOptions) -> None:
try:
print(
"""
▄▄▄▓▓▓▓
╓▓▓▓▓▓▓█▓▓▓▓▓
,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌
▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄
▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌
▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌
▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓
^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓`
'▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌
▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀
`▀█▓▓▓▓▓▓▓▓▓▌
¬`▀▀▀█▓
"""
)
except Exception:
print("\n\n\tUnity Technologies\n")
print(get_version_string())
if options.debug:
log_level = logging_util.DEBUG
else:
log_level = logging_util.INFO
# disable noisy warnings from tensorflow
tf_utils.set_warnings_enabled(False)
logging_util.set_log_level(log_level)
logger.debug("Configuration for this run:")
logger.debug(json.dumps(options.as_dict(), indent=4))
# Options deprecation warnings
if options.checkpoint_settings.load_model:
logger.warning(
"The --load option has been deprecated. Please use the --resume option instead."
)
if options.checkpoint_settings.train_model:
logger.warning(
"The --train option has been deprecated. Train mode is now the default. Use "
"--inference to run in inference mode."
)
run_seed = options.env_settings.seed
# Add some timer metadata
add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)
if options.env_settings.seed == -1:
run_seed = np.random.randint(0, 10000)
run_training(run_seed, options)
# if options.behaviors["CrawlerStatic"].transfer:
# os.system('export SCENE_NAME=crawlerstatictarget')
# os.system('mlagents-learn config/ppo_transfer/CrawlerStatic.yaml --run-id=cs-transfer --env=/unity-volume/executable --num-envs=4 --force')
# os.system('mlagents-learn config/ppo_transfer/3DBallHard.yaml --run-id=hardball-transfer --env=/unity-volume/3dballhard --num-envs=4 --force')
def main():
run_cli(parse_command_line())
# For python debugger to directly run this script
if __name__ == "__main__":
main()