浏览代码

Yamato inference tests (#4066)

* better errors for missing constants

* run inference in yamato after training

* add extension

* debug subprocess args

* fix exe path

* search for executable

* fix dumb bug

* -batchmode

* fail if inference fails

* install tf2onnx on yamato

* allow onnx for overrides (expect to fail now)

* enable logs

* fix commandline arg

* catch exception from SetModel and exit

* cleanup error message

* model artifacts, logs as artifacts, fix pip

* don't run onnx

* cleanup and comment

* update extension handling
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
7f0bb6bd
共有 6 个文件被更改,包括 166 次插入21 次删除
  1. 3
      .yamato/training-int-tests.yml
  2. 61
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  3. 10
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  4. 5
      com.unity.ml-agents/Runtime/Inference/TensorNames.cs
  5. 90
      ml-agents/tests/yamato/training_int_tests.py
  6. 18
      ml-agents/tests/yamato/yamato_utils.py

3
.yamato/training-int-tests.yml


logs:
paths:
- "artifacts/standalone_build.txt"
- "artifacts/inference.nn.txt"
- "artifacts/inference.onnx.txt"
- "artifacts/models/**"
{% endfor %}

61
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


/// Utility class to allow the NNModel file for an agent to be overriden during inference.
/// This is used internally to validate the file after training is done.
/// The behavior name to override and file path are specified on the commandline, e.g.
/// player.exe --mlagents-override-model behavior1 /path/to/model1.nn --mlagents-override-model behavior2 /path/to/model2.nn
/// player.exe --mlagents-override-model-directory /path/to/models
///
/// Additionally, a number of episodes to run can be specified; after this, the application will quit.
/// Note this will only work with example scenes that have 1:1 Agent:Behaviors. More complicated scenes like WallJump

{
HashSet<string> k_SupportedExtensions = new HashSet<string>{"nn", "onnx"};
const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension";
const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";

Dictionary<string, string> m_BehaviorNameOverrides = new Dictionary<string, string>();
string m_BehaviorNameOverrideDirectory;
string m_OverrideExtension = "nn";
// Cached loaded NNModels, with the behavior name as the key.
Dictionary<string, NNModel> m_CachedModels = new Dictionary<string, NNModel>();

{
m_BehaviorNameOverrideDirectory = args[i + 1].Trim();
}
else if (args[i] == k_CommandLineModelOverrideExtensionFlag && i < args.Length-1)
{
m_OverrideExtension = args[i + 1].Trim().ToLower();
var isKnownExtension = k_SupportedExtensions.Contains(m_OverrideExtension);
// Not supported yet - need to update the model loading code to support
var isOnnx = m_OverrideExtension.Equals("onnx");
if (!isKnownExtension || isOnnx)
{
Debug.LogError($"loading unsupported format: {m_OverrideExtension}");
Application.Quit(1);
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
}
}
else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length-1)
{
Int32.TryParse(args[i + 1], out maxEpisodes);

}
else if(!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
{
assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.nn");
assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.{m_OverrideExtension}");
}
if (string.IsNullOrEmpty(assetPath))

return null;
}
// Note - this approach doesn't work for onnx files. Need to replace with
// the equivalent of ONNXModelImporter.OnImportAsset()
var asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
asset.modelData.Value = model;

/// </summary>
void OverrideModel()
{
bool overrideOk = false;
string overrideError = null;
if (nnModel == null && m_QuitOnLoadFailure)
if (nnModel == null)
Debug.Log(
overrideError =
$"and that the model file exists"
);
$"and that the model file exists";
}
else
{
var modelName = nnModel != null ? nnModel.name : "<null>";
Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}");
try
{
m_Agent.SetModel(GetOverrideBehaviorName(behaviorName), nnModel);
overrideOk = true;
}
catch (Exception e)
{
overrideError = $"Exception calling Agent.SetModel: {e}";
}
}
if (!overrideOk && m_QuitOnLoadFailure)
{
if(!string.IsNullOrEmpty(overrideError))
{
Debug.LogWarning(overrideError);
}
var modelName = nnModel != null ? nnModel.name : "<null>";
Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}");
// This might give a null model; that's better because we'll fall back to the Heuristic
m_Agent.SetModel(GetOverrideBehaviorName(behaviorName), nnModel);
}
}

10
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


return failedModelChecks;
}
foreach (var constantName in TensorNames.RequiredConstants)
{
var tensor = model.GetTensorByName(constantName);
if (tensor == null)
{
failedModelChecks.Add($"Required constant \"{constantName}\" was not found in the model file.");
return failedModelChecks;
}
}
var modelApiVersion = (int)model.GetTensorByName(TensorNames.VersionNumber)[0];
var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0];

5
com.unity.ml-agents/Runtime/Inference/TensorNames.cs


public const string IsContinuousControl = "is_continuous_control";
public const string ActionOutputShape = "action_output_shape";
public const string ActionOutput = "action";
public static readonly string[] RequiredConstants =
{
VersionNumber, MemorySize, IsContinuousControl, ActionOutputShape
};
}
}

90
ml-agents/tests/yamato/training_int_tests.py


import argparse
import os
import shutil
from typing import Any
find_executables,
get_base_path,
get_base_output_path,
run_standalone_build,

)
def run_training(python_version, csharp_version):
def run_training(python_version: str, csharp_version: str) -> bool:
latest = "latest"
run_id = int(time.time() * 1000.0)
print(

nn_file_expected = f"./{output_dir}/{run_id}/3DBall.nn"
onnx_file_expected = f"./{output_dir}/{run_id}/3DBall.onnx"
frozen_graph_file_expected = f"./{output_dir}/{run_id}/3DBall/frozen_graph_def.pb"
sys.exit(1)
return False
base_path = get_base_path()
print(f"Running in base path {base_path}")

build_returncode = run_standalone_build(base_path)
if build_returncode != 0:
print("Standalone build FAILED!")
sys.exit(build_returncode)
print(f"Standalone build FAILED! with return code {build_returncode}")
return False
# Now rename the newly-built executable, and restore the old one
os.rename(full_player_path, final_player_path)

# and reduce the batch_size and buffer_size enough to ensure an update step happens.
yaml_out = "override.yaml"
if python_version:
overrides = {"max_steps": 100, "batch_size": 10, "buffer_size": 10}
overrides: Any = {"max_steps": 100, "batch_size": 10, "buffer_size": 10}
override_legacy_config_file(
python_version, "config/trainer_config.yaml", yaml_out, **overrides
)

}
override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides)
env_path = os.path.join(get_base_output_path(), standalone_player_path + ".app")
f"mlagents-learn {yaml_out} --force --env="
f"{os.path.join(get_base_output_path(), standalone_player_path)} "
f"mlagents-learn {yaml_out} --force --env={env_path} "
f"--run-id={run_id} --no-graphics --env-args -logFile -"
) # noqa
res = subprocess.run(

if res.returncode != 0 or not os.path.exists(nn_file_expected):
# Save models as artifacts (only if we're using latest python and C#)
if csharp_version is None and python_version is None:
model_artifacts_dir = os.path.join(get_base_output_path(), "models")
os.makedirs(model_artifacts_dir, exist_ok=True)
shutil.copy(nn_file_expected, model_artifacts_dir)
shutil.copy(onnx_file_expected, model_artifacts_dir)
shutil.copy(frozen_graph_file_expected, model_artifacts_dir)
if (
res.returncode != 0
or not os.path.exists(nn_file_expected)
or not os.path.exists(onnx_file_expected)
):
sys.exit(1)
return False
if csharp_version is None and python_version is None:
# Use abs path so that loading doesn't get confused
model_path = os.path.abspath(os.path.dirname(nn_file_expected))
# Onnx loading for overrides not currently supported, but this is
# where to add it in when it is.
for extension in ["nn"]:
inference_ok = run_inference(env_path, model_path, extension)
if not inference_ok:
return False
sys.exit(0)
return True
def run_inference(env_path: str, output_path: str, model_extension: str) -> bool:
start_time = time.time()
exes = find_executables(env_path)
if len(exes) != 1:
print(f"Can't determine the player executable in {env_path}. Found {exes}.")
return False
log_output_path = f"{get_base_output_path()}/inference.{model_extension}.txt"
exe_path = exes[0]
args = [
exe_path,
"-nographics",
"-batchmode",
"-logfile",
log_output_path,
"--mlagents-override-model-directory",
output_path,
"--mlagents-quit-on-load-failure",
"--mlagents-quit-after-episodes",
"1",
"--mlagents-override-model-extension",
model_extension,
]
res = subprocess.run(args)
end_time = time.time()
if res.returncode != 0:
print("Error running inference!")
print("Command line: " + " ".join(args))
subprocess.run(["cat", log_output_path])
return False
else:
print(f"Inference succeeded! Took {end_time - start_time} seconds")
return True
def main():

args = parser.parse_args()
try:
run_training(args.python, args.csharp)
ok = run_training(args.python, args.csharp)
if not ok:
sys.exit(1)
finally:
# Cleanup - this gets executed even if we hit sys.exit()
undo_git_checkout()

18
ml-agents/tests/yamato/yamato_utils.py


return res.returncode
def find_executables(root_dir: str) -> List[str]:
"""
Try to find the player executable. This seems to vary between Unity versions.
"""
ignored_extension = frozenset([".dll", ".dylib", ".bundle"])
exes = []
for root, _, files in os.walk(root_dir):
for filename in files:
file_root, ext = os.path.splitext(filename)
if ext in ignored_extension:
continue
file_path = os.path.join(root, filename)
if os.access(file_path, os.X_OK):
exes.append(file_path)
return exes
def init_venv(
mlagents_python_version: str = None, extra_packages: Optional[List[str]] = None
) -> str:

"--upgrade setuptools",
# TODO build these and publish to internal pypi
"~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl",
"tf2onnx==1.6.1",
]
if mlagents_python_version:
# install from pypi

正在加载...
取消
保存