浏览代码

Merge branch 'master' into develop-adjust-cpu-settings

/develop/jit/experiments
Ervin Teng 4 年前
当前提交
60eacc0d
共有 9 个文件被更改,包括 146 次插入73 次删除
  1. 18
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/CameraFollow.cs
  2. 42
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  3. 4
      com.unity.ml-agents/CHANGELOG.md
  4. 2
      com.unity.ml-agents/package.json
  5. 47
      ml-agents-envs/mlagents_envs/rpc_utils.py
  6. 2
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  7. 99
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  8. 1
      ml-agents/mlagents/trainers/torch/networks.py
  9. 4
      ml-agents/tests/yamato/training_int_tests.py

18
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/CameraFollow.cs


{
public class CameraFollow : MonoBehaviour
{
public Transform target;
Vector3 m_Offset;
[Tooltip("The target to follow")] public Transform target;
[Tooltip("The time it takes to move to the new position")]
public float smoothingTime; //The time it takes to move to the new position
private Vector3 m_Offset;
private Vector3 m_CamVelocity; //Camera's velocity (used by SmoothDamp)
// Use this for initialization
void Start()

// Update is called once per frame
void Update()
void FixedUpdate()
// gameObject.transform.position = target.position + offset;
gameObject.transform.position = newPosition;
gameObject.transform.position =
Vector3.SmoothDamp(transform.position, newPosition, ref m_CamVelocity, smoothingTime, Mathf.Infinity,
Time.fixedDeltaTime);
}
}
}

42
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


using UnityEngine;
using Unity.Barracuda;
using System.IO;
using Unity.Barracuda.ONNX;
using Unity.MLAgents;
using Unity.MLAgents.Policies;
#if UNITY_EDITOR

{
m_OverrideExtension = args[i + 1].Trim().ToLower();
var isKnownExtension = k_SupportedExtensions.Contains(m_OverrideExtension);
// Not supported yet - need to update the model loading code to support
var isOnnx = m_OverrideExtension.Equals("onnx");
if (!isKnownExtension || isOnnx)
if (!isKnownExtension)
{
Debug.LogError($"loading unsupported format: {m_OverrideExtension}");
Application.Quit(1);

return null;
}
byte[] model = null;
byte[] rawModel = null;
model = File.ReadAllBytes(assetPath);
rawModel = File.ReadAllBytes(assetPath);
}
catch (IOException)
{

return null;
}
// Note - this approach doesn't work for onnx files. Need to replace with
// the equivalent of ONNXModelImporter.OnImportAsset()
var asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
asset.modelData.Value = model;
NNModel asset;
var isOnnx = m_OverrideExtension.Equals("onnx");
if (isOnnx)
{
var converter = new ONNXModelConverter(true);
var onnxModel = converter.Convert(rawModel);
NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
using (var memoryStream = new MemoryStream())
using (var writer = new BinaryWriter(memoryStream))
{
ModelWriter.Save(writer, onnxModel);
assetData.Value = memoryStream.ToArray();
}
assetData.name = "Data";
assetData.hideFlags = HideFlags.HideInHierarchy;
asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = assetData;
}
else
{
// Note - this approach doesn't work for onnx files. Need to replace with
// the equivalent of ONNXModelImporter.OnImportAsset()
asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
asset.modelData.Value = rawModel;
}
asset.name = "Override - " + Path.GetFileName(assetPath);
m_CachedModels[behaviorName] = asset;

4
com.unity.ml-agents/CHANGELOG.md


### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.2.
- Enabled C# formatting using `dotnet-format`.
- Update Barracuda to 1.1.0-preview (#4208)
- Enabled C# formatting using `dotnet-format`. (#4362)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Experimental PyTorch support has been added. Use `--torch` when running `mlagents-learn`, or add
`framework: pytorch` to your trainer configuration (under the behavior name) to enable it.

2
com.unity.ml-agents/package.json


"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {
"com.unity.barracuda": "1.0.2",
"com.unity.barracuda": "1.1.0-preview",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",

47
ml-agents-envs/mlagents_envs/rpc_utils.py


return BehaviorSpec(observation_shape, action_type, action_shape)
class OffsetBytesIO:
"""
Simple file-like class that wraps a bytes, and allows moving its "start"
position in the bytes. This is only used for reading concatenated PNGs,
because Pillow always calls seek(0) at the start of reading.
"""
__slots__ = ["fp", "offset"]
def __init__(self, data: bytes):
self.fp = io.BytesIO(data)
self.offset = 0
def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
if whence == io.SEEK_SET:
res = self.fp.seek(offset + self.offset)
return res - self.offset
raise NotImplementedError()
def tell(self) -> int:
return self.fp.tell() - self.offset
def read(self, size: int = -1) -> bytes:
return self.fp.read(size)
def original_tell(self) -> int:
"""
Returns the offset into the original byte array
"""
return self.fp.tell()
@timed
def process_pixels(image_bytes: bytes, expected_channels: int) -> np.ndarray:
"""

:param expected_channels: Expected output channels
:return: processed numpy array of observation from environment
"""
image_bytearray = bytearray(image_bytes)
image_fp = OffsetBytesIO(image_bytes)
image = Image.open(io.BytesIO(image_bytearray))
image = Image.open(image_fp)
# Normally Image loads lazily, load() forces it to do loading in the timer scope.
image.load()
s = np.array(image, dtype=np.float32) / 255.0

image_arrays = []
bytes_read = 0
# TODO avoid creating a new array here. Unfortunately, Pillow doesn't respect the current state of the buffer
# and always starts with seek(0), but we should be able to wrap BytesIO with something that lets us adjust
# the "start" offset.
buffer = io.BytesIO(image_bytearray[bytes_read:])
image = Image.open(buffer)
image = Image.open(image_fp)
offset = buffer.getvalue().index(PNG_HEADER, buffer.tell())
bytes_read += offset
new_offset = image_bytes.index(PNG_HEADER, image_fp.original_tell())
image_fp.offset = new_offset
except ValueError:
# Didn't find the header, so must be at the end.
break

2
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


RewardSignalType.GAIL, behavior_spec, gail_settings
)
for _ in range(100):
for _ in range(200):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]

99
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


from typing import Optional, Dict
from typing import Optional, Dict, List, Tuple
import numpy as np
from mlagents.torch_utils import torch, default_device

from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish, Initialization
from mlagents.trainers.torch.layers import linear_layer, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.demo_loader import demo_to_buffer

self._use_vail = settings.use_vail
self._settings = settings
state_encoder_settings = NetworkSettings(
encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,

self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
encoder_input_size = settings.encoding_size
if settings.use_actions:
encoder_input_size += (
self._action_flattener.flattened_size + 1
) # + 1 is for done
self.encoder = torch.nn.Sequential(
linear_layer(encoder_input_size, settings.encoding_size),
Swish(),
linear_layer(settings.encoding_size, settings.encoding_size),
Swish(),
unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(
specs.observation_shapes, encoder_settings, unencoded_size
)
estimator_input_size = settings.encoding_size

torch.as_tensor(mini_batch["actions"], dtype=torch.float)
)
def get_state_encoding(self, mini_batch: AgentBuffer) -> torch.Tensor:
def get_state_inputs(
self, mini_batch: AgentBuffer
) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
n_vis = len(self._state_encoder.visual_processors)
hidden, _ = self._state_encoder.forward(
vec_inputs=[torch.as_tensor(mini_batch["vector_obs"], dtype=torch.float)],
vis_inputs=[
torch.as_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float)
for i in range(n_vis)
],
n_vis = len(self.encoder.visual_processors)
n_vec = len(self.encoder.vector_processors)
vec_inputs = (
[ModelUtils.list_to_tensor(mini_batch["vector_obs"], dtype=torch.float)]
if n_vec > 0
else []
return hidden
vis_inputs = [
ModelUtils.list_to_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float)
for i in range(n_vis)
]
return vec_inputs, vis_inputs
def compute_estimate(
self, mini_batch: AgentBuffer, use_vail_noise: bool = False

:param use_vail_noise: Only when using VAIL : If true, will sample the code, if
false, will return the mean of the code.
"""
encoder_input = self.get_state_encoding(mini_batch)
vec_inputs, vis_inputs = self.get_state_inputs(mini_batch)
encoder_input = torch.cat([encoder_input, actions, dones], dim=1)
hidden = self.encoder(encoder_input)
action_inputs = torch.cat([actions, dones], dim=1)
hidden, _ = self.encoder(vec_inputs, vis_inputs, action_inputs)
else:
hidden, _ = self.encoder(vec_inputs, vis_inputs)
z_mu: Optional[torch.Tensor] = None
if self._settings.use_vail:
z_mu = self._z_mu_layer(hidden)

Gradient penalty from https://arxiv.org/pdf/1704.00028. Adds stability esp.
for off-policy. Compute gradients w.r.t randomly interpolated input.
"""
policy_obs = self.get_state_encoding(policy_batch)
expert_obs = self.get_state_encoding(expert_batch)
obs_epsilon = torch.rand(policy_obs.shape)
encoder_input = obs_epsilon * policy_obs + (1 - obs_epsilon) * expert_obs
policy_vec_inputs, policy_vis_inputs = self.get_state_inputs(policy_batch)
expert_vec_inputs, expert_vis_inputs = self.get_state_inputs(expert_batch)
interp_vec_inputs = []
for policy_vec_input, expert_vec_input in zip(
policy_vec_inputs, expert_vec_inputs
):
obs_epsilon = torch.rand(policy_vec_input.shape)
interp_vec_input = (
obs_epsilon * policy_vec_input + (1 - obs_epsilon) * expert_vec_input
)
interp_vec_input.requires_grad = True # For gradient calculation
interp_vec_inputs.append(interp_vec_input)
interp_vis_inputs = []
for policy_vis_input, expert_vis_input in zip(
policy_vis_inputs, expert_vis_inputs
):
obs_epsilon = torch.rand(policy_vis_input.shape)
interp_vis_input = (
obs_epsilon * policy_vis_input + (1 - obs_epsilon) * expert_vis_input
)
interp_vis_input.requires_grad = True # For gradient calculation
interp_vis_inputs.append(interp_vis_input)
if self._settings.use_actions:
policy_action = self.get_action_input(policy_batch)
expert_action = self.get_action_input(expert_batch)

expert_batch["done"], dtype=torch.float
).unsqueeze(1)
dones_epsilon = torch.rand(policy_dones.shape)
encoder_input = torch.cat(
action_inputs = torch.cat(
encoder_input,
action_epsilon * policy_action
+ (1 - action_epsilon) * expert_action,
dones_epsilon * policy_dones + (1 - dones_epsilon) * expert_dones,

hidden = self.encoder(encoder_input)
action_inputs.requires_grad = True
hidden, _ = self.encoder(
interp_vec_inputs, interp_vis_inputs, action_inputs
)
encoder_input = tuple(
interp_vec_inputs + interp_vis_inputs + [action_inputs]
)
else:
hidden, _ = self.encoder(interp_vec_inputs, interp_vis_inputs)
encoder_input = tuple(interp_vec_inputs + interp_vis_inputs)
gradient = torch.autograd.grad(estimate, encoder_input, create_graph=True)[0]
# Norm's gradient could be NaN at 0. Use our own safe_norm
safe_norm = (torch.sum(gradient ** 2, dim=1) + self.EPSILON).sqrt()

1
ml-agents/mlagents/trainers/torch/networks.py


sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
encodes = []
for idx, processor in enumerate(self.vector_processors):
vec_input = vec_inputs[idx]
processed_vec = processor(vec_input)

4
ml-agents/tests/yamato/training_int_tests.py


if csharp_version is None and python_version is None:
# Use abs path so that loading doesn't get confused
model_path = os.path.abspath(os.path.dirname(nn_file_expected))
# Onnx loading for overrides not currently supported, but this is
# where to add it in when it is.
for extension in ["nn"]:
for extension in ["nn", "onnx"]:
inference_ok = run_inference(env_path, model_path, extension)
if not inference_ok:
return False

正在加载...
取消
保存