GitHub
6 年前
当前提交
3c9603d6
共有 57 个文件被更改,包括 4009 次插入 和 412 次删除
-
4.gitignore
-
9UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
-
77UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
-
152UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
-
47UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
-
71UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
-
25config/trainer_config.yaml
-
59docs/Training-Imitation-Learning.md
-
1ml-agents/mlagents/envs/communicator_objects/__init__.py
-
9ml-agents/mlagents/envs/communicator_objects/unity_to_external_pb2.py
-
18ml-agents/mlagents/envs/environment.py
-
4ml-agents/mlagents/trainers/__init__.py
-
3ml-agents/mlagents/trainers/bc/__init__.py
-
2ml-agents/mlagents/trainers/bc/policy.py
-
106ml-agents/mlagents/trainers/bc/trainer.py
-
2ml-agents/mlagents/trainers/buffer.py
-
16ml-agents/mlagents/trainers/ppo/trainer.py
-
25ml-agents/mlagents/trainers/trainer.py
-
12ml-agents/mlagents/trainers/trainer_controller.py
-
16ml-agents/tests/mock_communicator.py
-
1ml-agents/tests/trainers/test_meta_curriculum.py
-
104ml-agents/tests/trainers/test_trainer_controller.py
-
95UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs
-
11UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs.meta
-
60UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
-
11UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs.meta
-
66UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
-
11UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs.meta
-
1001UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll
-
30UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll.meta
-
623UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll
-
30UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll.meta
-
8UnitySDK/Assets/ML-Agents/Resources.meta
-
289UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs
-
11UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs.meta
-
76UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
-
11UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs.meta
-
65UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
-
11UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs.meta
-
138UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs.meta
-
73UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs.meta
-
55config/bc_config.yaml
-
102docs/images/demo_component.png
-
198docs/images/demo_inspector.png
-
98ml-agents/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py
-
19ml-agents/mlagents/envs/utilities.py
-
53ml-agents/mlagents/trainers/bc/offline_trainer.py
-
116ml-agents/mlagents/trainers/bc/online_trainer.py
-
151ml-agents/mlagents/trainers/demo_loader.py
-
60ml-agents/tests/trainers/test.demo
-
14ml-agents/tests/trainers/test_demo_loader.py
-
12protobuf-definitions/proto/mlagents/envs/communicator_objects/demonstration_meta_proto.proto
-
68UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png
-
86UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png.meta
|
|||
from .models import * |
|||
from .trainer import * |
|||
from .online_trainer import * |
|||
from .offline_trainer import * |
|||
from .policy import * |
|
|||
using System.Text; |
|||
using MLAgents; |
|||
using UnityEditor; |
|||
|
|||
/// <summary>
|
|||
/// Renders a custom UI for Demonstration Scriptable Object.
|
|||
/// </summary>
|
|||
[CustomEditor(typeof(Demonstration))] |
|||
[CanEditMultipleObjects] |
|||
public class DemonstrationEditor : Editor |
|||
{ |
|||
SerializedProperty brainParameters; |
|||
SerializedProperty demoMetaData; |
|||
|
|||
void OnEnable() |
|||
{ |
|||
brainParameters = serializedObject.FindProperty("brainParameters"); |
|||
demoMetaData = serializedObject.FindProperty("metaData"); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Renders Inspector UI for Demonstration metadata.
|
|||
/// </summary>
|
|||
void MakeMetaDataProperty(SerializedProperty property) |
|||
{ |
|||
var nameProp = property.FindPropertyRelative("demonstrationName"); |
|||
var expProp = property.FindPropertyRelative("numberExperiences"); |
|||
var epiProp = property.FindPropertyRelative("numberEpisodes"); |
|||
var rewProp = property.FindPropertyRelative("meanReward"); |
|||
|
|||
var nameLabel = nameProp.displayName + ": " + nameProp.stringValue; |
|||
var expLabel = expProp.displayName + ": " + expProp.intValue; |
|||
var epiLabel = epiProp.displayName + ": " + epiProp.intValue; |
|||
var rewLabel = rewProp.displayName + ": " + rewProp.floatValue; |
|||
|
|||
EditorGUILayout.LabelField(nameLabel); |
|||
EditorGUILayout.LabelField(expLabel); |
|||
EditorGUILayout.LabelField(epiLabel); |
|||
EditorGUILayout.LabelField(rewLabel); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Constructs label for action size array.
|
|||
/// </summary>
|
|||
static string BuildActionArrayLabel(SerializedProperty actionSizeProperty) |
|||
{ |
|||
var actionSize = actionSizeProperty.arraySize; |
|||
StringBuilder actionLabel = new StringBuilder("[ "); |
|||
for (int i = 0; i < actionSize; i++) |
|||
{ |
|||
actionLabel.Append(actionSizeProperty.GetArrayElementAtIndex(i).intValue); |
|||
if (i < actionSize - 1) |
|||
{ |
|||
actionLabel.Append(", "); |
|||
} |
|||
} |
|||
|
|||
actionLabel.Append(" ]"); |
|||
return actionLabel.ToString(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Renders Inspector UI for Brain Parameters of Demonstration.
|
|||
/// </summary>
|
|||
void MakeBrainParametersProperty(SerializedProperty property) |
|||
{ |
|||
var vecObsSizeProp = property.FindPropertyRelative("vectorObservationSize"); |
|||
var numStackedProp = property.FindPropertyRelative("numStackedVectorObservations"); |
|||
var actSizeProperty = property.FindPropertyRelative("vectorActionSize"); |
|||
var camResProp = property.FindPropertyRelative("cameraResolutions"); |
|||
var actSpaceTypeProp = property.FindPropertyRelative("vectorActionSpaceType"); |
|||
|
|||
var vecObsSizeLabel = vecObsSizeProp.displayName + ": " + vecObsSizeProp.intValue; |
|||
var numStackedLabel = numStackedProp.displayName + ": " + numStackedProp.intValue; |
|||
var vecActSizeLabel = actSizeProperty.displayName + ": " + BuildActionArrayLabel(actSizeProperty); |
|||
var camResLabel = camResProp.displayName + ": " + camResProp.arraySize; |
|||
var actSpaceTypeLabel = actSpaceTypeProp.displayName + ": " + (SpaceType) actSpaceTypeProp.enumValueIndex; |
|||
|
|||
EditorGUILayout.LabelField(vecObsSizeLabel); |
|||
EditorGUILayout.LabelField(numStackedLabel); |
|||
EditorGUILayout.LabelField(vecActSizeLabel); |
|||
EditorGUILayout.LabelField(camResLabel); |
|||
EditorGUILayout.LabelField(actSpaceTypeLabel); |
|||
} |
|||
|
|||
public override void OnInspectorGUI() |
|||
{ |
|||
serializedObject.Update(); |
|||
EditorGUILayout.LabelField("Meta Data", EditorStyles.boldLabel); |
|||
MakeMetaDataProperty(demoMetaData); |
|||
EditorGUILayout.LabelField("Brain Parameters", EditorStyles.boldLabel); |
|||
MakeBrainParametersProperty(brainParameters); |
|||
serializedObject.ApplyModifiedProperties(); |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 84f9cd83f56c74790a51444a6cfe4945 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
using System; |
|||
using System.IO; |
|||
using MLAgents.CommunicatorObjects; |
|||
using UnityEditor; |
|||
using UnityEngine; |
|||
using UnityEditor.Experimental.AssetImporters; |
|||
|
|||
namespace MLAgents |
|||
{ |
|||
/// <summary>
|
|||
/// Asset Importer used to parse demonstration files.
|
|||
/// </summary>
|
|||
[ScriptedImporter(1, new[] {"demo"})] |
|||
public class DemonstrationImporter : ScriptedImporter |
|||
{ |
|||
private const string IconPath = "Assets/ML-Agents/Resources/DemoIcon.png"; |
|||
|
|||
public override void OnImportAsset(AssetImportContext ctx) |
|||
{ |
|||
var inputType = Path.GetExtension(ctx.assetPath); |
|||
if (inputType == null) |
|||
{ |
|||
throw new Exception("Demonstration import error."); |
|||
} |
|||
|
|||
try |
|||
{ |
|||
// Read first two proto objects containing metadata and brain parameters.
|
|||
Stream reader = File.OpenRead(ctx.assetPath); |
|||
|
|||
var metaDataProto = DemonstrationMetaProto.Parser.ParseDelimitedFrom(reader); |
|||
var metaData = new DemonstrationMetaData(metaDataProto); |
|||
|
|||
reader.Seek(DemonstrationStore.MetaDataBytes + 1, 0); |
|||
var brainParamsProto = BrainParametersProto.Parser.ParseDelimitedFrom(reader); |
|||
var brainParameters = new BrainParameters(brainParamsProto); |
|||
|
|||
reader.Close(); |
|||
|
|||
var demonstration = ScriptableObject.CreateInstance<Demonstration>(); |
|||
demonstration.Initialize(brainParameters, metaData); |
|||
userData = demonstration.ToString(); |
|||
|
|||
Texture2D texture = (Texture2D) |
|||
AssetDatabase.LoadAssetAtPath(IconPath, typeof(Texture2D)); |
|||
|
|||
#if UNITY_2017_3_OR_NEWER
|
|||
ctx.AddObjectToAsset(ctx.assetPath, demonstration, texture); |
|||
ctx.SetMainObject(demonstration); |
|||
#else
|
|||
ctx.SetMainAsset(ctx.assetPath, model); |
|||
#endif
|
|||
} |
|||
catch |
|||
{ |
|||
return; |
|||
} |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 7bd65ce151aaa4a41a45312543c56be1 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
using System.Collections.Generic; |
|||
using NUnit.Framework; |
|||
using UnityEngine; |
|||
using System.IO.Abstractions.TestingHelpers; |
|||
|
|||
namespace MLAgents.Tests |
|||
{ |
|||
public class DemonstrationTests : MonoBehaviour |
|||
{ |
|||
private const string DemoDirecory = "Assets/Demonstrations/"; |
|||
private const string ExtensionType = ".demo"; |
|||
private const string DemoName = "Test"; |
|||
|
|||
[Test] |
|||
public void TestSanitization() |
|||
{ |
|||
const string dirtyString = "abc123&!@"; |
|||
const string knownCleanString = "abc123"; |
|||
var cleanString = DemonstrationRecorder.SanitizeName(dirtyString); |
|||
Assert.AreNotEqual(dirtyString, cleanString); |
|||
Assert.AreEqual(cleanString, knownCleanString); |
|||
} |
|||
|
|||
[Test] |
|||
public void TestStoreInitalize() |
|||
{ |
|||
var fileSystem = new MockFileSystem(); |
|||
var demoStore = new DemonstrationStore(fileSystem); |
|||
|
|||
Assert.IsFalse(fileSystem.Directory.Exists(DemoDirecory)); |
|||
|
|||
var brainParameters = new BrainParameters |
|||
{ |
|||
vectorObservationSize = 3, |
|||
numStackedVectorObservations = 2, |
|||
cameraResolutions = new [] {new resolution()}, |
|||
vectorActionDescriptions = new [] {"TestActionA", "TestActionB"}, |
|||
vectorActionSize = new [] {2, 2}, |
|||
vectorActionSpaceType = SpaceType.discrete |
|||
}; |
|||
|
|||
demoStore.Initialize(DemoName, brainParameters, "TestBrain"); |
|||
|
|||
Assert.IsTrue(fileSystem.Directory.Exists(DemoDirecory)); |
|||
Assert.IsTrue(fileSystem.FileExists(DemoDirecory + DemoName + ExtensionType)); |
|||
|
|||
var agentInfo = new AgentInfo |
|||
{ |
|||
reward = 1f, |
|||
visualObservations = new List<Texture2D>(), |
|||
actionMasks = new []{false, true}, |
|||
done = true, |
|||
id = 5, |
|||
maxStepReached = true, |
|||
memories = new List<float>(), |
|||
stackedVectorObservation = new List<float>() {1f, 1f, 1f}, |
|||
storedTextActions = "TestAction", |
|||
storedVectorActions = new [] {0f, 1f}, |
|||
textObservation = "TestAction", |
|||
}; |
|||
|
|||
demoStore.Record(agentInfo); |
|||
demoStore.Close(); |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 4c5a970f5b6be4b57b3bd7a5f84c3623 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
1001
UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll
文件差异内容过多而无法显示
查看文件
文件差异内容过多而无法显示
查看文件
|
|||
fileFormatVersion: 2 |
|||
guid: 2d7ba4e1037b64de5b860bcbe15755b3 |
|||
PluginImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
iconMap: {} |
|||
executionOrder: {} |
|||
isPreloaded: 0 |
|||
isOverridable: 0 |
|||
platformData: |
|||
- first: |
|||
Any: |
|||
second: |
|||
enabled: 1 |
|||
settings: {} |
|||
- first: |
|||
Editor: Editor |
|||
second: |
|||
enabled: 0 |
|||
settings: |
|||
DefaultValueInitialized: true |
|||
- first: |
|||
Windows Store Apps: WindowsStoreApps |
|||
second: |
|||
enabled: 0 |
|||
settings: |
|||
CPU: AnyCPU |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
623
UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll
文件差异内容过多而无法显示
查看文件
文件差异内容过多而无法显示
查看文件
|
|||
fileFormatVersion: 2 |
|||
guid: b01205587773841ad95e8ceda347e8bd |
|||
PluginImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
iconMap: {} |
|||
executionOrder: {} |
|||
isPreloaded: 0 |
|||
isOverridable: 0 |
|||
platformData: |
|||
- first: |
|||
Any: |
|||
second: |
|||
enabled: 1 |
|||
settings: {} |
|||
- first: |
|||
Editor: Editor |
|||
second: |
|||
enabled: 0 |
|||
settings: |
|||
DefaultValueInitialized: true |
|||
- first: |
|||
Windows Store Apps: WindowsStoreApps |
|||
second: |
|||
enabled: 0 |
|||
settings: |
|||
CPU: AnyCPU |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 1b3ab22264a5447df9e52684598ac3b0 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
// <auto-generated>
|
|||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
|||
// source: mlagents/envs/communicator_objects/demonstration_meta_proto.proto
|
|||
// </auto-generated>
|
|||
#pragma warning disable 1591, 0612, 3021
|
|||
#region Designer generated code
|
|||
|
|||
using pb = global::Google.Protobuf; |
|||
using pbc = global::Google.Protobuf.Collections; |
|||
using pbr = global::Google.Protobuf.Reflection; |
|||
using scg = global::System.Collections.Generic; |
|||
namespace MLAgents.CommunicatorObjects { |
|||
|
|||
/// <summary>Holder for reflection information generated from mlagents/envs/communicator_objects/demonstration_meta_proto.proto</summary>
|
|||
public static partial class DemonstrationMetaProtoReflection { |
|||
|
|||
#region Descriptor
|
|||
/// <summary>File descriptor for mlagents/envs/communicator_objects/demonstration_meta_proto.proto</summary>
|
|||
public static pbr::FileDescriptor Descriptor { |
|||
get { return descriptor; } |
|||
} |
|||
private static pbr::FileDescriptor descriptor; |
|||
|
|||
static DemonstrationMetaProtoReflection() { |
|||
byte[] descriptorData = global::System.Convert.FromBase64String( |
|||
string.Concat( |
|||
"CkFtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2RlbW9uc3Ry", |
|||
"YXRpb25fbWV0YV9wcm90by5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi", |
|||
"jQEKFkRlbW9uc3RyYXRpb25NZXRhUHJvdG8SEwoLYXBpX3ZlcnNpb24YASAB", |
|||
"KAUSGgoSZGVtb25zdHJhdGlvbl9uYW1lGAIgASgJEhQKDG51bWJlcl9zdGVw", |
|||
"cxgDIAEoBRIXCg9udW1iZXJfZXBpc29kZXMYBCABKAUSEwoLbWVhbl9yZXdh", |
|||
"cmQYBSABKAJCH6oCHE1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy", |
|||
"b3RvMw==")); |
|||
descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData, |
|||
new pbr::FileDescriptor[] { }, |
|||
new pbr::GeneratedClrTypeInfo(null, new pbr::GeneratedClrTypeInfo[] { |
|||
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.DemonstrationMetaProto), global::MLAgents.CommunicatorObjects.DemonstrationMetaProto.Parser, new[]{ "ApiVersion", "DemonstrationName", "NumberSteps", "NumberEpisodes", "MeanReward" }, null, null, null) |
|||
})); |
|||
} |
|||
#endregion
|
|||
|
|||
} |
|||
#region Messages
|
|||
public sealed partial class DemonstrationMetaProto : pb::IMessage<DemonstrationMetaProto> { |
|||
private static readonly pb::MessageParser<DemonstrationMetaProto> _parser = new pb::MessageParser<DemonstrationMetaProto>(() => new DemonstrationMetaProto()); |
|||
private pb::UnknownFieldSet _unknownFields; |
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public static pb::MessageParser<DemonstrationMetaProto> Parser { get { return _parser; } } |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public static pbr::MessageDescriptor Descriptor { |
|||
get { return global::MLAgents.CommunicatorObjects.DemonstrationMetaProtoReflection.Descriptor.MessageTypes[0]; } |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
pbr::MessageDescriptor pb::IMessage.Descriptor { |
|||
get { return Descriptor; } |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public DemonstrationMetaProto() { |
|||
OnConstruction(); |
|||
} |
|||
|
|||
partial void OnConstruction(); |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public DemonstrationMetaProto(DemonstrationMetaProto other) : this() { |
|||
apiVersion_ = other.apiVersion_; |
|||
demonstrationName_ = other.demonstrationName_; |
|||
numberSteps_ = other.numberSteps_; |
|||
numberEpisodes_ = other.numberEpisodes_; |
|||
meanReward_ = other.meanReward_; |
|||
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields); |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public DemonstrationMetaProto Clone() { |
|||
return new DemonstrationMetaProto(this); |
|||
} |
|||
|
|||
/// <summary>Field number for the "api_version" field.</summary>
|
|||
public const int ApiVersionFieldNumber = 1; |
|||
private int apiVersion_; |
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public int ApiVersion { |
|||
get { return apiVersion_; } |
|||
set { |
|||
apiVersion_ = value; |
|||
} |
|||
} |
|||
|
|||
/// <summary>Field number for the "demonstration_name" field.</summary>
|
|||
public const int DemonstrationNameFieldNumber = 2; |
|||
private string demonstrationName_ = ""; |
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public string DemonstrationName { |
|||
get { return demonstrationName_; } |
|||
set { |
|||
demonstrationName_ = pb::ProtoPreconditions.CheckNotNull(value, "value"); |
|||
} |
|||
} |
|||
|
|||
/// <summary>Field number for the "number_steps" field.</summary>
|
|||
public const int NumberStepsFieldNumber = 3; |
|||
private int numberSteps_; |
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public int NumberSteps { |
|||
get { return numberSteps_; } |
|||
set { |
|||
numberSteps_ = value; |
|||
} |
|||
} |
|||
|
|||
/// <summary>Field number for the "number_episodes" field.</summary>
|
|||
public const int NumberEpisodesFieldNumber = 4; |
|||
private int numberEpisodes_; |
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public int NumberEpisodes { |
|||
get { return numberEpisodes_; } |
|||
set { |
|||
numberEpisodes_ = value; |
|||
} |
|||
} |
|||
|
|||
/// <summary>Field number for the "mean_reward" field.</summary>
|
|||
public const int MeanRewardFieldNumber = 5; |
|||
private float meanReward_; |
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public float MeanReward { |
|||
get { return meanReward_; } |
|||
set { |
|||
meanReward_ = value; |
|||
} |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public override bool Equals(object other) { |
|||
return Equals(other as DemonstrationMetaProto); |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public bool Equals(DemonstrationMetaProto other) { |
|||
if (ReferenceEquals(other, null)) { |
|||
return false; |
|||
} |
|||
if (ReferenceEquals(other, this)) { |
|||
return true; |
|||
} |
|||
if (ApiVersion != other.ApiVersion) return false; |
|||
if (DemonstrationName != other.DemonstrationName) return false; |
|||
if (NumberSteps != other.NumberSteps) return false; |
|||
if (NumberEpisodes != other.NumberEpisodes) return false; |
|||
if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(MeanReward, other.MeanReward)) return false; |
|||
return Equals(_unknownFields, other._unknownFields); |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public override int GetHashCode() { |
|||
int hash = 1; |
|||
if (ApiVersion != 0) hash ^= ApiVersion.GetHashCode(); |
|||
if (DemonstrationName.Length != 0) hash ^= DemonstrationName.GetHashCode(); |
|||
if (NumberSteps != 0) hash ^= NumberSteps.GetHashCode(); |
|||
if (NumberEpisodes != 0) hash ^= NumberEpisodes.GetHashCode(); |
|||
if (MeanReward != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(MeanReward); |
|||
if (_unknownFields != null) { |
|||
hash ^= _unknownFields.GetHashCode(); |
|||
} |
|||
return hash; |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public override string ToString() { |
|||
return pb::JsonFormatter.ToDiagnosticString(this); |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public void WriteTo(pb::CodedOutputStream output) { |
|||
if (ApiVersion != 0) { |
|||
output.WriteRawTag(8); |
|||
output.WriteInt32(ApiVersion); |
|||
} |
|||
if (DemonstrationName.Length != 0) { |
|||
output.WriteRawTag(18); |
|||
output.WriteString(DemonstrationName); |
|||
} |
|||
if (NumberSteps != 0) { |
|||
output.WriteRawTag(24); |
|||
output.WriteInt32(NumberSteps); |
|||
} |
|||
if (NumberEpisodes != 0) { |
|||
output.WriteRawTag(32); |
|||
output.WriteInt32(NumberEpisodes); |
|||
} |
|||
if (MeanReward != 0F) { |
|||
output.WriteRawTag(45); |
|||
output.WriteFloat(MeanReward); |
|||
} |
|||
if (_unknownFields != null) { |
|||
_unknownFields.WriteTo(output); |
|||
} |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public int CalculateSize() { |
|||
int size = 0; |
|||
if (ApiVersion != 0) { |
|||
size += 1 + pb::CodedOutputStream.ComputeInt32Size(ApiVersion); |
|||
} |
|||
if (DemonstrationName.Length != 0) { |
|||
size += 1 + pb::CodedOutputStream.ComputeStringSize(DemonstrationName); |
|||
} |
|||
if (NumberSteps != 0) { |
|||
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumberSteps); |
|||
} |
|||
if (NumberEpisodes != 0) { |
|||
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumberEpisodes); |
|||
} |
|||
if (MeanReward != 0F) { |
|||
size += 1 + 4; |
|||
} |
|||
if (_unknownFields != null) { |
|||
size += _unknownFields.CalculateSize(); |
|||
} |
|||
return size; |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public void MergeFrom(DemonstrationMetaProto other) { |
|||
if (other == null) { |
|||
return; |
|||
} |
|||
if (other.ApiVersion != 0) { |
|||
ApiVersion = other.ApiVersion; |
|||
} |
|||
if (other.DemonstrationName.Length != 0) { |
|||
DemonstrationName = other.DemonstrationName; |
|||
} |
|||
if (other.NumberSteps != 0) { |
|||
NumberSteps = other.NumberSteps; |
|||
} |
|||
if (other.NumberEpisodes != 0) { |
|||
NumberEpisodes = other.NumberEpisodes; |
|||
} |
|||
if (other.MeanReward != 0F) { |
|||
MeanReward = other.MeanReward; |
|||
} |
|||
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields); |
|||
} |
|||
|
|||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute] |
|||
public void MergeFrom(pb::CodedInputStream input) { |
|||
uint tag; |
|||
while ((tag = input.ReadTag()) != 0) { |
|||
switch(tag) { |
|||
default: |
|||
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input); |
|||
break; |
|||
case 8: { |
|||
ApiVersion = input.ReadInt32(); |
|||
break; |
|||
} |
|||
case 18: { |
|||
DemonstrationName = input.ReadString(); |
|||
break; |
|||
} |
|||
case 24: { |
|||
NumberSteps = input.ReadInt32(); |
|||
break; |
|||
} |
|||
case 32: { |
|||
NumberEpisodes = input.ReadInt32(); |
|||
break; |
|||
} |
|||
case 45: { |
|||
MeanReward = input.ReadFloat(); |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
#endregion
|
|||
|
|||
} |
|||
|
|||
#endregion Designer generated code
|
|
|||
fileFormatVersion: 2 |
|||
guid: f7abfeda342414e059423ef90ede4c30 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
using System; |
|||
using MLAgents.CommunicatorObjects; |
|||
using UnityEngine; |
|||
|
|||
namespace MLAgents |
|||
{ |
|||
/// <summary>
|
|||
/// Demonstration Object. Contains meta-data regarding demonstration.
|
|||
/// Used for imitation learning, or other forms of learning from data.
|
|||
/// </summary>
|
|||
[Serializable] |
|||
public class Demonstration : ScriptableObject |
|||
{ |
|||
public DemonstrationMetaData metaData; |
|||
public BrainParameters brainParameters; |
|||
|
|||
public void Initialize(BrainParameters brainParameters, |
|||
DemonstrationMetaData demonstrationMetaData) |
|||
{ |
|||
this.brainParameters = brainParameters; |
|||
metaData = demonstrationMetaData; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Demonstration meta-data.
|
|||
/// Kept in a struct for easy serialization and deserialization.
|
|||
/// </summary>
|
|||
[Serializable] |
|||
public class DemonstrationMetaData |
|||
{ |
|||
public int numberExperiences; |
|||
public int numberEpisodes; |
|||
public float meanReward; |
|||
public string demonstrationName; |
|||
public const int ApiVersion = 1; |
|||
|
|||
/// <summary>
|
|||
/// Constructor for initializing metadata to default values.
|
|||
/// </summary>
|
|||
public DemonstrationMetaData() |
|||
{ |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initialize metadata values based on proto object.
|
|||
/// </summary>
|
|||
public DemonstrationMetaData(DemonstrationMetaProto demoProto) |
|||
{ |
|||
numberEpisodes = demoProto.NumberEpisodes; |
|||
numberExperiences = demoProto.NumberSteps; |
|||
meanReward = demoProto.MeanReward; |
|||
demonstrationName = demoProto.DemonstrationName; |
|||
if (demoProto.ApiVersion != ApiVersion) |
|||
{ |
|||
throw new Exception("API versions of demonstration are incompatible."); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Convert metadata object to proto object.
|
|||
/// </summary>
|
|||
public DemonstrationMetaProto ToProto() |
|||
{ |
|||
var demoProto = new DemonstrationMetaProto |
|||
{ |
|||
ApiVersion = ApiVersion, |
|||
MeanReward = meanReward, |
|||
NumberSteps = numberExperiences, |
|||
NumberEpisodes = numberEpisodes, |
|||
DemonstrationName = demonstrationName |
|||
}; |
|||
return demoProto; |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: b651f66c75a1646c6ab48de06d0e13ef |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
using UnityEngine; |
|||
using System.Text.RegularExpressions; |
|||
|
|||
namespace MLAgents |
|||
{ |
|||
/// <summary>
|
|||
/// Demonstration Recorder Component.
|
|||
/// </summary>
|
|||
[RequireComponent(typeof(Agent))] |
|||
public class DemonstrationRecorder : MonoBehaviour |
|||
{ |
|||
public bool record; |
|||
public string demonstrationName; |
|||
private Agent recordingAgent; |
|||
private string filePath; |
|||
private DemonstrationStore demoStore; |
|||
|
|||
/// <summary>
|
|||
/// Initializes Demonstration store.
|
|||
/// </summary>
|
|||
private void Start() |
|||
{ |
|||
if (Application.isEditor && record) |
|||
{ |
|||
recordingAgent = GetComponent<Agent>(); |
|||
demoStore = new DemonstrationStore(); |
|||
demonstrationName = SanitizeName(demonstrationName); |
|||
demoStore.Initialize( |
|||
demonstrationName, |
|||
recordingAgent.brain.brainParameters, |
|||
recordingAgent.brain.name); |
|||
Monitor.Log("Recording Demonstration of Agent: ", recordingAgent.name); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Removes all characters except alphanumerics from demonstration name.
|
|||
/// </summary>
|
|||
public static string SanitizeName(string demoName) |
|||
{ |
|||
var rgx = new Regex("[^a-zA-Z0-9 -]"); |
|||
demoName = rgx.Replace(demoName, ""); |
|||
return demoName; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Forwards AgentInfo to Demonstration Store.
|
|||
/// </summary>
|
|||
public void WriteExperience(AgentInfo info) |
|||
{ |
|||
demoStore.Record(info); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Closes Demonstration store.
|
|||
/// </summary>
|
|||
private void OnApplicationQuit() |
|||
{ |
|||
if (Application.isEditor && record) |
|||
{ |
|||
demoStore.Close(); |
|||
} |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 50f710d360a49461cad67ff5e6bcefe1 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
using System.IO; |
|||
using System.IO.Abstractions; |
|||
using Google.Protobuf; |
|||
using MLAgents.CommunicatorObjects; |
|||
|
|||
namespace MLAgents |
|||
{ |
|||
/// <summary>
|
|||
/// Responsible for writing demonstration data to file.
|
|||
/// </summary>
|
|||
public class DemonstrationStore |
|||
{ |
|||
public const int MetaDataBytes = 32; // Number of bytes allocated to metadata in demo file.
|
|||
private readonly IFileSystem fileSystem; |
|||
private const string DemoDirecory = "Assets/Demonstrations/"; |
|||
private const string ExtensionType = ".demo"; |
|||
|
|||
private string filePath; |
|||
private DemonstrationMetaData metaData; |
|||
private Stream writer; |
|||
private float cumulativeReward; |
|||
|
|||
public DemonstrationStore(IFileSystem fileSystem) |
|||
{ |
|||
this.fileSystem = fileSystem; |
|||
} |
|||
|
|||
public DemonstrationStore() |
|||
{ |
|||
fileSystem = new FileSystem(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes the Demonstration Store, and writes initial data.
|
|||
/// </summary>
|
|||
public void Initialize( |
|||
string demonstrationName, BrainParameters brainParameters, string brainName) |
|||
{ |
|||
CreateDirectory(); |
|||
CreateDemonstrationFile(demonstrationName); |
|||
WriteBrainParameters(brainName, brainParameters); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Checks for the existence of the Demonstrations directory
|
|||
/// and creates it if it does not exist.
|
|||
/// </summary>
|
|||
private void CreateDirectory() |
|||
{ |
|||
if (!fileSystem.Directory.Exists(DemoDirecory)) |
|||
{ |
|||
fileSystem.Directory.CreateDirectory(DemoDirecory); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Creates demonstration file.
|
|||
/// </summary>
|
|||
private void CreateDemonstrationFile(string demonstrationName) |
|||
{ |
|||
// Creates demonstration file.
|
|||
var literalName = demonstrationName; |
|||
filePath = DemoDirecory + literalName + ExtensionType; |
|||
var uniqueNameCounter = 0; |
|||
while (fileSystem.File.Exists(filePath)) |
|||
{ |
|||
literalName = demonstrationName + "_" + uniqueNameCounter; |
|||
filePath = DemoDirecory + literalName + ExtensionType; |
|||
uniqueNameCounter++; |
|||
} |
|||
|
|||
writer = fileSystem.File.Create(filePath); |
|||
metaData = new DemonstrationMetaData {demonstrationName = demonstrationName}; |
|||
var metaProto = metaData.ToProto(); |
|||
metaProto.WriteDelimitedTo(writer); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Writes brain parameters to file.
|
|||
/// </summary>
|
|||
private void WriteBrainParameters(string brainName, BrainParameters brainParameters) |
|||
{ |
|||
// Writes BrainParameters to file.
|
|||
writer.Seek(MetaDataBytes + 1, 0); |
|||
var brainProto = brainParameters.ToProto(brainName, BrainTypeProto.Player); |
|||
brainProto.WriteDelimitedTo(writer); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Write AgentInfo experience to file.
|
|||
/// </summary>
|
|||
public void Record(AgentInfo info) |
|||
{ |
|||
// Increment meta-data counters.
|
|||
metaData.numberExperiences++; |
|||
cumulativeReward += info.reward; |
|||
if (info.done) |
|||
{ |
|||
EndEpisode(); |
|||
} |
|||
|
|||
// Write AgentInfo to file.
|
|||
var agentProto = info.ToProto(); |
|||
agentProto.WriteDelimitedTo(writer); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Performs all clean-up necessary
|
|||
/// </summary>
|
|||
public void Close() |
|||
{ |
|||
EndEpisode(); |
|||
metaData.meanReward = cumulativeReward / metaData.numberEpisodes; |
|||
WriteMetadata(); |
|||
writer.Close(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Performs necessary episode-completion steps.
|
|||
/// </summary>
|
|||
private void EndEpisode() |
|||
{ |
|||
metaData.numberEpisodes += 1; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Writes meta-data.
|
|||
/// </summary>
|
|||
private void WriteMetadata() |
|||
{ |
|||
var metaProto = metaData.ToProto(); |
|||
var metaProtoBytes = metaProto.ToByteArray(); |
|||
writer.Write(metaProtoBytes, 0, metaProtoBytes.Length); |
|||
writer.Seek(0, 0); |
|||
metaProto.WriteDelimitedTo(writer); |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: a79c7ccb2cd042b5b1e710b9588d921b |
|||
timeCreated: 1537388072 |
|
|||
using UnityEngine; |
|||
using System.Collections; |
|||
using System.Collections.Generic; |
|||
|
|||
namespace MLAgents |
|||
{ |
|||
public class Utilities |
|||
{ |
|||
/// <summary>
|
|||
/// Converts a list of Texture2D into a Tensor.
|
|||
/// </summary>
|
|||
/// <returns>
|
|||
/// A 4 dimensional float Tensor of dimension
|
|||
/// [batch_size, height, width, channel].
|
|||
/// Where batch_size is the number of input textures,
|
|||
/// height corresponds to the height of the texture,
|
|||
/// width corresponds to the width of the texture,
|
|||
/// channel corresponds to the number of channels extracted from the
|
|||
/// input textures (based on the input blackAndWhite flag
|
|||
/// (3 if the flag is false, 1 otherwise).
|
|||
/// The values of the Tensor are between 0 and 1.
|
|||
/// </returns>
|
|||
/// <param name="textures">
|
|||
/// The list of textures to be put into the tensor.
|
|||
/// Note that the textures must have same width and height.
|
|||
/// </param>
|
|||
/// <param name="blackAndWhite">
|
|||
/// If set to <c>true</c> the textures
|
|||
/// will be converted to grayscale before being stored in the tensor.
|
|||
/// </param>
|
|||
public static float[,,,] TextureToFloatArray( |
|||
List<Texture2D> textures, bool blackAndWhite) |
|||
{ |
|||
int batchSize = textures.Count; |
|||
int width = textures[0].width; |
|||
int height = textures[0].height; |
|||
var pixels = blackAndWhite ? 1 : 3; |
|||
float[,,,] result = new float[batchSize, height, width, pixels]; |
|||
float[] resultTemp = new float[batchSize * height * width * pixels]; |
|||
int hwp = height * width * pixels; |
|||
int wp = width * pixels; |
|||
|
|||
for (int b = 0; b < batchSize; b++) |
|||
{ |
|||
Color32[] cc = textures[b].GetPixels32(); |
|||
for (int h = height - 1; h >= 0; h--) |
|||
{ |
|||
for (int w = 0; w < width; w++) |
|||
{ |
|||
Color32 currentPixel = cc[(height - h - 1) * width + w]; |
|||
if (!blackAndWhite) |
|||
{ |
|||
// For Color32, the r, g and b values are between
|
|||
// 0 and 255.
|
|||
resultTemp[b * hwp + h * wp + w * pixels] = currentPixel.r / 255.0f; |
|||
resultTemp[b * hwp + h * wp + w * pixels + 1] = currentPixel.g / 255.0f; |
|||
resultTemp[b * hwp + h * wp + w * pixels + 2] = currentPixel.b / 255.0f; |
|||
} |
|||
else |
|||
{ |
|||
resultTemp[b * hwp + h * wp + w * pixels] = |
|||
(currentPixel.r + currentPixel.g + currentPixel.b) |
|||
/ 3f / 255.0f; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
System.Buffer.BlockCopy(resultTemp, 0, result, 0, batchSize * hwp * sizeof(float)); |
|||
return result; |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 0e664c25f496478c9c26df6688379f7e |
|||
timeCreated: 1537468595 |
|
|||
default: |
|||
trainer: offline_bc |
|||
batch_size: 64 |
|||
beta: 5.0e-3 |
|||
hidden_units: 128 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
memory_size: 256 |
|||
batches_per_epoch: 10 |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
sequence_length: 32 |
|||
demo_path: ./UnitySDK/Assets/Demonstrations/Crawler_test.demo |
|||
|
|||
HallwayBrain: |
|||
trainer: offline_bc |
|||
max_steps: 5.0e5 |
|||
num_epoch: 5 |
|||
batch_size: 64 |
|||
batches_per_epoch: 5 |
|||
num_layers: 2 |
|||
hidden_units: 128 |
|||
sequence_length: 16 |
|||
buffer_size: 512 |
|||
use_recurrent: true |
|||
memory_size: 256 |
|||
sequence_length: 32 |
|||
demo_path: ./UnitySDK/Assets/Demonstrations/Hallway.demo |
|||
|
|||
StudentBrain: |
|||
trainer: online_bc |
|||
max_steps: 10000 |
|||
summary_freq: 1000 |
|||
brain_to_imitate: TeacherBrain |
|||
batch_size: 16 |
|||
batches_per_epoch: 5 |
|||
num_layers: 4 |
|||
hidden_units: 64 |
|||
sequence_length: 16 |
|||
buffer_size: 128 |
|||
|
|||
StudentRecurrentBrain: |
|||
trainer: online_bc |
|||
max_steps: 10000 |
|||
summary_freq: 1000 |
|||
brain_to_imitate: TeacherBrain |
|||
batch_size: 16 |
|||
batches_per_epoch: 5 |
|||
num_layers: 4 |
|||
hidden_units: 64 |
|||
use_recurrent: true |
|||
sequence_length: 32 |
|||
buffer_size: 128 |
|
|||
# Generated by the protocol buffer compiler. DO NOT EDIT! |
|||
# source: mlagents/envs/communicator_objects/demonstration_meta_proto.proto |
|||
|
|||
import sys |
|||
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) |
|||
from google.protobuf import descriptor as _descriptor |
|||
from google.protobuf import message as _message |
|||
from google.protobuf import reflection as _reflection |
|||
from google.protobuf import symbol_database as _symbol_database |
|||
# @@protoc_insertion_point(imports) |
|||
|
|||
_sym_db = _symbol_database.Default() |
|||
|
|||
|
|||
|
|||
|
|||
DESCRIPTOR = _descriptor.FileDescriptor( |
|||
name='mlagents/envs/communicator_objects/demonstration_meta_proto.proto', |
|||
package='communicator_objects', |
|||
syntax='proto3', |
|||
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'), |
|||
serialized_pb=_b('\nAmlagents/envs/communicator_objects/demonstration_meta_proto.proto\x12\x14\x63ommunicator_objects\"\x8d\x01\n\x16\x44\x65monstrationMetaProto\x12\x13\n\x0b\x61pi_version\x18\x01 \x01(\x05\x12\x1a\n\x12\x64\x65monstration_name\x18\x02 \x01(\t\x12\x14\n\x0cnumber_steps\x18\x03 \x01(\x05\x12\x17\n\x0fnumber_episodes\x18\x04 \x01(\x05\x12\x13\n\x0bmean_reward\x18\x05 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') |
|||
) |
|||
|
|||
|
|||
|
|||
|
|||
_DEMONSTRATIONMETAPROTO = _descriptor.Descriptor( |
|||
name='DemonstrationMetaProto', |
|||
full_name='communicator_objects.DemonstrationMetaProto', |
|||
filename=None, |
|||
file=DESCRIPTOR, |
|||
containing_type=None, |
|||
fields=[ |
|||
_descriptor.FieldDescriptor( |
|||
name='api_version', full_name='communicator_objects.DemonstrationMetaProto.api_version', index=0, |
|||
number=1, type=5, cpp_type=1, label=1, |
|||
has_default_value=False, default_value=0, |
|||
message_type=None, enum_type=None, containing_type=None, |
|||
is_extension=False, extension_scope=None, |
|||
serialized_options=None, file=DESCRIPTOR), |
|||
_descriptor.FieldDescriptor( |
|||
name='demonstration_name', full_name='communicator_objects.DemonstrationMetaProto.demonstration_name', index=1, |
|||
number=2, type=9, cpp_type=9, label=1, |
|||
has_default_value=False, default_value=_b("").decode('utf-8'), |
|||
message_type=None, enum_type=None, containing_type=None, |
|||
is_extension=False, extension_scope=None, |
|||
serialized_options=None, file=DESCRIPTOR), |
|||
_descriptor.FieldDescriptor( |
|||
name='number_steps', full_name='communicator_objects.DemonstrationMetaProto.number_steps', index=2, |
|||
number=3, type=5, cpp_type=1, label=1, |
|||
has_default_value=False, default_value=0, |
|||
message_type=None, enum_type=None, containing_type=None, |
|||
is_extension=False, extension_scope=None, |
|||
serialized_options=None, file=DESCRIPTOR), |
|||
_descriptor.FieldDescriptor( |
|||
name='number_episodes', full_name='communicator_objects.DemonstrationMetaProto.number_episodes', index=3, |
|||
number=4, type=5, cpp_type=1, label=1, |
|||
has_default_value=False, default_value=0, |
|||
message_type=None, enum_type=None, containing_type=None, |
|||
is_extension=False, extension_scope=None, |
|||
serialized_options=None, file=DESCRIPTOR), |
|||
_descriptor.FieldDescriptor( |
|||
name='mean_reward', full_name='communicator_objects.DemonstrationMetaProto.mean_reward', index=4, |
|||
number=5, type=2, cpp_type=6, label=1, |
|||
has_default_value=False, default_value=float(0), |
|||
message_type=None, enum_type=None, containing_type=None, |
|||
is_extension=False, extension_scope=None, |
|||
serialized_options=None, file=DESCRIPTOR), |
|||
], |
|||
extensions=[ |
|||
], |
|||
nested_types=[], |
|||
enum_types=[ |
|||
], |
|||
serialized_options=None, |
|||
is_extendable=False, |
|||
syntax='proto3', |
|||
extension_ranges=[], |
|||
oneofs=[ |
|||
], |
|||
serialized_start=92, |
|||
serialized_end=233, |
|||
) |
|||
|
|||
DESCRIPTOR.message_types_by_name['DemonstrationMetaProto'] = _DEMONSTRATIONMETAPROTO |
|||
_sym_db.RegisterFileDescriptor(DESCRIPTOR) |
|||
|
|||
DemonstrationMetaProto = _reflection.GeneratedProtocolMessageType('DemonstrationMetaProto', (_message.Message,), dict( |
|||
DESCRIPTOR = _DEMONSTRATIONMETAPROTO, |
|||
__module__ = 'mlagents.envs.communicator_objects.demonstration_meta_proto_pb2' |
|||
# @@protoc_insertion_point(class_scope:communicator_objects.DemonstrationMetaProto) |
|||
)) |
|||
_sym_db.RegisterMessage(DemonstrationMetaProto) |
|||
|
|||
|
|||
DESCRIPTOR._options = None |
|||
# @@protoc_insertion_point(module_scope) |
|
|||
from PIL import Image |
|||
import numpy as np |
|||
import io |
|||
|
|||
|
|||
def process_pixels(image_bytes, gray_scale): |
|||
""" |
|||
Converts byte array observation image into numpy array, re-sizes it, |
|||
and optionally converts it to grey scale |
|||
:param image_bytes: input byte array corresponding to image |
|||
:return: processed numpy array of observation from environment |
|||
""" |
|||
s = bytearray(image_bytes) |
|||
image = Image.open(io.BytesIO(s)) |
|||
s = np.array(image) / 255.0 |
|||
if gray_scale: |
|||
s = np.mean(s, axis=2) |
|||
s = np.reshape(s, [s.shape[0], s.shape[1], 1]) |
|||
return s |
|
|||
# # Unity ML-Agents Toolkit |
|||
# ## ML-Agent Learning (Behavioral Cloning) |
|||
# Contains an implementation of Behavioral Cloning Algorithm |
|||
|
|||
import logging |
|||
|
|||
from mlagents.trainers.bc.trainer import BCTrainer |
|||
from mlagents.trainers.demo_loader import demo_to_buffer |
|||
from mlagents.trainers.trainer import UnityTrainerException |
|||
|
|||
logger = logging.getLogger("mlagents.trainers") |
|||
|
|||
|
|||
class OfflineBCTrainer(BCTrainer): |
|||
"""The OfflineBCTrainer is an implementation of Offline Behavioral Cloning.""" |
|||
|
|||
def __init__(self, brain, trainer_parameters, training, load, seed, run_id): |
|||
""" |
|||
Responsible for collecting experiences and training PPO model. |
|||
:param trainer_parameters: The parameters for the trainer (dictionary). |
|||
:param training: Whether the trainer is set for training. |
|||
:param load: Whether the model should be loaded. |
|||
:param seed: The seed the model will be initialized with |
|||
:param run_id: The The identifier of the current run |
|||
""" |
|||
super(OfflineBCTrainer, self).__init__( |
|||
brain, trainer_parameters, training, load, seed, run_id) |
|||
|
|||
self.param_keys = ['batch_size', 'summary_freq', 'max_steps', |
|||
'batches_per_epoch', 'use_recurrent', |
|||
'hidden_units', 'learning_rate', 'num_layers', |
|||
'sequence_length', 'memory_size', 'model_path', |
|||
'demo_path'] |
|||
|
|||
self.check_param_keys() |
|||
self.batches_per_epoch = trainer_parameters['batches_per_epoch'] |
|||
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length), |
|||
1) |
|||
|
|||
brain_params, self.demonstration_buffer = demo_to_buffer( |
|||
trainer_parameters['demo_path'], |
|||
self.policy.sequence_length) |
|||
|
|||
print(brain.__dict__) |
|||
print(brain_params.__dict__) |
|||
if brain.__dict__ != brain_params.__dict__: |
|||
raise UnityTrainerException("The provided demonstration is not compatible with the " |
|||
"brain being used for performance evaluation.") |
|||
|
|||
def __str__(self): |
|||
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format( |
|||
self.brain_name, '\n'.join( |
|||
['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys])) |
|
|||
# # Unity ML-Agents Toolkit |
|||
# ## ML-Agent Learning (Behavioral Cloning) |
|||
# Contains an implementation of Behavioral Cloning Algorithm |
|||
|
|||
import logging |
|||
import numpy as np |
|||
|
|||
from mlagents.envs import AllBrainInfo |
|||
from mlagents.trainers.bc.trainer import BCTrainer |
|||
|
|||
logger = logging.getLogger("mlagents.trainers") |
|||
|
|||
|
|||
class OnlineBCTrainer(BCTrainer): |
|||
"""The OnlineBCTrainer is an implementation of Online Behavioral Cloning.""" |
|||
|
|||
def __init__(self, brain, trainer_parameters, training, load, seed, run_id): |
|||
""" |
|||
Responsible for collecting experiences and training PPO model. |
|||
:param trainer_parameters: The parameters for the trainer (dictionary). |
|||
:param training: Whether the trainer is set for training. |
|||
:param load: Whether the model should be loaded. |
|||
:param seed: The seed the model will be initialized with |
|||
:param run_id: The The identifier of the current run |
|||
""" |
|||
super(OnlineBCTrainer, self).__init__(brain, trainer_parameters, training, load, seed, |
|||
run_id) |
|||
|
|||
self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon', |
|||
'summary_freq', 'max_steps', |
|||
'batches_per_epoch', 'use_recurrent', |
|||
'hidden_units', 'learning_rate', 'num_layers', |
|||
'sequence_length', 'memory_size', 'model_path'] |
|||
|
|||
self.check_param_keys() |
|||
self.brain_to_imitate = trainer_parameters['brain_to_imitate'] |
|||
self.batches_per_epoch = trainer_parameters['batches_per_epoch'] |
|||
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length), |
|||
1) |
|||
|
|||
def __str__(self): |
|||
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format( |
|||
self.brain_name, '\n'.join( |
|||
['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys])) |
|||
|
|||
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, |
|||
take_action_outputs): |
|||
""" |
|||
Adds experiences to each agent's experience history. |
|||
:param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo). |
|||
:param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo). |
|||
:param take_action_outputs: The outputs of the take action method. |
|||
""" |
|||
|
|||
# Used to collect teacher experience into training buffer |
|||
info_teacher = curr_info[self.brain_to_imitate] |
|||
next_info_teacher = next_info[self.brain_to_imitate] |
|||
for agent_id in info_teacher.agents: |
|||
self.demonstration_buffer[agent_id].last_brain_info = info_teacher |
|||
|
|||
for agent_id in next_info_teacher.agents: |
|||
stored_info_teacher = self.demonstration_buffer[agent_id].last_brain_info |
|||
if stored_info_teacher is None: |
|||
continue |
|||
else: |
|||
idx = stored_info_teacher.agents.index(agent_id) |
|||
next_idx = next_info_teacher.agents.index(agent_id) |
|||
if stored_info_teacher.text_observations[idx] != "": |
|||
info_teacher_record, info_teacher_reset = \ |
|||
stored_info_teacher.text_observations[idx].lower().split(",") |
|||
next_info_teacher_record, next_info_teacher_reset = \ |
|||
next_info_teacher.text_observations[idx]. \ |
|||
lower().split(",") |
|||
if next_info_teacher_reset == "true": |
|||
self.demonstration_buffer.reset_update_buffer() |
|||
else: |
|||
info_teacher_record, next_info_teacher_record = "true", "true" |
|||
if info_teacher_record == "true" and next_info_teacher_record == "true": |
|||
if not stored_info_teacher.local_done[idx]: |
|||
for i in range(self.policy.vis_obs_size): |
|||
self.demonstration_buffer[agent_id]['visual_obs%d' % i] \ |
|||
.append(stored_info_teacher.visual_observations[i][idx]) |
|||
if self.policy.use_vec_obs: |
|||
self.demonstration_buffer[agent_id]['vector_obs'] \ |
|||
.append(stored_info_teacher.vector_observations[idx]) |
|||
if self.policy.use_recurrent: |
|||
if stored_info_teacher.memories.shape[1] == 0: |
|||
stored_info_teacher.memories = np.zeros( |
|||
(len(stored_info_teacher.agents), |
|||
self.policy.m_size)) |
|||
self.demonstration_buffer[agent_id]['memory'].append( |
|||
stored_info_teacher.memories[idx]) |
|||
self.demonstration_buffer[agent_id]['actions'].append( |
|||
next_info_teacher.previous_vector_actions[next_idx]) |
|||
|
|||
super(OnlineBCTrainer, self).add_experiences(curr_info, next_info, take_action_outputs) |
|||
|
|||
def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo): |
|||
""" |
|||
Checks agent histories for processing condition, and processes them as necessary. |
|||
Processing involves calculating value and advantage targets for model updating step. |
|||
:param current_info: Current AllBrainInfo |
|||
:param next_info: Next AllBrainInfo |
|||
""" |
|||
info_teacher = next_info[self.brain_to_imitate] |
|||
for l in range(len(info_teacher.agents)): |
|||
teacher_action_list = len(self.demonstration_buffer[info_teacher.agents[l]]['actions']) |
|||
horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon'] |
|||
teacher_filled = len(self.demonstration_buffer[info_teacher.agents[l]]['actions']) > 0 |
|||
if (info_teacher.local_done[l] or horizon_reached) and teacher_filled: |
|||
agent_id = info_teacher.agents[l] |
|||
self.demonstration_buffer.append_update_buffer( |
|||
agent_id, batch_size=None, training_length=self.policy.sequence_length) |
|||
self.demonstration_buffer[agent_id].reset_agent() |
|||
|
|||
super(OnlineBCTrainer, self).process_experiences(current_info, next_info) |
|
|||
import numpy as np |
|||
import pathlib |
|||
import logging |
|||
from mlagents.trainers.buffer import Buffer |
|||
from mlagents.envs.brain import BrainParameters, BrainInfo |
|||
from mlagents.envs.utilities import process_pixels |
|||
from mlagents.envs.communicator_objects import * |
|||
from google.protobuf.internal.decoder import _DecodeVarint32 |
|||
|
|||
logger = logging.getLogger("mlagents.trainers") |
|||
|
|||
|
|||
def brain_param_proto_to_obj(brain_param_proto): |
|||
resolution = [{ |
|||
"height": x.height, |
|||
"width": x.width, |
|||
"blackAndWhite": x.gray_scale |
|||
} for x in brain_param_proto.camera_resolutions] |
|||
brain_params = BrainParameters(brain_param_proto.brain_name, { |
|||
"vectorObservationSize": brain_param_proto.vector_observation_size, |
|||
"numStackedVectorObservations": brain_param_proto.num_stacked_vector_observations, |
|||
"cameraResolutions": resolution, |
|||
"vectorActionSize": brain_param_proto.vector_action_size, |
|||
"vectorActionDescriptions": brain_param_proto.vector_action_descriptions, |
|||
"vectorActionSpaceType": brain_param_proto.vector_action_space_type |
|||
}) |
|||
return brain_params |
|||
|
|||
|
|||
def agent_info_proto_to_brain_info(agent_info, brain_params): |
|||
vis_obs = [] |
|||
agent_info_list = [agent_info] |
|||
for i in range(brain_params.number_visual_observations): |
|||
obs = [process_pixels(x.visual_observations[i], |
|||
brain_params.camera_resolutions[i]['blackAndWhite']) |
|||
for x in agent_info_list] |
|||
vis_obs += [np.array(obs)] |
|||
if len(agent_info_list) == 0: |
|||
memory_size = 0 |
|||
else: |
|||
memory_size = max([len(x.memories) for x in agent_info_list]) |
|||
if memory_size == 0: |
|||
memory = np.zeros((0, 0)) |
|||
else: |
|||
[x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list] |
|||
memory = np.array([x.memories for x in agent_info_list]) |
|||
total_num_actions = sum(brain_params.vector_action_space_size) |
|||
mask_actions = np.ones((len(agent_info_list), total_num_actions)) |
|||
for agent_index, agent_info in enumerate(agent_info_list): |
|||
if agent_info.action_mask is not None: |
|||
if len(agent_info.action_mask) == total_num_actions: |
|||
mask_actions[agent_index, :] = [ |
|||
0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)] |
|||
if any([np.isnan(x.reward) for x in agent_info_list]): |
|||
logger.warning("An agent had a NaN reward.") |
|||
if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]): |
|||
logger.warning("An agent had a NaN observation.") |
|||
brain_info = BrainInfo( |
|||
visual_observation=vis_obs, |
|||
vector_observation=np.nan_to_num( |
|||
np.array([x.stacked_vector_observation for x in agent_info_list])), |
|||
text_observations=[x.text_observation for x in agent_info_list], |
|||
memory=memory, |
|||
reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list], |
|||
agents=[x.id for x in agent_info_list], |
|||
local_done=[x.done for x in agent_info_list], |
|||
vector_action=np.array([x.stored_vector_actions for x in agent_info_list]), |
|||
text_action=[x.stored_text_actions for x in agent_info_list], |
|||
max_reached=[x.max_step_reached for x in agent_info_list], |
|||
action_mask=mask_actions |
|||
) |
|||
return brain_info |
|||
|
|||
|
|||
def make_demo_buffer(brain_infos, brain_params, sequence_length): |
|||
# Create and populate buffer using experiences |
|||
demo_buffer = Buffer() |
|||
for idx, experience in enumerate(brain_infos): |
|||
if idx > len(brain_infos) - 2: |
|||
break |
|||
current_brain_info = brain_infos[idx] |
|||
next_brain_info = brain_infos[idx + 1] |
|||
demo_buffer[0].last_brain_info = current_brain_info |
|||
for i in range(brain_params.number_visual_observations): |
|||
demo_buffer[0]['visual_obs%d' % i] \ |
|||
.append(current_brain_info.visual_observations[i][0]) |
|||
if brain_params.vector_observation_space_size > 0: |
|||
demo_buffer[0]['vector_obs'] \ |
|||
.append(current_brain_info.vector_observations[0]) |
|||
demo_buffer[0]['actions'].append(next_brain_info.previous_vector_actions[0]) |
|||
if next_brain_info.local_done[0]: |
|||
demo_buffer.append_update_buffer(0, batch_size=None, |
|||
training_length=sequence_length) |
|||
demo_buffer.reset_local_buffers() |
|||
demo_buffer.append_update_buffer(0, batch_size=None, |
|||
training_length=sequence_length) |
|||
return demo_buffer |
|||
|
|||
|
|||
def demo_to_buffer(file_path, sequence_length): |
|||
""" |
|||
Loads demonstration file and uses it to fill training buffer. |
|||
:param file_path: Location of demonstration file (.demo). |
|||
:param sequence_length: Length of trajectories to fill buffer. |
|||
:return: |
|||
""" |
|||
brain_params, brain_infos, _ = load_demonstration(file_path) |
|||
demo_buffer = make_demo_buffer(brain_infos, brain_params, sequence_length) |
|||
return brain_params, demo_buffer |
|||
|
|||
|
|||
def load_demonstration(file_path): |
|||
""" |
|||
Loads and parses a demonstration file. |
|||
:param file_path: Location of demonstration file (.demo). |
|||
:return: BrainParameter and list of BrainInfos containing demonstration data. |
|||
""" |
|||
INITIAL_POS = 33 |
|||
|
|||
file_extension = pathlib.Path(file_path).suffix |
|||
if file_extension != '.demo': |
|||
raise ValueError("The file is not a '.demo' file. Please provide a file with the " |
|||
"correct extension.") |
|||
|
|||
brain_params = None |
|||
brain_infos = [] |
|||
data = open(file_path, "rb").read() |
|||
next_pos, pos, obs_decoded = 0, 0, 0 |
|||
total_expected = 0 |
|||
while pos < len(data): |
|||
next_pos, pos = _DecodeVarint32(data, pos) |
|||
if obs_decoded == 0: |
|||
meta_data_proto = DemonstrationMetaProto() |
|||
meta_data_proto.ParseFromString(data[pos:pos + next_pos]) |
|||
total_expected = meta_data_proto.number_steps |
|||
pos = INITIAL_POS |
|||
if obs_decoded == 1: |
|||
brain_param_proto = BrainParametersProto() |
|||
brain_param_proto.ParseFromString(data[pos:pos + next_pos]) |
|||
brain_params = brain_param_proto_to_obj(brain_param_proto) |
|||
pos += next_pos |
|||
if obs_decoded > 1: |
|||
agent_info = AgentInfoProto() |
|||
agent_info.ParseFromString(data[pos:pos + next_pos]) |
|||
brain_info = agent_info_proto_to_brain_info(agent_info, brain_params) |
|||
brain_infos.append(brain_info) |
|||
if len(brain_infos) == total_expected: |
|||
break |
|||
pos += next_pos |
|||
obs_decoded += 1 |
|||
return brain_params, brain_infos, total_expected |
|
|||
Test9 -��@ * * 0:Ball3DBrain7 |
|||
�k?����<�
�� �@HZ�� " P���������< |
|||
�k?����<�
���;|@HZ�� �"{� " =���=P���������< |
|||
�k?����<�
�� 0r@HZ�� �"�� " =���=P���������< |
|||
�k?����<�
���a@HZ�� Z<� " =���=P���������< |
|||
�k?����<�
���BK@HZ�� �"{� " =���=P���������< |
|||
�k?����<�
��|a.@HZ�� ���� " =���=P���������< |
|||
�k?����<�
���8@HZ�� Z�� " =���=P���������< |
|||
�k?����<�
�����?HZ�� r��� " =���=P���������< |
|||
�k?����<�
��0FH?HZ�� �"�� " =���=P���������< |
|||
�k?����<�뵾�+?D#���>����7-�>" =���=P���������< |
|||
�k?����<`��P�*?� |
|||
����">ש��`Ъ>" =���=P���������< |
|||
�k?����<@w���*?����A�3>b���_L�>" =���=P���������< |
|||
�k?����< ���0s)?8����ID> |
|||
hƼ���>" =���=P���������< |
|||
�k?����<����`�(?�!����T>H�R�>" =���=P���������< |
|||
�k?����<���� (?�I����e>.9�����>" =���=P���������< |
|||
�k?����<@u�PV'?�v��Rv>����U$?" =���=P���������< |
|||
�k?����<�r[��&?��h��>]��x� ?" =���=P���������< |
|||
�k?����<�%@�@�%?hBZ�8>�Y
�g�?" =���=P���������< |
|||
�k?����<@.#��$?�K�A(�>����Y?" =���=P���������< |
|||
�k?����< |