浏览代码

Refactor DemonstrationStore/Recorder (#3354)

/asymm-envs
GitHub 5 年前
当前提交
a5d0cf3c
共有 7 个文件被更改,包括 212 次插入108 次删除
  1. 24
      com.unity.ml-agents/Runtime/Agent.cs
  2. 4
      com.unity.ml-agents/Runtime/Demonstration.cs
  3. 130
      com.unity.ml-agents/Runtime/DemonstrationRecorder.cs
  4. 102
      com.unity.ml-agents/Runtime/DemonstrationStore.cs
  5. 14
      com.unity.ml-agents/Runtime/DemonstrationStore.cs.meta
  6. 44
      com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
  7. 2
      docs/Training-Imitation-Learning.md

24
com.unity.ml-agents/Runtime/Agent.cs


/// Struct that contains all the information for an Agent, including its
/// observations, actions and current status, that is sent to the Brain.
/// </summary>
public struct AgentInfo
internal struct AgentInfo
{
/// <summary>
/// Keeps track of the last vector action taken by the Brain.

/// Whether or not the agent requests a decision.
bool m_RequestDecision;
/// Keeps track of the number of steps taken by the agent in this episode.
/// Note that this value is different for each agent, and may not overlap
/// with the step counter in the Academy, since agents reset based on

ActionMasker m_ActionMasker;
/// <summary>
/// Demonstration recorder.
/// Set of DemonstrationStores that the Agent will write its step information to.
/// If you use a DemonstrationRecorder component, this will automatically register its DemonstrationStore.
/// You can also add your own DemonstrationStore by calling DemonstrationRecorder.AddDemonstrationStoreToAgent()
DemonstrationRecorder m_Recorder;
internal ISet<DemonstrationStore> DemonstrationStores = new HashSet<DemonstrationStore>();
/// <summary>
/// List of sensors used to generate observations.

// Grab the "static" properties for the Agent.
m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
m_PolicyFactory = GetComponent<BehaviorParameters>();
m_Recorder = GetComponent<DemonstrationRecorder>();
m_Info = new AgentInfo();
m_Action = new AgentAction();

/// becomes disabled or inactive.
void OnDisable()
{
DemonstrationStores.Clear();
// If Academy.Dispose has already been called, we don't need to unregister with it.
// We don't want to even try, because this will lazily create a new Academy!
if (Academy.IsInitialized)

// We request a decision so Python knows the Agent is done immediately
m_Brain?.RequestDecision(m_Info, sensors);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
// We also have to write any to any DemonstationStores so that they get the "done" flag.
foreach(var demoWriter in DemonstrationStores)
m_Recorder.WriteExperience(m_Info, sensors);
demoWriter.Record(m_Info, sensors);
}
UpdateRewardStats();

m_Brain.RequestDecision(m_Info, sensors);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
// If we have any DemonstrationStores, write the AgentInfo and sensors to them.
foreach(var demoWriter in DemonstrationStores)
m_Recorder.WriteExperience(m_Info, sensors);
demoWriter.Record(m_Info, sensors);
}
}

sensors[i].Update();
}
}
/// <summary>
/// Collects the vector observations of the agent.

4
com.unity.ml-agents/Runtime/Demonstration.cs


/// Used for imitation learning, or other forms of learning from data.
/// </summary>
[Serializable]
public class Demonstration : ScriptableObject
internal class Demonstration : ScriptableObject
{
public DemonstrationMetaData metaData;
public BrainParameters brainParameters;

/// Kept in a struct for easy serialization and deserialization.
/// </summary>
[Serializable]
public class DemonstrationMetaData
internal class DemonstrationMetaData
{
public int numberExperiences;
public int numberEpisodes;

130
com.unity.ml-agents/Runtime/DemonstrationRecorder.cs


using System.IO.Abstractions;
using System.Text.RegularExpressions;
using UnityEngine;
using System.Collections.Generic;
using System.IO;
namespace MLAgents
{

[AddComponentMenu("ML Agents/Demonstration Recorder", (int)MenuGroup.Default)]
public class DemonstrationRecorder : MonoBehaviour
{
[Tooltip("Whether or not to record demonstrations.")]
[Tooltip("Base demonstration file name. Will have numbers appended to make unique.")]
string m_FilePath;
[Tooltip("Base directory to write the demo files. If null, will use {Application.dataPath}/Demonstrations.")]
public string demonstrationDirectory;
public const int MaxNameLength = 16;
internal const int MaxNameLength = 16;
const string k_ExtensionType = ".demo";
IFileSystem m_FileSystem;
Agent m_Agent;
void Start()
void OnEnable()
if (Application.isEditor && record)
{
InitializeDemoStore();
}
m_Agent = GetComponent<Agent>();
if (Application.isEditor && record && m_DemoStore == null)
if (record)
InitializeDemoStore();
LazyInitialize();
/// Has no effect if the demonstration store was already created.
public void InitializeDemoStore(IFileSystem fileSystem = null)
internal DemonstrationStore LazyInitialize(IFileSystem fileSystem = null)
m_DemoStore = new DemonstrationStore(fileSystem);
if (m_DemoStore != null)
{
return m_DemoStore;
}
if (m_Agent == null)
{
m_Agent = GetComponent<Agent>();
}
m_FileSystem = fileSystem ?? new FileSystem();
if (string.IsNullOrEmpty(demonstrationName))
{
demonstrationName = behaviorParams.behaviorName;
}
if (string.IsNullOrEmpty(demonstrationDirectory))
{
demonstrationDirectory = Path.Combine(Application.dataPath, "Demonstrations");
}
var filePath = MakeDemonstrationFilePath(m_FileSystem, demonstrationDirectory, demonstrationName);
var stream = m_FileSystem.File.Create(filePath);
m_DemoStore = new DemonstrationStore(stream);
behaviorParams.fullyQualifiedBehaviorName);
behaviorParams.fullyQualifiedBehaviorName
);
AddDemonstrationStoreToAgent(m_DemoStore);
return m_DemoStore;
}
/// <summary>

public static string SanitizeName(string demoName, int maxNameLength)
internal static string SanitizeName(string demoName, int maxNameLength)
{
var rgx = new Regex("[^a-zA-Z0-9 -]");
demoName = rgx.Replace(demoName, "");

}
/// <summary>
/// Forwards AgentInfo to Demonstration Store.
/// Gets a unique path for the demonstrationName in the demonstrationDirectory.
public void WriteExperience(AgentInfo info, List<ISensor> sensors)
/// <param name="fileSystem"></param>
/// <param name="demonstrationDirectory"></param>
/// <param name="demonstrationName"></param>
/// <returns></returns>
internal static string MakeDemonstrationFilePath(
IFileSystem fileSystem, string demonstrationDirectory, string demonstrationName
)
m_DemoStore?.Record(info, sensors);
// Create the directory if it doesn't already exist
if (!fileSystem.Directory.Exists(demonstrationDirectory))
{
fileSystem.Directory.CreateDirectory(demonstrationDirectory);
}
var literalName = demonstrationName;
var filePath = Path.Combine(demonstrationDirectory, literalName + k_ExtensionType);
var uniqueNameCounter = 0;
while (fileSystem.File.Exists(filePath))
{
// TODO should we use a timestamp instead of a counter here? This loops an increasing number of times
// as the number of demos increases.
literalName = demonstrationName + "_" + uniqueNameCounter;
filePath = Path.Combine(demonstrationDirectory, literalName + k_ExtensionType);
uniqueNameCounter++;
}
return filePath;
/// <summary>
/// Close the DemonstrationStore and remove it from the Agent.
/// Has no effect if the DemonstrationStore is already closed (or wasn't opened)
/// </summary>
RemoveDemonstrationStoreFromAgent(m_DemoStore);
m_DemoStore.Close();
m_DemoStore = null;
}

/// Closes Demonstration store.
/// Clean up the DemonstrationStore when shutting down or destroying the Agent.
void OnApplicationQuit()
void OnDestroy()
if (Application.isEditor && record)
{
Close();
}
Close();
}
/// <summary>
/// Add additional DemonstrationStore to the Agent. It is still up to the user to Close this
/// DemonstrationStores when recording is done.
/// </summary>
/// <param name="demoStore"></param>
public void AddDemonstrationStoreToAgent(DemonstrationStore demoStore)
{
m_Agent.DemonstrationStores.Add(demoStore);
}
/// <summary>
/// Remove additional DemonstrationStore to the Agent. It is still up to the user to Close this
/// DemonstrationStores when recording is done.
/// </summary>
/// <param name="demoStore"></param>
public void RemoveDemonstrationStoreFromAgent(DemonstrationStore demoStore)
{
m_Agent.DemonstrationStores.Remove(demoStore);
}
}
}

102
com.unity.ml-agents/Runtime/DemonstrationStore.cs


using System.IO;
using System.IO.Abstractions;
using Google.Protobuf;
using System.Collections.Generic;

/// Responsible for writing demonstration data to file.
/// Responsible for writing demonstration data to stream (usually a file stream).
readonly IFileSystem m_FileSystem;
const string k_DemoDirectory = "Assets/Demonstrations/";
const string k_ExtensionType = ".demo";
string m_FilePath;
public DemonstrationStore(IFileSystem fileSystem)
/// <summary>
/// Create a DemonstrationStore that will write to the specified stream.
/// The stream must support writes and seeking.
/// </summary>
/// <param name="stream"></param>
public DemonstrationStore(Stream stream)
if (fileSystem != null)
{
m_FileSystem = fileSystem;
}
else
{
m_FileSystem = new FileSystem();
}
m_Writer = stream;
/// Initializes the Demonstration Store, and writes initial data.
/// Writes the initial data to the stream.
CreateDirectory();
CreateDemonstrationFile(demonstrationName);
WriteBrainParameters(brainName, brainParameters);
}
/// <summary>
/// Checks for the existence of the Demonstrations directory
/// and creates it if it does not exist.
/// </summary>
void CreateDirectory()
{
if (!m_FileSystem.Directory.Exists(k_DemoDirectory))
if (m_Writer == null)
m_FileSystem.Directory.CreateDirectory(k_DemoDirectory);
// Already closed
return;
m_MetaData = new DemonstrationMetaData { demonstrationName = demonstrationName };
var metaProto = m_MetaData.ToProto();
metaProto.WriteDelimitedTo(m_Writer);
WriteBrainParameters(brainName, brainParameters);
/// Creates demonstration file.
/// Writes meta-data. Note that this is called at the *end* of recording, but writes to the
/// beginning of the file.
void CreateDemonstrationFile(string demonstrationName)
void WriteMetadata()
// Creates demonstration file.
var literalName = demonstrationName;
m_FilePath = k_DemoDirectory + literalName + k_ExtensionType;
var uniqueNameCounter = 0;
while (m_FileSystem.File.Exists(m_FilePath))
if (m_Writer == null)
literalName = demonstrationName + "_" + uniqueNameCounter;
m_FilePath = k_DemoDirectory + literalName + k_ExtensionType;
uniqueNameCounter++;
// Already closed
return;
m_Writer = m_FileSystem.File.Create(m_FilePath);
m_MetaData = new DemonstrationMetaData { demonstrationName = demonstrationName };
var metaProtoBytes = metaProto.ToByteArray();
m_Writer.Write(metaProtoBytes, 0, metaProtoBytes.Length);
m_Writer.Seek(0, 0);
metaProto.WriteDelimitedTo(m_Writer);
}

void WriteBrainParameters(string brainName, BrainParameters brainParameters)
{
if (m_Writer == null)
{
// Already closed
return;
}
// Writes BrainParameters to file.
m_Writer.Seek(MetaDataBytes + 1, 0);
var brainProto = brainParameters.ToProto(brainName, false);

/// <summary>
/// Write AgentInfo experience to file.
/// </summary>
public void Record(AgentInfo info, List<ISensor> sensors)
internal void Record(AgentInfo info, List<ISensor> sensors)
if (m_Writer == null)
{
// Already closed
return;
}
// Increment meta-data counters.
m_MetaData.numberExperiences++;
m_CumulativeReward += info.reward;

agentProto.WriteDelimitedTo(m_Writer);
}
if (m_Writer == null)
{
// Already closed
return;
}
m_Writer = null;
}
/// <summary>

{
m_MetaData.numberEpisodes += 1;
}
/// <summary>
/// Writes meta-data.
/// </summary>
void WriteMetadata()
{
var metaProto = m_MetaData.ToProto();
var metaProtoBytes = metaProto.ToByteArray();
m_Writer.Write(metaProtoBytes, 0, metaProtoBytes.Length);
m_Writer.Seek(0, 0);
metaProto.WriteDelimitedTo(m_Writer);
}
}
}

14
com.unity.ml-agents/Runtime/DemonstrationStore.cs.meta


fileFormatVersion: 2
guid: a79c7ccb2cd042b5b1e710b9588d921b
timeCreated: 1537388072
fileFormatVersion: 2
guid: a79c7ccb2cd042b5b1e710b9588d921b
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

44
com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs


[TestFixture]
public class DemonstrationTests : MonoBehaviour
{
const string k_DemoDirecory = "Assets/Demonstrations/";
const string k_DemoDirectory = "Assets/Demonstrations/";
const string k_ExtensionType = ".demo";
const string k_DemoName = "Test";

public void TestStoreInitalize()
{
var fileSystem = new MockFileSystem();
var demoStore = new DemonstrationStore(fileSystem);
Assert.IsFalse(fileSystem.Directory.Exists(k_DemoDirecory));
var gameobj = new GameObject("gameObj");
var brainParameters = new BrainParameters
{
vectorObservationSize = 3,
numStackedVectorObservations = 2,
vectorActionDescriptions = new[] { "TestActionA", "TestActionB" },
vectorActionSize = new[] { 2, 2 },
vectorActionSpaceType = SpaceType.Discrete
};
var bp = gameobj.AddComponent<BehaviorParameters>();
bp.brainParameters.vectorObservationSize = 3;
bp.brainParameters.numStackedVectorObservations = 2;
bp.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
bp.brainParameters.vectorActionSize = new[] { 2, 2 };
bp.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
demoStore.Initialize(k_DemoName, brainParameters, "TestBrain");
var agent = gameobj.AddComponent<TestAgent>();
Assert.IsTrue(fileSystem.Directory.Exists(k_DemoDirecory));
Assert.IsTrue(fileSystem.FileExists(k_DemoDirecory + k_DemoName + k_ExtensionType));
Assert.IsFalse(fileSystem.Directory.Exists(k_DemoDirectory));
var demoRec = gameobj.AddComponent<DemonstrationRecorder>();
demoRec.record = true;
demoRec.demonstrationName = k_DemoName;
demoRec.demonstrationDirectory = k_DemoDirectory;
var demoStore = demoRec.LazyInitialize(fileSystem);
Assert.IsTrue(fileSystem.Directory.Exists(k_DemoDirectory));
Assert.IsTrue(fileSystem.FileExists(k_DemoDirectory + k_DemoName + k_ExtensionType));
var agentInfo = new AgentInfo
{

storedVectorActions = new[] { 0f, 1f },
};
demoRec.Close();
// Make sure close can be called multiple times
demoRec.Close();
// Make sure trying to write after closing doesn't raise an error.
demoStore.Record(agentInfo, new System.Collections.Generic.List<ISensor>());
}
public class ObservationAgent : TestAgent

agentGo1.AddComponent<DemonstrationRecorder>();
var demoRecorder = agentGo1.GetComponent<DemonstrationRecorder>();
var fileSystem = new MockFileSystem();
demoRecorder.demonstrationDirectory = k_DemoDirectory;
demoRecorder.InitializeDemoStore(fileSystem);
demoRecorder.LazyInitialize(fileSystem);
var agentEnableMethod = typeof(Agent).GetMethod("OnEnable",
BindingFlags.Instance | BindingFlags.NonPublic);

2
docs/Training-Imitation-Learning.md


from a few minutes or a few hours of demonstration data may be necessary to
be useful for imitation learning. When you have recorded enough data, end
the Editor play session, and a `.demo` file will be created in the
`Assets/Demonstrations` folder. This file contains the demonstrations.
`Assets/Demonstrations` folder (by default). This file contains the demonstrations.
Clicking on the file will provide metadata about the demonstration in the
inspector.

正在加载...
取消
保存