浏览代码

[MLA-1767] Refactor communicator connection exceptions (#4935)

/bullet-hell-barracuda-test-1.3.1
GitHub 3 年前
当前提交
9d2f16cf
共有 6 个文件被更改,包括 143 次插入115 次删除
  1. 4
      com.unity.ml-agents/CHANGELOG.md
  2. 49
      com.unity.ml-agents/Runtime/Academy.cs
  3. 5
      com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
  4. 167
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  5. 14
      com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
  6. 19
      com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs

4
com.unity.ml-agents/CHANGELOG.md


reduced the amount of memory allocated by approximately 25%. (#4887)
- Removed several memory allocations that happened during inference with discrete actions. (#4922)
- Properly catch permission errors when writing timer files. (#4921)
- Unexpected gRPC exceptions during training are now logged before stopping training. If you see
"noisy" log, please let us know! (#4930)
- Unexpected exceptions during training initialization and shutdown are now logged. If you see
"noisy" logs, please let us know! (#4930, #4935)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)

49
com.unity.ml-agents/Runtime/Academy.cs


{
// We try to exchange the first message with Python. If this fails, it means
// no Python Process is ready to train the environment. In this case, the
//environment must use Inference.
// environment must use Inference.
bool initSuccessful = false;
var communicatorInitParams = new CommunicatorInitParameters
{
unityCommunicationVersion = k_ApiVersion,
unityPackageVersion = k_PackageVersion,
name = "AcademySingleton",
CSharpCapabilities = new UnityRLCapabilities()
};
var unityRlInitParameters = Communicator.Initialize(
new CommunicatorInitParameters
{
unityCommunicationVersion = k_ApiVersion,
unityPackageVersion = k_PackageVersion,
name = "AcademySingleton",
CSharpCapabilities = new UnityRLCapabilities()
});
UnityEngine.Random.InitState(unityRlInitParameters.seed);
// We might have inference-only Agents, so set the seed for them too.
m_InferenceSeed = unityRlInitParameters.seed;
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
initSuccessful = Communicator.Initialize(
communicatorInitParams,
out var unityRlInitParameters
);
if (initSuccessful)
{
UnityEngine.Random.InitState(unityRlInitParameters.seed);
// We might have inference-only Agents, so set the seed for them too.
m_InferenceSeed = unityRlInitParameters.seed;
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
}
else
{
Debug.Log($"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. Will perform inference instead.");
Communicator = null;
}
catch
catch (Exception ex)
Debug.Log($"" +
$"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. " +
"Will perform inference instead."
);
Debug.Log($"Unexpected exception when trying to initialize communication: {ex}\nWill perform inference instead.");
if (Communicator != null)
{
Communicator.QuitCommandReceived += OnQuitCommandReceived;

5
com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs


/// Sends the academy parameters through the Communicator.
/// Is used by the academy to send the AcademyParameters to the communicator.
/// </summary>
/// <returns>The External Initialization Parameters received.</returns>
/// <returns>Whether the connection was successful.</returns>
UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters);
/// <param name="initParametersOut">The External Initialization Parameters received</param>
bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut);
/// <summary>
/// Registers a new Brain to the Communicator.

167
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


internal static bool CheckCommunicationVersionsAreCompatible(
string unityCommunicationVersion,
string pythonApiVersion,
string pythonLibraryVersion)
string pythonApiVersion
)
{
var unityVersion = new Version(unityCommunicationVersion);
var pythonVersion = new Version(pythonApiVersion);

/// Sends the initialization parameters through the Communicator.
/// Is used by the academy to send initialization parameters to the communicator.
/// </summary>
/// <returns>The External Initialization Parameters received.</returns>
/// <returns>Whether the connection was successful.</returns>
public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters)
/// <param name="initParametersOut">The External Initialization Parameters received.</param>
public bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut)
{
var academyParameters = new UnityRLInitializationOutputProto
{

{
RlInitializationOutput = academyParameters
},
out input);
var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
var unityCommunicationVersion = initParameters.unityCommunicationVersion;
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
out input
);
}
catch (Exception ex)
{
if (ex is RpcException rpcException)
{
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(unityCommunicationVersion,
pythonCommunicationVersion,
pythonPackageVersion);
// Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
// API strings, so log an explicit warning if that's the case.
if (initializationInput != null && input == null)
{
if (!communicationIsCompatible)
switch (rpcException.Status.StatusCode)
Debug.LogWarningFormat(
"Communication protocol between python ({0}) and Unity ({1}) have different " +
"versions which make them incompatible. Python library version: {2}.",
pythonCommunicationVersion, initParameters.unityCommunicationVersion,
pythonPackageVersion
);
case StatusCode.Unavailable:
// This is the common case where there's no trainer to connect to.
break;
case StatusCode.DeadlineExceeded:
// We don't currently set a deadline for connection, but likely will in the future.
break;
default:
Debug.Log($"Unexpected gRPC exception when trying to initialize communication: {rpcException}");
break;
else
{
Debug.LogWarningFormat(
"Unknown communication error between Python. Python communication protocol: {0}, " +
"Python library version: {1}.",
pythonCommunicationVersion,
pythonPackageVersion
);
}
throw new UnityAgentsException("ICommunicator.Initialize() failed.");
else
{
Debug.Log($"Unexpected exception when trying to initialize communication: {ex}");
}
initParametersOut = new UnityRLInitParameters();
return false;
catch
var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(
initParameters.unityCommunicationVersion,
pythonCommunicationVersion
);
// Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
// API strings, so log an explicit warning if that's the case.
if (initializationInput != null && input == null)
var exceptionMessage = "The Communicator was unable to connect. Please make sure the External " +
"process is ready to accept communication with Unity.";
// Check for common error condition and add details to the exception message.
var httpProxy = Environment.GetEnvironmentVariable("HTTP_PROXY");
var httpsProxy = Environment.GetEnvironmentVariable("HTTPS_PROXY");
if (httpProxy != null || httpsProxy != null)
if (!communicationIsCompatible)
{
Debug.LogWarningFormat(
"Communication protocol between python ({0}) and Unity ({1}) have different " +
"versions which make them incompatible. Python library version: {2}.",
pythonCommunicationVersion, initParameters.unityCommunicationVersion,
pythonPackageVersion
);
}
else
exceptionMessage += " Try removing HTTP_PROXY and HTTPS_PROXY from the" +
"environment variables and try again.";
Debug.LogWarningFormat(
"Unknown communication error between Python. Python communication protocol: {0}, " +
"Python library version: {1}.",
pythonCommunicationVersion,
pythonPackageVersion
);
throw new UnityAgentsException(exceptionMessage);
initParametersOut = new UnityRLInitParameters();
return false;
return initializationInput.RlInitializationInput.ToUnityRLInitParameters();
initParametersOut = initializationInput.RlInitializationInput.ToUnityRLInitParameters();
return true;
}
/// <summary>

SendCommandEvent(rlInput.Command);
}
UnityInputProto Initialize(UnityOutputProto unityOutput,
out UnityInputProto unityInput)
UnityInputProto Initialize(UnityOutputProto unityOutput, out UnityInputProto unityInput)
{
#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
m_IsOpen = true;

}
return result.UnityInput;
#else
throw new UnityAgentsException(
"You cannot perform training on this platform.");
throw new UnityAgentsException("You cannot perform training on this platform.");
#endif
}

QuitCommandReceived?.Invoke();
return message.UnityInput;
}
catch (RpcException rpcException)
catch (Exception ex)
// Log more verbose errors if they're something the user can possibly do something about.
switch (rpcException.Status.StatusCode)
if (ex is RpcException rpcException)
{
// Log more verbose errors if they're something the user can possibly do something about.
switch (rpcException.Status.StatusCode)
{
case StatusCode.Unavailable:
// This can happen when python disconnects. Ignore it to avoid noisy logs.
break;
case StatusCode.ResourceExhausted:
// This happens is the message body is too large. There's no way to
// gracefully handle this, but at least we can show the message and the
// user can try to reduce the number of agents or observation sizes.
Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
break;
default:
// Other unknown errors. Log at INFO level.
Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
break;
}
}
else
case StatusCode.Unavailable:
// This can happen when python disconnects. Ignore it to avoid noisy logs.
break;
case StatusCode.ResourceExhausted:
// This happens is the message body is too large. There's no way to
// gracefully handle this, but at least we can show the message and the
// user can try to reduce the number of agents or observation sizes.
Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
break;
default:
// Other unknown errors. Log at INFO level.
Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
break;
// Fall-through for other error types
Debug.LogError($"Communication Exception: {ex.Message}. Disconnecting from trainer.");
m_IsOpen = false;
QuitCommandReceived?.Invoke();
return null;
}
catch (Exception ex)
{
// Fall-through for other error types
Debug.LogError($"GRPC Exception: {ex.Message}. Disconnecting from trainer.");
m_IsOpen = false;
QuitCommandReceived?.Invoke();
return null;

14
com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs


using System.Collections.Generic;
using System;
using UnityEngine;
namespace Unity.MLAgents.SideChannels
{

internal void ProcessMessage(byte[] msg)
{
using (var incomingMsg = new IncomingMessage(msg))
try
{
using (var incomingMsg = new IncomingMessage(msg))
{
OnMessageReceived(incomingMsg);
}
}
catch (Exception ex)
OnMessageReceived(incomingMsg);
// Catch all errors in the sidechannel processing, so that a single
// bad SideChannel implementation doesn't take everything down with it.
Debug.LogError($"Error processing SideChannel message: {ex}.\nThe message will be skipped.");
}
}

19
com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs


{
var unityVerStr = "1.0.0";
var pythonVerStr = "1.0.0";
var pythonPackageVerStr = "0.16.0";
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
}
}
正在加载...
取消
保存