浏览代码

Merge branch 'master' into soccer-fives

/soccer-fives
Andrew Cohen 4 年前
当前提交
b7d77740
共有 8 个文件被更改,包括 38 次插入30 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  2. 3
      com.unity.ml-agents/CHANGELOG.md
  3. 5
      com.unity.ml-agents/Runtime/Academy.cs
  4. 7
      com.unity.ml-agents/Runtime/Agent.cs
  5. 8
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  6. 27
      config/gail_config.yaml
  7. 2
      docs/Migrating.md
  8. 14
      ml-agents-envs/mlagents_envs/environment.py

2
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


{
if (useVectorObs)
{
sensor.AddObservation(GetStepCount() / (float)maxStep);
sensor.AddObservation(StepCount / (float)maxStep);
}
}

3
com.unity.ml-agents/CHANGELOG.md


- The stepping logic for the Agent and the Academy has been simplified (#3448)
- Update Barracuda to 0.6.0-preview
- The checkpoint file suffix was changed from `.cptk` to `.ckpt` (#3470)
- The command-line argument used to determine the port that an environment will listen on was changed from `--port` to `--mlagents-port`.
- The method `GetStepCount()` on the Agent class has been replaced with the property getter `StepCount`
- Updated the `gail_config.yaml` to work with per-Agent steps (#3475)
## [0.14.0-preview] - 2020-02-13

5
com.unity.ml-agents/Runtime/Academy.cs


{
const string k_ApiVersion = "API-15-dev0";
const int k_EditorTrainingPort = 5004;
internal const string k_portCommandLineFlag = "--mlagents-port";
// Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy
static Lazy<Academy> s_Lazy = new Lazy<Academy>(() => new Academy());

// Signals to all the listeners that the academy is being destroyed
internal event Action DestroyAction;
// Signals the Agent that a new step is about to start.
// Signals the Agent that a new step is about to start.
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;

var inputPort = "";
for (var i = 0; i < args.Length; i++)
{
if (args[i] == "--port")
if (args[i] == k_portCommandLineFlag)
{
inputPort = args[i + 1];
}

7
com.unity.ml-agents/Runtime/Agent.cs


m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
}
/// <summary>
/// Current episode number.
/// Current step count.
public int GetStepCount()
public int StepCount
return m_StepCount;
get { return m_StepCount; }
}
/// <summary>

8
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


Assert.AreEqual(i, aca.TotalStepCount);
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
Assert.AreEqual(agent2StepSinceReset, agent2.StepCount);
Assert.AreEqual(numberAgent1Reset, agent1.agentResetCalls);
Assert.AreEqual(numberAgent2Reset, agent2.agentResetCalls);

expectedAgentStepCount += 1;
// If the next step will put the agent at maxSteps, we expect it to reset
if (agent1.GetStepCount() == maxStep - 1 || (i == 0))
if (agent1.StepCount == maxStep - 1 || (i == 0))
if (agent1.GetStepCount() == maxStep - 1)
if (agent1.StepCount == maxStep - 1)
{
expectedAgentActionSinceReset = 0;
expectedCollectObsCallsSinceReset = 0;

Assert.AreEqual(expectedAgentStepCount, agent1.GetStepCount());
Assert.AreEqual(expectedAgentStepCount, agent1.StepCount);
Assert.AreEqual(expectedResets, agent1.agentResetCalls);
Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls);
Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset);

27
config/gail_config.yaml


num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
summary_freq: 10000
use_recurrent: false
reward_signals:
extrinsic:

Pyramids:
summary_freq: 2000
summary_freq: 30000
time_horizon: 128
batch_size: 128
buffer_size: 2048

max_steps: 5.0e5
max_steps: 1.0e7
steps: 10000
steps: 150000
reward_signals:
extrinsic:
strength: 1.0

time_horizon: 1000
batch_size: 2024
buffer_size: 20240
max_steps: 1e6
summary_freq: 3000
max_steps: 1e7
summary_freq: 30000
steps: 5000
steps: 50000
reward_signals:
gail:
strength: 1.0

PushBlock:
max_steps: 5.0e4
max_steps: 1.5e7
summary_freq: 2000
summary_freq: 60000
time_horizon: 64
num_layers: 2
reward_signals:

encoding_size: 128
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
demo_path: Project/Assets/Demonstrations/PushblockDemo.demo
Hallway:
use_recurrent: true

num_epoch: 3
buffer_size: 1024
batch_size: 128
max_steps: 5.0e5
summary_freq: 1000
max_steps: 1.0e7
summary_freq: 10000
time_horizon: 64
reward_signals:
extrinsic:

FoodCollector:
batch_size: 64
summary_freq: 1000
max_steps: 5.0e4
max_steps: 2.0e6
use_recurrent: false
hidden_units: 128
learning_rate: 3.0e-4

2
docs/Migrating.md


* The `Monitor` class has been moved to the Examples Project. (It was prone to errors during testing)
* The `MLAgents.Sensor` namespace has been removed. All sensors now belong to the `MLAgents` namespace.
* The `SetActionMask` method must now be called on the optional `ActionMasker` argument of the `CollectObservations` method. (We now consider an action mask as a type of observation)
* The method `GetStepCount()` on the Agent class has been replaced with the property getter `StepCount`
* Replace all calls to `Agent.GetStepCount()` with `Agent.StepCount`
## Migrating from 0.13 to 0.14

14
ml-agents-envs/mlagents_envs/environment.py


SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
API_VERSION = "API-15-dev0"
DEFAULT_EDITOR_PORT = 5004
PORT_COMMAND_LINE_ARG = "--mlagents-port"
def __init__(
self,

subprocess_args = [launch_string]
if no_graphics:
subprocess_args += ["-nographics", "-batchmode"]
subprocess_args += ["--port", str(self.port)]
subprocess_args += [
UnityEnvironment.PORT_COMMAND_LINE_ARG,
str(self.port),
]
subprocess_args += args
try:
self.proc1 = subprocess.Popen(

# we created with `xvfb`.
#
docker_ls = (
"exec xvfb-run --auto-servernum"
" --server-args='-screen 0 640x480x24'"
" {0} --port {1}"
).format(launch_string, str(self.port))
f"exec xvfb-run --auto-servernum --server-args='-screen 0 640x480x24'"
f" {launch_string} {UnityEnvironment.PORT_COMMAND_LINE_ARG} {self.port}"
)
self.proc1 = subprocess.Popen(
docker_ls,
stdout=subprocess.PIPE,

正在加载...
取消
保存