浏览代码

Merge branch 'master' into develop-var-len-obs-feature

/bullet-hell-barracuda-test-1.3.1
GitHub 3 年前
当前提交
4d32857d
共有 15 个文件被更改,包括 95 次插入57 次删除
  1. 2
      DevProject/Packages/manifest.json
  2. 2
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
  3. 2
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
  4. 2
      com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
  5. 5
      com.unity.ml-agents/CHANGELOG.md
  6. 14
      com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
  7. 14
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  8. 2
      com.unity.ml-agents/Runtime/Agent.cs
  9. 7
      com.unity.ml-agents/Runtime/StatsRecorder.cs
  10. 9
      docs/Migrating.md
  11. 3
      ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
  12. 32
      ml-agents/mlagents/trainers/stats.py
  13. 18
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  14. 34
      ml-agents/mlagents/trainers/tests/test_stats.py
  15. 6
      ml-agents/mlagents/trainers/trainer/rl_trainer.py

2
DevProject/Packages/manifest.json


"com.unity.purchasing": "2.1.0",
"com.unity.test-framework": "1.1.16",
"com.unity.test-framework.performance": "2.2.0-preview",
"com.unity.testtools.codecoverage": "0.2.2-preview",
"com.unity.testtools.codecoverage": "1.0.0-pre.3",
"com.unity.textmeshpro": "2.0.1",
"com.unity.timeline": "1.2.12",
"com.unity.ugui": "1.0.0",

2
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs


/// Creates a BasicActuator.
/// </summary>
/// <returns></returns>
#pragma warning disable 672
#pragma warning restore 672
{
return new BasicActuator(basicController);
}

2
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs


public class Match3ExampleActuatorComponent : Match3ActuatorComponent
{
/// <inheritdoc/>
#pragma warning disable 672
#pragma warning restore 672
{
var board = GetComponent<Match3Board>();
var agent = GetComponentInParent<Agent>();

2
com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs


public bool ForceHeuristic;
/// <inheritdoc/>
#pragma warning disable 672
#pragma warning restore 672
{
var board = GetComponent<AbstractBoard>();
var agent = GetComponentInParent<Agent>();

5
com.unity.ml-agents/CHANGELOG.md


will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
- The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
- Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
Updated the Basic example and the Match3 Example to use Actuators.
Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)

- Added `ObservationWriter.AddList()` and deprecated `ObservationWriter.AddRange()`.
`AddList()` is recommended, as it does not generate any additional memory allocations. (#4887)
- The Barracuda dependency was upgraded to 1.3.0. (#4898)
- Added `ActuatorComponent.CreateActuators`, and deprecate `ActuatorComponent.CreateActuator`. The
default implementation will wrap `ActuatorComponent.CreateActuator` in an array and return that. (#4899)
- Tensorboard now logs the Environment Reward as both a scalar and a histogram. (#4878)
- Added a `--torch-device` commandline option to `mlagents-learn`, which sets the default
[`torch.device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) used for training. (#4888)
- The `--cpu` commandline option had no effect and was removed. Use `--torch-device=cpu` to force CPU training. (#4888)

14
com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs


using System;
using UnityEngine;
namespace Unity.MLAgents.Actuators

/// Create the IActuator. This is called by the Agent when it is initialized.
/// </summary>
/// <returns>Created IActuator object.</returns>
[Obsolete("Use CreateActuators instead.")]
/// <summary>
/// Create a collection of <see cref="IActuator"/>s. This is called by the <see cref="Agent"/> during
/// initialization.
/// </summary>
/// <returns>A collection of <see cref="IActuator"/>s</returns>
public virtual IActuator[] CreateActuators()
{
#pragma warning disable 618
return new[] { CreateActuator() };
#pragma warning restore 618
}
/// <summary>
/// The specification of the possible actions for this ActuatorComponent.

14
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


NumContinuousActions = NumDiscreteActions = SumOfDiscreteBranchSizes = 0;
}
/// <summary>
/// Add an array of <see cref="IActuator"/>s at once.
/// </summary>
/// <param name="actuators">The array of <see cref="IActuator"/>s to add.</param>
public void AddActuators(IActuator[] actuators)
{
for (var i = 0; i < actuators.Length; i++)
{
Add(actuators[i]);
}
}
/*********************************************************************************
* IList implementation that delegates to m_Actuators List. *
*********************************************************************************/

public int Count => m_Actuators.Count;
/// <inheritdoc/>
public bool IsReadOnly => m_Actuators.IsReadOnly;
public bool IsReadOnly => false;
/// <inheritdoc/>
public int IndexOf(IActuator item)

2
com.unity.ml-agents/Runtime/Agent.cs


foreach (var actuatorComponent in attachedActuators)
{
m_ActuatorManager.Add(actuatorComponent.CreateActuator());
m_ActuatorManager.AddActuators(actuatorComponent.CreateActuators());
}
}

7
com.unity.ml-agents/Runtime/StatsRecorder.cs


/// <summary>
/// Values within the summary period are summed up before reporting.
/// </summary>
Sum = 2
Sum = 2,
/// <summary>
/// Values within the summary period are reported as a histogram.
/// </summary>
Histogram = 3
}
/// <summary>

9
docs/Migrating.md


- `VectorSensor.AddObservation(IEnumerable<float>)` is deprecated. Use `VectorSensor.AddObservation(IList<float>)`
instead.
- `ObservationWriter.AddRange()` is deprecated. Use `ObservationWriter.AddList()` instead.
- `ActuatorComponent.CreateAcuator()` is deprecated. Please use override `ActuatorComponent.CreateActuators`
instead. Since `ActuatorComponent.CreateActuator()` is abstract, you will still need to override it in your
class until it is removed. It is only ever called if you don't override `ActuatorComponent.CreateActuators`.
You can suppress the warnings by surrounding the method with the following pragma:
```c#
#pragma warning disable 672
public IActuator CreateActuator() { ... }
#pragma warning restore 672
```
# Migrating

3
ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py


# Values within the summary period are summed up before reporting.
SUM = 2
# All values within a summary period are reported as a histogram.
HISTOGRAM = 3
StatList = List[Tuple[float, StatsAggregationMethod]]
EnvironmentStats = Mapping[str, StatList]

32
ml-agents/mlagents/trainers/stats.py


class StatsSummary(NamedTuple):
mean: float
std: float
num: int
sum: float
full_dist: List[float]
return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
return StatsSummary([], StatsAggregationMethod.AVERAGE)
@property
def aggregated_value(self):

return self.mean
@property
def mean(self):
return np.mean(self.full_dist)
@property
def std(self):
return np.std(self.full_dist)
@property
def num(self):
return len(self.full_dist)
@property
def sum(self):
return np.sum(self.full_dist)
class StatsPropertyType(Enum):
HYPERPARAMETERS = "hyperparameters"

self.summary_writers[category].add_scalar(
f"{key}", value.aggregated_value, step
)
if value.aggregation_method == StatsAggregationMethod.HISTOGRAM:
self.summary_writers[category].add_histogram(
f"{key}_hist", np.array(value.full_dist), step
)
self.summary_writers[category].flush()
def _maybe_create_summary_writer(self, category: str) -> None:

return StatsSummary.empty()
return StatsSummary(
mean=np.mean(stat_values),
std=np.std(stat_values),
num=len(stat_values),
sum=np.sum(stat_values),
full_dist=stat_values,
aggregation_method=StatsReporter.stats_aggregation[self.category][key],
)

18
ml-agents/mlagents/trainers/tests/test_agent_processor.py


expected_stats = {
"averaged": StatsSummary(
mean=2.0,
std=mock.ANY,
num=2,
sum=4.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0, 3.0], aggregation_method=StatsAggregationMethod.AVERAGE
mean=4.0,
std=0.0,
num=1,
sum=4.0,
aggregation_method=StatsAggregationMethod.MOST_RECENT,
full_dist=[4.0], aggregation_method=StatsAggregationMethod.MOST_RECENT
mean=2.1,
std=mock.ANY,
num=2,
sum=4.2,
aggregation_method=StatsAggregationMethod.SUM,
full_dist=[3.1, 1.1], aggregation_method=StatsAggregationMethod.SUM
),
}
stats_reporter.write_stats(123)

34
ml-agents/mlagents/trainers/tests/test_stats.py


with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)

def test_tensorboard_writer_clear(tmp_path):
tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# TB has some sort of timeout before making a new file

category = "category1"
console_writer = ConsoleWriter()
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
console_writer.write_stats(
category,

10,
)
statssummary2 = StatsSummary(
mean=0.0,
std=0.0,
num=1,
sum=0.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[0.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
console_writer.write_stats(
category,

)
self.assertIn(
"Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
"Mean Reward: 1.000. Std of Reward: 0.000. Training.", cm.output[0]
)
self.assertIn("Not Training.", cm.output[1])

console_writer = ConsoleWriter()
console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
console_writer.write_stats(
category,

)
self.assertIn(
"Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
"Mean Reward: 1.000. Std of Reward: 0.000. Training.", cm.output[0]
)

6
ml-agents/mlagents/trainers/trainer/rl_trainer.py


import abc
import time
import attr
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.policy.checkpoint_manager import (
ModelCheckpoint,
ModelCheckpointManager,

for name, rewards in self.collected_rewards.items():
if name == "environment":
self.stats_reporter.add_stat(
"Environment/Cumulative Reward", rewards.get(agent_id, 0)
"Environment/Cumulative Reward",
rewards.get(agent_id, 0),
aggregation=StatsAggregationMethod.HISTOGRAM,
)
self.cumulative_returns_since_policy_update.append(
rewards.get(agent_id, 0)

正在加载...
取消
保存