浏览代码

Merge pull request #2346 from Unity-Technologies/release-0.9.0

Merge latest fixes from release into develop
/develop-generalizationTraining-TrainerController
GitHub 5 年前
当前提交
4991d83f
共有 15 个文件被更改,包括 92 次插入56 次删除
  1. 6
      README.md
  2. 9
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
  3. 32
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
  4. 19
      UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs
  5. 2
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  6. 2
      config/trainer_config.yaml
  7. 11
      docs/ML-Agents-Overview.md
  8. 51
      docs/Training-Generalization-Learning.md
  9. 2
      docs/Training-ML-Agents.md
  10. 4
      gym-unity/setup.py
  11. 2
      ml-agents-envs/mlagents/envs/environment.py
  12. 2
      ml-agents-envs/mlagents/envs/mock_communicator.py
  13. 2
      ml-agents-envs/setup.py
  14. 4
      ml-agents/setup.py
  15. 0
      /config/3dball_generalize.yaml

6
README.md


* 10+ sample Unity environments
* Support for multiple environment configurations and training scenarios
* Train memory-enhanced agents using deep reinforcement learning
* Easily definable Curriculum Learning scenarios
* Easily definable Curriculum Learning and Generalization scenarios
* Broadcasting of agent behavior for supervised learning
* Built-in support for Imitation Learning
* Flexible agent control with On Demand Decision Making

[submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and
make sure to include as much detail as possible.
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
team at ml-agents@unity3d.com.
team at ml-agents@unity3d.com.
## Translations

9
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs


public void Contruction()
{
var bp = new BrainParameters();
var tensorGenerator = new TensorApplier(bp, 0, new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var tensorGenerator = new TensorApplier(bp, 0, alloc);
alloc.Dispose();
}
[Test]

4f, 5f, 6f, 7f, 8f})
};
var agentInfos = GetFakeAgentInfos();
var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, alloc);
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos.Keys.ToList();
var agent = agents[0] as TestAgent;

action = agent.GetAction();
Assert.AreEqual(action.vectorActions[0], 1);
Assert.AreEqual(action.vectorActions[1], 2);
alloc.Dispose();
}
[Test]

32
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs


public void Contruction()
{
var bp = new BrainParameters();
var tensorGenerator = new TensorGenerator(bp, 0, new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var tensorGenerator = new TensorGenerator(bp, 0, alloc);
alloc.Dispose();
}
[Test]

var alloc = new TensorCachingAllocator();
var generator = new BatchSizeGenerator(new TensorCachingAllocator());
var generator = new BatchSizeGenerator(alloc);
alloc.Dispose();
}
[Test]

var alloc = new TensorCachingAllocator();
var generator = new SequenceLengthGenerator(new TensorCachingAllocator());
var generator = new SequenceLengthGenerator(alloc);
alloc.Dispose();
}
[Test]

};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var generator = new VectorObservationGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new VectorObservationGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0, 0], 1);

alloc.Dispose();
}
[Test]

};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var generator = new RecurrentInputGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new RecurrentInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0, 0], 0);

alloc.Dispose();
}
[Test]

};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var generator = new PreviousActionInputGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new PreviousActionInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);

Assert.AreEqual(inputTensor.Data[1, 1], 4);
alloc.Dispose();
}
[Test]

};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var generator = new ActionMaskInputGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new ActionMaskInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0, 0], 1);

alloc.Dispose();
}
}
}

19
UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs


}
[Test]
public void TestDstWrongShape()
{
Multinomial m = new Multinomial(2018);
TensorProxy src = new TensorProxy
{
ValueType = TensorProxy.TensorType.FloatingPoint,
Data = new Tensor(0,1)
};
TensorProxy dst = new TensorProxy
{
ValueType = TensorProxy.TensorType.FloatingPoint,
Data = new Tensor(0,2)
};
Assert.Throws<ArgumentException>(() => m.Eval(src, dst));
}
[Test]
public void TestUnequalBatchSize()
{
Multinomial m = new Multinomial(2018);

2
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


[SerializeField]
public BroadcastHub broadcastHub = new BroadcastHub();
private const string kApiVersion = "API-8";
private const string kApiVersion = "API-9";
/// Temporary storage for global gravity value
/// Used to restore oringal value when deriving Academy modifies it

2
config/trainer_config.yaml


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0

11
docs/ML-Agents-Overview.md


Link](https://youtu.be/kpb8ZkMBFYs).
ML-Agents provides ways to both learn directly from demonstrations as well as
use demonstrations to help speed up reward-based training. The
use demonstrations to help speed up reward-based training, and two algorithms to do
so (Generative Adversarial Imitation Learning and Behavioral Cloning). The
[Training with Imitation Learning](Training-Imitation-Learning.md) tutorial
covers these features in more depth.

particularly when debugging agent behaviors. You can learn more about using
the broadcasting feature
[here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).
- **Training with Environment Parameter Sampling** - To train agents to be robust
to changes in its environment (i.e., generalization), the agent should be exposed
to a variety of environment variations. Similarly to Curriculum Learning, which
allows environments to get more difficult as the agent learns, we also provide
a way to randomly resample aspects of the environment during training. See
[Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
to learn more about this feature.
- **Docker Set-up (Experimental)** - To facilitate setting up ML-Agents without
installing Python or TensorFlow directly, we provide a

51
docs/Training-Generalization-Learning.md


_Variations of the 3D Ball environment._
To vary environments, we first decide what parameters to vary in an
environment. These parameters are known as `Reset Parameters`. In the 3D ball
environment example displayed in the figure above, the reset parameters are `gravity`, `ball_mass` and `ball_scale`.
environment. We call these parameters `Reset Parameters`. In the 3D ball
environment example displayed in the figure above, the reset parameters are
`gravity`, `ball_mass` and `ball_scale`.
## How-to

This is done by assigning each reset parameter a sampler, which samples a reset
parameter value (such as a uniform sampler). If a sampler isn't provided for a
reset parameter, the parameter maintains the default value throughout the
training, remaining unchanged. The samplers for all the reset parameters are
handled by a **Sampler Manager**, which also handles the generation of new
training procedure, remaining unchanged. The samplers for all the reset parameters
are handled by a **Sampler Manager**, which also handles the generation of new
`resampling-duration` (number of simulation steps after which reset parameters are
`resampling-interval` (number of simulation steps after which reset parameters are
episode-length: 5000
resampling-interval: 5000
mass:
sampler-type: "uniform"

```
* `resampling-duration` (int) - Specifies the number of steps for agent to
* `resampling-interval` (int) - Specifies the number of steps for agent to
train under a particular environment configuration before resetting the
environment with a new sample of reset parameters.

key under the `multirange_uniform` sampler for the gravity reset parameter.
The key name should match the name of the corresponding argument in the sampler definition. (Look at defining a new sampler method)
#### Possible Sampler Types
The currently implemented samplers that can be used with the `sampler-type` arguments are:
* `uniform` - Uniform sampler
* Uniformly samples a single float value between defined endpoints.
The sub-arguments for this sampler to specify the interval
endpoints are as below. The sampling is done in the range of
[`min_value`, `max_value`).
* **sub-arguments** - `min_value`, `max_value`
* `gaussian` - Gaussian sampler
* Samples a single float value from the distribution characterized by
the mean and standard deviation. The sub-arguments to specify the
gaussian distribution to use are as below.
* **sub-arguments** - `mean`, `st_dev`
* `multirange_uniform` - Multirange Uniform sampler
* Uniformly samples a single float value between the specified intervals.
Samples by first performing a weight pick of an interval from the list
of intervals (weighted based on interval width) and samples uniformly
from the selected interval (half-closed interval, same as the uniform
sampler). This sampler can take an arbitrary number of intervals in a
list in the following format:
[[`interval_1_min`, `interval_1_max`], [`interval_2_min`, `interval_2_max`], ...]
* **sub-arguments** - `intervals`
The implementation of the samplers can be found at `ml-agents-envs/mlagents/envs/sampler_class.py`.
### Defining a new sampler method

### Training with Generalization Learning
We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/generalization-test.yaml` sampling setup, we can run
We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/3dball_generalize.yaml` sampling setup, we can run
mlagents-learn config/trainer_config.yaml --sampler=config/generalize_test.yaml --run-id=3D-Ball-generalization --train
mlagents-learn config/trainer_config.yaml --sampler=config/3dball_generalize.yaml --run-id=3D-Ball-generalization --train
```
We can observe progress and metrics via Tensorboard.

2
docs/Training-ML-Agents.md


* [Training with PPO](Training-PPO.md)
* [Using Recurrent Neural Networks](Feature-Memory.md)
* [Training with Curriculum Learning](Training-Curriculum-Learning.md)
* [Training with Generalization](Training-Generalization-Learning.md)
* [Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
* [Training with Imitation Learning](Training-Imitation-Learning.md)
You can also compare the

4
gym-unity/setup.py


setup(
name="gym_unity",
version="0.4.2",
version="0.4.3",
description="Unity Machine Learning Agents Gym Interface",
license="Apache License 2.0",
author="Unity Technologies",

install_requires=["gym", "mlagents_envs==0.8.2"],
install_requires=["gym", "mlagents_envs==0.9.0"],
)

2
ml-agents-envs/mlagents/envs/environment.py


atexit.register(self._close)
self.port = base_port + worker_id
self._buffer_size = 12000
self._version_ = "API-8"
self._version_ = "API-9"
self._loaded = (
False
) # If true, this means the environment was successfully loaded

2
ml-agents-envs/mlagents/envs/mock_communicator.py


is_training=True,
)
rl_init = UnityRLInitializationOutput(
name="RealFakeAcademy", version="API-8", log_path="", brain_parameters=[bp]
name="RealFakeAcademy", version="API-9", log_path="", brain_parameters=[bp]
)
return UnityOutput(rl_initialization_output=rl_init)

2
ml-agents-envs/setup.py


setup(
name="mlagents_envs",
version="0.8.2",
version="0.9.0",
description="Unity Machine Learning Agents Interface",
url="https://github.com/Unity-Technologies/ml-agents",
author="Unity Technologies",

4
ml-agents/setup.py


setup(
name="mlagents",
version="0.8.2",
version="0.9.0",
description="Unity Machine Learning Agents",
long_description=long_description,
long_description_content_type="text/markdown",

),
zip_safe=False,
install_requires=[
"mlagents_envs==0.8.2",
"mlagents_envs==0.9.0",
"tensorflow>=1.7,<1.8",
"Pillow>=4.2.1",
"matplotlib",

/config/generalize_test.yaml → /config/3dball_generalize.yaml

正在加载...
取消
保存