浏览代码

Don't drop multiple stats from the same step (#4236)

/MLA-1734-demo-provider
GitHub 5 年前
当前提交
20f1386a
共有 10 个文件被更改,包括 82 次插入64 次删除
  1. 73
      Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
  2. 8
      com.unity.ml-agents/CHANGELOG.md
  3. 1
      com.unity.ml-agents/Runtime/StatsRecorder.cs
  4. 2
      docs/Python-API.md
  5. 18
      ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
  6. 2
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  7. 24
      ml-agents/mlagents/trainers/agent_processor.py
  8. 4
      ml-agents/mlagents/trainers/env_manager.py
  9. 6
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  10. 8
      ml-agents/mlagents/trainers/tests/test_agent_processor.py

73
Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114176228333253036
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

frozenMaterial: {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
myLaser: {fileID: 1081721624670010}
contribute: 0
contribute: 1
useVectorObs: 1
--- !u!114 &114725457980523372
MonoBehaviour:

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &1222199865870203693
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1482701732800114
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114711827726849508
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1528397385587768
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114542632553128056
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1617924810425504
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114189751434580810
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1688105343773098
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114235147148547996
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1729825611722018
GameObject:
m_ObjectHideFlags: 0

8
com.unity.ml-agents/CHANGELOG.md


## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- StatsSideChannel now stores multiple values per key. This means that multiple
calls to `StatsRecorder.Add()` with the same key in the same step will no
longer overwrite each other. (#4236)
- Model checkpoints are now also saved as .nn files during training. (#4127)
- Model checkpoint info is saved in TrainingStatus.json after training is concluded (#4127)

recursively (for example, by an Agent's CollectObservations method).
Previously, this would result in an infinite loop and cause the editor to hang.
(#4226)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.2.0-preview] - 2020-07-15

1
com.unity.ml-agents/Runtime/StatsRecorder.cs


{
/// <summary>
/// Values within the summary period are averaged before reporting.
/// Note that values from the same C# environment in the same step may replace each other.
/// </summary>
Average = 0,

2
docs/Python-API.md


from mlagents_envs.environment import UnityEnvironment
# This is a non-blocking call that only loads the environment.
env = UnityEnvironment(file_name="3DBall", seed=1, side_channels=[])
# Start interacting with the evironment.
# Start interacting with the environment.
env.reset()
behavior_names = env.behavior_specs.keys()
...

18
ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py


from mlagents_envs.side_channel import SideChannel, IncomingMessage
from typing import Dict, Tuple
from typing import Tuple, List, Mapping
from collections import defaultdict
from mlagents_envs.side_channel import SideChannel, IncomingMessage
# Determines the behavior of how multiple stats within the same summary period are combined.

MOST_RECENT = 1
StatList = List[Tuple[float, StatsAggregationMethod]]
EnvironmentStats = Mapping[str, StatList]
class StatsSideChannel(SideChannel):
"""
Side channel that receives (string, float) pairs from the environment, so that they can eventually

# UUID('a1d8f7b7-cec8-50f9-b78b-d3e165a78520')
super().__init__(uuid.UUID("a1d8f7b7-cec8-50f9-b78b-d3e165a78520"))
self.stats: Dict[str, Tuple[float, StatsAggregationMethod]] = {}
self.stats: EnvironmentStats = defaultdict(list)
def on_message_received(self, msg: IncomingMessage) -> None:
"""

val = msg.read_float32()
agg_type = StatsAggregationMethod(msg.read_int32())
self.stats[key] = (val, agg_type)
self.stats[key].append((val, agg_type))
def get_and_reset_stats(self) -> Dict[str, Tuple[float, StatsAggregationMethod]]:
def get_and_reset_stats(self) -> EnvironmentStats:
self.stats = {}
self.stats = defaultdict(list)
return s

2
ml-agents-envs/mlagents_envs/tests/test_side_channel.py


stats = receiver.get_and_reset_stats()
assert len(stats) == 1
val, method = stats["stats-1"]
val, method = stats["stats-1"][0]
assert val - 42.0 < 1e-8
assert method == StatsAggregationMethod.MOST_RECENT

24
ml-agents/mlagents/trainers/agent_processor.py


TerminalSteps,
TerminalStep,
)
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.side_channel.stats_side_channel import (
StatsAggregationMethod,
EnvironmentStats,
)
from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.policy import Policy

self.publish_trajectory_queue(self.trajectory_queue)
def record_environment_stats(
self, env_stats: Dict[str, Tuple[float, StatsAggregationMethod]], worker_id: int
self, env_stats: EnvironmentStats, worker_id: int
) -> None:
"""
Pass stats from the environment to the StatsReporter.

:param worker_id:
:return:
"""
for stat_name, (val, agg_type) in env_stats.items():
if agg_type == StatsAggregationMethod.AVERAGE:
self.stats_reporter.add_stat(stat_name, val)
elif agg_type == StatsAggregationMethod.MOST_RECENT:
# In order to prevent conflicts between multiple environments,
# only stats from the first environment are recorded.
if worker_id == 0:
self.stats_reporter.set_stat(stat_name, val)
for stat_name, value_list in env_stats.items():
for val, agg_type in value_list:
if agg_type == StatsAggregationMethod.AVERAGE:
self.stats_reporter.add_stat(stat_name, val)
elif agg_type == StatsAggregationMethod.MOST_RECENT:
# In order to prevent conflicts between multiple environments,
# only stats from the first environment are recorded.
if worker_id == 0:
self.stats_reporter.set_stat(stat_name, val)

4
ml-agents/mlagents/trainers/env_manager.py


BehaviorSpec,
BehaviorName,
)
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue

current_all_step_result: AllStepResult
worker_id: int
brain_name_to_action_info: Dict[BehaviorName, ActionInfo]
environment_stats: Dict[str, Tuple[float, StatsAggregationMethod]]
environment_stats: EnvironmentStats
@property
def name_behavior_ids(self) -> Iterable[BehaviorName]:

6
ml-agents/mlagents/trainers/subprocess_env_manager.py


from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set, Tuple
from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set
import cloudpickle
import enum

)
from mlagents_envs.side_channel.stats_side_channel import (
StatsSideChannel,
StatsAggregationMethod,
EnvironmentStats,
)
from mlagents_envs.side_channel.side_channel import SideChannel

class StepResponse(NamedTuple):
all_step_result: AllStepResult
timer_root: Optional[TimerNode]
environment_stats: Dict[str, Tuple[float, StatsAggregationMethod]]
environment_stats: EnvironmentStats
class UnityEnvWorker:

8
ml-agents/mlagents/trainers/tests/test_agent_processor.py


all_env_stats = [
{
"averaged": (1.0, StatsAggregationMethod.AVERAGE),
"most_recent": (2.0, StatsAggregationMethod.MOST_RECENT),
"averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
"most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
"averaged": (3.0, StatsAggregationMethod.AVERAGE),
"most_recent": (4.0, StatsAggregationMethod.MOST_RECENT),
"averaged": [(3.0, StatsAggregationMethod.AVERAGE)],
"most_recent": [(4.0, StatsAggregationMethod.MOST_RECENT)],
},
]
for env_stats in all_env_stats:

正在加载...
取消
保存