浏览代码

try new reward falloff

/active-variablespeed
HH 5 年前
当前提交
9e6edb6c
共有 3 个文件被更改,包括 108 次插入0 次删除
  1. 30
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
  2. 27
      config/ppo/WalkerDynamic.yaml
  3. 51
      ml-agents/mlagents/trainers/ppo/trainer.py

30
Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab


propertyPath: targetToLookAt
value:
objectReference: {fileID: 4714470935848893865}
- target: {fileID: 895268871264836333, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: agentDoneOnGroundContact
value: 1
objectReference: {fileID: 0}
- target: {fileID: 895268871264836333, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: penalizeGroundContact
value: 1
objectReference: {fileID: 0}
- target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_Name

propertyPath: m_ConnectedAnchor.x
value: -0.39999408
objectReference: {fileID: 0}
- target: {fileID: 7933235353228551181, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: agentDoneOnGroundContact
value: 1
objectReference: {fileID: 0}
- target: {fileID: 7933235353228551181, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: penalizeGroundContact
value: 1
objectReference: {fileID: 0}
- target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x

type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.39999408
objectReference: {fileID: 0}
- target: {fileID: 7933235353713167637, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: agentDoneOnGroundContact
value: 1
objectReference: {fileID: 0}
- target: {fileID: 7933235353713167637, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: penalizeGroundContact
value: 1
objectReference: {fileID: 0}
- target: {fileID: 7933235354616748523, guid: 765582efd9dda46ed98564603316353f,
type: 3}

27
config/ppo/WalkerDynamic.yaml


# behaviors:
# WalkerDynamic:
# trainer_type: ppo
# hyperparameters:
# batch_size: 1024
# buffer_size: 20480
# learning_rate: 0.0003
# beta: 0.005
# epsilon: 0.2
# lambd: 0.95
# num_epoch: 10 #3
# learning_rate_schedule: constant
# network_settings:
# normalize: true
# hidden_units: 512
# num_layers: 3
# vis_encode_type: simple
# reward_signals:
# extrinsic:
# gamma: 0.995
# strength: 1.0
# keep_checkpoints: 5
# max_steps: 20000000
# time_horizon: 1000
# summary_freq: 30000
# threaded: true
behaviors:
WalkerDynamic:
trainer_type: ppo

51
ml-agents/mlagents/trainers/ppo/trainer.py


for stat, val in update_stats.items():
self._stats_reporter.add_stat(stat, val)
self._clear_update_buffer()
# # # DYNAMIC BUFFER TEST
# # if self.get_step in range(0, 50000):
# # # self.expand_buffer(64, 64, 640)
# # # self.expand_buffer(64, 2048, 2048)
# # self.expand_buffer(64, 2048, 512)
# batchBase = 256 #512
# bufferBase = 2560 #5120
# if self.get_step in range(0, 100000):
# # self.expand_buffer(64, 128, 1280)
# # self.expand_buffer(64, 512, 5120)
# self.expand_buffer(64, batchBase * 1, bufferBase * 1)
# # self.expand_buffer(64, 512, 1536)
# # self.expand_buffer(64, 2048, 4096)
# # self.expand_buffer(64, 2048, 1024)
# elif self.get_step in range(100000, 300000):
# # self.expand_buffer(64, 1024, 4096)
# self.expand_buffer(64, batchBase * 2, bufferBase * 2)
# # self.expand_buffer(128, 256, 2560)
# elif self.get_step in range(300000, 2000000):
# # self.expand_buffer(64, 2048, 8192)
# self.expand_buffer(64, batchBase * 4, bufferBase * 4)
# # self.expand_buffer(256, 512, 5120)
# elif self.get_step in range(2000000, 4500000):
# # self.expand_buffer(64, 2048, 16384)
# # self.expand_buffer(64, 2048, 4096)
# # elif self.get_step in range(2000000, 3500000):
# # self.expand_buffer(512, 1024, 10240)
# self.expand_buffer(1000, batchBase * 8, bufferBase * 8)
# else:
# # self.expand_buffer(64, 2048, 32768)
# # self.expand_buffer(256, 512, 5120)
# # self.expand_buffer(512, 1024, 10240)
# self.expand_buffer(1000, batchBase * 8, bufferBase * 8)
# # self.expand_buffer(1000, 2048, 20480)
def expand_buffer(self, th, batch, buffer):
# setattr(self.hyperparameters, "buffer_size", buffer)
# self.hyperparameters["buffer_size"] = buffer
# print(f'updating policy: self.get_step: {self.get_step} | buffer_size: {self.hyperparameters.buffer_size}.')
# print(f'updating policy: self.get_step: {self.get_step} | buffer_size: {self.hyperparameters["buffer_size"]}.')
# print(f'updating policy: self.get_step: {self.get_step} | batch_size: {self.trainer_parameters["batch_size"]} | buffer_size: {self.trainer_parameters["buffer_size"]}.')
# print(self.trainer_parameters["time_horizon"])
# print(self.trainer_parameters["batch_size"])
# print(self.trainer_parameters["buffer_size"])
# self.trainer_parameters["time_horizon"] = th
# self.trainer_parameters["batch_size"] = batch
# self.trainer_settings["buffer_size"] = buffer
self.hyperparameters.batch_size = batch
self.hyperparameters.buffer_size = buffer
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec

正在加载...
取消
保存