浏览代码

cleanup

/active-variablespeed
HH 4 年前
当前提交
c3c83920
共有 2 个文件被更改,包括 13 次插入91 次删除
  1. 27
      config/ppo/WalkerDynamic.yaml
  2. 77
      ml-agents/mlagents/trainers/ppo/trainer.py

27
config/ppo/WalkerDynamic.yaml


# behaviors:
# WalkerDynamic:
# trainer_type: ppo
# hyperparameters:
# batch_size: 1024
# buffer_size: 20480
# learning_rate: 0.0003
# beta: 0.005
# epsilon: 0.2
# lambd: 0.95
# num_epoch: 10 #3
# learning_rate_schedule: constant
# network_settings:
# normalize: true
# hidden_units: 512
# num_layers: 3
# vis_encode_type: simple
# reward_signals:
# extrinsic:
# gamma: 0.995
# strength: 1.0
# keep_checkpoints: 5
# max_steps: 20000000
# time_horizon: 1000
# summary_freq: 30000
# threaded: true
behaviors:
WalkerDynamic:
trainer_type: ppo

77
ml-agents/mlagents/trainers/ppo/trainer.py


"""The PPOTrainer is an implementation of the PPO algorithm."""
def __init__(
self,
brain_name: str,
reward_buff_cap: int,
trainer_settings: TrainerSettings,
training: bool,
load: bool,
seed: int,
artifact_path: str,
self,
brain_name: str,
reward_buff_cap: int,
trainer_settings: TrainerSettings,
training: bool,
load: bool,
seed: int,
artifact_path: str,
):
"""
Responsible for collecting experiences and training PPO model.

agent_buffer_trajectory,
trajectory.next_obs,
trajectory.done_reached and not trajectory.interrupted,
)
)
for name, v in value_estimates.items():
agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
self._stats_reporter.add_stat(

# Make sure batch_size is a multiple of sequence length. During training, we
# will need to reshape the data into a batch_size x sequence_length tensor.
batch_size = (
self.hyperparameters.batch_size
- self.hyperparameters.batch_size % self.policy.sequence_length
self.hyperparameters.batch_size
- self.hyperparameters.batch_size % self.policy.sequence_length
)
# Make sure there is at least one sequence
batch_size = max(batch_size, self.policy.sequence_length)

for stat, val in update_stats.items():
self._stats_reporter.add_stat(stat, val)
self._clear_update_buffer()
# # # DYNAMIC BUFFER TEST
# # if self.get_step in range(0, 50000):
# # # self.expand_buffer(64, 64, 640)
# # # self.expand_buffer(64, 2048, 2048)
# # self.expand_buffer(64, 2048, 512)
# batchBase = 256 #512
# bufferBase = 2560 #5120
# if self.get_step in range(0, 100000):
# # self.expand_buffer(64, 128, 1280)
# # self.expand_buffer(64, 512, 5120)
# self.expand_buffer(64, batchBase * 1, bufferBase * 1)
# # self.expand_buffer(64, 512, 1536)
# # self.expand_buffer(64, 2048, 4096)
# # self.expand_buffer(64, 2048, 1024)
# elif self.get_step in range(100000, 300000):
# # self.expand_buffer(64, 1024, 4096)
# self.expand_buffer(64, batchBase * 2, bufferBase * 2)
# # self.expand_buffer(128, 256, 2560)
# elif self.get_step in range(300000, 2000000):
# # self.expand_buffer(64, 2048, 8192)
# self.expand_buffer(64, batchBase * 4, bufferBase * 4)
# # self.expand_buffer(256, 512, 5120)
# elif self.get_step in range(2000000, 4500000):
# # self.expand_buffer(64, 2048, 16384)
# # self.expand_buffer(64, 2048, 4096)
# # elif self.get_step in range(2000000, 3500000):
# # self.expand_buffer(512, 1024, 10240)
# self.expand_buffer(1000, batchBase * 8, bufferBase * 8)
# else:
# # self.expand_buffer(64, 2048, 32768)
# # self.expand_buffer(256, 512, 5120)
# # self.expand_buffer(512, 1024, 10240)
# self.expand_buffer(1000, batchBase * 8, bufferBase * 8)
# # self.expand_buffer(1000, 2048, 20480)
def expand_buffer(self, th, batch, buffer):
# setattr(self.hyperparameters, "buffer_size", buffer)
# self.hyperparameters["buffer_size"] = buffer
# print(f'updating policy: self.get_step: {self.get_step} | buffer_size: {self.hyperparameters.buffer_size}.')
# print(f'updating policy: self.get_step: {self.get_step} | buffer_size: {self.hyperparameters["buffer_size"]}.')
# print(f'updating policy: self.get_step: {self.get_step} | batch_size: {self.trainer_parameters["batch_size"]} | buffer_size: {self.trainer_parameters["buffer_size"]}.')
# print(self.trainer_parameters["time_horizon"])
# print(self.trainer_parameters["batch_size"])
# print(self.trainer_parameters["buffer_size"])
# self.trainer_parameters["time_horizon"] = th
# self.trainer_parameters["batch_size"] = batch
# self.trainer_settings["buffer_size"] = buffer
self.hyperparameters.batch_size = batch
self.hyperparameters.buffer_size = buffer
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TFPolicy:
"""
Creates a PPO policy to trainers list of policies.

return policy
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
) -> None:
"""
Adds policy to trainer.

正在加载...
取消
保存