浏览代码

Add back next_obs

/develop-newnormalization
Ervin Teng 5 年前
当前提交
77ff4822
共有 6 个文件被更改,包括 17 次插入13 次删除
  1. 4
      ml-agents/mlagents/trainers/agent_processor.py
  2. 8
      ml-agents/mlagents/trainers/ppo/policy.py
  3. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  4. 2
      ml-agents/mlagents/trainers/sac/trainer.py
  5. 4
      ml-agents/mlagents/trainers/trainer_controller.py
  6. 10
      ml-agents/mlagents/trainers/trajectory.py

4
ml-agents/mlagents/trainers/agent_processor.py


if self.policy.use_vec_obs:
next_obs.append(next_info.vector_observations[next_idx])
trajectory = Trajectory(
steps=self.experience_buffers[agent_id], agent_id=agent_id
steps=self.experience_buffers[agent_id],
agent_id=agent_id,
next_obs=next_obs,
)
# This will eventually be replaced with a queue
self.trainer.process_trajectory(trajectory)

8
ml-agents/mlagents/trainers/ppo/policy.py


import logging
import numpy as np
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, List
from mlagents.tf_utils import tf

from mlagents.trainers.trajectory import split_obs, AgentExperience
from mlagents.trainers.trajectory import split_obs
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.buffer import AgentBuffer

return value_estimates
def get_value_estimates(
self, experience: AgentExperience, agent_id: str, done: bool
self, next_obs: List[np.ndarray], agent_id: str, done: bool
) -> Dict[str, float]:
"""
Generates value estimates for bootstrapping.

self.model.batch_size: 1,
self.model.sequence_length: 1,
}
vec_vis_obs = split_obs(experience.obs)
vec_vis_obs = split_obs(next_obs)
for i in range(len(vec_vis_obs.visual_observations)):
feed_dict[self.model.visual_in[i]] = [vec_vis_obs.visual_observations[i]]

2
ml-agents/mlagents/trainers/ppo/trainer.py


agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
value_next = self.policy.get_value_estimates(
trajectory.steps[-1],
trajectory.next_obs,
trajectory.steps[-1].done and not trajectory.steps[-1].max_step,
agent_id,
)

2
ml-agents/mlagents/trainers/sac/trainer.py


last_step = trajectory.steps[-1]
agent_id = trajectory.agent_id # All the agents should have the same ID
# Note that this agent buffer version of the traj. is one less than the len of the raw trajectory
# for bootstrapping purposes.
agent_buffer_trajectory = trajectory_to_agentbuffer(trajectory)
# Update the normalization

4
ml-agents/mlagents/trainers/trainer_controller.py


processor=AgentProcessor(
trainer,
trainer.policy,
trainer.parameters["time_horizon"] + 1,
trainer.parameters["time_horizon"],
# Note: for a trajectory to be useful for bootstrapping, we need the next state.
# Hence we have the processor give us 1 more than the time horizon.
)
self.managers[name] = agent_manager
last_brain_names = external_brains

10
ml-agents/mlagents/trainers/trajectory.py


class Trajectory(NamedTuple):
steps: List[AgentExperience]
next_obs: List[
np.ndarray
] # Observation following the trajectory, for bootstrapping
agent_id: str

step of the trajectory.
"""
agent_buffer_trajectory = AgentBuffer()
for step, exp in enumerate(trajectory.steps[:-1]):
for step, exp in enumerate(trajectory.steps):
next_vec_vis_obs = split_obs(trajectory.steps[step + 1].obs)
if step < len(trajectory.steps) - 1:
next_vec_vis_obs = split_obs(trajectory.steps[step + 1].obs)
else:
next_vec_vis_obs = split_obs(trajectory.next_obs)
for i, _ in enumerate(vec_vis_obs.visual_observations):
agent_buffer_trajectory["visual_obs%d" % i].append(

正在加载...
取消
保存