|
|
|
|
|
|
self.seed = seed |
|
|
|
self.brain = brain |
|
|
|
self.use_recurrent = trainer_parameters["use_recurrent"] |
|
|
|
self.memory_dict: Dict[int, np.ndarray] = {} |
|
|
|
self.memory_dict: Dict[str, np.ndarray] = {} |
|
|
|
self.previous_action_dict: Dict[int, np.array] = {} |
|
|
|
self.previous_action_dict: Dict[str, np.array] = {} |
|
|
|
self.normalize = trainer_parameters.get("normalize", False) |
|
|
|
self.use_continuous_act = brain.vector_action_space_type == "continuous" |
|
|
|
if self.use_continuous_act: |
|
|
|
|
|
|
return np.zeros((num_agents, self.m_size), dtype=np.float32) |
|
|
|
|
|
|
|
def save_memories( |
|
|
|
self, agent_ids: List[int], memory_matrix: Optional[np.ndarray] |
|
|
|
self, agent_ids: List[str], memory_matrix: Optional[np.ndarray] |
|
|
|
) -> None: |
|
|
|
if memory_matrix is None: |
|
|
|
return |
|
|
|
|
|
|
def retrieve_memories(self, agent_ids: List[int]) -> np.ndarray: |
|
|
|
def retrieve_memories(self, agent_ids: List[str]) -> np.ndarray: |
|
|
|
memory_matrix = np.zeros((len(agent_ids), self.m_size), dtype=np.float32) |
|
|
|
for index, agent_id in enumerate(agent_ids): |
|
|
|
if agent_id in self.memory_dict: |
|
|
|
|
|
|
return np.zeros((num_agents, self.num_branches), dtype=np.int) |
|
|
|
|
|
|
|
def save_previous_action( |
|
|
|
self, agent_ids: List[int], action_matrix: Optional[np.ndarray] |
|
|
|
self, agent_ids: List[str], action_matrix: Optional[np.ndarray] |
|
|
|
) -> None: |
|
|
|
if action_matrix is None: |
|
|
|
return |
|
|
|
|
|
|
def retrieve_previous_action(self, agent_ids: List[int]) -> np.ndarray: |
|
|
|
def retrieve_previous_action(self, agent_ids: List[str]) -> np.ndarray: |
|
|
|
action_matrix = np.zeros((len(agent_ids), self.num_branches), dtype=np.int) |
|
|
|
for index, agent_id in enumerate(agent_ids): |
|
|
|
if agent_id in self.previous_action_dict: |
|
|
|