OpenI
/
PARL
mirror of https://gitee.com/paddlepaddle/PARL.git

 
			
							#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from powernet_model import PowerNetModel
from es import ES
from es_agent import ESAgent
from tqdm import tqdm
import copy
import numpy as np
from copy import deepcopy
from utils import process
import parl
import paddle.fluid as fluid
from parl import layers


class Track2PowerNetAgent(object):
    def __init__(self, action_space):
        """Initialize a new agent."""

        self.action_space = action_space

        self.actions = []
        actions_vec = np.load("./saved_files/top1000_actions.npz")["actions"]
        for i in range(actions_vec.shape[0]):
            act = action_space.from_vect(actions_vec[i])
            self.actions.append(act)

        self.actions = self.actions[:1000]
        self.act_num = len(self.actions)
        self.sub_ids = np.load('./saved_files/sub_id_info.npz')['sub_ids']
        self.do_nothing_action = action_space({})
        self.origin_ids = range(len(self.actions))

        offset = action_space.n_line
        self.action_to_sub_topo = {}
        for sub_id, sub_elem_num in enumerate(action_space.sub_info):
            self.action_to_sub_topo[sub_id] = (offset, offset + sub_elem_num)
            offset += sub_elem_num
        self.step = 0

        model = PowerNetModel()
        algorithm = ES(model)
        self.es_agent = ESAgent(algorithm)
        self.es_agent.restore(save_path='./saved_files', filename='model.ckpt')

        self.to_print_data = []

        self.last_disconnect_step = -100
        self.last_diconnect_line = None
        self.simulation_times = 0

    def simulate_do_nothing(self, observation):
        init_to_maintain_lines = np.where((observation.time_next_maintenance>0) \
                              & (observation.time_next_maintenance<9))[0]
        to_check_action = self.do_nothing_action
        to_maintain_lines = []
        for line_id in init_to_maintain_lines:
            if observation.line_status[line_id]:
                to_maintain_lines.append(line_id)
        # we do not disconnect the only line in advance
        if len(to_maintain_lines) == 1:
            rest_step = observation.time_next_maintenance[to_maintain_lines[0]]
            if rest_step > 1:
                to_maintain_lines = []
        else:  # we only maintain the first line in `to_maintain_lines`
            to_maintain_lines = to_maintain_lines[:1]

        if len(to_maintain_lines
               ) != 0 and self.step - self.last_disconnect_step > 3:
            line_status = []
            for line_id in to_maintain_lines:
                line_status.append((line_id, -1))
            to_check_action = self.action_space({
                'set_line_status': line_status
            })

            obs_simulate, reward_simulate, done_simulate, info_simulate = observation.simulate(
                to_check_action)
            observation._obs_env._reset_to_orig_state()
        else:
            obs_simulate, reward_simulate, done_simulate, info_simulate = observation.simulate(
                to_check_action)
            observation._obs_env._reset_to_orig_state()
        return obs_simulate, done_simulate, to_check_action, to_maintain_lines

    def find_unaccessible_pos(self, to_check_action):
        if to_check_action == self.do_nothing_action:
            return []
        lines = to_check_action.as_dict()['set_line_status']['disconnected_id']
        arr = []
        for line_id in lines:
            arr.append((line_id, 1))
        act = self.action_space({
            "set_bus": {
                "lines_ex_id": arr,
                "lines_or_id": arr
            }
        })
        pos = np.where(act._set_topo_vect != 0)[0]
        return pos

    def avoid_overflow(self, observation, reset_action=None):
        if reset_action is None:
            obs_simulate, done_simulate, to_check_action, to_maintain_lines = self.simulate_do_nothing(
                observation)
        else:
            to_check_action = reset_action
            to_maintain_lines = []
            obs_simulate, reward_simulate, done_simulate, info_simulate = observation.simulate(
                to_check_action)
            observation._obs_env._reset_to_orig_state()

        has_overflow = False
        if observation is not None and not any(np.isnan(observation.rho)):
            has_overflow = any(observation.rho > 1.0) or any(
                obs_simulate.rho > 1.0)

        if not (done_simulate or has_overflow) and (
                to_check_action == self.do_nothing_action):
            return self.do_nothing_action, -1
        if to_check_action != self.do_nothing_action and obs_simulate.rho.max(
        ) < 1.0 and not done_simulate:
            return to_check_action, -1

        # action selection and rerank
        extracted_obs = process(observation).astype(np.float32)
        top_idx, pred_rho = self.es_agent.predict_unitary_actions_rho(
            extracted_obs)

        action_selected = [False] * len(self.actions)
        for i in range(80):
            idx = top_idx[i]
            action_selected[idx] = True

        # select_action_by_dis
        overflow_lines = np.where(observation.rho > 1.0)[0].tolist()
        if len(overflow_lines) == 0:
            overflow_lines = np.where(obs_simulate.rho > 1.0)[0].tolist()

        best_idx = -1
        least_overflow_action = self.do_nothing_action
        least_overflow = 10.0
        least_obs_simulate = obs_simulate
        if obs_simulate is not None and not any(np.isnan(obs_simulate.rho)):
            least_overflow = float(np.max(obs_simulate.rho))

        if reset_action is None:
            illegal_pos = self.find_unaccessible_pos(to_check_action)
        else:
            illegal_pos = []

        self.simulation_times += 1
        found = False
        for idx in range(self.act_num):
            if not action_selected[idx]: continue
            to_simulate_action = self.actions[idx]
            # check conflict
            if to_check_action != self.do_nothing_action:
                illegal_pos_value = to_simulate_action._set_topo_vect[
                    illegal_pos]
                if np.any(illegal_pos_value):
                    continue
                action1_vec = to_simulate_action.to_vect()
                action2_vec = to_check_action.to_vect()
                to_simulate_action = self.action_space.from_vect(action1_vec +
                                                                 action2_vec)
            legal_action = self.correct_action(observation, to_simulate_action,
                                               self.sub_ids[idx])
            if legal_action == self.do_nothing_action:
                continue

            obs_simulate, reward_simulate, done_simulate, info_simulate = observation.simulate(
                legal_action)
            observation._obs_env._reset_to_orig_state()
            max_rho = obs_simulate.rho.max()

            assert not info_simulate['is_illegal'] and not info_simulate[
                'is_ambiguous']

            if obs_simulate is not None and not any(
                    np.isnan(obs_simulate.rho)):
                if not done_simulate:
                    overflow_value = float(np.max(obs_simulate.rho))
                    if (not found) and (overflow_value < least_overflow):
                        least_overflow = overflow_value
                        least_overflow_action = legal_action
                        least_obs_simulate = obs_simulate
                        best_idx = idx
                    if least_overflow < 0.95:
                        if not found:
                            pass
                        found = True
                        break
                    continue

        if best_idx != -1:
            least_overflow_action = self.correct_action(
                observation, least_overflow_action, self.sub_ids[best_idx])
            if to_check_action != self.do_nothing_action and least_overflow_action != self.do_nothing_action and reset_action is None:
                self.last_disconnect_step = self.step - 1
                self.last_diconnect_line = to_maintain_lines[0]
            if reset_action is not None:
                pass
            return least_overflow_action, self.sub_ids[best_idx]
        else:
            return self.do_nothing_action, -1

    def correct_action(self, observation, to_simulate_action, sub_id):
        if sub_id != -1:
            if observation.time_before_cooldown_sub[sub_id] != 0:
                legal_action_vec = deepcopy(self.do_nothing_action.to_vect())
                return self.do_nothing_action
            else:
                legal_action_vec = deepcopy(to_simulate_action.to_vect())

            sub_topo = self.sub_topo_dict[sub_id]
            if np.any(sub_topo == -1):  # line disconnected
                start, end = self.action_to_sub_topo[sub_id]
                action_topo = legal_action_vec[start:end].astype(
                    "int")  # reference
                action_topo[np.where(
                    sub_topo == -1)[0]] = 0  # done't change bus=-1
                legal_action_vec[start:end] = action_topo
            legal_action = self.action_space.from_vect(legal_action_vec)

        elif sub_id == -1:
            legal_action = to_simulate_action
        else:  # TODO remove
            legal_action = self.do_nothing_action
        return legal_action

    def act(self, observation, reward, done):
        self.step += 1
        offset = 0
        self.sub_topo_dict = {}
        for sub_id, sub_elem_num in enumerate(observation.sub_info):
            sub_topo = observation.topo_vect[offset:offset + sub_elem_num]
            offset += sub_elem_num
            self.sub_topo_dict[sub_id] = sub_topo

        disconnected = np.where(observation.line_status == False)[0].tolist()
        to_maintain_lines = np.where((observation.time_next_maintenance>0) \
                              & (observation.time_next_maintenance<15))[0]
        to_maintain_lines = to_maintain_lines.tolist()
        if len(disconnected) > 0:
            for line_id in disconnected:
                if observation.time_before_cooldown_line[line_id] == 0 and \
                    line_id not in to_maintain_lines:
                    reset_action = self.action_space({
                        "set_line_status": [(line_id, +1)]
                    })
                    obs_simulate, reward_simulate, done_simulate, info_simulate = observation.simulate(
                        reset_action)
                    observation._obs_env._reset_to_orig_state()
                    if np.max(observation.rho) < 1.0 and np.max(
                            obs_simulate.rho) >= 1.0:
                        continue
                    combined_action, sub_id = self.avoid_overflow(
                        observation, reset_action)
                    return combined_action

        if observation is not None and not any(np.isnan(observation.rho)):
            if np.max(observation.rho) < 0.94 and np.any(
                    observation.topo_vect == 2):
                offset = 0
                for sub_id, sub_elem_num in enumerate(observation.sub_info):
                    sub_topo = self.sub_topo_dict[sub_id]

                    if np.any(
                            sub_topo == 2
                    ) and observation.time_before_cooldown_sub[sub_id] == 0:
                        sub_topo = np.where(sub_topo == 2, 1,
                                            sub_topo)  # bus 2 to bus 1
                        sub_topo = np.where(
                            sub_topo == -1, 0,
                            sub_topo)  # don't do action in bus=-1
                        reconfig_sub = self.action_space({
                            "set_bus": {
                                "substations_id": [(sub_id, sub_topo)]
                            }
                        })

                        obs_simulate, reward_simulate, done_simulate, info_simulate = observation.simulate(
                            reconfig_sub)
                        observation._obs_env._reset_to_orig_state()
                        assert not info_simulate[
                            'is_illegal'] and not info_simulate['is_ambiguous']

                        if not done_simulate and obs_simulate is not None and not any(
                                np.isnan(obs_simulate.rho)):
                            if np.max(obs_simulate.rho) < 0.95:
                                return reconfig_sub
                            else:
                                pass
        action, sub_id = self.avoid_overflow(observation)
        return action