PCL_AutoML
/
XBBO

 
			
							import collections
from glob import glob
import os, yaml
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt
import pandas as pd

from matplotlib import rcParams

from xbbo.utils.util import loadJson, loadOBJ
from xbbo.core.constants import Key


class Analyse():
    def __init__(self,
                 exp_dir_root='./exp',
                 benchmark='countingones',
                 methods=None,
                 marks=None,
                 limit=1e7,
                 **kwargs) -> None:
        self.exp_dir_root = exp_dir_root
        self.out_dir = os.path.join(exp_dir_root, benchmark)
        if not os.path.exists(self.out_dir):
            os.mkdir(self.out_dir)
        self.methods = methods
        self.marks = marks
        self.benchmark = benchmark
        self._set_plot()
        plt.clf()
        hashset = set()
        min_cost = np.inf
        max_cost = 0
        self.min_regret = np.inf
        self.max_regret = -np.inf
        self.regret_key = Key.REGRET_TEST
        # new first
        cfg_paths = sorted(glob(exp_dir_root + '/*/*.yaml'), reverse=True)
        mean_df = {}
        std_df = {}
        index = -1
        replace_nan = 1
        for cfg_path in cfg_paths:  # for every method
            with open(cfg_path, 'r') as f:
                cfg = yaml.safe_load(f)
            # hash_name = cfg_path.split('/')[-2].split('_')[-1]
            mark_label = cfg["mark_label"]
            method_name = cfg["OPTM"]["name"]  # tmp[-1][:-5]
            if cfg["TEST_PROBLEM"]["name"] != self.benchmark or (
                (self.methods is not None) and
                (method_name not in self.methods)) or (
                    (self.marks is not None) and
                    (mark_label not in self.marks)) or mark_label in hashset:
                continue
            jfiles = glob('{}/{}/*.json'.format(cfg["GENERAL"]["exp_dir"],
                                                method_name))
            if len(jfiles) == 0:
                continue
            hashset.add(mark_label)
            index += 1

            regret = []
            costs = []
            use_log = False
            for jfile in jfiles:
                j = loadJson(jfile)
                curr_regret = np.array(j[self.regret_key])
                kwargs["title"] = self.benchmark
                if method_name == 'dehb_':
                    curr_cost = np.array(j['runtime'])
                else:
                    curr_cost = np.array(j[Key.COST])
                if self.benchmark == "countingones":
                    d = cfg["TEST_PROBLEM"].get("n_continuous",
                                                4) + cfg["TEST_PROBLEM"].get(
                                                    "n_categorical", 4)
                    if method_name == 'dehb_':
                        curr_regret = curr_regret * d - d
                    curr_regret = (curr_regret + d) / d  # 0-1
                    max_budget = 93312 / d
                    curr_cost /= max_budget
                elif self.benchmark == "Rosenbrock":
                    d = cfg["TEST_PROBLEM"].get("dim", 2)
                    curr_regret = np.log(curr_regret) # 0-1
                    kwargs["use_log"] = True
                    kwargs["plot_type"] = "fevals"
                    curr_cost = np.arange(len(curr_regret)) + 1
                elif self.benchmark == "FCNet":
                    dataset_name = cfg["TEST_PROBLEM"]["kwargs"].get("dataset_name")
                    # curr_regret = np.log() # 0-1
                    kwargs["use_log"] = True
                    kwargs["plot_type"] = "fevals"
                    kwargs['title'] = kwargs['title'] + '-{}'.format(dataset_name)
                    curr_cost = np.arange(len(curr_regret)) + 1
                    curr_regret -= {"protein":0.2213788479566574}[dataset_name]
                
                _, idx = np.unique(curr_regret, return_index=True)
                idx.sort()

                regret.append(curr_regret[idx])
                costs.append(curr_cost[idx])
            # regret = np.array(regret)
            # costs = np.array(costs)

            # finds the latest time where the first measurement was made across runs
            t = np.max([costs[i][0] for i in range(len(costs))])
            # t = costs[:, 0].max()
            min_cost = min(min_cost, t)
            te, cost = self._fill_trajectory(regret, costs, replace_nan=replace_nan)

            idx = cost.tolist().index(t)
            te = te[idx:, :]
            # log_te = np.log(te) if use_log else te
            # te = np.log(te) if  kwargs["use_log"] else te
            cost = cost[idx:]

            # Clips off all measurements after 10^7s
            idx = np.where(cost <= limit)[0]
            # if hash_to_label_map is not None:
            #     label_name = hash_to_label_map.get(hash_name, method_name+'-'+hash_name)
            # else:
            #     label_name = method_name+'-'+hash_name
            print("{}. Plotting for {}".format(index, mark_label))
            print(len(regret), len(costs))
            print("\nMean: {}; Std: {}\n".format(
                np.mean(te, axis=1)[idx][-1],
                stats.sem(te[idx], axis=1)[-1]))
            # The mean plot
            plt.plot(cost[idx],
                     np.mean(te, axis=1)[idx],
                     color='C{}'.format(index),
                     linewidth=4,
                     label=mark_label,
                     linestyle=self.linestyles[index % len(self.linestyles)],
                     marker=self.marker[index % len(self.marker)],
                     markevery=(0.1, 0.1),
                     markersize=15)
            # The error band
            plt.fill_between(
                cost[idx],
                np.mean(te[idx], axis=1)[idx] + 2 * stats.sem(te[idx], axis=1),
                np.mean(te[idx], axis=1)[idx] - 2 * stats.sem(te[idx], axis=1),
                color="C%d" % index,
                alpha=0.2)

            # Stats to dynamically impose limits on the axes of the plots
            max_cost = max(max_cost, cost[idx][-1])
            self.min_regret = min(self.min_regret,
                                  np.mean(te, axis=1)[idx][-1])
            self.max_regret = max(self.max_regret, np.mean(te, axis=1)[idx][0])

            # For final score table
            mean_df[mark_label] = pd.Series(data=np.mean(te, axis=1)[idx],
                                            index=cost[idx])
            std_df[mark_label] = pd.Series(data=np.std(te, axis=1)[idx],
                                           index=cost[idx])
        mean_df = pd.DataFrame(mean_df)
        all_mean_df = mean_df.copy()
        all_mean_df.ffill().to_pickle(
            os.path.join(self.out_dir, 'all_mean_df.pkl'))
        std_df = pd.DataFrame(std_df)
        # minimum of the maximum time limit recorded for each algorithm
        cutoff_idx = min(
            list(
                map(lambda x: np.where(~mean_df.isna()[x] == True)[0][-1],
                    mean_df.columns)))
        mean_df = mean_df.iloc[:cutoff_idx + 1].ffill()
        std_df = std_df.iloc[:cutoff_idx + 1].ffill()
        if len(hashset) > 1:
            rank_df = mean_df.apply(stats.rankdata,
                                    axis=1,
                                    result_type='broadcast')
            rank_df.to_pickle(os.path.join(self.out_dir, 'rank_df.pkl'))
        mean_df.iloc[-1].to_pickle(os.path.join(self.out_dir, 'mean_df.pkl'))
        std_df.iloc[-1].to_pickle(os.path.join(self.out_dir, 'std_df.pkl'))

        # self.plt = plt
        self.min_cost = min_cost
        self.max_cost = max_cost
        # self.min_regret = self.min_regret
        # self.max_regret = self.max_regret
        self._regret_plot(**kwargs)

    def _set_plot(self, fix_colors=False):
        rcParams["font.size"] = "25"
        rcParams['text.usetex'] = False
        rcParams['font.family'] = 'serif'
        rcParams['figure.figsize'] = (16.0, 9.0)
        rcParams['figure.frameon'] = True
        rcParams['figure.edgecolor'] = 'k'
        rcParams['grid.color'] = 'k'
        rcParams['grid.linestyle'] = ':'
        rcParams['grid.linewidth'] = 0.5
        rcParams['axes.linewidth'] = 1
        rcParams['axes.edgecolor'] = 'k'
        rcParams['axes.grid.which'] = 'both'
        rcParams['legend.frameon'] = 'True'
        rcParams['legend.framealpha'] = 1

        rcParams['ytick.major.size'] = 12
        rcParams['ytick.major.width'] = 1.5
        rcParams['ytick.minor.size'] = 6
        rcParams['ytick.minor.width'] = 1
        rcParams['xtick.major.size'] = 12
        rcParams['xtick.major.width'] = 1.5
        rcParams['xtick.minor.size'] = 6
        rcParams['xtick.minor.width'] = 1
        self.marker = ['x', '^', 'D', 'o', 's', 'h', '*', 'v', '<', ">"]
        self.linestyles = ['-', '--', '-.', ':']
        # plot setup
        # self.colors = ["C%d" % i for i in range(len(self.methods))]
        # if fix_colors and len(self.methods) <= 8:
        #     _colors = dict()
        #     _colors["RS"] = "C0"
        #     _colors["HB"] = "C7"
        #     _colors["BOHB"] = "C1"
        #     _colors["TPE"] = "C3"
        #     _colors["SMAC"] = "C4"
        #     _colors["RE"] = "C5"
        #     _colors["DE"] = "C6"
        #     _colors["DEHB"] = "C2"
        #     self.colors = []
        #     for (_, l) in self.methods:
        #         self.colors.append(_colors[l])

    def _fill_trajectory(self,
                         performance_list,
                         cost_list,
                         replace_nan=np.NaN, use_log=False):
        frame_dict = collections.OrderedDict()
        counter = np.arange(0, len(performance_list))
        for p, t, c in zip(performance_list, cost_list, counter):
            if len(p) != len(t):
                raise ValueError("(%d) Array length mismatch: %d != %d" %
                                 (c, len(p), len(t)))
            frame_dict[str(c)] = pd.Series(data=p, index=t)

        # creates a dataframe where the rows are indexed based on time
        # fills with NA for missing values for the respective timesteps
        merged = pd.DataFrame(frame_dict)
        # ffill() acts like a fillna() wherein a forward fill happens
        # only remaining NAs for in the beginning until a value is recorded
        merged = merged.ffill()

        performance = merged.to_numpy()  # converts to a 2D numpy array
        cost_ = merged.index.values  # retrieves the timestamps

        performance[np.isnan(performance)] = replace_nan

        if not np.isfinite(performance).all():
            raise ValueError(
                "\nCould not merge lists, because \n"
                "\t(a) one list is empty?\n"
                "\t(b) the lists do not start with the same times and"
                " replace_nan is not set?\n"
                "\t(c) any other reason.")

        return performance, cost_

    def _regret_plot(self, **kwargs):
        plot_name = kwargs.get("plot_name", "comparison")
        output_type = kwargs.get("output_type", "pdf")
        plot_type = kwargs.get("plot_type", "wallclock")
        # bench_type = kwargs.get("bench_type", 'protein')
        title = kwargs.get("title", "benchmark")
        legend_size = kwargs.get("legend_size", 40)
        if self.benchmark != 'cc18' or kwargs.get('use_log_x'):
            plt.xscale("log")
        if (self.benchmark != 'svm' and self.benchmark != 'bnn') or kwargs.get('use_log'):
            plt.yscale("log")
        plt.tick_params(which='both', direction="in")
        if self.benchmark == 'svm' or self.benchmark == 'bnn' or self.benchmark == "cc18" or self.benchmark == "paramnet":
            plt.legend(loc='upper right',
                       framealpha=1,
                       prop={
                           'size': legend_size,
                           'weight': 'normal'
                       })
        elif self.benchmark == "rl":
            plt.legend(loc='lower left',
                       framealpha=1,
                       prop={
                           'size': legend_size,
                           'weight': 'normal'
                       },
                       ncol=1)
        else:  #elif self.benchmark == "countingones":
            plt.legend(loc='lower left',
                       framealpha=1,
                       prop={
                           'size': legend_size,
                           'weight': 'normal'
                       })
        plt.title(title, size=40)

        if self.benchmark == 'rl':
            plt.xlabel("time $[s]$", fontsize=45)
        elif self.benchmark == 'bnn':
            plt.xlabel("MCMC steps", fontsize=45)
        elif self.benchmark == 'countingones':
            plt.xlabel("cummulative budget / $b_{max}$", fontsize=45)
        elif self.benchmark == 'speed':
            plt.xlabel("Runtime sans function evalution")
        elif plot_type == "wallclock":
            plt.xlabel("estimated wallclock time $[s]$", fontsize=45)
        elif plot_type == "fevals":
            plt.xlabel("number of function evaluations", fontsize=45)

        if self.benchmark == 'svm':
            plt.ylabel("{} error".format(self.regret_key), fontsize=45)
        elif self.benchmark == 'rl':
            plt.ylabel("epochs until convergence", fontsize=45)
        elif self.benchmark == 'bnn':
            plt.ylabel("negative log-likelihood", fontsize=45)
        elif self.benchmark == 'countingones':
            plt.ylabel("normalized {} regret".format(self.regret_key),
                       fontsize=40)
        elif self.benchmark == 'countingones':
            plt.ylabel("number of function evaluations", fontsize=45)
        else:
            plt.ylabel("{} regret".format(self.regret_key), fontsize=45)

        if self.benchmark == 'rl':
            # plt.xlim(1e2, 1e5)
            plt.xlim(1e2, self.max_cost)
            # plt.xlim(self.min_cost, self.max_cost)
        elif self.benchmark == 'bnn':
            # plt.xlim(min_limit, self.max_cost)
            plt.xlim(50000, self.max_cost)  # min(self.max_cost*10, limit))
        elif self.benchmark == 'countingones':
            plt.xlim(self.min_cost, self.max_cost)
            # plt.xlim(self.min_cost, 1e4)
        elif self.benchmark == 'cc18':
            plt.xlim(0.1, self.max_cost)
        elif self.benchmark == "paramnet":
            print("Max time: {}".format(self.max_cost))
            plt.xlim(self.min_cost, self.max_cost)
        elif self.benchmark == "nas_101_cifar10":
            plt.xlim(1e2, self.max_cost)
        else:
            plt.xlim(self.min_cost, self.max_cost)
            # plt.xlim(max(self.min_cost/10, 1e0), min(self.max_cost*10, 1e7))

        if self.benchmark == 'bnn':
            plt.ylim(3, 10)  # 75)
        elif self.benchmark == 'rl':
            plt.ylim(1e2, 1e4)
        elif self.benchmark == 'cc18':
            plt.ylim(0, self.max_regret)
        elif self.benchmark == 'svm':
            plt.ylim(self.min_regret, 0.5)
        else:
            plt.ylim(self.min_regret, self.max_regret)

        plt.grid(which='both', alpha=0.2, linewidth=0.5)
        print(
            os.path.join(self.out_dir, '{}.{}'.format(plot_name, output_type)))
        plt.savefig(os.path.join(self.out_dir,
                                 '{}.{}'.format(plot_name, output_type)),
                    bbox_inches='tight')
        plt.savefig(os.path.join(self.out_dir,
                                 '{}.{}'.format(plot_name, "png")),
                    bbox_inches='tight')


class Analyse_multi_benchmark():
    def __init__(self, exp_dir_root='./exp', **kwargs) -> None:
        self.exp_dir_root = exp_dir_root
        list_of_mean_files = glob(
            os.path.join(self.exp_dir_root, '*/mean_df.pkl'))

        list_of_std_files = glob(
            os.path.join(self.exp_dir_root, '*/std_df.pkl'))
        self.mean_dfs = {}
        for filename in list_of_mean_files:
            benchname = filename.split('/')[-2]
            self.mean_dfs[benchname] = loadOBJ(filename)
        self.mean_dfs = pd.DataFrame(self.mean_dfs).transpose()
        self.mean_dfs.to_pickle(
            os.path.join(self.exp_dir_root, "all_mean_dfs.pkl"))

        std_dfs = {}
        for filename in list_of_std_files:
            benchname = filename.split('/')[-2]
            std_dfs[benchname] = loadOBJ(filename)
        std_dfs = pd.DataFrame(std_dfs).transpose()
        std_dfs.to_pickle(os.path.join(self.exp_dir_root, "all_std_dfs.pkl"))

        # Load run statistics to create a relative ranking plot over time

        rank_list_candidates = glob(
            os.path.join(self.exp_dir_root, '*/rank_df.pkl'))
        list_of_rank_files = []
        for name in rank_list_candidates:
            # ignore benchmarks where the runtime is not wallclock time in seconds
            # if "countingones" in name or "bnn" in name or "svm" in name:
            #     continue
            list_of_rank_files.append(name)

        # load rankings per benchmark
        rank_dfs = []
        for filename in list_of_rank_files:
            rank_dfs.append(loadOBJ(filename))
        # reorganize data to have algorithms as the top hierarchy, followed by every benchmark for the algo
        avg_rank = {}
        for i in range(len(rank_dfs)):
            for name in rank_dfs[i].columns:
                if name not in avg_rank.keys():
                    avg_rank[name] = {}
                if i not in avg_rank[name].keys():
                    avg_rank[name][i] = None
                avg_rank[name][i] = pd.Series(data=rank_dfs[i][name],
                                              index=rank_dfs[i].index)

        # assigning mean rank to all algorithms at start
        starting_rank = np.mean(np.arange(1, 1 + len(avg_rank.keys())))
        for name, v in avg_rank.items():
            avg_rank[name] = pd.DataFrame(v)
            avg_rank[name].iloc[0] = [starting_rank] * avg_rank[name].shape[1]

        # compute mean relative rank of each algorithm across all benchmarks
        self.rank_lists = {}
        for name, v in avg_rank.items():
            self.rank_lists[name] = pd.Series(data=np.mean(
                avg_rank[name].ffill(), axis=1),
                                              index=avg_rank[name].index)
        self.rank_lists = pd.DataFrame(self.rank_lists)

        self.linestyles = [
            (0, (1, 5)),  # loosely dotted
            (0, (5, 5)),  # loosely dashed
            'dotted',
            (0, (3, 2, 1, 2, 1, 2)),  # dash dot dotted
            'dashed',
            'dashdot',
            (0, (3, 1, 1, 1, 1, 1)),
            'solid'
        ]

        self.colors = ["C%d" % i for i in range(len(self.rank_lists.columns))]
        # if len(rank_lists.columns) <= 8:
        #     _colors = dict()
        #     _colors["RS"] = "C0"
        #     _colors["HB"] = "C7"
        #     _colors["BOHB"] = "C1"
        #     _colors["TPE"] = "C3"
        #     _colors["SMAC"] = "C4"
        #     _colors["RE"] = "C5"
        #     _colors["DE"] = "C6"
        #     _colors["DEHB"] = "C2"
        #     colors = []
        #     for l in rank_lists.columns:
        #         colors.append(_colors[l])

        self.landmarks = np.arange(start=0,
                                   stop=self.rank_lists.shape[0],
                                   step=5)  # for smoothing
        self._set_plot()
        self.rank_plot()

    def _set_plot(self, ):
        rcParams['font.family'] = 'serif'

    def rank_plot(self, ):
        plt.clf()
        xlims = [np.inf, -np.inf]
        for i, name in enumerate(self.rank_lists.columns):
            if name == 'DEHB':
                lw, a = (1.75, 1)
            else:
                lw, a = (1.5, 0.7)
            plt.plot(self.rank_lists[name].index.to_numpy()[self.landmarks],
                     self.rank_lists[name].to_numpy()[self.landmarks],
                     label=name,
                     alpha=a,
                     linestyle=self.linestyles[i],
                     linewidth=1.5,
                     color=self.colors[i])
            xlims[0] = min(xlims[0], self.rank_lists[name].index.to_numpy()[0])
            xlims[1] = max(xlims[1],
                           self.rank_lists[name].index.to_numpy()[-1])

        plt.xscale('log')
        plt.legend(loc='upper left', framealpha=1, prop={'size': 12}, ncol=4)
        # plt.fill_between(
        #     rank_lists['DEHB'].index.to_numpy()[landmarks],
        #     0, rank_lists['DEHB'].to_numpy()[landmarks],
        #     alpha=0.5, color=_colors["DEHB"]
        # )
        # plt.fill_between(
        #     rank_lists['DEHB'].index.to_numpy()[landmarks],
        #     0, starting_rank,
        #     alpha=0.3, color='gray'
        # )
        # plt.hlines(starting_rank, 0, 1e7)
        plt.xlim(xlims[0], xlims[1])
        plt.ylim(1, self.rank_lists.shape[1])
        plt.xlabel('estimated wallclock time $[s]$', fontsize=15)
        plt.ylabel('average relative rank', fontsize=15)
        plt.savefig(os.path.join(self.exp_dir_root, 'rank_plot.pdf'),
                    bbox_inches='tight')

        rank_stats = {}
        rank_stats['minimum'] = np.min(self.rank_lists, axis=0)
        rank_stats['maximum'] = np.max(self.rank_lists, axis=0)
        rank_stats['variance'] = np.var(self.rank_lists, axis=0)
        rank_stats = pd.DataFrame(rank_stats)
        # Ranks based on final numbers
        rank_df = {}
        for idx in self.mean_dfs.index:
            rank_df[idx] = pd.Series(data=stats.rankdata(
                self.mean_dfs.loc[idx]),
                                     index=self.mean_dfs.loc[idx].index)
        rank_df = pd.DataFrame(rank_df)
        print(rank_df.mean(axis=1))