|
| 1 | +import seaborn as sns |
| 2 | +import pandas as pd |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | + |
| 6 | +from utils import get_plot_folder |
| 7 | +from functools import reduce |
| 8 | +from pathlib import Path |
| 9 | +import json |
| 10 | +import re |
| 11 | + |
| 12 | + |
| 13 | +def build_info(file_name, info, i): |
| 14 | + data = {key: value for key, value in info.items() if not isinstance(value, list)} |
| 15 | + # data['failed_pipelines'] = data['failed_pipelines'] / len(info['scores']) |
| 16 | + data['dataset'] = file_name |
| 17 | + data['i'] = i |
| 18 | + if 'max_idx' not in data: |
| 19 | + try: |
| 20 | + max_idx = info['scores'].index(data['best_fn']) |
| 21 | + except: |
| 22 | + max_idx = None |
| 23 | + data['max_idx'] = max_idx |
| 24 | + return data |
| 25 | + |
| 26 | +def extract_scores(metalearner_path: Path): |
| 27 | + "Extracts a feature from an especific task info" |
| 28 | + data_dict = {} |
| 29 | + file_re = re.compile('\w+_(\d+)_(\d+)\.json') |
| 30 | + for fn in metalearner_path.glob('*.json'): |
| 31 | + info = json.load(open(fn, 'r+')) |
| 32 | + m = file_re.match(fn.name) |
| 33 | + dataset_name = m.group(1) |
| 34 | + iteration = int(m.group(2)) |
| 35 | + |
| 36 | + try: |
| 37 | + data_dict[iteration].append(build_info(dataset_name, info, iteration)) |
| 38 | + except KeyError: |
| 39 | + data_dict[iteration] = [build_info(dataset_name, info, iteration)] |
| 40 | + |
| 41 | + return [pd.DataFrame(data) for data in data_dict.values()] |
| 42 | + |
| 43 | + |
| 44 | +def build_average_dataframe(dfs, metalearner): |
| 45 | + datasets = {} |
| 46 | + for df in dfs: |
| 47 | + for _, row in df.iterrows(): |
| 48 | + dataset = row['dataset'] |
| 49 | + for column in df.columns: |
| 50 | + if column == 'dataset' or column == 'i': |
| 51 | + continue |
| 52 | + try: |
| 53 | + datasets[dataset][column].append(row[column]) |
| 54 | + except KeyError: |
| 55 | + try: |
| 56 | + datasets[dataset][column] = [row[column]] |
| 57 | + except KeyError: |
| 58 | + datasets[dataset] = {column: [row[column]]} |
| 59 | + aggregate_datasets = [] |
| 60 | + for ds, values in datasets.items(): |
| 61 | + ds_dict = {prop: np.mean(list_v) for prop, list_v in values.items()} |
| 62 | + aggregate_datasets.append({'dataset': ds, 'i': metalearner, **ds_dict}) |
| 63 | + return pd.DataFrame(aggregate_datasets) |
| 64 | + |
| 65 | + |
| 66 | +def plot_boxplot(data, prop_name, folder): |
| 67 | + plt.figure(prop_name).suptitle(prop_name) |
| 68 | + sns.boxplot(data=data, y=prop_name) |
| 69 | + plt.savefig(folder) |
| 70 | + plt.close() |
| 71 | + |
| 72 | + |
| 73 | +def plot_multiple_boxplot(data, prop_name, folder): |
| 74 | + plt.figure(prop_name).suptitle(prop_name) |
| 75 | + g = sns.boxplot(data=data, x='i', y=prop_name, hue='i', dodge=False) |
| 76 | + plt.legend(title='Estrategias', loc='best') |
| 77 | + g.set(xticklabels=[]) |
| 78 | + g.set(xlabel=None) |
| 79 | + plt.savefig(folder) |
| 80 | + plt.close() |
| 81 | + |
| 82 | + |
| 83 | +def plot_histogram(data, metalearners, folder): |
| 84 | + autogoal = metalearners[0] |
| 85 | + fig, axs = plt.subplots(nrows=len(metalearners)-1) |
| 86 | + for i, metalearner in enumerate(metalearners[1:]): |
| 87 | + df = data[(data['i'] == autogoal) | (data['i'] == metalearner)] |
| 88 | + axs[i].set_xlabel('Accuracy Obtenido') |
| 89 | + axs[i].set_ylabel('Datasets') |
| 90 | + sns.histplot(data=df, x='best_fn', hue='i', bins=20, ax=axs[i]) |
| 91 | + axs[i].legend(title='Estrategias', loc='best', labels=[autogoal, metalearner]) |
| 92 | + plt.savefig(folder / 'histogram') |
| 93 | + plt.close() |
| 94 | + |
| 95 | + |
| 96 | +def plot_results(metalearners, metalearners_path: Path, plot_folder: Path): |
| 97 | + avg_results = [] |
| 98 | + for j, metalearner in enumerate(metalearners): |
| 99 | + metalearner_folder = get_plot_folder(plot_folder / metalearner) |
| 100 | + data = extract_scores(metalearners_path / metalearner) |
| 101 | + avg = build_average_dataframe(data, metalearner) |
| 102 | + data.append(avg) |
| 103 | + avg_results.append(avg) |
| 104 | + for i, df in enumerate(data): |
| 105 | + for column in df.columns: |
| 106 | + if column in ['i', 'dataset']: |
| 107 | + continue |
| 108 | + plot_boxplot(df, column, metalearner_folder / f'{column}_{i}') |
| 109 | + df = pd.concat(avg_results) |
| 110 | + df.loc[df['best_fn'] < 0, 'best_fn'] = 0 |
| 111 | + for column in df.columns: |
| 112 | + if column in ['i', 'dataset']: |
| 113 | + continue |
| 114 | + plot_multiple_boxplot(df, column, plot_folder / f'{column}') |
| 115 | + |
| 116 | + plot_histogram(df, metalearners, plot_folder) |
| 117 | + |
| 118 | + dfs = build_performance_info(metalearners, metalearners_path) |
| 119 | + plotting_performance(dfs, plot_folder / 'performance') |
| 120 | + |
| 121 | + |
| 122 | +def fix_performance_info(performance: list): |
| 123 | + new_performance = [performance[0]] |
| 124 | + for p in performance: |
| 125 | + if p > new_performance[-1]: |
| 126 | + new_performance.append(p) |
| 127 | + else: |
| 128 | + new_performance.append(new_performance[-1]) |
| 129 | + return new_performance |
| 130 | + |
| 131 | + |
| 132 | +def build_performance_info(metalearners, metalearners_path: Path): |
| 133 | + dataframes = {} |
| 134 | + for metalearner in metalearners: |
| 135 | + data_dict = {'i': [], metalearner: []} |
| 136 | + metalearner_path = metalearners_path / metalearner |
| 137 | + for fn in metalearner_path.glob('*.json'): |
| 138 | + info = json.load(open(fn, 'r+')) |
| 139 | + |
| 140 | + # performance = info['scores'] |
| 141 | + # |
| 142 | + performance = [p if p > 0 else 0 for p in info['scores']] |
| 143 | + # performance = [p if p > 0 else 0 for p in info['scores']] |
| 144 | + if len(performance) == 0: |
| 145 | + continue |
| 146 | + performance = fix_performance_info(performance) |
| 147 | + data_dict[metalearner].extend(performance) |
| 148 | + data_dict['i'].extend(range(len(performance))) |
| 149 | + dataframes[metalearner] = pd.DataFrame(data_dict) |
| 150 | + return dataframes |
| 151 | + |
| 152 | + |
| 153 | +def plotting_performance(data, folder): |
| 154 | + fig = plt.figure() |
| 155 | + for metalearner, df in data.items(): |
| 156 | + sns.lineplot(data=df, x='i', y=metalearner) |
| 157 | + # fig.legend(title='Estrategias', labels=data.keys()) |
| 158 | + plt.legend(loc='best', title='Estrategias', labels=data.keys()) |
| 159 | + plt.ylabel(None) |
| 160 | + plt.xlabel('Iteraciones') |
| 161 | + plt.xlim([0, 200]) |
| 162 | + plt.savefig(folder) |
| 163 | + plt.close() |
| 164 | + |
| 165 | + |
| 166 | +def main(): |
| 167 | + # plot_folder = get_plot_folder('plots/results/l1 distance') |
| 168 | + # metalearners = ['autogoal', 'nn_learner_aggregated', 'nn_learner_simple', 'xgb_metalearner'] |
| 169 | + # plot_results(metalearners, Path('results/l1 distance/results'), plot_folder) |
| 170 | + |
| 171 | + # plot_folder = get_plot_folder('plots/results/l2 distance') |
| 172 | + # metalearners = ['autogoal', 'nn_learner_aggregated', 'nn_metalearner_simple', 'xgb_metalearner'] |
| 173 | + # plot_results(metalearners, Path('results/l2 distance/results'), plot_folder) |
| 174 | + |
| 175 | + # plot_folder = get_plot_folder('plots/results/xgb_metalearner_v2') |
| 176 | + # metalearners = ['autogoal', 'xgb_metalearner_v2', |
| 177 | + # 'nn_learner_aggregated_l1', 'nn_learner_simple_l1', 'xgb_metalearner_l1', |
| 178 | + # 'nn_learner_aggregated_l2', 'nn_metalearner_simple_l2', 'xgb_metalearner_l2'] |
| 179 | + # plot_results(metalearners, Path('results/xgb_metalearner_v2/results'), plot_folder) |
| 180 | + |
| 181 | + plot_folder = get_plot_folder('plots/results/paper') |
| 182 | + metalearners = ['Autogoal', 'Vecinos Cercanos Simple', |
| 183 | + 'Vecinos Cercanos Ponderado', 'XGBRanker'] |
| 184 | + plot_results(metalearners, Path('results/paper/results'), plot_folder) |
| 185 | + |
| 186 | + # plot_folder = get_plot_folder('plots/results/new_paper') |
| 187 | + # metalearners = ['Autogoal', 'Vecinos Cercanos Simple', |
| 188 | + # 'Vecinos Cercanos Ponderado', 'XGBRanker'] |
| 189 | + # plot_results(metalearners, Path('results/new paperr/results'), plot_folder) |
| 190 | + |
| 191 | + # plot_folder = get_plot_folder('plots/results/new_paper_performance') |
| 192 | + # metalearners = ['Autogoal', 'Vecinos Cercanos Simple', |
| 193 | + # 'Vecinos Cercanos Ponderado', 'XGBRanker'] |
| 194 | + # plot_performance(metalearners, Path('results/new paperr/results'), plot_folder) |
| 195 | + |
| 196 | + |
| 197 | +if __name__ == '__main__': |
| 198 | + main() |
0 commit comments