1
1
""" Processors transform Evaluations to be more useful when visualized. """
2
2
import math
3
3
from typing import Any , Callable , Dict , List , Union
4
+ from enum import Enum
4
5
5
6
import numpy as np
6
7
import pandas as pd
7
8
from ftpvl .evaluation import Evaluation
8
9
from scipy import stats
9
10
11
+ class Direction (Enum ):
12
+ """
13
+ Represents the optimization direction for certain test metrics. For example,
14
+ runtime is usually minimized, while frequency is maximized.
15
+ """
16
+ MINIMIZE = - 1
17
+ MAXIMIZE = 1
18
+
10
19
11
20
class Processor :
12
21
"""
@@ -141,27 +150,30 @@ class AddNormalizedColumn(Processor):
141
150
output_col_name : str
142
151
the column to write the normalized values to
143
152
144
- direction : int (either 1 or -1)
145
- the direction to find the best value to normalize against. If 1, then
146
- the best value is the max and all other values are compared to it. If -1,
147
- the best value is the min and all other values are compared to it .
153
+ direction : Direction
154
+ specifies how to find the ' best' value to normalize against. By default
155
+ MAXIMIZE, all values will be compared to the max value of the input
156
+ column .
148
157
"""
149
158
150
- def __init__ (self , groupby : str , input_col_name : str , output_col_name : str , direction : int = 1 ):
151
-
159
+ def __init__ (
160
+ self ,
161
+ groupby : str ,
162
+ input_col_name : str ,
163
+ output_col_name : str ,
164
+ direction : Direction = Direction .MAXIMIZE
165
+ ):
152
166
self ._groupby = groupby
153
167
self ._input_col_name = input_col_name
154
168
self ._output_col_name = output_col_name
155
-
156
- assert direction in [1 , - 1 ], "direction must be either 1 or -1"
157
169
self ._direction = direction
158
170
159
171
def _normalize (self , input_df : pd .DataFrame ):
160
172
"""
161
173
Given a dataframe, find the max value of the input col name and
162
174
create a new column with the normalized value of each row
163
175
"""
164
- if self ._direction == 1 :
176
+ if self ._direction == Direction . MAXIMIZE :
165
177
max_val = input_df [self ._input_col_name ].max ()
166
178
else :
167
179
max_val = input_df [self ._input_col_name ].min ()
@@ -284,11 +296,10 @@ class NormalizeAround(Processor):
284
296
285
297
Parameters
286
298
----------
287
- normalize_direction : dict
288
- a dictionary mapping column names to 1 or -1. If a value is
289
- optimized when smaller, set the negation to 1. If it is optimized
290
- when larger, set the negation to -1. If there is no entry,
291
- normalization is skipped.
299
+ normalize_direction : Dict[str, Direction]
300
+ a dictionary mapping column names to the optimization direction of
301
+ the column. Used to determine if increases or decreases to baseline are
302
+ perceived to be 'better'.
292
303
293
304
group_by : str
294
305
the column name used to group results before finding the baseline
@@ -303,16 +314,25 @@ class NormalizeAround(Processor):
303
314
the value of the baseline result at idx_name
304
315
"""
305
316
306
- def __init__ (self , normalize_direction : dict , group_by : str , idx_name : str , idx_value : str ):
317
+ def __init__ (
318
+ self ,
319
+ normalize_direction : Dict [str , Direction ],
320
+ group_by : str ,
321
+ idx_name : str ,
322
+ idx_value : str
323
+ ):
307
324
self ._groupby = group_by
308
325
self ._idx_name = idx_name
309
326
self ._idx_value = idx_value
310
327
311
328
self ._column_names = []
312
329
self ._column_negations = []
313
- for name , negation in normalize_direction .items ():
330
+ for name , direction in normalize_direction .items ():
314
331
self ._column_names .append (name )
315
- self ._column_negations .append (negation )
332
+ if direction == Direction .MINIMIZE :
333
+ self ._column_negations .append (1 )
334
+ else :
335
+ self ._column_negations .append (- 1 )
316
336
317
337
def _normalize_around (self , input_df ):
318
338
"""
@@ -357,19 +377,22 @@ class Normalize(Processor):
357
377
358
378
Parameters
359
379
----------
360
- normalize_direction : dict
361
- a dictionary mapping column names to 1 or -1. If a value is optimized
362
- when smaller, set the negation to 1. If it is optimized when larger, set
363
- the negation to -1. If there is no entry, normalization is skipped .
380
+ normalize_direction : Dict[str, Direction]
381
+ a dictionary mapping column names to the optimization direction of
382
+ the column. Used to determine if increases or decreases to baseline are
383
+ perceived to be 'better' .
364
384
"""
365
385
366
- def __init__ (self , normalize_direction : dict ):
386
+ def __init__ (self , normalize_direction : Dict [ str , Direction ] ):
367
387
368
388
self ._column_names = []
369
389
self ._column_negations = []
370
- for name , negation in normalize_direction .items ():
390
+ for name , direction in normalize_direction .items ():
371
391
self ._column_names .append (name )
372
- self ._column_negations .append (negation )
392
+ if direction == Direction .MINIMIZE :
393
+ self ._column_negations .append (1 )
394
+ else :
395
+ self ._column_negations .append (- 1 )
373
396
374
397
def _normalize (self , input_df ):
375
398
"""
@@ -543,3 +566,82 @@ def geomean(x):
543
566
x = x .dropna ()
544
567
return stats .gmean (x ) if not x .empty else math .nan
545
568
super ().__init__ (geomean )
569
+
570
+
571
+ class CompareToFirst (Processor ):
572
+ """
573
+ Processor that compares numeric rows in an evaluation to the first row by
574
+ adding columns that specify the relative difference between the first row
575
+ and all other rows.
576
+
577
+ You can specify the direction that improvements should be outputted. For
578
+ example, a change from `100` to `50` may be a 2x change if the objective is
579
+ minimization, while it may be a 0.5x change if the objective is
580
+ maximization.
581
+
582
+ Parameters
583
+ ----------
584
+ normalize_direction : Dict[str, Direction]
585
+ a dictionary mapping column names to the optimization direction of
586
+ the column. Used to determine if increases or decreases to baseline are
587
+ perceived to be 'better'.
588
+ suffix : str
589
+ the suffix to use when creating new columns that contain the relative
590
+ comparison to the first row, by default ".relative"
591
+
592
+ Examples
593
+ --------
594
+ >>> a = Evaluation(pd.DataFrame(
595
+ ... data=[
596
+ ... {"x": 1, "y": 8},
597
+ ... {"x": 4, "y": 8}
598
+ ... ]))
599
+ >>> direction = {"x": Direction.MAXIMIZE, "y": Direction.MAXIMIZE}
600
+ >>> a.process([CompareToFirst(direction, suffix=".diff")).get_df()
601
+ x x.diff y y.diff
602
+ 0 1 1.00 8 1.0
603
+ 1 4 4.00 8 1.0
604
+
605
+ >>> a = Evaluation(pd.DataFrame(
606
+ ... data=[
607
+ ... {"x": 1, "y": 8},
608
+ ... {"x": 4, "y": 8}
609
+ ... ]))
610
+ >>> direction = {"x": Direction.MINIMIZE, "y": Direction.MINIMIZE}
611
+ >>> a.process([CompareToFirst(direction, suffix=".diff")).get_df()
612
+ x x.diff y y.diff
613
+ 0 1 1.00 8 1.0
614
+ 1 4 0.25 8 1.0
615
+ """
616
+
617
+ def __init__ (self , normalize_direction : Dict [str , Direction ], suffix : str = ".relative" ):
618
+ self ._column_names = []
619
+ self ._column_negations = []
620
+ for name , direction in normalize_direction .items ():
621
+ self ._column_names .append (name )
622
+ if direction == Direction .MINIMIZE :
623
+ self ._column_negations .append (- 1 )
624
+ else :
625
+ self ._column_negations .append (1 )
626
+
627
+ self ._suffix = suffix
628
+
629
+ def _compare_to_first (self , input_df ):
630
+ """
631
+ Given a dataframe, sets the first row as the baseline and compares all
632
+ rows to the first row, outputting the relative difference between each
633
+ value as a new column.
634
+ """
635
+ new_cols = []
636
+ for col in self ._column_names :
637
+ new_cols .extend ([col , col + self ._suffix ])
638
+
639
+ base = input_df .loc [0 , self ._column_names ]
640
+ baseline_ratio = (input_df [self ._column_names ] / base ) ** self ._column_negations
641
+ renamed_ratio = baseline_ratio .rename (lambda col : col + self ._suffix , axis = 1 )
642
+ return pd .concat ([input_df , renamed_ratio ], axis = 1 )[new_cols ]
643
+
644
+ def process (self , input_eval : Evaluation ) -> Evaluation :
645
+ input_df = input_eval .get_df ()
646
+ new_df = self ._compare_to_first (input_df )
647
+ return Evaluation (new_df , input_eval .get_eval_id ())
0 commit comments