Skip to content

Commit d847093

Browse files
TypingKoalaJohnny Bui
andauthored
Implement Evaluation Comparison features and Direction (#18)
* Add support for evaluation concatenation * Add support for evaluation concatenation * Implement direction and comparetofirst * update documentation with info about direction and comparetofirst * Increment version to 0.2.0 * update comparetofirst naming and tests * rename baseline_ratio in comparetofirst Co-authored-by: Johnny Bui <[email protected]>
1 parent dd2a438 commit d847093

File tree

7 files changed

+384
-31
lines changed

7 files changed

+384
-31
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ Take a look at the notebooks below to demonstrate the functionality of FTPVL.
1212

1313
1. [Using `HydraFetcher` and Processors](https://colab.research.google.com/drive/1BIQ-iulDFpzcve7lGJPwLePJ5ETBJ6Ut?usp=sharing)
1414
2. [Styling tables with `SingleTableVisualizer`](https://colab.research.google.com/drive/1u3EnmIYnTBk-LXZhqNHt_h4aMuq-_cWq?usp=sharing)
15-
3. [Comparing two different Evaluations](https://colab.research.google.com/drive/1I7InmA6210vIIwdQ7TGHE6aF_WwIm1dM?usp=sharing)
15+
3. [Comparing two Evaluations using the internal dataframe](https://colab.research.google.com/drive/1I7InmA6210vIIwdQ7TGHE6aF_WwIm1dM?usp=sharing)
1616
4. [Filtering and Aggregating an Evaluation](https://colab.research.google.com/drive/1DDwlQFS81RGLL-q8DsgICF-HOC5ir6oS?usp=sharing)
17+
5. [Comparing multiple Evaluations](https://colab.research.google.com/drive/1kSF3bEjG_c6bLh9PLus9GPk5aLrtYhly?usp=sharing)
1718

1819
## Documentation
1920
Extensive documentation, including a *Getting Started* guide, is available on
@@ -45,6 +46,10 @@ make html
4546
* `sphinx-rtd-theme`: for documentation generation (theme) ([website](https://github.com/readthedocs/sphinx_rtd_theme))
4647

4748
## Changes
49+
### 0.2.0
50+
* Added evaluation concatenation and `CompareToFirst` processors for relative comparisons between evaluations.
51+
* Fixed `HydraFetcher` issues when fetching an older evaluation that contains more than one build artifact.
52+
4853
### 0.1.6
4954
* Added support for filter and aggregator processors, fixes [#9](https://github.com/SymbiFlow/FPGA-Tool-Performance-Visualization-Library/issues/9)
5055

docs/topics/api.rst

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ Processors API
7676
.. autoclass:: ftpvl.processors.GeomeanAggregate
7777
:members:
7878

79+
.. autoclass:: ftpvl.processors.CompareToFirst
80+
7981
.. _topics-api-styles:
8082

8183
Styles API
@@ -99,4 +101,21 @@ Visualizers API
99101
:members:
100102

101103
.. autoclass:: ftpvl.visualizers.SingleTableVisualizer
102-
:members:
104+
:members:
105+
106+
Enums
107+
=====
108+
Direction
109+
*********
110+
.. autoclass:: ftpvl.processors.Direction
111+
:members:
112+
113+
.. autoattribute:: MAXIMIZE
114+
115+
Indicates that the corresponding metric is optimized by
116+
*maximizing* the value
117+
118+
.. autoattribute:: MINIMIZE
119+
120+
Indicates that the corresponding metric is optimized by
121+
*minimizing* the value

ftpvl/evaluation.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ def get_eval_id(self) -> Union[int, None]:
3434
Returns the ID number of the evaluation if specified, otherwise None
3535
"""
3636
return self._eval_id
37+
38+
def get_copy(self) -> 'Evaluation':
39+
"""
40+
Returns a deep copy of the Evaluation instance
41+
"""
42+
return Evaluation(self.get_df(), self.get_eval_id())
3743

3844
def process(self, pipeline: List['Processor']) -> 'Evaluation':
3945
"""
@@ -47,3 +53,37 @@ def process(self, pipeline: List['Processor']) -> 'Evaluation':
4753
an Evaluation instance that was processed by the pipeline
4854
"""
4955
return reduce(lambda r, p: p.process(r), pipeline, self)
56+
57+
def __add__(self, other: 'Evaluation') -> 'Evaluation':
58+
"""
59+
Magic method for concatenating two Evaluations, returning an Evaluation
60+
with dataframe (self + other).
61+
62+
Args
63+
------
64+
other: the other Evaluation to concatenate
65+
66+
Returns:
67+
a new Evaluation that consists of the two Evaluations concatenated
68+
"""
69+
if not isinstance(other, Evaluation):
70+
raise TypeError(f"can only concatenate Evaluation (not {type(other).__name__}) to Evaluation")
71+
new_df = pd.concat([self.get_df(), other.get_df()], ignore_index=True)
72+
return Evaluation(new_df)
73+
74+
def __radd__(self, other: 'Evaluation') -> 'Evaluation':
75+
"""
76+
Magic method for reverse concatenating two Evaluations, returning an
77+
Evaluation with dataframe (other + self).
78+
79+
Args
80+
------
81+
other: the other Evaluation to concatenate
82+
83+
Returns:
84+
a new Evaluation that consists of the two Evaluations concatenated
85+
"""
86+
# handle default start value of sum() is `0`
87+
if other == 0:
88+
return self.get_copy()
89+
return self.__add__(other)

ftpvl/processors.py

Lines changed: 126 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
11
""" Processors transform Evaluations to be more useful when visualized. """
22
import math
33
from typing import Any, Callable, Dict, List, Union
4+
from enum import Enum
45

56
import numpy as np
67
import pandas as pd
78
from ftpvl.evaluation import Evaluation
89
from scipy import stats
910

11+
class Direction(Enum):
12+
"""
13+
Represents the optimization direction for certain test metrics. For example,
14+
runtime is usually minimized, while frequency is maximized.
15+
"""
16+
MINIMIZE = -1
17+
MAXIMIZE = 1
18+
1019

1120
class Processor:
1221
"""
@@ -141,27 +150,30 @@ class AddNormalizedColumn(Processor):
141150
output_col_name : str
142151
the column to write the normalized values to
143152
144-
direction : int (either 1 or -1)
145-
the direction to find the best value to normalize against. If 1, then
146-
the best value is the max and all other values are compared to it. If -1,
147-
the best value is the min and all other values are compared to it.
153+
direction : Direction
154+
specifies how to find the 'best' value to normalize against. By default
155+
MAXIMIZE, all values will be compared to the max value of the input
156+
column.
148157
"""
149158

150-
def __init__(self, groupby: str, input_col_name: str, output_col_name: str, direction: int = 1):
151-
159+
def __init__(
160+
self,
161+
groupby: str,
162+
input_col_name: str,
163+
output_col_name: str,
164+
direction: Direction = Direction.MAXIMIZE
165+
):
152166
self._groupby = groupby
153167
self._input_col_name = input_col_name
154168
self._output_col_name = output_col_name
155-
156-
assert direction in [1, -1], "direction must be either 1 or -1"
157169
self._direction = direction
158170

159171
def _normalize(self, input_df: pd.DataFrame):
160172
"""
161173
Given a dataframe, find the max value of the input col name and
162174
create a new column with the normalized value of each row
163175
"""
164-
if self._direction == 1:
176+
if self._direction == Direction.MAXIMIZE:
165177
max_val = input_df[self._input_col_name].max()
166178
else:
167179
max_val = input_df[self._input_col_name].min()
@@ -284,11 +296,10 @@ class NormalizeAround(Processor):
284296
285297
Parameters
286298
----------
287-
normalize_direction : dict
288-
a dictionary mapping column names to 1 or -1. If a value is
289-
optimized when smaller, set the negation to 1. If it is optimized
290-
when larger, set the negation to -1. If there is no entry,
291-
normalization is skipped.
299+
normalize_direction : Dict[str, Direction]
300+
a dictionary mapping column names to the optimization direction of
301+
the column. Used to determine if increases or decreases to baseline are
302+
perceived to be 'better'.
292303
293304
group_by : str
294305
the column name used to group results before finding the baseline
@@ -303,16 +314,25 @@ class NormalizeAround(Processor):
303314
the value of the baseline result at idx_name
304315
"""
305316

306-
def __init__(self, normalize_direction: dict, group_by: str, idx_name: str, idx_value: str):
317+
def __init__(
318+
self,
319+
normalize_direction: Dict[str, Direction],
320+
group_by: str,
321+
idx_name: str,
322+
idx_value: str
323+
):
307324
self._groupby = group_by
308325
self._idx_name = idx_name
309326
self._idx_value = idx_value
310327

311328
self._column_names = []
312329
self._column_negations = []
313-
for name, negation in normalize_direction.items():
330+
for name, direction in normalize_direction.items():
314331
self._column_names.append(name)
315-
self._column_negations.append(negation)
332+
if direction == Direction.MINIMIZE:
333+
self._column_negations.append(1)
334+
else:
335+
self._column_negations.append(-1)
316336

317337
def _normalize_around(self, input_df):
318338
"""
@@ -357,19 +377,22 @@ class Normalize(Processor):
357377
358378
Parameters
359379
----------
360-
normalize_direction : dict
361-
a dictionary mapping column names to 1 or -1. If a value is optimized
362-
when smaller, set the negation to 1. If it is optimized when larger, set
363-
the negation to -1. If there is no entry, normalization is skipped.
380+
normalize_direction : Dict[str, Direction]
381+
a dictionary mapping column names to the optimization direction of
382+
the column. Used to determine if increases or decreases to baseline are
383+
perceived to be 'better'.
364384
"""
365385

366-
def __init__(self, normalize_direction: dict):
386+
def __init__(self, normalize_direction: Dict[str, Direction]):
367387

368388
self._column_names = []
369389
self._column_negations = []
370-
for name, negation in normalize_direction.items():
390+
for name, direction in normalize_direction.items():
371391
self._column_names.append(name)
372-
self._column_negations.append(negation)
392+
if direction == Direction.MINIMIZE:
393+
self._column_negations.append(1)
394+
else:
395+
self._column_negations.append(-1)
373396

374397
def _normalize(self, input_df):
375398
"""
@@ -543,3 +566,82 @@ def geomean(x):
543566
x = x.dropna()
544567
return stats.gmean(x) if not x.empty else math.nan
545568
super().__init__(geomean)
569+
570+
571+
class CompareToFirst(Processor):
572+
"""
573+
Processor that compares numeric rows in an evaluation to the first row by
574+
adding columns that specify the relative difference between the first row
575+
and all other rows.
576+
577+
You can specify the direction that improvements should be outputted. For
578+
example, a change from `100` to `50` may be a 2x change if the objective is
579+
minimization, while it may be a 0.5x change if the objective is
580+
maximization.
581+
582+
Parameters
583+
----------
584+
normalize_direction : Dict[str, Direction]
585+
a dictionary mapping column names to the optimization direction of
586+
the column. Used to determine if increases or decreases to baseline are
587+
perceived to be 'better'.
588+
suffix : str
589+
the suffix to use when creating new columns that contain the relative
590+
comparison to the first row, by default ".relative"
591+
592+
Examples
593+
--------
594+
>>> a = Evaluation(pd.DataFrame(
595+
... data=[
596+
... {"x": 1, "y": 8},
597+
... {"x": 4, "y": 8}
598+
... ]))
599+
>>> direction = {"x": Direction.MAXIMIZE, "y": Direction.MAXIMIZE}
600+
>>> a.process([CompareToFirst(direction, suffix=".diff")).get_df()
601+
x x.diff y y.diff
602+
0 1 1.00 8 1.0
603+
1 4 4.00 8 1.0
604+
605+
>>> a = Evaluation(pd.DataFrame(
606+
... data=[
607+
... {"x": 1, "y": 8},
608+
... {"x": 4, "y": 8}
609+
... ]))
610+
>>> direction = {"x": Direction.MINIMIZE, "y": Direction.MINIMIZE}
611+
>>> a.process([CompareToFirst(direction, suffix=".diff")).get_df()
612+
x x.diff y y.diff
613+
0 1 1.00 8 1.0
614+
1 4 0.25 8 1.0
615+
"""
616+
617+
def __init__(self, normalize_direction: Dict[str, Direction], suffix: str = ".relative"):
618+
self._column_names = []
619+
self._column_negations = []
620+
for name, direction in normalize_direction.items():
621+
self._column_names.append(name)
622+
if direction == Direction.MINIMIZE:
623+
self._column_negations.append(-1)
624+
else:
625+
self._column_negations.append(1)
626+
627+
self._suffix = suffix
628+
629+
def _compare_to_first(self, input_df):
630+
"""
631+
Given a dataframe, sets the first row as the baseline and compares all
632+
rows to the first row, outputting the relative difference between each
633+
value as a new column.
634+
"""
635+
new_cols = []
636+
for col in self._column_names:
637+
new_cols.extend([col, col + self._suffix])
638+
639+
base = input_df.loc[0, self._column_names]
640+
baseline_ratio = (input_df[self._column_names] / base) ** self._column_negations
641+
renamed_ratio = baseline_ratio.rename(lambda col: col + self._suffix, axis=1)
642+
return pd.concat([input_df, renamed_ratio], axis=1)[new_cols]
643+
644+
def process(self, input_eval: Evaluation) -> Evaluation:
645+
input_df = input_eval.get_df()
646+
new_df = self._compare_to_first(input_df)
647+
return Evaluation(new_df, input_eval.get_eval_id())

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
1919
AUTHOR = 'Johnny Bui'
2020
REQUIRES_PYTHON = '>=3.6.0'
21-
VERSION = '0.1.6'
21+
VERSION = '0.2.0'
2222

2323
# What packages are required for this module to be executed?
2424
REQUIRED = [

0 commit comments

Comments
 (0)