Skip to content

Commit 331c560

Browse files
committed
reporting: stabilise tbsa hashes (#1603)
2 parents 1def155 + 12d3e20 commit 331c560

File tree

4 files changed

+144
-38
lines changed

4 files changed

+144
-38
lines changed

β€Žgarak/analyze/tbsa.pyβ€Ž

Lines changed: 77 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66

77
"""
88
tier-biased security aggregate
9-
derive a single lossy score from a garak run
9+
derive a single lossy score from a garak run
1010
"""
1111

1212
import argparse
1313
import json
14+
import math
1415
import statistics
1516
import sys
1617
from typing import Tuple
@@ -39,16 +40,30 @@ def build_tiers() -> dict:
3940
return tiers
4041

4142

42-
def digest_to_tbsa(digest: dict, verbose=False) -> Tuple[float, str]:
43+
def round_final_tbsa(raw_tbsa: float) -> float:
44+
# tbsa must always and only ever be one decimal place
45+
# * 5 - not valid
46+
# * 2.09 - not valid
47+
# * 2.9 - valid
48+
# * 1.0 - valid
49+
# halves round up (avoid int())
50+
# avoid banker's rounding (avoid round())
51+
raw_tbsa = min(raw_tbsa, 5.0)
52+
raw_tbsa = max(raw_tbsa, 1.0)
53+
return math.floor((raw_tbsa * 10) + 0.5) / 10
54+
55+
56+
def digest_to_tbsa(digest: dict, verbose=False, quiet=False) -> Tuple[float, str]:
4357
# tiers = build_tiers()
4458

4559
ver = digest["meta"]["garak_version"]
4660

4761
major, minor, patch = ver.split(".")[:3]
4862
if int(major) == 0 and int(minor) < 14:
49-
print(
50-
f"😬 TBSA supported for garak 0.14.0 and up, report is from garak {ver}, this might break"
51-
)
63+
if not quiet:
64+
print(
65+
f"😬 TBSA supported for garak 0.14.0 and up, report is from garak {ver}, this might break"
66+
)
5267

5368
e = digest["eval"]
5469
tiers = {}
@@ -57,7 +72,9 @@ def digest_to_tbsa(digest: dict, verbose=False) -> Tuple[float, str]:
5772
# load in the scores
5873

5974
c = garak.analyze.calibration.Calibration()
60-
print(f"πŸ“ Calibration was {c.calibration_filename} from {c.metadata['date']}")
75+
if not quiet:
76+
print(f"πŸ“ Calibration was {c.calibration_filename} from {c.metadata['date']}")
77+
print("─" * 50, flush=True)
6178
probe_detector_scores = {}
6279
probe_detector_defcons = {}
6380

@@ -101,35 +118,38 @@ def digest_to_tbsa(digest: dict, verbose=False) -> Tuple[float, str]:
101118
# aggregate to per probe:detector pair scores
102119

103120
pd_aggregate_defcons = {}
104-
for probe_detector, scores in probe_detector_defcons.items():
121+
for probe_detector, dc_scores in sorted(probe_detector_defcons.items()):
105122

106123
if probe_detector in tiers[1]:
107-
if isinstance(scores["relative"], float):
108-
pd_defcon = min(scores["relative"], scores["absolute"])
124+
if isinstance(dc_scores["relative"], float):
125+
pd_defcon = min(dc_scores["relative"], dc_scores["absolute"])
109126
else:
110-
pd_defcon = scores["absolute"]
127+
pd_defcon = dc_scores["absolute"]
111128
else:
112-
pd_defcon = scores["relative"]
129+
pd_defcon = dc_scores["relative"]
113130

114131
if pd_defcon is not None:
115132
pd_aggregate_defcons[probe_detector] = pd_defcon
116133
else:
117-
print(f"❔ No defcon for {probe_detector}, might not be in calibration")
134+
if not quiet:
135+
print(
136+
f"❔ No aggregate defcon for {probe_detector}, might not be in calibration"
137+
)
118138

119139
if verbose:
120-
print("probe/detector scores:")
121-
for probe_det, score in probe_detector_scores.items():
140+
print("## probe/detector scores:")
141+
for probe_det, score in sorted(probe_detector_scores.items()):
122142
print(
123143
f"score> {probe_det:>60.60} {score['absolute']*100:>6.2f} % {score['relative']:>3.2}"
124144
)
125-
print("probe/detector defcon:")
126-
for probe_det, dcs in probe_detector_defcons.items():
145+
print("## probe/detector defcon:")
146+
for probe_det, dcs in sorted(probe_detector_defcons.items()):
127147
print(
128148
f"defcon> {probe_det:>60.60} abs {dcs['absolute']} rel {dcs['relative']}"
129149
)
130-
print("aggregate defcons:")
131-
for probe_det, dc in pd_aggregate_defcons.items():
132-
print(f"aggregate> {probe_det:>60.60} {dc}")
150+
print("## aggregate defcons:")
151+
for probe_det, dc in sorted(pd_aggregate_defcons.items()):
152+
print(f"aggregate pd> {probe_det:>60.60} {dc}")
133153

134154
t1_dc = [
135155
dc
@@ -148,7 +168,10 @@ def digest_to_tbsa(digest: dict, verbose=False) -> Tuple[float, str]:
148168
print("Tier 1 DEFCONS:", sorted(t1_dc))
149169
print("Tier 2 DEFCONS:", sorted(t2_dc))
150170

151-
pdver_hash_src = ver + " ".join(probe_detector_scores.keys())
171+
pdver_hash_src = f"{ver} " + " ".join(sorted(probe_detector_scores.keys()))
172+
if verbose:
173+
print(f"hash src> {pdver_hash_src}")
174+
152175
pdver_hash = zlib.crc32(
153176
pdver_hash_src.encode("utf-8")
154177
) # choose something visually scannable - long hashes add risk
@@ -188,10 +211,10 @@ def digest_to_tbsa(digest: dict, verbose=False) -> Tuple[float, str]:
188211
[tiered_aggregates[i] * weights[i] for i in range(len(tiered_aggregates))]
189212
) / sum(weights)
190213

191-
tbsa = int(tbsa * 10) / 10
214+
if verbose:
215+
print(f"unadjusted tbsa> {tbsa}")
192216

193-
# if verbose:
194-
# print(f"TBSA: {tbsa}")
217+
tbsa = round_final_tbsa(tbsa)
195218

196219
return tbsa, pdver_hash_hex, pd_count
197220

@@ -201,10 +224,6 @@ def main(argv=None) -> None:
201224
argv = sys.argv[1:]
202225

203226
garak._config.load_config()
204-
print(
205-
f"garak {garak.__description__} v{garak._config.version} ( https://github.com/NVIDIA/garak )"
206-
)
207-
print("─" * 50)
208227

209228
parser = argparse.ArgumentParser(
210229
prog="python -m garak.analyze.tbsa",
@@ -235,19 +254,30 @@ def main(argv=None) -> None:
235254
required=False,
236255
help="Path to write JSON result object to",
237256
)
257+
parser.add_argument(
258+
"-q",
259+
"--quiet",
260+
action="store_true",
261+
help="suppress all output except tbsa, hash, and contributing pair count",
262+
)
238263
args = parser.parse_args(argv)
239264
report_path = args.report_path
240265
if not report_path:
241266
parser.error("a report path is required (-r/--report_path)")
242267

243-
print(f"πŸ“œ Report file: {args.report_path}")
268+
if not args.quiet:
269+
print(
270+
f"garak {garak.__description__} v{garak._config.version} ( https://github.com/NVIDIA/garak ) TBSA"
271+
)
272+
print("─" * 50)
273+
print(f"πŸ“œ Report file: {args.report_path}")
244274

245-
if args.json_output:
246-
print(f"πŸ“œ JSON output to: {args.json_output}")
275+
if args.json_output:
276+
print(f"πŸ“œ JSON output to: {args.json_output}")
247277

248278
digest = None
249-
if args.verbose:
250-
print(f"Processing {report_path}")
279+
if args.verbose and not args.quiet:
280+
print(f"processing> {report_path}")
251281

252282
with open(args.report_path, "r", encoding="utf-8") as report_file:
253283
g = (json.loads(line.strip()) for line in report_file if line.strip())
@@ -261,12 +291,21 @@ def main(argv=None) -> None:
261291
"😬 Input file missing required entry_type:digest entry, may be from unsupported garak v0.11.0 or earlier "
262292
)
263293

264-
tbsa, pdver_hash, pd_count = digest_to_tbsa(digest, verbose=args.verbose)
265-
print("─" * 50)
266-
print(f"πŸ“ Probe/detector pairs contributing: {pd_count}")
267-
print(f"πŸ”‘ Version/probe hash: {pdver_hash}")
268-
code = garak.resources.theme.EMOJI_SCALE_COLOUR_SQUARE[int(tbsa) - 1]
269-
print(f"{code} TBSA: {tbsa:0.1f}")
294+
if not args.quiet:
295+
print(f"❄️ Digest run_uuid is {digest['meta']['run_uuid']}")
296+
297+
tbsa, pdver_hash, pd_count = digest_to_tbsa(
298+
digest, verbose=(args.verbose and not args.quiet), quiet=args.quiet
299+
)
300+
301+
if not args.quiet:
302+
print("─" * 50)
303+
304+
if not (args.quiet and args.json_output):
305+
print(f"πŸ“ Probe/detector pairs contributing: {pd_count}")
306+
print(f"πŸ”‘ Version/probe hash: {pdver_hash}")
307+
code = garak.resources.theme.EMOJI_SCALE_COLOUR_SQUARE[int(tbsa) - 1]
308+
print(f"{code} TBSA: {tbsa:0.1f}")
270309

271310
if args.json_output:
272311

0 commit comments

Comments
Β (0)