Skip to content

Commit 4e33f96

Browse files
mihowclaude
andcommitted
fix(ml): create null detection markers only after real saves succeed
Issue #1310: null detections (empty-bbox sentinels marking "image processed, nothing found") were created before create_detections / create_classifications / create_and_update_occurrences_for_detections ran. Two consequences: 1. If any of those downstream steps failed, the image was already flagged as processed via the null marker — filter_processed_images would skip it on the next run, leaving the image permanently in a "processed but no detections" state. Observed on project 171 (400 captures with only null detections). 2. create_and_update_occurrences_for_detections iterated every detection including nulls, so each null marker spawned a phantom Occurrence with determination=NULL. Fix in ami/ml/models/pipeline.py save_results: - Run create_detections / create_classifications / create_and_update_occurrences on the real DetectionResponses only. - After those succeed, build null DetectionResponses for images that ended up without any detections and persist them via a second create_detections call. - Null responses never enter the classification / occurrence loops, so no phantom Occurrence is created even in the happy path. Tests in ami/ml/tests.py TestPipeline: - test_null_detection_does_not_create_phantom_occurrence: asserts the happy path "pipeline found nothing" creates the null marker but no Occurrence. - test_captures_not_marked_processed_after_failure: asserts that when a downstream step (create_classifications) raises, the image without a real detection is left unmarked and filter_processed_images re-yields it. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent f4fb6f1 commit 4e33f96

2 files changed

Lines changed: 89 additions & 9 deletions

File tree

ami/ml/models/pipeline.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -952,15 +952,6 @@ def save_results(
952952
"Algorithms and category maps must be registered before processing, using /info endpoint."
953953
)
954954

955-
# Ensure all images have detections
956-
# if not, add a NULL detection (empty bbox) to the results
957-
null_detections = create_null_detections_for_undetected_images(
958-
results=results,
959-
detection_algorithm=detection_algorithm,
960-
logger=job_logger,
961-
)
962-
results.detections = results.detections + null_detections
963-
964955
detections = create_detections(
965956
detections=results.detections,
966957
algorithms_known=algorithms_known,
@@ -981,6 +972,22 @@ def save_results(
981972
logger=job_logger,
982973
)
983974

975+
# Mark images with no real detections as processed by creating null-bbox sentinels.
976+
# Issue #1310: must run AFTER the real-detection / classification / occurrence steps
977+
# so a failure earlier in the pipeline leaves the image unmarked (and therefore
978+
# re-processed by filter_processed_images on the next run). Null DetectionResponses
979+
# are kept out of the real-detection list so they bypass occurrence creation entirely.
980+
null_detection_responses = create_null_detections_for_undetected_images(
981+
results=results,
982+
detection_algorithm=detection_algorithm,
983+
logger=job_logger,
984+
)
985+
create_detections(
986+
detections=null_detection_responses,
987+
algorithms_known=algorithms_known,
988+
logger=job_logger,
989+
)
990+
984991
# Update precalculated counts on source images and events
985992
source_images = list(source_images)
986993
logger.info(f"Updating calculated fields for {len(source_images)} source images")

ami/ml/tests.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
Deployment,
1414
Detection,
1515
Event,
16+
Occurrence,
1617
Project,
1718
SourceImage,
1819
SourceImageCollection,
@@ -1024,6 +1025,78 @@ def test_null_detection_deduplication_same_pipeline(self):
10241025
null_detections = image.detections.filter(bbox__isnull=True)
10251026
self.assertEqual(null_detections.count(), 1, "Same pipeline should not create duplicate null detections")
10261027

1028+
def test_null_detection_does_not_create_phantom_occurrence(self):
1029+
"""
1030+
Issue #1310: a null detection (empty-bbox sentinel marking "image processed,
1031+
nothing found") must NOT spawn an Occurrence. Occurrences with no
1032+
determination and no real detections leak to the API as ghost rows.
1033+
"""
1034+
image = self.test_images[0]
1035+
results = self.fake_pipeline_results([image], self.pipeline)
1036+
results.detections = [] # pipeline found nothing
1037+
1038+
save_results(results)
1039+
1040+
null_dets = image.detections.filter(bbox__isnull=True)
1041+
self.assertEqual(null_dets.count(), 1, "Null marker should still be created")
1042+
self.assertIsNone(
1043+
null_dets.first().occurrence,
1044+
"Null detection must NOT be associated with an Occurrence",
1045+
)
1046+
# No phantom Occurrence in DB tied to this image at all
1047+
phantom_occs = Occurrence.objects.filter(detections__source_image=image, determination__isnull=True)
1048+
self.assertEqual(
1049+
phantom_occs.count(),
1050+
0,
1051+
"No Occurrence with NULL determination should exist for an image that had no detections",
1052+
)
1053+
1054+
def test_captures_not_marked_processed_after_failure(self):
1055+
"""
1056+
Issue #1310: null markers should only flag images as processed AFTER all
1057+
downstream save steps (classifications, occurrences) succeed. If any
1058+
downstream step raises, the image must remain unmarked so the next run
1059+
re-processes it.
1060+
1061+
Reproduces the field bug where 400 images ended up with null markers but
1062+
no real detections — created when null-creation ran ahead of a later step
1063+
that failed.
1064+
"""
1065+
from unittest.mock import patch
1066+
1067+
from ami.ml.models.pipeline import filter_processed_images
1068+
1069+
# Mix: image_with_real has a detection in the response, image_without_real does not.
1070+
# The without-real image is the one that would get a null marker.
1071+
image_with_real, image_without_real = self.test_images
1072+
results = self.fake_pipeline_results(self.test_images, self.pipeline)
1073+
# Trim detections to only the first image so the second qualifies for null-marker creation
1074+
results.detections = [d for d in results.detections if str(d.source_image_id) == str(image_with_real.pk)]
1075+
1076+
# Inject failure in a step that runs AFTER detection bulk_create
1077+
with patch(
1078+
"ami.ml.models.pipeline.create_classifications",
1079+
side_effect=RuntimeError("simulated classification failure"),
1080+
):
1081+
with self.assertRaises(RuntimeError):
1082+
save_results(results)
1083+
1084+
# The image with no real detection must NOT have a null marker —
1085+
# the run failed, so it should be re-tried.
1086+
null_dets = image_without_real.detections.filter(bbox__isnull=True)
1087+
self.assertEqual(
1088+
null_dets.count(),
1089+
0,
1090+
"Image without real detections must not be marked processed when downstream step fails",
1091+
)
1092+
# filter_processed_images should still yield it for the next run
1093+
retry_yield = list(filter_processed_images([image_without_real], self.pipeline))
1094+
self.assertEqual(
1095+
retry_yield,
1096+
[image_without_real],
1097+
"Image with failed run must be re-yielded for processing",
1098+
)
1099+
10271100

10281101
class TestAlgorithmCategoryMaps(TestCase):
10291102
def setUp(self):

0 commit comments

Comments
 (0)