refactor(occurrence-stats): rename to model-agreement + push aggregation to SQL

mihow · claude · mihow · commit ea1e498378f0 · 2026-05-14T23:41:17.000-07:00
Addresses review feedback on PR #1307: Rename (drop "human"): - URL: /occurrences/stats/human-model-agreement/ -> /model-agreement/ - Function: human_model_agreement_for_project -> model_agreement_for_project - Serializer: HumanModelAgreementSerializer -> ModelAgreementSerializer - Viewset action + url_path: human_model_agreement -> model_agreement - FE hook: useHumanModelAgreement -> useModelAgreement (file + symbol) - FE type: Response -> ModelAgreementResponse (fixes DOM Response shadow) - Test class: TestHumanModelAgreementForProject -> TestModelAgreementForProject SQL push-down (Copilot+CodeRabbit perf flag): - Replace list(qs) full-row materialization with annotated aggregate(). - Annotate best_user_taxon_id via Subquery over Identification (BEST_IDENTIFICATION_ORDER). Drop the prefetch + select_related("taxon") on identifications since only taxon_id is read. - aggregate() Count(filter=Q(...)) for total/verified/exact/no-prediction. - For under-order disagreement: group disagreement set by distinct (user_taxon, machine_taxon) pair before LCA. Each pair's LCA runs once. - Bench against project 18 (43,149 occurrences): pre-rework apply_defaults=false curl timed out at 159s; post-rework 1.96s unfiltered / 3.4s with bypass (93,019 occurrences post-filter). Denominator fix (Copilot): - agreed_*_pct now divides by verified_with_prediction_count instead of verified_count. A verified occurrence with no machine prediction can't agree or disagree; including it in the denominator drags the rate down without representing actual model disagreement. - Surface no_prediction_count + verified_with_prediction_count as sibling fields so consumers can see how many such occurrences exist. UNKNOWN rank bug (Copilot): - TaxonRank.UNKNOWN sorts after SPECIES in OrderedEnum definition order, so without explicit exclusion UNKNOWN >= ORDER is True and a shared UNKNOWN ancestor would wrongly count as under-order agreement. Filter UNKNOWN out of lca_rank_between's candidate ranks. Add regression test. Tests: - New: test_unknown_rank_excluded_from_lca (LCA regression) - New: test_agreement_under_order_bucket (HTTP coverage for sister-species case, previously only exact-match shortcut was exercised) - Updated: happy-path asserts verified_with_prediction_count and no_prediction_count. 22/22 backend tests green: docker compose exec django python manage.py test ami.main.tests.TestLcaRankBetween ami.main.tests.TestModelAgreementForProject ami.main.tests.TestOccurrenceStatsViewSet Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -1751,19 +1751,34 @@ class TopIdentifiersResponseSerializer(serializers.Serializer):
     top_identifiers = UserIdentificationCountSerializer(many=True)
 
 
-class HumanModelAgreementSerializer(serializers.Serializer):
+class ModelAgreementSerializer(serializers.Serializer):
     """Verified / agreement rates over the filtered Occurrence set.
 
     `agreed_exact_count` is a subset of `agreed_under_order_count` by
     construction — an exact match implies an LCA at SPECIES, which is
     deeper than ORDER. `*_pct` percentages are 0.0..1.0 (not 0..100).
+
+    Denominator note: `agreed_*_pct` divide by `verified_with_prediction_count`
+    (verified occurrences that *also* have a machine prediction), NOT by
+    `verified_count`. A verified occurrence with no machine prediction can't
+    agree or disagree — including it in the denominator would drag the rate
+    down without representing actual model disagreement. `no_prediction_count`
+    is surfaced so the consumer can see how many such occurrences exist.
     """
 
     project_id = serializers.IntegerField()
     total_occurrences = serializers.IntegerField()
-    verified_count = serializers.IntegerField()
+    verified_count = serializers.IntegerField(help_text="Occurrences with at least one non-withdrawn identification.")
     verified_pct = serializers.FloatField(help_text="verified_count / total_occurrences")
+    verified_with_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences that also have a machine prediction (denominator for agreed_*_pct)."
+    )
+    no_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences with no machine prediction (excluded from agreement denominator)."
+    )
     agreed_exact_count = serializers.IntegerField()
-    agreed_exact_pct = serializers.FloatField(help_text="agreed_exact_count / verified_count")
+    agreed_exact_pct = serializers.FloatField(help_text="agreed_exact_count / verified_with_prediction_count")
     agreed_under_order_count = serializers.IntegerField()
-    agreed_under_order_pct = serializers.FloatField(help_text="agreed_under_order_count / verified_count")
+    agreed_under_order_pct = serializers.FloatField(
+        help_text="agreed_under_order_count / verified_with_prediction_count"
+    )
diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -32,7 +32,7 @@
 from ami.base.views import ProjectMixin
 from ami.main.api.schemas import limit_doc_param, project_id_doc_param
 from ami.main.api.serializers import TagSerializer
-from ami.main.models_future.occurrence import human_model_agreement_for_project, top_identifiers_for_project
+from ami.main.models_future.occurrence import model_agreement_for_project, top_identifiers_for_project
 from ami.utils.requests import get_default_classification_threshold
 from ami.utils.storages import ConnectionTestResult
 
@@ -71,8 +71,8 @@
     EventListSerializer,
     EventSerializer,
     EventTimelineSerializer,
-    HumanModelAgreementSerializer,
     IdentificationSerializer,
+    ModelAgreementSerializer,
     OccurrenceListSerializer,
     OccurrenceSerializer,
     PageListSerializer,
@@ -1332,10 +1332,10 @@ def top_identifiers(self, request):
 
     @extend_schema(
         parameters=[project_id_doc_param],
-        responses=HumanModelAgreementSerializer,
+        responses=ModelAgreementSerializer,
     )
-    @action(detail=False, methods=["get"], url_path="human-model-agreement")
-    def human_model_agreement(self, request):
+    @action(detail=False, methods=["get"], url_path="model-agreement")
+    def model_agreement(self, request):
         """Verified / human↔model agreement rates over the filtered occurrence set.
 
         Accepts every query param the `/occurrences/` list endpoint accepts.
@@ -1349,9 +1349,9 @@ def human_model_agreement(self, request):
 
         base_qs = Occurrence.objects.filter(project=project).valid().apply_default_filters(project, request)
         filtered_qs = self.filter_queryset(base_qs)
-        payload = human_model_agreement_for_project(filtered_qs)
+        payload = model_agreement_for_project(filtered_qs)
         payload["project_id"] = project.pk
-        return Response(HumanModelAgreementSerializer(payload, context={"request": request}).data)
+        return Response(ModelAgreementSerializer(payload, context={"request": request}).data)
 
 
 class TaxonTaxaListFilter(filters.BaseFilterBackend):
diff --git a/ami/main/models_future/occurrence.py b/ami/main/models_future/occurrence.py
@@ -12,7 +12,7 @@
 
 from typing import TYPE_CHECKING
 
-from django.db.models import Count, Prefetch, Q, QuerySet
+from django.db.models import Count, F, OuterRef, Prefetch, Q, QuerySet, Subquery
 
 from ami.main.models import Project, TaxonRank, User
 
@@ -30,14 +30,20 @@ def lca_rank_between(a: TaxonTuple, b: TaxonTuple) -> TaxonRank | None:
 
     The taxon itself counts as part of its own ancestor chain — passing the
     same taxon twice returns that taxon's rank. Returns ``None`` when the two
-    chains share no ancestor (e.g. one has an empty parents_json and the other
-    doesn't include it).
+    chains share no ancestor at a real taxonomic rank.
+
+    ``TaxonRank.UNKNOWN`` is excluded from the candidate set even though it
+    sorts after SPECIES in OrderedEnum definition order — it isn't a real
+    taxonomic rank and treating it as deeper-than-ORDER produces false
+    under-order agreements when an UNKNOWN ancestor happens to be shared.
     """
     chain_a = [(p["id"], TaxonRank(p["rank"])) for p in a[2]] + [(a[0], TaxonRank(a[1]))]
     chain_b_ids = {p["id"] for p in b[2]} | {b[0]}
 
     deepest: TaxonRank | None = None
     for tid, rank in chain_a:
+        if rank == TaxonRank.UNKNOWN:
+            continue
         if tid in chain_b_ids:
             if deepest is None or rank > deepest:
                 deepest = rank
@@ -157,74 +163,83 @@ def detection_image_urls_from_prefetch(occurrence: Occurrence, limit: int | None
     return [get_media_url(det.path) for det in detections]
 
 
-def human_model_agreement_for_project(queryset: QuerySet[Occurrence]) -> dict:
+def model_agreement_for_project(queryset: QuerySet[Occurrence]) -> dict:
     """Verified / agreement stats over a pre-filtered Occurrence queryset.
 
     The queryset MUST already be filtered to the project + user-supplied
     filters (caller wires apply_default_filters + OccurrenceFilter). This
-    function adds the prefetches/annotations it needs and returns a dict
-    matching HumanModelAgreementSerializer's field set (without project_id —
-    the view layer adds that).
+    function adds the annotations it needs and returns a dict matching
+    ModelAgreementSerializer's field set (without project_id — the view
+    layer adds that).
 
     "Verified" means the occurrence has at least one non-withdrawn
     Identification. "Model prediction" means the Classification chosen by
     BEST_MACHINE_PREDICTION_ORDER. "Under-order" agreement means the user's
     taxon and the model's prediction share an ancestor at rank >= ORDER
     (inclusive of ORDER itself).
+
+    Aggregation is SQL-side. Only the disagreement set (occurrences where
+    user and machine disagree at SPECIES) is materialized in Python, and
+    even then it's deduplicated to distinct (user_taxon, machine_taxon)
+    pairs so LCA runs once per pair, not once per occurrence.
     """
-    from ami.main.models import Identification, Taxon
+    from ami.main.models import BEST_IDENTIFICATION_ORDER, Identification, Taxon
 
-    qs = queryset.with_best_machine_prediction().prefetch_related(  # type: ignore[attr-defined]
-        Prefetch(
-            "identifications",
-            queryset=Identification.objects.filter(withdrawn=False)
-            .select_related("taxon")
-            .order_by("-created_at", "-pk"),
-            to_attr="_non_withdrawn_idents",
-        )
+    best_user_ident = Identification.objects.filter(occurrence=OuterRef("pk"), withdrawn=False).order_by(
+        *BEST_IDENTIFICATION_ORDER
+    )
+
+    qs = queryset.with_best_machine_prediction().annotate(  # type: ignore[attr-defined]
+        best_user_taxon_id=Subquery(best_user_ident.values("taxon_id")[:1]),
     )
 
-    occurrences = list(qs)
+    verified_q = Q(best_user_taxon_id__isnull=False)
+    has_pred_q = Q(best_machine_prediction_taxon_id__isnull=False)
+    exact_q = verified_q & has_pred_q & Q(best_user_taxon_id=F("best_machine_prediction_taxon_id"))
 
-    needed_taxa_ids: set[int] = set()
-    for occ in occurrences:
-        machine_id = getattr(occ, "best_machine_prediction_taxon_id", None)
-        if machine_id:
-            needed_taxa_ids.add(machine_id)
-        idents = getattr(occ, "_non_withdrawn_idents", [])
-        if idents:
-            needed_taxa_ids.add(idents[0].taxon_id)
+    aggregates = qs.aggregate(
+        total_occurrences=Count("pk"),
+        verified_count=Count("pk", filter=verified_q),
+        verified_with_prediction_count=Count("pk", filter=verified_q & has_pred_q),
+        no_prediction_count=Count("pk", filter=verified_q & ~has_pred_q),
+        agreed_exact_count=Count("pk", filter=exact_q),
+    )
+
+    # Under-order: only the disagreement set hits Python, grouped by distinct
+    # (user_taxon, machine_taxon) pair so each pair's LCA is computed once.
+    disagreement_pairs = (
+        qs.filter(verified_q & has_pred_q)
+        .exclude(best_user_taxon_id=F("best_machine_prediction_taxon_id"))
+        .values("best_user_taxon_id", "best_machine_prediction_taxon_id")
+        .annotate(occurrence_count=Count("pk"))
+    )
+
+    pairs = list(disagreement_pairs)
+    needed_taxa_ids = {p["best_user_taxon_id"] for p in pairs} | {p["best_machine_prediction_taxon_id"] for p in pairs}
 
     taxa_by_id: dict[int, TaxonTuple] = {}
-    for t in Taxon.objects.filter(pk__in=needed_taxa_ids):
-        parents = [{"id": p.id, "rank": p.rank.name if hasattr(p.rank, "name") else p.rank} for p in t.parents_json]
-        taxa_by_id[t.pk] = (t.pk, t.rank, parents)
-
-    total = len(occurrences)
-    verified = 0
-    agreed_exact = 0
-    agreed_under_order = 0
-
-    for occ in occurrences:
-        idents = getattr(occ, "_non_withdrawn_idents", [])
-        if not idents:
-            continue
-        verified += 1
-        user_taxon_id = idents[0].taxon_id
-        machine_taxon_id = getattr(occ, "best_machine_prediction_taxon_id", None)
-        if not machine_taxon_id or not user_taxon_id:
+    if needed_taxa_ids:
+        for t in Taxon.objects.filter(pk__in=needed_taxa_ids):
+            parents = [
+                {"id": p.id, "rank": p.rank.name if hasattr(p.rank, "name") else p.rank} for p in t.parents_json
+            ]
+            taxa_by_id[t.pk] = (t.pk, t.rank, parents)
+
+    under_order_disagreement_count = 0
+    for pair in pairs:
+        u = taxa_by_id.get(pair["best_user_taxon_id"])
+        m = taxa_by_id.get(pair["best_machine_prediction_taxon_id"])
+        if not u or not m:
             continue
-        if user_taxon_id == machine_taxon_id:
-            agreed_exact += 1
-            agreed_under_order += 1
-            continue
-        user_tuple = taxa_by_id.get(user_taxon_id)
-        machine_tuple = taxa_by_id.get(machine_taxon_id)
-        if not user_tuple or not machine_tuple:
-            continue
-        lca = lca_rank_between(user_tuple, machine_tuple)
+        lca = lca_rank_between(u, m)
         if lca is not None and lca >= TaxonRank.ORDER:
-            agreed_under_order += 1
+            under_order_disagreement_count += pair["occurrence_count"]
+
+    agreed_exact = aggregates["agreed_exact_count"]
+    agreed_under_order = agreed_exact + under_order_disagreement_count
+    total = aggregates["total_occurrences"]
+    verified = aggregates["verified_count"]
+    verified_with_pred = aggregates["verified_with_prediction_count"]
 
     def _pct(num: int, denom: int) -> float:
         return round(num / denom, 4) if denom else 0.0
@@ -233,10 +248,12 @@ def _pct(num: int, denom: int) -> float:
         "total_occurrences": total,
         "verified_count": verified,
         "verified_pct": _pct(verified, total),
+        "verified_with_prediction_count": verified_with_pred,
+        "no_prediction_count": aggregates["no_prediction_count"],
         "agreed_exact_count": agreed_exact,
-        "agreed_exact_pct": _pct(agreed_exact, verified),
+        "agreed_exact_pct": _pct(agreed_exact, verified_with_pred),
         "agreed_under_order_count": agreed_under_order,
-        "agreed_under_order_pct": _pct(agreed_under_order, verified),
+        "agreed_under_order_pct": _pct(agreed_under_order, verified_with_pred),
     }
 
 
diff --git a/ami/main/tests.py b/ami/main/tests.py
@@ -4771,8 +4771,36 @@ def test_no_shared_ancestor_returns_none(self):
         rank = lca_rank_between(rootless, self.SPECIES_NOCTUA_PRONUBA)
         self.assertIsNone(rank)
 
+    def test_unknown_rank_excluded_from_lca(self):
+        """TaxonRank.UNKNOWN sorts after SPECIES in OrderedEnum definition order,
+        so without explicit exclusion `UNKNOWN >= ORDER` would be True and a
+        shared UNKNOWN ancestor would wrongly count as under-order agreement.
+        """
+        from ami.main.models_future.occurrence import lca_rank_between
+
+        # Both chains share a KINGDOM ancestor and an UNKNOWN ancestor; the LCA
+        # at a real taxonomic rank is KINGDOM, not UNKNOWN.
+        unknown_a = (
+            701,
+            "SPECIES",
+            [
+                {"id": 1, "rank": "KINGDOM"},
+                {"id": 999, "rank": "UNKNOWN"},
+            ],
+        )
+        unknown_b = (
+            702,
+            "SPECIES",
+            [
+                {"id": 1, "rank": "KINGDOM"},
+                {"id": 999, "rank": "UNKNOWN"},
+            ],
+        )
+        rank = lca_rank_between(unknown_a, unknown_b)
+        self.assertEqual(rank, TaxonRank.KINGDOM)
 
-class TestHumanModelAgreementForProject(APITestCase):
+
+class TestModelAgreementForProject(APITestCase):
     """Aggregation function over a filtered Occurrence queryset.
 
     Covers the four bucket transitions: unverified, verified+exact-agreed,
@@ -4806,18 +4834,18 @@ def _identify(self, occurrence: Occurrence, taxon: Taxon) -> Identification:
         return Identification.objects.create(user=self.user, occurrence=occurrence, taxon=taxon)
 
     def test_empty_project_returns_zeros_not_nans(self):
-        from ami.main.models_future.occurrence import human_model_agreement_for_project
+        from ami.main.models_future.occurrence import model_agreement_for_project
 
         empty_project = Project.objects.create(name="empty")
-        result = human_model_agreement_for_project(Occurrence.objects.filter(project=empty_project))
+        result = model_agreement_for_project(Occurrence.objects.filter(project=empty_project))
         self.assertEqual(result["total_occurrences"], 0)
         self.assertEqual(result["verified_count"], 0)
         self.assertEqual(result["verified_pct"], 0.0)
         self.assertEqual(result["agreed_exact_pct"], 0.0)
         self.assertEqual(result["agreed_under_order_pct"], 0.0)
 
     def test_buckets_canonical_cases(self):
-        from ami.main.models_future.occurrence import human_model_agreement_for_project
+        from ami.main.models_future.occurrence import model_agreement_for_project
 
         occurrences = list(Occurrence.objects.filter(project=self.project).order_by("pk"))
         self.assertEqual(len(occurrences), 4)
@@ -4829,7 +4857,7 @@ def test_buckets_canonical_cases(self):
         self._identify(occurrences[2], self.pieris_brassicae)
         # 3: unverified
 
-        result = human_model_agreement_for_project(Occurrence.objects.filter(project=self.project))
+        result = model_agreement_for_project(Occurrence.objects.filter(project=self.project))
         self.assertEqual(result["total_occurrences"], 4)
         self.assertEqual(result["verified_count"], 3)
         self.assertEqual(result["agreed_exact_count"], 1)
@@ -4922,9 +4950,9 @@ def test_registration_order_preserves_occurrence_retrieve(self):
         self.assertEqual(stats_response.status_code, 200, "stats URL must resolve")
         self.assertEqual(retrieve_response.status_code, 200, "occurrence retrieve must still work")
 
-    # ----- /occurrences/stats/human-model-agreement/ -----
+    # ----- /occurrences/stats/model-agreement/ -----
 
-    agreement_url = "/api/v2/occurrences/stats/human-model-agreement/"
+    agreement_url = "/api/v2/occurrences/stats/model-agreement/"
 
     def test_agreement_no_project_id_returns_400(self):
         response = self.client.get(self.agreement_url)
@@ -4965,9 +4993,42 @@ def test_agreement_happy_path(self):
         body = response.json()
         self.assertEqual(body["total_occurrences"], 4)
         self.assertEqual(body["verified_count"], 1)
+        self.assertEqual(body["verified_with_prediction_count"], 1)
+        self.assertEqual(body["no_prediction_count"], 0)
         self.assertEqual(body["agreed_exact_count"], 1)
         self.assertEqual(body["agreed_under_order_count"], 1)
 
+    def test_agreement_under_order_bucket(self):
+        """Disagreement at species but same genus → counted under-order, not exact.
+
+        Pick the machine prediction's sister species (same parent genus) for the
+        identification. LCA between the two species is GENUS, which is >= ORDER,
+        so the occurrence falls into the under-order bucket without contributing
+        to agreed_exact_count.
+        """
+        occurrence = Occurrence.objects.filter(project=self.project).order_by("pk").first()
+        machine_taxon = occurrence.detections.first().classifications.first().taxon
+        # Sister species: same parent (genus Vanessa), different SPECIES.
+        sister = (
+            Taxon.objects.filter(parent=machine_taxon.parent, rank=TaxonRank.SPECIES.name)
+            .exclude(pk=machine_taxon.pk)
+            .first()
+        )
+        self.assertIsNotNone(sister, "Test fixture must have a sister species under the same genus")
+        Taxon.objects.update_all_parents()
+        Identification.objects.create(user=self.alice, occurrence=occurrence, taxon=sister)
+
+        response = self.client.get(f"{self.agreement_url}?project_id={self.project.pk}")
+        self.assertEqual(response.status_code, 200)
+        body = response.json()
+        self.assertEqual(body["verified_count"], 1)
+        self.assertEqual(body["verified_with_prediction_count"], 1)
+        self.assertEqual(body["agreed_exact_count"], 0)
+        self.assertEqual(body["agreed_under_order_count"], 1)
+        # 0/1 exact, 1/1 under-order
+        self.assertEqual(body["agreed_exact_pct"], 0.0)
+        self.assertEqual(body["agreed_under_order_pct"], 1.0)
+
     def test_agreement_filter_passthrough(self):
         """`?deployment=` should narrow the set."""
         other_deployment = Deployment.objects.create(name="other", project=self.project)
diff --git a/docs/claude/reference/api-stats-pattern.md b/docs/claude/reference/api-stats-pattern.md
@@ -232,7 +232,7 @@ into pagination only if the kind genuinely needs it):
 
 - `GET /occurrences/stats/top-identifiers/` — done (this PR)
 - `GET /occurrences/stats/identifications-summary/` — total / distinct / verified counts
-- `GET /occurrences/stats/human-model-agreement/` — model agreement rate
+- `GET /occurrences/stats/model-agreement/` — model agreement rate
 - `GET /occurrences/stats/identifications-by-species/` — per-taxon ID counts
 - `GET /occurrences/stats/timeline/` — Plotly-shaped time series
 - `GET /deployments/stats/processed-images/` — processed images per station
diff --git a/ui/src/data-services/hooks/occurrences/stats/useModelAgreement.ts b/ui/src/data-services/hooks/occurrences/stats/useModelAgreement.ts