RolnickLab
diff --git a/‎ami/main/api/serializers.py‎
Lines changed: 20 additions & 0 deletions b/‎ami/main/api/serializers.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎ami/main/api/views.py‎
Lines changed: 123 additions & 0 deletions b/‎ami/main/api/views.py‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎ami/main/migrations/0085_taxon_parents_json_gin_index.py‎
Lines changed: 31 additions & 0 deletions b/‎ami/main/migrations/0085_taxon_parents_json_gin_index.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎ami/main/models.py‎
Lines changed: 12 additions & 0 deletions b/‎ami/main/models.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎ami/main/tests.py‎
Lines changed: 129 additions & 0 deletions b/‎ami/main/tests.py‎
Lines changed: 129 additions & 0 deletions
@@ -588,13 +588,27 @@ def get_taxa(self, obj):
         return [{"id": taxon.id, "name": taxon.name} for taxon in obj.taxa.all()]
 
 
+def agreement_requested(request: Request | None) -> bool:
+    """Whether ``with_agreement=true`` is set, gating the heavier agreed_exact_count."""
+    if request is None:
+        return False
+    value = request.query_params.get("with_agreement", "")
+    return str(value).lower() in ("true", "1", "yes", "on")
+
+
 class TaxonListSerializer(DefaultSerializer):
     # latest_detection = DetectionNestedSerializer(read_only=True)
     occurrences = serializers.SerializerMethodField()
     parents = TaxonParentSerializer(many=True, read_only=True, source="parents_json")
     parent_id = serializers.PrimaryKeyRelatedField(queryset=Taxon.objects.all(), source="parent")
     tags = serializers.SerializerMethodField()
 
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # agreed_exact_count is a gated annotation: omit it unless with_agreement=true.
+        if not agreement_requested(self.context.get("request")):
+            self.fields.pop("agreed_exact_count", None)
+
     def get_tags(self, obj):
         tag_list = getattr(obj, "prefetched_tags", [])
         return TagSerializer(tag_list, many=True, context=self.context).data
@@ -609,6 +623,9 @@ class Meta:
             "parents",
             "details",
             "occurrences_count",
+            "verified_count",
+            "agreed_with_prediction_count",
+            "agreed_exact_count",
             "occurrences",
             "tags",
             "last_detected",
@@ -886,6 +903,9 @@ class Meta:
             "parents",
             "details",
             "occurrences_count",
+            "verified_count",
+            "agreed_with_prediction_count",
+            "agreed_exact_count",
             "events_count",
             "occurrences",
             "gbif_taxon_key",
 
@@ -37,6 +37,8 @@
 from ami.utils.storages import ConnectionTestResult
 
 from ..models import (
+    BEST_IDENTIFICATION_ORDER,
+    BEST_MACHINE_PREDICTION_ORDER,
     NULL_DETECTIONS_FILTER,
     Classification,
     Deployment,
@@ -1400,6 +1402,19 @@ def filter_queryset(self, request, queryset, view):
         return queryset.distinct()
 
 
+class JSONBContains(models.Func):
+    """Postgres ``@>`` containment rendered as a boolean expression.
+
+    Needed for correlated subqueries where the right-hand side is built from an
+    ``OuterRef`` — the literal ``parents_json__contains=[{"id": ...}]`` lookup
+    can't embed an ``OuterRef`` (it would try to JSON-serialize the expression).
+    """
+
+    arg_joiner = " @> "
+    template = "(%(expressions)s)"
+    output_field = models.BooleanField()
+
+
 class TaxonViewSet(DefaultViewSet, ProjectMixin):
     """
     API endpoint that allows taxa to be viewed or edited.
@@ -1428,6 +1443,7 @@ class TaxonViewSet(DefaultViewSet, ProjectMixin):
         "created_at",
         "updated_at",
         "occurrences_count",
+        "verified_count",
         "last_detected",
         "best_determination_score",
         "name",
@@ -1654,6 +1670,113 @@ def get_taxa_observed(
             # Efficient EXISTS check that uses the composite index
             qs = qs.filter(models.Exists(Occurrence.objects.filter(base_filter)))
 
+        qs = self.add_verification_data(qs, occurrence_filters, default_filters_q)
+
+        return qs
+
+    def _occurrences_under_taxon(self, occurrence_filters: models.Q, default_filters_q: models.Q) -> QuerySet:
+        """
+        Correlated Occurrence queryset matching occurrences whose determination is the
+        outer Taxon (``OuterRef("id")``) or any of its descendants, project-scoped and
+        default-filtered.
+
+        Mirrors the hierarchical match used by the occurrence-list ``taxon=<id>`` filter
+        (``CustomOccurrenceDeterminationFilter``), but the descendant test is built with
+        an ``OuterRef`` right-hand side so a Family/Order row aggregates all its
+        descendant species' occurrences.
+        """
+        descendant_match = JSONBContains(
+            models.F("determination__parents_json"),
+            models.Func(
+                models.Func(
+                    models.Value("id"),
+                    models.OuterRef("id"),
+                    function="jsonb_build_object",
+                ),
+                function="jsonb_build_array",
+                output_field=models.JSONField(),
+            ),
+        )
+        return (
+            Occurrence.objects.filter(occurrence_filters)
+            .filter(default_filters_q)
+            .alias(_under_taxon=descendant_match)
+            .filter(models.Q(determination_id=models.OuterRef("id")) | models.Q(_under_taxon=True))
+        )
+
+    def _include_agreement(self) -> bool:
+        """Whether the heavier ``agreed_exact_count`` annotation should be computed."""
+        if self.action == "retrieve":
+            return True
+        return bool(BooleanField(required=False).clean(self.request.query_params.get("with_agreement")))
+
+    def add_verification_data(
+        self, qs: QuerySet, occurrence_filters: models.Q, default_filters_q: models.Q
+    ) -> QuerySet:
+        """
+        Annotate per-taxon verification and human/model agreement counts, and apply the
+        ``verified=true|false`` filter on list responses.
+
+        All counts roll up descendant occurrences via ``_occurrences_under_taxon`` and
+        respect the project's default filters (same ``apply_defaults`` handling as
+        ``occurrences_count``).
+        """
+        under_taxon = self._occurrences_under_taxon(occurrence_filters, default_filters_q)
+
+        has_identification = models.Exists(
+            Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False)
+        )
+        verified_occurrences = under_taxon.filter(has_identification)
+
+        def correlated_count(occurrence_qs: QuerySet) -> Coalesce:
+            # Group by project_id (constant within the subquery) to collapse the
+            # hierarchical match — determination_id varies across descendants so it
+            # can't be the grouping key.
+            return Coalesce(
+                models.Subquery(
+                    occurrence_qs.values("project_id").annotate(c=models.Count("id")).values("c")[:1],
+                    output_field=models.IntegerField(),
+                ),
+                0,
+            )
+
+        # The chosen (best, non-withdrawn) identification's agreed_with_prediction FK is set.
+        best_identification_agreed_prediction = models.Subquery(
+            Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False)
+            .order_by(*BEST_IDENTIFICATION_ORDER)
+            .values("agreed_with_prediction_id")[:1]
+        )
+        agreed_with_prediction_occurrences = under_taxon.annotate(
+            _best_agreed_prediction=best_identification_agreed_prediction
+        ).filter(_best_agreed_prediction__isnull=False)
+
+        qs = qs.annotate(
+            verified_count=correlated_count(verified_occurrences),
+            agreed_with_prediction_count=correlated_count(agreed_with_prediction_occurrences),
+        )
+
+        if self._include_agreement():
+            # Verified occurrence where the user determination equals the top machine
+            # prediction's taxon for the same occurrence.
+            best_machine_taxon = models.Subquery(
+                Classification.objects.filter(detection__occurrence=models.OuterRef("pk"))
+                .order_by(*BEST_MACHINE_PREDICTION_ORDER)
+                .values("taxon_id")[:1]
+            )
+            agreed_exact_occurrences = verified_occurrences.annotate(_best_machine_taxon=best_machine_taxon).filter(
+                determination_id=models.F("_best_machine_taxon")
+            )
+            qs = qs.annotate(agreed_exact_count=correlated_count(agreed_exact_occurrences))
+
+        # verified=true|false filter (list only); the complement uses the same set, so
+        # verified=false is the strict complement of verified=true on the filtered taxa.
+        if self.action == "list" and "verified" in self.request.query_params:
+            verified = BooleanField(required=False).clean(self.request.query_params.get("verified"))
+            if verified:
+                qs = qs.filter(models.Exists(verified_occurrences))
+            else:
+                qs = qs.filter(~models.Exists(verified_occurrences))
+
         return qs
 
     def attach_tags_by_project(self, qs: QuerySet, project: Project) -> QuerySet:
 
@@ -0,0 +1,31 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    """
+    GIN index on Taxon.parents_json to support hierarchical (descendant) rollup
+    of the per-taxon verification / agreement counts added for issue #1316.
+
+    Without it, Family- and Order-rank rows on large projects fall back to a
+    seq-scan on the parents_json containment (`@>`) test and dominate query time.
+
+    CREATE INDEX CONCURRENTLY can't run inside a transaction, so this migration
+    is non-atomic. IF NOT EXISTS keeps it safe to co-exist with the same index if
+    it lands separately via the #1307 follow-up.
+    """
+
+    atomic = False
+
+    dependencies = [
+        ("main", "0084_revoke_delete_job_from_roles"),
+    ]
+
+    operations = [
+        migrations.RunSQL(
+            sql=(
+                "CREATE INDEX CONCURRENTLY IF NOT EXISTS main_taxon_parents_json_gin_idx "
+                "ON main_taxon USING gin (parents_json jsonb_path_ops);"
+            ),
+            reverse_sql="DROP INDEX CONCURRENTLY IF EXISTS main_taxon_parents_json_gin_idx;",
+        ),
+    ]
@@ -3811,6 +3811,18 @@ def best_determination_score(self) -> float | None:
         # This is handled by an annotation if we are filtering by project, deployment or event
         return None
 
+    def verified_count(self) -> int | None:
+        # Handled by an annotation when filtering by project (TaxonViewSet.add_verification_data)
+        return None
+
+    def agreed_with_prediction_count(self) -> int | None:
+        # Handled by an annotation when filtering by project (TaxonViewSet.add_verification_data)
+        return None
+
+    def agreed_exact_count(self) -> int | None:
+        # Handled by an annotation only when with_agreement is requested or on the detail view
+        return None
+
     def occurrence_images(self, limit: int | None = 10) -> list[str]:
         # This is handled by an annotation if we are filtering by project, deployment or event
         return []
 
@@ -4761,3 +4761,132 @@ def test_registration_order_preserves_occurrence_retrieve(self):
         retrieve_response = self.client.get(f"/api/v2/occurrences/{occurrence.pk}/?project_id={self.project.pk}")
         self.assertEqual(stats_response.status_code, 200, "stats URL must resolve")
         self.assertEqual(retrieve_response.status_code, 200, "occurrence retrieve must still work")
+
+
+class TestTaxaVerification(APITestCase):
+    """Per-taxon verification + human/model agreement annotations and the verified filter (#1316)."""
+
+    def setUp(self):
+        self.project, self.deployment = setup_test_project(reuse=False)
+        self.taxa_list = create_taxa(self.project)
+        self.order = Taxon.objects.get(name="Lepidoptera")
+        self.family = Taxon.objects.get(name="Nymphalidae")
+        self.genus = Taxon.objects.get(name="Vanessa")
+        self.cardui = Taxon.objects.get(name="Vanessa cardui")
+        self.atalanta = Taxon.objects.get(name="Vanessa atalanta")
+        self.itea = Taxon.objects.get(name="Vanessa itea")
+
+        create_captures(deployment=self.deployment, num_nights=1, images_per_night=3)
+        # 3 occurrences ML-determined to cardui, 1 to itea (left unverified)
+        create_occurrences(deployment=self.deployment, num=3, taxon=self.cardui, determination_score=0.9)
+        create_occurrences(deployment=self.deployment, num=1, taxon=self.itea, determination_score=0.9)
+
+        self.user = User.objects.create_user(email="verifier@insectai.org", is_staff=True, is_superuser=True)
+        self.client.force_authenticate(user=self.user)
+
+        cardui_occ = list(Occurrence.objects.filter(project=self.project, determination=self.cardui).order_by("pk"))
+        self.assertEqual(len(cardui_occ), 3)
+        self.occ_pred, self.occ_exact, self.occ_disagree = cardui_occ
+
+        # occ_pred: user agrees with the model prediction (cardui), agreed_with_prediction set
+        Identification.objects.create(
+            occurrence=self.occ_pred,
+            taxon=self.cardui,
+            user=self.user,
+            agreed_with_prediction=self.occ_pred.best_prediction,
+        )
+        # occ_exact: same taxon as the model, but not via the "agree" workflow
+        Identification.objects.create(occurrence=self.occ_exact, taxon=self.cardui, user=self.user)
+        # occ_disagree: user overrides to a different taxon (atalanta) than the model (cardui)
+        Identification.objects.create(occurrence=self.occ_disagree, taxon=self.atalanta, user=self.user)
+
+        self.itea_occ = Occurrence.objects.get(project=self.project, determination=self.itea)
+        self.list_url = f"/api/v2/taxa/?project_id={self.project.pk}&limit=1000"
+
+    def _detail(self, taxon):
+        res = self.client.get(f"/api/v2/taxa/{taxon.pk}/?project_id={self.project.pk}")
+        self.assertEqual(res.status_code, status.HTTP_200_OK)
+        return res.json()
+
+    def _list_by_name(self, url=None):
+        res = self.client.get(url or self.list_url)
+        self.assertEqual(res.status_code, status.HTTP_200_OK)
+        return {row["name"]: row for row in res.json()["results"]}
+
+    # --- verified_count (hierarchical rollup) ---
+
+    def test_verified_count_species(self):
+        self.assertEqual(self._detail(self.cardui)["verified_count"], 2)
+        self.assertEqual(self._detail(self.atalanta)["verified_count"], 1)
+        self.assertEqual(self._detail(self.itea)["verified_count"], 0)
+
+    def test_verified_count_rolls_up_to_ancestors(self):
+        # Verifying species marks genus/family/order verified, occurrence-weighted by descendants.
+        for ancestor in (self.genus, self.family, self.order):
+            self.assertEqual(self._detail(ancestor)["verified_count"], 3, ancestor.name)
+
+    # --- agreed_with_prediction_count (chosen identification only) ---
+
+    def test_agreed_with_prediction_counts_only_chosen_identification(self):
+        self.assertEqual(self._detail(self.cardui)["agreed_with_prediction_count"], 1)
+        self.assertEqual(self._detail(self.atalanta)["agreed_with_prediction_count"], 0)
+        # Rolls up: only occ_pred contributes under the genus.
+        self.assertEqual(self._detail(self.genus)["agreed_with_prediction_count"], 1)
+
+    # --- agreed_exact_count (gated) ---
+
+    def test_agreed_exact_count_on_detail(self):
+        # occ_pred + occ_exact: user determination == top machine prediction (cardui).
+        self.assertEqual(self._detail(self.cardui)["agreed_exact_count"], 2)
+        # occ_disagree: user picked atalanta, model said cardui → not exact.
+        self.assertEqual(self._detail(self.atalanta)["agreed_exact_count"], 0)
+        self.assertEqual(self._detail(self.genus)["agreed_exact_count"], 2)
+
+    def test_agreed_exact_count_gated_on_list(self):
+        rows = self._list_by_name()
+        self.assertIn("verified_count", rows["Vanessa cardui"])
+        self.assertIn("agreed_with_prediction_count", rows["Vanessa cardui"])
+        self.assertNotIn("agreed_exact_count", rows["Vanessa cardui"])
+
+        rows = self._list_by_name(self.list_url + "&with_agreement=true")
+        self.assertIn("agreed_exact_count", rows["Vanessa cardui"])
+        self.assertEqual(rows["Vanessa cardui"]["agreed_exact_count"], 2)
+
+    # --- list field values ---
+
+    def test_list_field_values(self):
+        rows = self._list_by_name()
+        self.assertEqual(rows["Vanessa cardui"]["occurrences_count"], 2)
+        self.assertEqual(rows["Vanessa cardui"]["verified_count"], 2)
+        self.assertEqual(rows["Vanessa cardui"]["agreed_with_prediction_count"], 1)
+        self.assertEqual(rows["Vanessa atalanta"]["verified_count"], 1)
+        self.assertEqual(rows["Vanessa itea"]["verified_count"], 0)
+
+    # --- verified=true|false filter ---
+
+    def test_verified_filter_true_false_complement(self):
+        all_names = set(self._list_by_name().keys())
+        verified = set(self._list_by_name(self.list_url + "&verified=true").keys())
+        unverified = set(self._list_by_name(self.list_url + "&verified=false").keys())
+        self.assertEqual(verified, {"Vanessa cardui", "Vanessa atalanta"})
+        self.assertEqual(unverified, {"Vanessa itea"})
+        # verified=false is the strict complement of verified=true on the filtered set.
+        self.assertEqual(verified | unverified, all_names)
+        self.assertEqual(verified & unverified, set())
+
+    def test_ordering_by_verified_count(self):
+        res = self.client.get(self.list_url + "&ordering=verified_count")
+        self.assertEqual(res.status_code, status.HTTP_200_OK)
+        counts = [row["verified_count"] for row in res.json()["results"]]
+        self.assertEqual(counts, sorted(counts))
+
+    # --- apply_defaults handling ---
+
+    def test_verified_filter_respects_apply_defaults(self):
+        self.project.default_filters_exclude_taxa.add(self.atalanta)
+
+        verified_default = set(self._list_by_name(self.list_url + "&verified=true").keys())
+        self.assertEqual(verified_default, {"Vanessa cardui"})
+
+        verified_bypassed = set(self._list_by_name(self.list_url + "&verified=true&apply_defaults=false").keys())
+        self.assertEqual(verified_bypassed, {"Vanessa cardui", "Vanessa atalanta"})