RolnickLab · mihow · May 29, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/ami/base/metadata.py b/ami/base/metadata.py
@@ -0,0 +1,39 @@
+"""Metadata classes for DRF OPTIONS responses.
+
+DRF's default ``SimpleMetadata`` only emits serializer field schema under
+``actions.POST`` / ``actions.PUT`` — write operations. Read-only endpoints
+(stats actions, scalar aggregates) return just ``name`` + ``description`` on
+OPTIONS, so their ``help_text=`` annotations on response serializers are
+invisible to clients.
+
+``ResponseSchemaMetadata`` adds ``actions.GET`` carrying the response
+serializer's field info (``type``, ``label``, ``help_text``, etc.), so
+frontends can fetch one OPTIONS request per stats endpoint and render
+tooltips / labels from the field metadata without hardcoding copy.
+"""
+
+from __future__ import annotations
+
+import typing
+
+from rest_framework.metadata import SimpleMetadata
+
+
+class ResponseSchemaMetadata(SimpleMetadata):
+    """Adds ``actions.GET`` with the response serializer's field schema.
+
+    Falls back gracefully if the view doesn't expose a serializer for GET
+    (anonymous endpoints, raw responses) — in that case the OPTIONS body
+    is unchanged from ``SimpleMetadata``'s default.
+    """
+
+    def determine_metadata(self, request, view) -> dict[str, typing.Any]:
+        metadata = super().determine_metadata(request, view)
+        if "GET" in view.allowed_methods and hasattr(view, "get_serializer"):
+            try:
+                serializer = view.get_serializer()
+            except Exception:
+                return metadata
+            actions = metadata.setdefault("actions", {})
+            actions.setdefault("GET", self.get_serializer_info(serializer))
+        return metadata
diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -1751,3 +1751,129 @@ class TopIdentifiersResponseSerializer(serializers.Serializer):
 
     project_id = serializers.IntegerField()
     top_identifiers = UserIdentificationCountSerializer(many=True)
+
+
+class ModelAgreementSerializer(serializers.Serializer):
+    """Verified / agreement rates over the filtered Occurrence set.
+
+    `agreed_exact_count` is a subset of `agreed_any_rank_count` by
+    construction — an exact match implies the LCA is the taxon itself.
+    `*_pct` percentages are 0.0..1.0 (not 0..100).
+
+    Denominator note: `agreed_*_pct` divide by `comparable_count` — verified
+    occurrences that have BOTH a machine prediction and a human taxon, NOT by
+    `verified_count`. Two kinds of verified occurrence are excluded because they
+    can't agree or disagree: those with no machine prediction (`no_prediction_count`)
+    and those whose human identification has no taxon, e.g. a comment-only
+    verification (`verified_without_taxon_count`). Both are surfaced so the
+    consumer can see why `comparable_count` differs from `verified_count`.
+
+    Optional rank threshold: when the caller passes
+    `?agreement_coarsest_rank=FAMILY`, the response also includes
+    `agreed_coarser_rank_*` counting only LCAs at that rank or deeper. The
+    threshold rank is echoed in `agreement_coarsest_rank`. When the param is
+    absent, the coarser-rank fields are null and `agreement_coarsest_rank`
+    is null.
+    """
+
+    project_id = serializers.IntegerField()
+    total_occurrences = serializers.IntegerField()
+    verified_count = serializers.IntegerField(help_text="Occurrences with at least one non-withdrawn identification.")
+    verified_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="verified_count / total_occurrences",
+    )
+    verified_with_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences that also have a machine prediction."
+    )
+    no_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences with no machine prediction (excluded from agreement denominator)."
+    )
+    verified_without_taxon_count = serializers.IntegerField(
+        help_text=(
+            "Verified occurrences that have a machine prediction but no human taxon "
+            "(e.g. comment-only identification). Excluded from the agreement denominator "
+            "since there is no human label to compare."
+        )
+    )
+    comparable_count = serializers.IntegerField(
+        help_text=(
+            "Verified occurrences with BOTH a machine prediction and a human taxon — the "
+            "denominator for all agreed_*_pct and the Wilson CIs. Equals "
+            "verified_with_prediction_count minus verified_without_taxon_count."
+        )
+    )
+    agreed_exact_count = serializers.IntegerField()
+    agreed_exact_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="agreed_exact_count / comparable_count",
+    )
+    agreed_exact_ci_low = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI lower bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_exact_ci_high = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI upper bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_any_rank_count = serializers.IntegerField(
+        help_text="Exact matches plus disagreements whose LCA is at any real rank (UNKNOWN excluded)."
+    )
+    agreed_any_rank_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="agreed_any_rank_count / verified_with_prediction_count",
+    )
+    agreed_any_rank_ci_low = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI lower bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_any_rank_ci_high = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI upper bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
+    )
+    cohens_kappa = serializers.FloatField(
+        min_value=-1.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text=(
+            "Cohen's kappa (exact-taxon) — human↔model agreement beyond chance. "
+            "Range [-1, 1]; negative is worse than chance. Null when there are no "
+            "doubly-classified occurrences or expected agreement is 1.0."
+        ),
+    )
+    agreement_coarsest_rank = serializers.CharField(
+        allow_null=True,
+        required=False,
+        help_text="Threshold rank from ?agreement_coarsest_rank query param. Null when the param is absent.",
+    )
+    agreed_coarser_rank_count = serializers.IntegerField(
+        allow_null=True,
+        required=False,
+        help_text=(
+            "Exact matches plus disagreements whose LCA is at `agreement_coarsest_rank` or deeper. "
+            "Null when no threshold was supplied."
+        ),
+    )
+    agreed_coarser_rank_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="agreed_coarser_rank_count / verified_with_prediction_count. Null when no threshold supplied.",
+    )
diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -24,14 +24,15 @@
 from rest_framework.views import APIView
 
 from ami.base.filters import NullsLastOrderingFilter, ThresholdFilter
+from ami.base.metadata import ResponseSchemaMetadata
 from ami.base.models import BaseQuerySet
 from ami.base.pagination import LimitOffsetPaginationWithPermissions
 from ami.base.permissions import IsActiveStaffOrReadOnly, IsProjectMemberOrReadOnly, ObjectPermission
 from ami.base.serializers import FilterParamsSerializer, SingleParamSerializer
 from ami.base.views import ProjectMixin
 from ami.main.api.schemas import limit_doc_param, project_id_doc_param
 from ami.main.api.serializers import TagSerializer
-from ami.main.models_future.occurrence import top_identifiers_for_project
+from ami.main.models_future.occurrence import model_agreement_for_project, top_identifiers_for_project
 from ami.utils.requests import get_default_classification_threshold
 from ami.utils.storages import ConnectionTestResult
 
@@ -55,6 +56,7 @@
     Tag,
     TaxaList,
     Taxon,
+    TaxonRank,
     User,
     update_detection_counts,
 )
@@ -71,6 +73,7 @@
     EventSerializer,
     EventTimelineSerializer,
     IdentificationSerializer,
+    ModelAgreementSerializer,
     OccurrenceListSerializer,
     OccurrenceSerializer,
     PageListSerializer,
@@ -1202,6 +1205,24 @@ def filter_queryset(self, request, queryset, view):
         return queryset
 
 
+OCCURRENCE_FILTER_BACKENDS = (
+    CustomOccurrenceDeterminationFilter,
+    OccurrenceCollectionFilter,
+    OccurrenceAlgorithmFilter,
+    OccurrenceDateFilter,
+    OccurrenceVerified,
+    OccurrenceVerifiedByMeFilter,
+    OccurrenceTaxaListFilter,
+)
+
+OCCURRENCE_FILTERSET_FIELDS = (
+    "event",
+    "deployment",
+    "determination__rank",
+    "detections__source_image",
+)
+
+
 class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
     """
     API endpoint that allows occurrences to be viewed or edited.
@@ -1211,22 +1232,8 @@ class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
     queryset = Occurrence.objects.all()
 
     serializer_class = OccurrenceSerializer
-    # filter_backends = [CustomDeterminationFilter, DjangoFilterBackend, NullsLastOrderingFilter, SearchFilter]
-    filter_backends = DefaultViewSetMixin.filter_backends + [
-        CustomOccurrenceDeterminationFilter,
-        OccurrenceCollectionFilter,
-        OccurrenceAlgorithmFilter,
-        OccurrenceDateFilter,
-        OccurrenceVerified,
-        OccurrenceVerifiedByMeFilter,
-        OccurrenceTaxaListFilter,
-    ]
-    filterset_fields = [
-        "event",
-        "deployment",
-        "determination__rank",
-        "detections__source_image",
-    ]
+    filter_backends = DefaultViewSetMixin.filter_backends + list(OCCURRENCE_FILTER_BACKENDS)
+    filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
     ordering_fields = [
         "created_at",
         "updated_at",
@@ -1324,12 +1331,26 @@ class OccurrenceStatsViewSet(viewsets.GenericViewSet, ProjectMixin):
 
     permission_classes = [IsActiveStaffOrReadOnly]
     require_project = True
+    # OPTIONS on each action returns its response serializer field schema
+    # (type + help_text) under `actions.GET`. Frontends consume this to render
+    # tooltips and labels without hardcoding stat descriptions in the UI.
+    metadata_class = ResponseSchemaMetadata
+    # Filter machinery for actions that opt into `self.filter_queryset(...)`.
+    # `top_identifiers` doesn't call it, so its behavior is unchanged.
+    queryset = Occurrence.objects.none()
+    filter_backends = [DjangoFilterBackend, *OCCURRENCE_FILTER_BACKENDS]
+    filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
 
     @extend_schema(
         parameters=[project_id_doc_param, limit_doc_param],
         responses=TopIdentifiersResponseSerializer,
     )
-    @action(detail=False, methods=["get"], url_path="top-identifiers")
+    @action(
+        detail=False,
+        methods=["get"],
+        url_path="top-identifiers",
+        serializer_class=TopIdentifiersResponseSerializer,
+    )
     def top_identifiers(self, request):
         """Users ranked by distinct occurrences they identified.
 
@@ -1354,6 +1375,55 @@ def top_identifiers(self, request):
         )
         return Response(serializer.data)
 
+    @extend_schema(
+        parameters=[project_id_doc_param],
+        responses=ModelAgreementSerializer,
+    )
+    @action(
+        detail=False,
+        methods=["get"],
+        url_path="model-agreement",
+        serializer_class=ModelAgreementSerializer,
+    )
+    def model_agreement(self, request):
+        """Verified / human↔model agreement rates over the filtered occurrence set.
+
+        Accepts every query param the `/occurrences/` list endpoint accepts.
+        Reuses `apply_default_filters` so `apply_defaults=false` bypasses
+        project default taxa lists + score thresholds.
+
+        Optional ?agreement_coarsest_rank=<RANK> adds `agreed_coarser_rank_*`
+        counts — LCAs at the given rank or deeper. Valid values: any
+        TaxonRank name (FAMILY, GENUS, etc.); invalid → 400.
+        """
+        project = self.get_active_project()
+        assert project is not None  # require_project=True guarantees this
+        if not Project.objects.visible_for_user(request.user).filter(pk=project.pk).exists():
+            raise NotFound("Project not found.")
+
+        # ChoiceField gives strict 400s for free: blank (?agreement_coarsest_rank=),
+        # unknown ranks, and UNKNOWN (not in the choice list) all fail at the boundary.
+        # drf-spectacular reads the choices into the OpenAPI schema as an enum.
+        # Build a plain dict (not the QueryDict) so a blank value is validated as a
+        # real "" — DRF treats blank fields in HTML/QueryDict input as absent, which
+        # would let ?agreement_coarsest_rank= silently no-op. Uppercase the raw value
+        # so the param stays case-insensitive.
+        valid_ranks = [r.name for r in TaxonRank if r != TaxonRank.UNKNOWN]
+        raw_rank = request.query_params.get("agreement_coarsest_rank")
+        rank_data = {} if raw_rank is None else {"agreement_coarsest_rank": raw_rank.upper()}
+        coarsest_rank_param = SingleParamSerializer[str].clean(
+            param_name="agreement_coarsest_rank",
+            field=serializers.ChoiceField(choices=valid_ranks, required=False, allow_blank=False),
+            data=rank_data,
+        )
+        coarsest_rank = TaxonRank[coarsest_rank_param] if coarsest_rank_param else None
+
+        base_qs = Occurrence.objects.filter(project=project).valid().apply_default_filters(project, request)
+        filtered_qs = self.filter_queryset(base_qs)
+        payload = model_agreement_for_project(filtered_qs, coarsest_rank=coarsest_rank)
+        payload["project_id"] = project.pk
+        return Response(ModelAgreementSerializer(payload, context={"request": request}).data)
+
 
 class TaxonTaxaListFilter(filters.BaseFilterBackend):
     """