RolnickLab · mihow · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -1751,3 +1751,114 @@ class TopIdentifiersResponseSerializer(serializers.Serializer):
 
     project_id = serializers.IntegerField()
     top_identifiers = UserIdentificationCountSerializer(many=True)
+
+
+class ModelAgreementSerializer(serializers.Serializer):
+    """Verified / agreement rates over the filtered Occurrence set.
+
+    `agreed_exact_count` is a subset of `agreed_any_rank_count` by
+    construction — an exact match implies the LCA is the taxon itself.
+    `*_pct` percentages are 0.0..1.0 (not 0..100).
+
+    Denominator note: `agreed_*_pct` divide by `verified_with_prediction_count`
+    (verified occurrences that *also* have a machine prediction), NOT by
+    `verified_count`. A verified occurrence with no machine prediction can't
+    agree or disagree — including it in the denominator would drag the rate
+    down without representing actual model disagreement. `no_prediction_count`
+    is surfaced so the consumer can see how many such occurrences exist.
+
+    Optional rank threshold: when the caller passes
+    `?agreement_coarsest_rank=FAMILY`, the response also includes
+    `agreed_coarser_rank_*` counting only LCAs at that rank or deeper. The
+    threshold rank is echoed in `agreement_coarsest_rank`. When the param is
+    absent, the coarser-rank fields are null and `agreement_coarsest_rank`
+    is null.
+    """
+
+    project_id = serializers.IntegerField()
+    total_occurrences = serializers.IntegerField()
+    verified_count = serializers.IntegerField(help_text="Occurrences with at least one non-withdrawn identification.")
+    verified_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="verified_count / total_occurrences",
+    )
+    verified_with_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences that also have a machine prediction (denominator for agreed_*_pct)."
+    )
+    no_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences with no machine prediction (excluded from agreement denominator)."
+    )
+    agreed_exact_count = serializers.IntegerField()
+    agreed_exact_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="agreed_exact_count / verified_with_prediction_count",
+    )
+    agreed_exact_ci_low = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI lower bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_exact_ci_high = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI upper bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_any_rank_count = serializers.IntegerField(
+        help_text="Exact matches plus disagreements whose LCA is at any real rank (UNKNOWN excluded)."
+    )
+    agreed_any_rank_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="agreed_any_rank_count / verified_with_prediction_count",
+    )
+    agreed_any_rank_ci_low = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI lower bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_any_rank_ci_high = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI upper bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
+    )
+    cohens_kappa = serializers.FloatField(
+        min_value=-1.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text=(
+            "Cohen's kappa (exact-taxon) — human↔model agreement beyond chance. "
+            "Range [-1, 1]; negative is worse than chance. Null when there are no "
+            "doubly-classified occurrences or expected agreement is 1.0."
+        ),
+    )
+    agreement_coarsest_rank = serializers.CharField(
+        allow_null=True,
+        required=False,
+        help_text="Threshold rank from ?agreement_coarsest_rank query param. Null when the param is absent.",
+    )
+    agreed_coarser_rank_count = serializers.IntegerField(
+        allow_null=True,
+        required=False,
+        help_text=(
+            "Exact matches plus disagreements whose LCA is at `agreement_coarsest_rank` or deeper. "
+            "Null when no threshold was supplied."
+        ),
+    )
+    agreed_coarser_rank_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="agreed_coarser_rank_count / verified_with_prediction_count. Null when no threshold supplied.",
+    )
diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -31,7 +31,7 @@
 from ami.base.views import ProjectMixin
 from ami.main.api.schemas import limit_doc_param, project_id_doc_param
 from ami.main.api.serializers import TagSerializer
-from ami.main.models_future.occurrence import top_identifiers_for_project
+from ami.main.models_future.occurrence import model_agreement_for_project, top_identifiers_for_project
 from ami.utils.requests import get_default_classification_threshold
 from ami.utils.storages import ConnectionTestResult
 
@@ -55,6 +55,7 @@
     Tag,
     TaxaList,
     Taxon,
+    TaxonRank,
     User,
     update_detection_counts,
 )
@@ -71,6 +72,7 @@
     EventSerializer,
     EventTimelineSerializer,
     IdentificationSerializer,
+    ModelAgreementSerializer,
     OccurrenceListSerializer,
     OccurrenceSerializer,
     PageListSerializer,
@@ -1202,6 +1204,24 @@ def filter_queryset(self, request, queryset, view):
         return queryset
 
 
+OCCURRENCE_FILTER_BACKENDS = (
+    CustomOccurrenceDeterminationFilter,
+    OccurrenceCollectionFilter,
+    OccurrenceAlgorithmFilter,
+    OccurrenceDateFilter,
+    OccurrenceVerified,
+    OccurrenceVerifiedByMeFilter,
+    OccurrenceTaxaListFilter,
+)
+
+OCCURRENCE_FILTERSET_FIELDS = (
+    "event",
+    "deployment",
+    "determination__rank",
+    "detections__source_image",
+)
+
+
 class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
     """
     API endpoint that allows occurrences to be viewed or edited.
@@ -1211,22 +1231,8 @@ class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
     queryset = Occurrence.objects.all()
 
     serializer_class = OccurrenceSerializer
-    # filter_backends = [CustomDeterminationFilter, DjangoFilterBackend, NullsLastOrderingFilter, SearchFilter]
-    filter_backends = DefaultViewSetMixin.filter_backends + [
-        CustomOccurrenceDeterminationFilter,
-        OccurrenceCollectionFilter,
-        OccurrenceAlgorithmFilter,
-        OccurrenceDateFilter,
-        OccurrenceVerified,
-        OccurrenceVerifiedByMeFilter,
-        OccurrenceTaxaListFilter,
-    ]
-    filterset_fields = [
-        "event",
-        "deployment",
-        "determination__rank",
-        "detections__source_image",
-    ]
+    filter_backends = DefaultViewSetMixin.filter_backends + list(OCCURRENCE_FILTER_BACKENDS)
+    filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
     ordering_fields = [
         "created_at",
         "updated_at",
@@ -1324,6 +1330,11 @@ class OccurrenceStatsViewSet(viewsets.GenericViewSet, ProjectMixin):
 
     permission_classes = [IsActiveStaffOrReadOnly]
     require_project = True
+    # Filter machinery for actions that opt into `self.filter_queryset(...)`.
+    # `top_identifiers` doesn't call it, so its behavior is unchanged.
+    queryset = Occurrence.objects.none()
+    filter_backends = [DjangoFilterBackend, *OCCURRENCE_FILTER_BACKENDS]
+    filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
 
     @extend_schema(
         parameters=[project_id_doc_param, limit_doc_param],
@@ -1354,6 +1365,48 @@ def top_identifiers(self, request):
         )
         return Response(serializer.data)
 
+    @extend_schema(
+        parameters=[project_id_doc_param],
+        responses=ModelAgreementSerializer,
+    )
+    @action(detail=False, methods=["get"], url_path="model-agreement")
+    def model_agreement(self, request):
+        """Verified / human↔model agreement rates over the filtered occurrence set.
+
+        Accepts every query param the `/occurrences/` list endpoint accepts.
+        Reuses `apply_default_filters` so `apply_defaults=false` bypasses
+        project default taxa lists + score thresholds.
+
+        Optional ?agreement_coarsest_rank=<RANK> adds `agreed_coarser_rank_*`
+        counts — LCAs at the given rank or deeper. Valid values: any
+        TaxonRank name (FAMILY, GENUS, etc.); invalid → 400.
+        """
+        project = self.get_active_project()
+        assert project is not None  # require_project=True guarantees this
+        if not Project.objects.visible_for_user(request.user).filter(pk=project.pk).exists():
+            raise NotFound("Project not found.")
+
+        coarsest_rank_param = request.query_params.get("agreement_coarsest_rank")
+        coarsest_rank = None
+        if coarsest_rank_param:
+            try:
+                coarsest_rank = TaxonRank[coarsest_rank_param.upper()]
+            except KeyError:
+                valid = ", ".join(r.name for r in TaxonRank if r.name != "UNKNOWN")
+                raise api_exceptions.ValidationError(
+                    {"agreement_coarsest_rank": f"Invalid rank '{coarsest_rank_param}'. Must be one of: {valid}."}
+                )
+            if coarsest_rank == TaxonRank.UNKNOWN:
+                raise api_exceptions.ValidationError(
+                    {"agreement_coarsest_rank": "UNKNOWN is not a valid threshold rank."}
+                )
 ## Query parameters 
 Use `SingleParamSerializer[T].clean(...)` from `ami/base/serializers.py`. 
 It runs a DRF `serializers.IntegerField` / etc. through the standard 
 validation pipeline and raises `ValidationError` → DRF returns 400 with 
 the field-level error body the frontend expects. 
 ```python 
 limit = SingleParamSerializer[int].clean( 
     param_name="limit", 
     field=serializers.IntegerField(required=False, min_value=1, max_value=50, default=5), 
     data=request.query_params, 
 ) 
 ``` 
 ## Query parameters 
  
 Use `SingleParamSerializer[T].clean(...)` from `ami/base/serializers.py`. 
 It runs a DRF `serializers.IntegerField` / etc. through the standard 
 validation pipeline and raises `ValidationError` → DRF returns 400 with 
 the field-level error body the frontend expects. 
  
 ```python 
 limit = SingleParamSerializer[int].clean( 
     param_name="limit", 
     field=serializers.IntegerField(required=False, min_value=1, max_value=50, default=5), 
     data=request.query_params, 
 ) 
 ``` 
+
+        base_qs = Occurrence.objects.filter(project=project).valid().apply_default_filters(project, request)
+        filtered_qs = self.filter_queryset(base_qs)
+        payload = model_agreement_for_project(filtered_qs, coarsest_rank=coarsest_rank)
+        payload["project_id"] = project.pk
+        return Response(ModelAgreementSerializer(payload, context={"request": request}).data)
+
 
 class TaxonTaxaListFilter(filters.BaseFilterBackend):
     """