RolnickLab · coderabbitai · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/ami/base/serializers.py b/ami/base/serializers.py
@@ -165,6 +165,34 @@ def __init__(
         self.fields[param_name] = field
 
 
+class EnumChoiceField(serializers.ChoiceField):
+    """A ``ChoiceField`` backed by a Python ``enum.Enum`` class.
+
+    Accepts enum member *names* (case-insensitive) and returns the
+    corresponding enum member on ``to_internal_value``.  Use the
+    ``exclude`` parameter to reject specific members (e.g. sentinel
+    values like ``UNKNOWN``) with a standard 400 invalid-choice error.
+
+    Example::
+
+        field = EnumChoiceField(TaxonRank, exclude=[TaxonRank.UNKNOWN], required=False, default=None)
+        rank = SingleParamSerializer[TaxonRank | None].clean("agreement_coarsest_rank", field, request.query_params)
+    """
+
+    def __init__(self, enum_class: type, exclude: list | None = None, **kwargs: typing.Any) -> None:
+        self._enum_class = enum_class
+        excluded = set(exclude or [])
+        choices = [m.name for m in enum_class if m not in excluded]
+        kwargs.setdefault("choices", choices)
+        super().__init__(**kwargs)
+
+    def to_internal_value(self, data: typing.Any) -> typing.Any:
+        normalized = str(data).upper()
+        if normalized not in self.choices:
+            self.fail("invalid_choice", input=data)
+        return self._enum_class[normalized]
+
+
 class FilterParamsSerializer(serializers.Serializer):
     """
     Serializer for validating query parameters in DRF views.

diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -1751,3 +1751,114 @@ class TopIdentifiersResponseSerializer(serializers.Serializer):
 
     project_id = serializers.IntegerField()
     top_identifiers = UserIdentificationCountSerializer(many=True)
+
+
+class ModelAgreementSerializer(serializers.Serializer):
+    """Verified / agreement rates over the filtered Occurrence set.
+
+    `agreed_exact_count` is a subset of `agreed_any_rank_count` by
+    construction — an exact match implies the LCA is the taxon itself.
+    `*_pct` percentages are 0.0..1.0 (not 0..100).
+
+    Denominator note: `agreed_*_pct` divide by `verified_with_prediction_count`
+    (verified occurrences that *also* have a machine prediction), NOT by
+    `verified_count`. A verified occurrence with no machine prediction can't
+    agree or disagree — including it in the denominator would drag the rate
+    down without representing actual model disagreement. `no_prediction_count`
+    is surfaced so the consumer can see how many such occurrences exist.
+
+    Optional rank threshold: when the caller passes
+    `?agreement_coarsest_rank=FAMILY`, the response also includes
+    `agreed_coarser_rank_*` counting only LCAs at that rank or deeper. The
+    threshold rank is echoed in `agreement_coarsest_rank`. When the param is
+    absent, the coarser-rank fields are null and `agreement_coarsest_rank`
+    is null.
+    """
+
+    project_id = serializers.IntegerField()
+    total_occurrences = serializers.IntegerField()
+    verified_count = serializers.IntegerField(help_text="Occurrences with at least one non-withdrawn identification.")
+    verified_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="verified_count / total_occurrences",
+    )
+    verified_with_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences that also have a machine prediction (denominator for agreed_*_pct)."
+    )
+    no_prediction_count = serializers.IntegerField(
+        help_text="Verified occurrences with no machine prediction (excluded from agreement denominator)."
+    )
+    agreed_exact_count = serializers.IntegerField()
+    agreed_exact_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="agreed_exact_count / verified_with_prediction_count",
+    )
+    agreed_exact_ci_low = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI lower bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_exact_ci_high = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI upper bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_any_rank_count = serializers.IntegerField(
+        help_text="Exact matches plus disagreements whose LCA is at any real rank (UNKNOWN excluded)."
+    )
+    agreed_any_rank_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        help_text="agreed_any_rank_count / verified_with_prediction_count",
+    )
+    agreed_any_rank_ci_low = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI lower bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
+    )
+    agreed_any_rank_ci_high = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="Wilson 95% CI upper bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
+    )
+    cohens_kappa = serializers.FloatField(
+        min_value=-1.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text=(
+            "Cohen's kappa (exact-taxon) — human↔model agreement beyond chance. "
+            "Range [-1, 1]; negative is worse than chance. Null when there are no "
+            "doubly-classified occurrences or expected agreement is 1.0."
+        ),
+    )
+    agreement_coarsest_rank = serializers.CharField(
+        allow_null=True,
+        required=False,
+        help_text="Threshold rank from ?agreement_coarsest_rank query param. Null when the param is absent.",
+    )
+    agreed_coarser_rank_count = serializers.IntegerField(
+        allow_null=True,
+        required=False,
+        help_text=(
+            "Exact matches plus disagreements whose LCA is at `agreement_coarsest_rank` or deeper. "
+            "Null when no threshold was supplied."
+        ),
+    )
+    agreed_coarser_rank_pct = serializers.FloatField(
+        min_value=0.0,
+        max_value=1.0,
+        allow_null=True,
+        required=False,
+        help_text="agreed_coarser_rank_count / verified_with_prediction_count. Null when no threshold supplied.",
+    )
diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -27,11 +27,11 @@
 from ami.base.models import BaseQuerySet
 from ami.base.pagination import LimitOffsetPaginationWithPermissions
 from ami.base.permissions import IsActiveStaffOrReadOnly, IsProjectMemberOrReadOnly, ObjectPermission
-from ami.base.serializers import FilterParamsSerializer, SingleParamSerializer
+from ami.base.serializers import EnumChoiceField, FilterParamsSerializer, SingleParamSerializer
 from ami.base.views import ProjectMixin
 from ami.main.api.schemas import limit_doc_param, project_id_doc_param
 from ami.main.api.serializers import TagSerializer
-from ami.main.models_future.occurrence import top_identifiers_for_project
+from ami.main.models_future.occurrence import model_agreement_for_project, top_identifiers_for_project
 from ami.utils.requests import get_default_classification_threshold
 from ami.utils.storages import ConnectionTestResult
 
@@ -55,6 +55,7 @@
     Tag,
     TaxaList,
     Taxon,
+    TaxonRank,
     User,
     update_detection_counts,
 )
@@ -71,6 +72,7 @@
     EventSerializer,
     EventTimelineSerializer,
     IdentificationSerializer,
+    ModelAgreementSerializer,
     OccurrenceListSerializer,
     OccurrenceSerializer,
     PageListSerializer,
@@ -1202,6 +1204,24 @@ def filter_queryset(self, request, queryset, view):
         return queryset
 
 
+OCCURRENCE_FILTER_BACKENDS = (
+    CustomOccurrenceDeterminationFilter,
+    OccurrenceCollectionFilter,
+    OccurrenceAlgorithmFilter,
+    OccurrenceDateFilter,
+    OccurrenceVerified,
+    OccurrenceVerifiedByMeFilter,
+    OccurrenceTaxaListFilter,
+)
+
+OCCURRENCE_FILTERSET_FIELDS = (
+    "event",
+    "deployment",
+    "determination__rank",
+    "detections__source_image",
+)
+
+
 class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
     """
     API endpoint that allows occurrences to be viewed or edited.
@@ -1211,22 +1231,8 @@ class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
     queryset = Occurrence.objects.all()
 
     serializer_class = OccurrenceSerializer
-    # filter_backends = [CustomDeterminationFilter, DjangoFilterBackend, NullsLastOrderingFilter, SearchFilter]
-    filter_backends = DefaultViewSetMixin.filter_backends + [
-        CustomOccurrenceDeterminationFilter,
-        OccurrenceCollectionFilter,
-        OccurrenceAlgorithmFilter,
-        OccurrenceDateFilter,
-        OccurrenceVerified,
-        OccurrenceVerifiedByMeFilter,
-        OccurrenceTaxaListFilter,
-    ]
-    filterset_fields = [
-        "event",
-        "deployment",
-        "determination__rank",
-        "detections__source_image",
-    ]
+    filter_backends = DefaultViewSetMixin.filter_backends + list(OCCURRENCE_FILTER_BACKENDS)
+    filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
     ordering_fields = [
         "created_at",
         "updated_at",
@@ -1324,6 +1330,11 @@ class OccurrenceStatsViewSet(viewsets.GenericViewSet, ProjectMixin):
 
     permission_classes = [IsActiveStaffOrReadOnly]
     require_project = True
+    # Filter machinery for actions that opt into `self.filter_queryset(...)`.
+    # `top_identifiers` doesn't call it, so its behavior is unchanged.
+    queryset = Occurrence.objects.none()
+    filter_backends = [DjangoFilterBackend, *OCCURRENCE_FILTER_BACKENDS]
+    filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
 
     @extend_schema(
         parameters=[project_id_doc_param, limit_doc_param],
@@ -1354,6 +1365,39 @@ def top_identifiers(self, request):
         )
         return Response(serializer.data)
 
+    @extend_schema(
+        parameters=[project_id_doc_param],
+        responses=ModelAgreementSerializer,
+    )
+    @action(detail=False, methods=["get"], url_path="model-agreement")
+    def model_agreement(self, request):
+        """Verified / human↔model agreement rates over the filtered occurrence set.
+
+        Accepts every query param the `/occurrences/` list endpoint accepts.
+        Reuses `apply_default_filters` so `apply_defaults=false` bypasses
+        project default taxa lists + score thresholds.
+
+        Optional ?agreement_coarsest_rank=<RANK> adds `agreed_coarser_rank_*`
+        counts — LCAs at the given rank or deeper. Valid values: any
+        TaxonRank name (FAMILY, GENUS, etc.); invalid → 400.
+        """
+        project = self.get_active_project()
+        assert project is not None  # require_project=True guarantees this
+        if not Project.objects.visible_for_user(request.user).filter(pk=project.pk).exists():
+            raise NotFound("Project not found.")
+
+        coarsest_rank = SingleParamSerializer[TaxonRank | None].clean(
+            "agreement_coarsest_rank",
+            EnumChoiceField(TaxonRank, exclude=[TaxonRank.UNKNOWN], required=False, allow_null=True, default=None),
+            request.query_params,
+        )
+
+        base_qs = Occurrence.objects.filter(project=project).valid().apply_default_filters(project, request)
+        filtered_qs = self.filter_queryset(base_qs)
+
+        payload["project_id"] = project.pk
+        return Response(ModelAgreementSerializer(payload, context={"request": request}).data)
+
 
 class TaxonTaxaListFilter(filters.BaseFilterBackend):
     """