Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
165ae9b
feat(occurrence-stats): add lca_rank_between helper
mihow May 14, 2026
7b1660c
feat(occurrence-stats): aggregate human-model agreement over filtered…
mihow May 14, 2026
3110418
feat(occurrence-stats): wire human-model-agreement action
mihow May 14, 2026
ba9c901
test(occurrence-stats): HTTP coverage for human-model-agreement action
mihow May 14, 2026
5b1bde7
feat(ui): useHumanModelAgreement hook for occurrence stats
mihow May 14, 2026
e050a1f
docs(prompts): handoff for PR #1307 rework — rename + SQL push-down +…
mihow May 14, 2026
f49c9ca
refactor(occurrence-stats): rename to model-agreement + push aggregat…
mihow May 15, 2026
da2a232
docs(plan): add text lang to fenced block (markdownlint MD040)
mihow May 15, 2026
7ba8689
perf(occurrence-stats): scope agreement subqueries to verified set
mihow May 15, 2026
6ad1885
feat(occurrence-stats): drop ORDER threshold; add coarsest_rank query…
May 21, 2026
6f51da5
feat(ui): align model-agreement hook with BE rename + multi-value que…
May 21, 2026
7c144b0
chore(docs): drop NEXT_SESSION_PROMPT.md from PR
May 21, 2026
34aace5
chore(docs): drop session-scratchpad planning docs from PR
May 21, 2026
36cc677
test(occurrence-stats): make any-rank bucket test deterministic
mihow May 22, 2026
b74b3cd
chore(occurrence-stats): move FE hook to UI PR #1308
mihow May 22, 2026
2c65cce
feat(occurrence-stats): add Wilson CI + Cohen's kappa to model-agreement
mihow May 26, 2026
336c1fe
refactor(stats): move wilson_interval + cohens_kappa to ami/utils/stats
mihow May 26, 2026
3d522db
📝 CodeRabbit Chat: Implement requested code changes
coderabbitai[bot] May 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions ami/base/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,34 @@ def __init__(
self.fields[param_name] = field


class EnumChoiceField(serializers.ChoiceField):
"""A ``ChoiceField`` backed by a Python ``enum.Enum`` class.

Accepts enum member *names* (case-insensitive) and returns the
corresponding enum member on ``to_internal_value``. Use the
``exclude`` parameter to reject specific members (e.g. sentinel
values like ``UNKNOWN``) with a standard 400 invalid-choice error.

Example::

field = EnumChoiceField(TaxonRank, exclude=[TaxonRank.UNKNOWN], required=False, default=None)
rank = SingleParamSerializer[TaxonRank | None].clean("agreement_coarsest_rank", field, request.query_params)
"""

def __init__(self, enum_class: type, exclude: list | None = None, **kwargs: typing.Any) -> None:
self._enum_class = enum_class
excluded = set(exclude or [])
choices = [m.name for m in enum_class if m not in excluded]
kwargs.setdefault("choices", choices)
super().__init__(**kwargs)

def to_internal_value(self, data: typing.Any) -> typing.Any:
normalized = str(data).upper()
if normalized not in self.choices:
self.fail("invalid_choice", input=data)
return self._enum_class[normalized]


class FilterParamsSerializer(serializers.Serializer):
"""
Serializer for validating query parameters in DRF views.
Expand Down
111 changes: 111 additions & 0 deletions ami/main/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1751,3 +1751,114 @@ class TopIdentifiersResponseSerializer(serializers.Serializer):

project_id = serializers.IntegerField()
top_identifiers = UserIdentificationCountSerializer(many=True)


class ModelAgreementSerializer(serializers.Serializer):
"""Verified / agreement rates over the filtered Occurrence set.

`agreed_exact_count` is a subset of `agreed_any_rank_count` by
construction — an exact match implies the LCA is the taxon itself.
`*_pct` percentages are 0.0..1.0 (not 0..100).

Denominator note: `agreed_*_pct` divide by `verified_with_prediction_count`
(verified occurrences that *also* have a machine prediction), NOT by
`verified_count`. A verified occurrence with no machine prediction can't
agree or disagree — including it in the denominator would drag the rate
down without representing actual model disagreement. `no_prediction_count`
is surfaced so the consumer can see how many such occurrences exist.

Optional rank threshold: when the caller passes
`?agreement_coarsest_rank=FAMILY`, the response also includes
`agreed_coarser_rank_*` counting only LCAs at that rank or deeper. The
threshold rank is echoed in `agreement_coarsest_rank`. When the param is
absent, the coarser-rank fields are null and `agreement_coarsest_rank`
is null.
"""

project_id = serializers.IntegerField()
total_occurrences = serializers.IntegerField()
verified_count = serializers.IntegerField(help_text="Occurrences with at least one non-withdrawn identification.")
verified_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
help_text="verified_count / total_occurrences",
)
verified_with_prediction_count = serializers.IntegerField(
help_text="Verified occurrences that also have a machine prediction (denominator for agreed_*_pct)."
)
no_prediction_count = serializers.IntegerField(
help_text="Verified occurrences with no machine prediction (excluded from agreement denominator)."
)
agreed_exact_count = serializers.IntegerField()
agreed_exact_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
help_text="agreed_exact_count / verified_with_prediction_count",
)
agreed_exact_ci_low = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI lower bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
)
agreed_exact_ci_high = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI upper bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
)
agreed_any_rank_count = serializers.IntegerField(
help_text="Exact matches plus disagreements whose LCA is at any real rank (UNKNOWN excluded)."
)
agreed_any_rank_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
help_text="agreed_any_rank_count / verified_with_prediction_count",
)
agreed_any_rank_ci_low = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI lower bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
)
agreed_any_rank_ci_high = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI upper bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
)
cohens_kappa = serializers.FloatField(
min_value=-1.0,
max_value=1.0,
allow_null=True,
required=False,
help_text=(
"Cohen's kappa (exact-taxon) — human↔model agreement beyond chance. "
"Range [-1, 1]; negative is worse than chance. Null when there are no "
"doubly-classified occurrences or expected agreement is 1.0."
),
)
agreement_coarsest_rank = serializers.CharField(
allow_null=True,
required=False,
help_text="Threshold rank from ?agreement_coarsest_rank query param. Null when the param is absent.",
)
agreed_coarser_rank_count = serializers.IntegerField(
allow_null=True,
required=False,
help_text=(
"Exact matches plus disagreements whose LCA is at `agreement_coarsest_rank` or deeper. "
"Null when no threshold was supplied."
),
)
agreed_coarser_rank_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="agreed_coarser_rank_count / verified_with_prediction_count. Null when no threshold supplied.",
)
80 changes: 62 additions & 18 deletions ami/main/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
from ami.base.models import BaseQuerySet
from ami.base.pagination import LimitOffsetPaginationWithPermissions
from ami.base.permissions import IsActiveStaffOrReadOnly, IsProjectMemberOrReadOnly, ObjectPermission
from ami.base.serializers import FilterParamsSerializer, SingleParamSerializer
from ami.base.serializers import EnumChoiceField, FilterParamsSerializer, SingleParamSerializer
from ami.base.views import ProjectMixin
from ami.main.api.schemas import limit_doc_param, project_id_doc_param
from ami.main.api.serializers import TagSerializer
from ami.main.models_future.occurrence import top_identifiers_for_project
from ami.main.models_future.occurrence import model_agreement_for_project, top_identifiers_for_project
from ami.utils.requests import get_default_classification_threshold
from ami.utils.storages import ConnectionTestResult

Expand All @@ -55,6 +55,7 @@
Tag,
TaxaList,
Taxon,
TaxonRank,
User,
update_detection_counts,
)
Expand All @@ -71,6 +72,7 @@
EventSerializer,
EventTimelineSerializer,
IdentificationSerializer,
ModelAgreementSerializer,
OccurrenceListSerializer,
OccurrenceSerializer,
PageListSerializer,
Expand Down Expand Up @@ -1202,6 +1204,24 @@ def filter_queryset(self, request, queryset, view):
return queryset


OCCURRENCE_FILTER_BACKENDS = (
CustomOccurrenceDeterminationFilter,
OccurrenceCollectionFilter,
OccurrenceAlgorithmFilter,
OccurrenceDateFilter,
OccurrenceVerified,
OccurrenceVerifiedByMeFilter,
OccurrenceTaxaListFilter,
)

OCCURRENCE_FILTERSET_FIELDS = (
"event",
"deployment",
"determination__rank",
"detections__source_image",
)


class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
"""
API endpoint that allows occurrences to be viewed or edited.
Expand All @@ -1211,22 +1231,8 @@ class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
queryset = Occurrence.objects.all()

serializer_class = OccurrenceSerializer
# filter_backends = [CustomDeterminationFilter, DjangoFilterBackend, NullsLastOrderingFilter, SearchFilter]
filter_backends = DefaultViewSetMixin.filter_backends + [
CustomOccurrenceDeterminationFilter,
OccurrenceCollectionFilter,
OccurrenceAlgorithmFilter,
OccurrenceDateFilter,
OccurrenceVerified,
OccurrenceVerifiedByMeFilter,
OccurrenceTaxaListFilter,
]
filterset_fields = [
"event",
"deployment",
"determination__rank",
"detections__source_image",
]
filter_backends = DefaultViewSetMixin.filter_backends + list(OCCURRENCE_FILTER_BACKENDS)
filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
ordering_fields = [
"created_at",
"updated_at",
Expand Down Expand Up @@ -1324,6 +1330,11 @@ class OccurrenceStatsViewSet(viewsets.GenericViewSet, ProjectMixin):

permission_classes = [IsActiveStaffOrReadOnly]
require_project = True
# Filter machinery for actions that opt into `self.filter_queryset(...)`.
# `top_identifiers` doesn't call it, so its behavior is unchanged.
queryset = Occurrence.objects.none()
filter_backends = [DjangoFilterBackend, *OCCURRENCE_FILTER_BACKENDS]
filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)

@extend_schema(
parameters=[project_id_doc_param, limit_doc_param],
Expand Down Expand Up @@ -1354,6 +1365,39 @@ def top_identifiers(self, request):
)
return Response(serializer.data)

@extend_schema(
parameters=[project_id_doc_param],
responses=ModelAgreementSerializer,
)
@action(detail=False, methods=["get"], url_path="model-agreement")
def model_agreement(self, request):
"""Verified / human↔model agreement rates over the filtered occurrence set.

Accepts every query param the `/occurrences/` list endpoint accepts.
Reuses `apply_default_filters` so `apply_defaults=false` bypasses
project default taxa lists + score thresholds.

Optional ?agreement_coarsest_rank=<RANK> adds `agreed_coarser_rank_*`
counts — LCAs at the given rank or deeper. Valid values: any
TaxonRank name (FAMILY, GENUS, etc.); invalid → 400.
"""
project = self.get_active_project()
assert project is not None # require_project=True guarantees this
if not Project.objects.visible_for_user(request.user).filter(pk=project.pk).exists():
raise NotFound("Project not found.")

coarsest_rank = SingleParamSerializer[TaxonRank | None].clean(
"agreement_coarsest_rank",
EnumChoiceField(TaxonRank, exclude=[TaxonRank.UNKNOWN], required=False, allow_null=True, default=None),
request.query_params,
)

base_qs = Occurrence.objects.filter(project=project).valid().apply_default_filters(project, request)
filtered_qs = self.filter_queryset(base_qs)

payload["project_id"] = project.pk
return Response(ModelAgreementSerializer(payload, context={"request": request}).data)


class TaxonTaxaListFilter(filters.BaseFilterBackend):
"""
Expand Down
Loading