Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
165ae9b
feat(occurrence-stats): add lca_rank_between helper
mihow May 14, 2026
7b1660c
feat(occurrence-stats): aggregate human-model agreement over filtered…
mihow May 14, 2026
3110418
feat(occurrence-stats): wire human-model-agreement action
mihow May 14, 2026
ba9c901
test(occurrence-stats): HTTP coverage for human-model-agreement action
mihow May 14, 2026
5b1bde7
feat(ui): useHumanModelAgreement hook for occurrence stats
mihow May 14, 2026
e050a1f
docs(prompts): handoff for PR #1307 rework — rename + SQL push-down +…
mihow May 14, 2026
f49c9ca
refactor(occurrence-stats): rename to model-agreement + push aggregat…
mihow May 15, 2026
da2a232
docs(plan): add text lang to fenced block (markdownlint MD040)
mihow May 15, 2026
7ba8689
perf(occurrence-stats): scope agreement subqueries to verified set
mihow May 15, 2026
6ad1885
feat(occurrence-stats): drop ORDER threshold; add coarsest_rank query…
May 21, 2026
6f51da5
feat(ui): align model-agreement hook with BE rename + multi-value que…
May 21, 2026
7c144b0
chore(docs): drop NEXT_SESSION_PROMPT.md from PR
May 21, 2026
34aace5
chore(docs): drop session-scratchpad planning docs from PR
May 21, 2026
36cc677
test(occurrence-stats): make any-rank bucket test deterministic
mihow May 22, 2026
b74b3cd
chore(occurrence-stats): move FE hook to UI PR #1308
mihow May 22, 2026
2c65cce
feat(occurrence-stats): add Wilson CI + Cohen's kappa to model-agreement
mihow May 26, 2026
336c1fe
refactor(stats): move wilson_interval + cohens_kappa to ami/utils/stats
mihow May 26, 2026
6748631
feat(stats): expose response schema via OPTIONS metadata
mihow May 28, 2026
8bea80f
fix(stats): exclude taxon-less verifications from agreement denominator
mihow May 28, 2026
75b21c3
fix(stats): validate agreement_coarsest_rank via ChoiceField
mihow May 28, 2026
b65100f
fix(stats): wilson_interval rejects successes outside [0, total]
mihow May 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions ami/base/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Metadata classes for DRF OPTIONS responses.

DRF's default ``SimpleMetadata`` only emits serializer field schema under
``actions.POST`` / ``actions.PUT`` — write operations. Read-only endpoints
(stats actions, scalar aggregates) return just ``name`` + ``description`` on
OPTIONS, so their ``help_text=`` annotations on response serializers are
invisible to clients.

``ResponseSchemaMetadata`` adds ``actions.GET`` carrying the response
serializer's field info (``type``, ``label``, ``help_text``, etc.), so
frontends can fetch one OPTIONS request per stats endpoint and render
tooltips / labels from the field metadata without hardcoding copy.
"""

from __future__ import annotations

import typing

from rest_framework.metadata import SimpleMetadata


class ResponseSchemaMetadata(SimpleMetadata):
"""Adds ``actions.GET`` with the response serializer's field schema.

Falls back gracefully if the view doesn't expose a serializer for GET
(anonymous endpoints, raw responses) — in that case the OPTIONS body
is unchanged from ``SimpleMetadata``'s default.
"""

def determine_metadata(self, request, view) -> dict[str, typing.Any]:
metadata = super().determine_metadata(request, view)
if "GET" in view.allowed_methods and hasattr(view, "get_serializer"):
try:
serializer = view.get_serializer()
except Exception:
return metadata
actions = metadata.setdefault("actions", {})
actions.setdefault("GET", self.get_serializer_info(serializer))
return metadata
126 changes: 126 additions & 0 deletions ami/main/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1751,3 +1751,129 @@ class TopIdentifiersResponseSerializer(serializers.Serializer):

project_id = serializers.IntegerField()
top_identifiers = UserIdentificationCountSerializer(many=True)


class ModelAgreementSerializer(serializers.Serializer):
"""Verified / agreement rates over the filtered Occurrence set.

`agreed_exact_count` is a subset of `agreed_any_rank_count` by
construction — an exact match implies the LCA is the taxon itself.
`*_pct` percentages are 0.0..1.0 (not 0..100).

Denominator note: `agreed_*_pct` divide by `comparable_count` — verified
occurrences that have BOTH a machine prediction and a human taxon, NOT by
`verified_count`. Two kinds of verified occurrence are excluded because they
can't agree or disagree: those with no machine prediction (`no_prediction_count`)
and those whose human identification has no taxon, e.g. a comment-only
verification (`verified_without_taxon_count`). Both are surfaced so the
consumer can see why `comparable_count` differs from `verified_count`.

Optional rank threshold: when the caller passes
`?agreement_coarsest_rank=FAMILY`, the response also includes
`agreed_coarser_rank_*` counting only LCAs at that rank or deeper. The
threshold rank is echoed in `agreement_coarsest_rank`. When the param is
absent, the coarser-rank fields are null and `agreement_coarsest_rank`
is null.
"""

project_id = serializers.IntegerField()
total_occurrences = serializers.IntegerField()
verified_count = serializers.IntegerField(help_text="Occurrences with at least one non-withdrawn identification.")
verified_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
help_text="verified_count / total_occurrences",
)
verified_with_prediction_count = serializers.IntegerField(
help_text="Verified occurrences that also have a machine prediction."
)
no_prediction_count = serializers.IntegerField(
help_text="Verified occurrences with no machine prediction (excluded from agreement denominator)."
)
verified_without_taxon_count = serializers.IntegerField(
help_text=(
"Verified occurrences that have a machine prediction but no human taxon "
"(e.g. comment-only identification). Excluded from the agreement denominator "
"since there is no human label to compare."
)
)
comparable_count = serializers.IntegerField(
help_text=(
"Verified occurrences with BOTH a machine prediction and a human taxon — the "
"denominator for all agreed_*_pct and the Wilson CIs. Equals "
"verified_with_prediction_count minus verified_without_taxon_count."
)
)
agreed_exact_count = serializers.IntegerField()
agreed_exact_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
help_text="agreed_exact_count / comparable_count",
)
agreed_exact_ci_low = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI lower bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
Comment thread
mihow marked this conversation as resolved.
)
agreed_exact_ci_high = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI upper bound for agreed_exact_pct. Null when verified_with_prediction_count is 0.",
)
agreed_any_rank_count = serializers.IntegerField(
help_text="Exact matches plus disagreements whose LCA is at any real rank (UNKNOWN excluded)."
)
agreed_any_rank_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
help_text="agreed_any_rank_count / verified_with_prediction_count",
)
agreed_any_rank_ci_low = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI lower bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
)
agreed_any_rank_ci_high = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="Wilson 95% CI upper bound for agreed_any_rank_pct. Null when verified_with_prediction_count is 0.",
)
cohens_kappa = serializers.FloatField(
min_value=-1.0,
max_value=1.0,
allow_null=True,
required=False,
help_text=(
"Cohen's kappa (exact-taxon) — human↔model agreement beyond chance. "
"Range [-1, 1]; negative is worse than chance. Null when there are no "
"doubly-classified occurrences or expected agreement is 1.0."
),
)
agreement_coarsest_rank = serializers.CharField(
allow_null=True,
required=False,
help_text="Threshold rank from ?agreement_coarsest_rank query param. Null when the param is absent.",
)
agreed_coarser_rank_count = serializers.IntegerField(
allow_null=True,
required=False,
help_text=(
"Exact matches plus disagreements whose LCA is at `agreement_coarsest_rank` or deeper. "
"Null when no threshold was supplied."
),
)
agreed_coarser_rank_pct = serializers.FloatField(
min_value=0.0,
max_value=1.0,
allow_null=True,
required=False,
help_text="agreed_coarser_rank_count / verified_with_prediction_count. Null when no threshold supplied.",
)
106 changes: 88 additions & 18 deletions ami/main/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@
from rest_framework.views import APIView

from ami.base.filters import NullsLastOrderingFilter, ThresholdFilter
from ami.base.metadata import ResponseSchemaMetadata
from ami.base.models import BaseQuerySet
from ami.base.pagination import LimitOffsetPaginationWithPermissions
from ami.base.permissions import IsActiveStaffOrReadOnly, IsProjectMemberOrReadOnly, ObjectPermission
from ami.base.serializers import FilterParamsSerializer, SingleParamSerializer
from ami.base.views import ProjectMixin
from ami.main.api.schemas import limit_doc_param, project_id_doc_param
from ami.main.api.serializers import TagSerializer
from ami.main.models_future.occurrence import top_identifiers_for_project
from ami.main.models_future.occurrence import model_agreement_for_project, top_identifiers_for_project
from ami.utils.requests import get_default_classification_threshold
from ami.utils.storages import ConnectionTestResult

Expand All @@ -55,6 +56,7 @@
Tag,
TaxaList,
Taxon,
TaxonRank,
User,
update_detection_counts,
)
Expand All @@ -71,6 +73,7 @@
EventSerializer,
EventTimelineSerializer,
IdentificationSerializer,
ModelAgreementSerializer,
OccurrenceListSerializer,
OccurrenceSerializer,
PageListSerializer,
Expand Down Expand Up @@ -1202,6 +1205,24 @@ def filter_queryset(self, request, queryset, view):
return queryset


OCCURRENCE_FILTER_BACKENDS = (
CustomOccurrenceDeterminationFilter,
OccurrenceCollectionFilter,
OccurrenceAlgorithmFilter,
OccurrenceDateFilter,
OccurrenceVerified,
OccurrenceVerifiedByMeFilter,
OccurrenceTaxaListFilter,
)

OCCURRENCE_FILTERSET_FIELDS = (
"event",
"deployment",
"determination__rank",
"detections__source_image",
)


class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
"""
API endpoint that allows occurrences to be viewed or edited.
Expand All @@ -1211,22 +1232,8 @@ class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
queryset = Occurrence.objects.all()

serializer_class = OccurrenceSerializer
# filter_backends = [CustomDeterminationFilter, DjangoFilterBackend, NullsLastOrderingFilter, SearchFilter]
filter_backends = DefaultViewSetMixin.filter_backends + [
CustomOccurrenceDeterminationFilter,
OccurrenceCollectionFilter,
OccurrenceAlgorithmFilter,
OccurrenceDateFilter,
OccurrenceVerified,
OccurrenceVerifiedByMeFilter,
OccurrenceTaxaListFilter,
]
filterset_fields = [
"event",
"deployment",
"determination__rank",
"detections__source_image",
]
filter_backends = DefaultViewSetMixin.filter_backends + list(OCCURRENCE_FILTER_BACKENDS)
filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)
ordering_fields = [
"created_at",
"updated_at",
Expand Down Expand Up @@ -1324,12 +1331,26 @@ class OccurrenceStatsViewSet(viewsets.GenericViewSet, ProjectMixin):

permission_classes = [IsActiveStaffOrReadOnly]
require_project = True
# OPTIONS on each action returns its response serializer field schema
# (type + help_text) under `actions.GET`. Frontends consume this to render
# tooltips and labels without hardcoding stat descriptions in the UI.
metadata_class = ResponseSchemaMetadata
# Filter machinery for actions that opt into `self.filter_queryset(...)`.
# `top_identifiers` doesn't call it, so its behavior is unchanged.
queryset = Occurrence.objects.none()
filter_backends = [DjangoFilterBackend, *OCCURRENCE_FILTER_BACKENDS]
filterset_fields = list(OCCURRENCE_FILTERSET_FIELDS)

Comment thread
mihow marked this conversation as resolved.
@extend_schema(
parameters=[project_id_doc_param, limit_doc_param],
responses=TopIdentifiersResponseSerializer,
)
@action(detail=False, methods=["get"], url_path="top-identifiers")
@action(
detail=False,
methods=["get"],
url_path="top-identifiers",
serializer_class=TopIdentifiersResponseSerializer,
)
def top_identifiers(self, request):
"""Users ranked by distinct occurrences they identified.

Expand All @@ -1354,6 +1375,55 @@ def top_identifiers(self, request):
)
return Response(serializer.data)

@extend_schema(
parameters=[project_id_doc_param],
responses=ModelAgreementSerializer,
)
@action(
detail=False,
methods=["get"],
url_path="model-agreement",
serializer_class=ModelAgreementSerializer,
)
def model_agreement(self, request):
"""Verified / human↔model agreement rates over the filtered occurrence set.

Accepts every query param the `/occurrences/` list endpoint accepts.
Reuses `apply_default_filters` so `apply_defaults=false` bypasses
project default taxa lists + score thresholds.

Optional ?agreement_coarsest_rank=<RANK> adds `agreed_coarser_rank_*`
counts — LCAs at the given rank or deeper. Valid values: any
TaxonRank name (FAMILY, GENUS, etc.); invalid → 400.
"""
project = self.get_active_project()
assert project is not None # require_project=True guarantees this
if not Project.objects.visible_for_user(request.user).filter(pk=project.pk).exists():
raise NotFound("Project not found.")

# ChoiceField gives strict 400s for free: blank (?agreement_coarsest_rank=),
# unknown ranks, and UNKNOWN (not in the choice list) all fail at the boundary.
# drf-spectacular reads the choices into the OpenAPI schema as an enum.
# Build a plain dict (not the QueryDict) so a blank value is validated as a
# real "" — DRF treats blank fields in HTML/QueryDict input as absent, which
# would let ?agreement_coarsest_rank= silently no-op. Uppercase the raw value
# so the param stays case-insensitive.
valid_ranks = [r.name for r in TaxonRank if r != TaxonRank.UNKNOWN]
raw_rank = request.query_params.get("agreement_coarsest_rank")
rank_data = {} if raw_rank is None else {"agreement_coarsest_rank": raw_rank.upper()}
coarsest_rank_param = SingleParamSerializer[str].clean(
param_name="agreement_coarsest_rank",
field=serializers.ChoiceField(choices=valid_ranks, required=False, allow_blank=False),
data=rank_data,
)
coarsest_rank = TaxonRank[coarsest_rank_param] if coarsest_rank_param else None

base_qs = Occurrence.objects.filter(project=project).valid().apply_default_filters(project, request)
filtered_qs = self.filter_queryset(base_qs)
payload = model_agreement_for_project(filtered_qs, coarsest_rank=coarsest_rank)
payload["project_id"] = project.pk
return Response(ModelAgreementSerializer(payload, context={"request": request}).data)


class TaxonTaxaListFilter(filters.BaseFilterBackend):
"""
Expand Down
Loading
Loading