Skip to content

Commit ea9f45d

Browse files
mihowclaude
andcommitted
feat(taxa): per-taxon verification + agreement counts and verified filter
Adds to GET /api/v2/taxa/ (issue #1316): - verified_count and agreed_with_prediction_count annotations (always on), rolled up over descendant occurrences via a hierarchical parents_json match. - agreed_exact_count, gated behind with_agreement=true (and always on the detail view). - verified=true|false filter (EXISTS / strict complement), project-scoped and respecting apply_default_filters. - verified_count added to ordering_fields. The hierarchical descendant match uses a Postgres jsonb @> containment built from an OuterRef (literal __contains can't embed an OuterRef). Migration 0085 adds the supporting GIN index on Taxon.parents_json (jsonb_path_ops). Frontend: sortable "Verified" column + "Verification status" filter on the taxa list, and a Verification panel on the taxon detail page. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent e721c8b commit ea9f45d

11 files changed

Lines changed: 533 additions & 0 deletions

File tree

ami/main/api/serializers.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,13 +588,27 @@ def get_taxa(self, obj):
588588
return [{"id": taxon.id, "name": taxon.name} for taxon in obj.taxa.all()]
589589

590590

591+
def agreement_requested(request: Request | None) -> bool:
592+
"""Whether ``with_agreement=true`` is set, gating the heavier agreed_exact_count."""
593+
if request is None:
594+
return False
595+
value = request.query_params.get("with_agreement", "")
596+
return str(value).lower() in ("true", "1", "yes", "on")
597+
598+
591599
class TaxonListSerializer(DefaultSerializer):
592600
# latest_detection = DetectionNestedSerializer(read_only=True)
593601
occurrences = serializers.SerializerMethodField()
594602
parents = TaxonParentSerializer(many=True, read_only=True, source="parents_json")
595603
parent_id = serializers.PrimaryKeyRelatedField(queryset=Taxon.objects.all(), source="parent")
596604
tags = serializers.SerializerMethodField()
597605

606+
def __init__(self, *args, **kwargs):
607+
super().__init__(*args, **kwargs)
608+
# agreed_exact_count is a gated annotation: omit it unless with_agreement=true.
609+
if not agreement_requested(self.context.get("request")):
610+
self.fields.pop("agreed_exact_count", None)
611+
598612
def get_tags(self, obj):
599613
tag_list = getattr(obj, "prefetched_tags", [])
600614
return TagSerializer(tag_list, many=True, context=self.context).data
@@ -609,6 +623,9 @@ class Meta:
609623
"parents",
610624
"details",
611625
"occurrences_count",
626+
"verified_count",
627+
"agreed_with_prediction_count",
628+
"agreed_exact_count",
612629
"occurrences",
613630
"tags",
614631
"last_detected",
@@ -886,6 +903,9 @@ class Meta:
886903
"parents",
887904
"details",
888905
"occurrences_count",
906+
"verified_count",
907+
"agreed_with_prediction_count",
908+
"agreed_exact_count",
889909
"events_count",
890910
"occurrences",
891911
"gbif_taxon_key",

ami/main/api/views.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
from ami.utils.storages import ConnectionTestResult
3838

3939
from ..models import (
40+
BEST_IDENTIFICATION_ORDER,
41+
BEST_MACHINE_PREDICTION_ORDER,
4042
NULL_DETECTIONS_FILTER,
4143
Classification,
4244
Deployment,
@@ -1400,6 +1402,19 @@ def filter_queryset(self, request, queryset, view):
14001402
return queryset.distinct()
14011403

14021404

1405+
class JSONBContains(models.Func):
1406+
"""Postgres ``@>`` containment rendered as a boolean expression.
1407+
1408+
Needed for correlated subqueries where the right-hand side is built from an
1409+
``OuterRef`` — the literal ``parents_json__contains=[{"id": ...}]`` lookup
1410+
can't embed an ``OuterRef`` (it would try to JSON-serialize the expression).
1411+
"""
1412+
1413+
arg_joiner = " @> "
1414+
template = "(%(expressions)s)"
1415+
output_field = models.BooleanField()
1416+
1417+
14031418
class TaxonViewSet(DefaultViewSet, ProjectMixin):
14041419
"""
14051420
API endpoint that allows taxa to be viewed or edited.
@@ -1428,6 +1443,7 @@ class TaxonViewSet(DefaultViewSet, ProjectMixin):
14281443
"created_at",
14291444
"updated_at",
14301445
"occurrences_count",
1446+
"verified_count",
14311447
"last_detected",
14321448
"best_determination_score",
14331449
"name",
@@ -1654,6 +1670,113 @@ def get_taxa_observed(
16541670
# Efficient EXISTS check that uses the composite index
16551671
qs = qs.filter(models.Exists(Occurrence.objects.filter(base_filter)))
16561672

1673+
qs = self.add_verification_data(qs, occurrence_filters, default_filters_q)
1674+
1675+
return qs
1676+
1677+
def _occurrences_under_taxon(self, occurrence_filters: models.Q, default_filters_q: models.Q) -> QuerySet:
1678+
"""
1679+
Correlated Occurrence queryset matching occurrences whose determination is the
1680+
outer Taxon (``OuterRef("id")``) or any of its descendants, project-scoped and
1681+
default-filtered.
1682+
1683+
Mirrors the hierarchical match used by the occurrence-list ``taxon=<id>`` filter
1684+
(``CustomOccurrenceDeterminationFilter``), but the descendant test is built with
1685+
an ``OuterRef`` right-hand side so a Family/Order row aggregates all its
1686+
descendant species' occurrences.
1687+
"""
1688+
descendant_match = JSONBContains(
1689+
models.F("determination__parents_json"),
1690+
models.Func(
1691+
models.Func(
1692+
models.Value("id"),
1693+
models.OuterRef("id"),
1694+
function="jsonb_build_object",
1695+
),
1696+
function="jsonb_build_array",
1697+
output_field=models.JSONField(),
1698+
),
1699+
)
1700+
return (
1701+
Occurrence.objects.filter(occurrence_filters)
1702+
.filter(default_filters_q)
1703+
.alias(_under_taxon=descendant_match)
1704+
.filter(models.Q(determination_id=models.OuterRef("id")) | models.Q(_under_taxon=True))
1705+
)
1706+
1707+
def _include_agreement(self) -> bool:
1708+
"""Whether the heavier ``agreed_exact_count`` annotation should be computed."""
1709+
if self.action == "retrieve":
1710+
return True
1711+
return bool(BooleanField(required=False).clean(self.request.query_params.get("with_agreement")))
1712+
1713+
def add_verification_data(
1714+
self, qs: QuerySet, occurrence_filters: models.Q, default_filters_q: models.Q
1715+
) -> QuerySet:
1716+
"""
1717+
Annotate per-taxon verification and human/model agreement counts, and apply the
1718+
``verified=true|false`` filter on list responses.
1719+
1720+
All counts roll up descendant occurrences via ``_occurrences_under_taxon`` and
1721+
respect the project's default filters (same ``apply_defaults`` handling as
1722+
``occurrences_count``).
1723+
"""
1724+
under_taxon = self._occurrences_under_taxon(occurrence_filters, default_filters_q)
1725+
1726+
has_identification = models.Exists(
1727+
Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False)
1728+
)
1729+
verified_occurrences = under_taxon.filter(has_identification)
1730+
1731+
def correlated_count(occurrence_qs: QuerySet) -> Coalesce:
1732+
# Group by project_id (constant within the subquery) to collapse the
1733+
# hierarchical match — determination_id varies across descendants so it
1734+
# can't be the grouping key.
1735+
return Coalesce(
1736+
models.Subquery(
1737+
occurrence_qs.values("project_id").annotate(c=models.Count("id")).values("c")[:1],
1738+
output_field=models.IntegerField(),
1739+
),
1740+
0,
1741+
)
1742+
1743+
# The chosen (best, non-withdrawn) identification's agreed_with_prediction FK is set.
1744+
best_identification_agreed_prediction = models.Subquery(
1745+
Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False)
1746+
.order_by(*BEST_IDENTIFICATION_ORDER)
1747+
.values("agreed_with_prediction_id")[:1]
1748+
)
1749+
agreed_with_prediction_occurrences = under_taxon.annotate(
1750+
_best_agreed_prediction=best_identification_agreed_prediction
1751+
).filter(_best_agreed_prediction__isnull=False)
1752+
1753+
qs = qs.annotate(
1754+
verified_count=correlated_count(verified_occurrences),
1755+
agreed_with_prediction_count=correlated_count(agreed_with_prediction_occurrences),
1756+
)
1757+
1758+
if self._include_agreement():
1759+
# Verified occurrence where the user determination equals the top machine
1760+
# prediction's taxon for the same occurrence.
1761+
best_machine_taxon = models.Subquery(
1762+
Classification.objects.filter(detection__occurrence=models.OuterRef("pk"))
1763+
.order_by(*BEST_MACHINE_PREDICTION_ORDER)
1764+
.values("taxon_id")[:1]
1765+
)
1766+
agreed_exact_occurrences = verified_occurrences.annotate(_best_machine_taxon=best_machine_taxon).filter(
1767+
determination_id=models.F("_best_machine_taxon")
1768+
)
1769+
qs = qs.annotate(agreed_exact_count=correlated_count(agreed_exact_occurrences))
1770+
1771+
# verified=true|false filter (list only); the complement uses the same set, so
1772+
# verified=false is the strict complement of verified=true on the filtered taxa.
1773+
if self.action == "list" and "verified" in self.request.query_params:
1774+
verified = BooleanField(required=False).clean(self.request.query_params.get("verified"))
1775+
if verified:
1776+
qs = qs.filter(models.Exists(verified_occurrences))
1777+
else:
1778+
qs = qs.filter(~models.Exists(verified_occurrences))
1779+
16571780
return qs
16581781

16591782
def attach_tags_by_project(self, qs: QuerySet, project: Project) -> QuerySet:
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from django.db import migrations
2+
3+
4+
class Migration(migrations.Migration):
5+
"""
6+
GIN index on Taxon.parents_json to support hierarchical (descendant) rollup
7+
of the per-taxon verification / agreement counts added for issue #1316.
8+
9+
Without it, Family- and Order-rank rows on large projects fall back to a
10+
seq-scan on the parents_json containment (`@>`) test and dominate query time.
11+
12+
CREATE INDEX CONCURRENTLY can't run inside a transaction, so this migration
13+
is non-atomic. IF NOT EXISTS keeps it safe to co-exist with the same index if
14+
it lands separately via the #1307 follow-up.
15+
"""
16+
17+
atomic = False
18+
19+
dependencies = [
20+
("main", "0084_revoke_delete_job_from_roles"),
21+
]
22+
23+
operations = [
24+
migrations.RunSQL(
25+
sql=(
26+
"CREATE INDEX CONCURRENTLY IF NOT EXISTS main_taxon_parents_json_gin_idx "
27+
"ON main_taxon USING gin (parents_json jsonb_path_ops);"
28+
),
29+
reverse_sql="DROP INDEX CONCURRENTLY IF EXISTS main_taxon_parents_json_gin_idx;",
30+
),
31+
]

ami/main/models.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3811,6 +3811,18 @@ def best_determination_score(self) -> float | None:
38113811
# This is handled by an annotation if we are filtering by project, deployment or event
38123812
return None
38133813

3814+
def verified_count(self) -> int | None:
3815+
# Handled by an annotation when filtering by project (TaxonViewSet.add_verification_data)
3816+
return None
3817+
3818+
def agreed_with_prediction_count(self) -> int | None:
3819+
# Handled by an annotation when filtering by project (TaxonViewSet.add_verification_data)
3820+
return None
3821+
3822+
def agreed_exact_count(self) -> int | None:
3823+
# Handled by an annotation only when with_agreement is requested or on the detail view
3824+
return None
3825+
38143826
def occurrence_images(self, limit: int | None = 10) -> list[str]:
38153827
# This is handled by an annotation if we are filtering by project, deployment or event
38163828
return []

ami/main/tests.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4761,3 +4761,132 @@ def test_registration_order_preserves_occurrence_retrieve(self):
47614761
retrieve_response = self.client.get(f"/api/v2/occurrences/{occurrence.pk}/?project_id={self.project.pk}")
47624762
self.assertEqual(stats_response.status_code, 200, "stats URL must resolve")
47634763
self.assertEqual(retrieve_response.status_code, 200, "occurrence retrieve must still work")
4764+
4765+
4766+
class TestTaxaVerification(APITestCase):
4767+
"""Per-taxon verification + human/model agreement annotations and the verified filter (#1316)."""
4768+
4769+
def setUp(self):
4770+
self.project, self.deployment = setup_test_project(reuse=False)
4771+
self.taxa_list = create_taxa(self.project)
4772+
self.order = Taxon.objects.get(name="Lepidoptera")
4773+
self.family = Taxon.objects.get(name="Nymphalidae")
4774+
self.genus = Taxon.objects.get(name="Vanessa")
4775+
self.cardui = Taxon.objects.get(name="Vanessa cardui")
4776+
self.atalanta = Taxon.objects.get(name="Vanessa atalanta")
4777+
self.itea = Taxon.objects.get(name="Vanessa itea")
4778+
4779+
create_captures(deployment=self.deployment, num_nights=1, images_per_night=3)
4780+
# 3 occurrences ML-determined to cardui, 1 to itea (left unverified)
4781+
create_occurrences(deployment=self.deployment, num=3, taxon=self.cardui, determination_score=0.9)
4782+
create_occurrences(deployment=self.deployment, num=1, taxon=self.itea, determination_score=0.9)
4783+
4784+
self.user = User.objects.create_user(email="verifier@insectai.org", is_staff=True, is_superuser=True)
4785+
self.client.force_authenticate(user=self.user)
4786+
4787+
cardui_occ = list(Occurrence.objects.filter(project=self.project, determination=self.cardui).order_by("pk"))
4788+
self.assertEqual(len(cardui_occ), 3)
4789+
self.occ_pred, self.occ_exact, self.occ_disagree = cardui_occ
4790+
4791+
# occ_pred: user agrees with the model prediction (cardui), agreed_with_prediction set
4792+
Identification.objects.create(
4793+
occurrence=self.occ_pred,
4794+
taxon=self.cardui,
4795+
user=self.user,
4796+
agreed_with_prediction=self.occ_pred.best_prediction,
4797+
)
4798+
# occ_exact: same taxon as the model, but not via the "agree" workflow
4799+
Identification.objects.create(occurrence=self.occ_exact, taxon=self.cardui, user=self.user)
4800+
# occ_disagree: user overrides to a different taxon (atalanta) than the model (cardui)
4801+
Identification.objects.create(occurrence=self.occ_disagree, taxon=self.atalanta, user=self.user)
4802+
4803+
self.itea_occ = Occurrence.objects.get(project=self.project, determination=self.itea)
4804+
self.list_url = f"/api/v2/taxa/?project_id={self.project.pk}&limit=1000"
4805+
4806+
def _detail(self, taxon):
4807+
res = self.client.get(f"/api/v2/taxa/{taxon.pk}/?project_id={self.project.pk}")
4808+
self.assertEqual(res.status_code, status.HTTP_200_OK)
4809+
return res.json()
4810+
4811+
def _list_by_name(self, url=None):
4812+
res = self.client.get(url or self.list_url)
4813+
self.assertEqual(res.status_code, status.HTTP_200_OK)
4814+
return {row["name"]: row for row in res.json()["results"]}
4815+
4816+
# --- verified_count (hierarchical rollup) ---
4817+
4818+
def test_verified_count_species(self):
4819+
self.assertEqual(self._detail(self.cardui)["verified_count"], 2)
4820+
self.assertEqual(self._detail(self.atalanta)["verified_count"], 1)
4821+
self.assertEqual(self._detail(self.itea)["verified_count"], 0)
4822+
4823+
def test_verified_count_rolls_up_to_ancestors(self):
4824+
# Verifying species marks genus/family/order verified, occurrence-weighted by descendants.
4825+
for ancestor in (self.genus, self.family, self.order):
4826+
self.assertEqual(self._detail(ancestor)["verified_count"], 3, ancestor.name)
4827+
4828+
# --- agreed_with_prediction_count (chosen identification only) ---
4829+
4830+
def test_agreed_with_prediction_counts_only_chosen_identification(self):
4831+
self.assertEqual(self._detail(self.cardui)["agreed_with_prediction_count"], 1)
4832+
self.assertEqual(self._detail(self.atalanta)["agreed_with_prediction_count"], 0)
4833+
# Rolls up: only occ_pred contributes under the genus.
4834+
self.assertEqual(self._detail(self.genus)["agreed_with_prediction_count"], 1)
4835+
4836+
# --- agreed_exact_count (gated) ---
4837+
4838+
def test_agreed_exact_count_on_detail(self):
4839+
# occ_pred + occ_exact: user determination == top machine prediction (cardui).
4840+
self.assertEqual(self._detail(self.cardui)["agreed_exact_count"], 2)
4841+
# occ_disagree: user picked atalanta, model said cardui → not exact.
4842+
self.assertEqual(self._detail(self.atalanta)["agreed_exact_count"], 0)
4843+
self.assertEqual(self._detail(self.genus)["agreed_exact_count"], 2)
4844+
4845+
def test_agreed_exact_count_gated_on_list(self):
4846+
rows = self._list_by_name()
4847+
self.assertIn("verified_count", rows["Vanessa cardui"])
4848+
self.assertIn("agreed_with_prediction_count", rows["Vanessa cardui"])
4849+
self.assertNotIn("agreed_exact_count", rows["Vanessa cardui"])
4850+
4851+
rows = self._list_by_name(self.list_url + "&with_agreement=true")
4852+
self.assertIn("agreed_exact_count", rows["Vanessa cardui"])
4853+
self.assertEqual(rows["Vanessa cardui"]["agreed_exact_count"], 2)
4854+
4855+
# --- list field values ---
4856+
4857+
def test_list_field_values(self):
4858+
rows = self._list_by_name()
4859+
self.assertEqual(rows["Vanessa cardui"]["occurrences_count"], 2)
4860+
self.assertEqual(rows["Vanessa cardui"]["verified_count"], 2)
4861+
self.assertEqual(rows["Vanessa cardui"]["agreed_with_prediction_count"], 1)
4862+
self.assertEqual(rows["Vanessa atalanta"]["verified_count"], 1)
4863+
self.assertEqual(rows["Vanessa itea"]["verified_count"], 0)
4864+
4865+
# --- verified=true|false filter ---
4866+
4867+
def test_verified_filter_true_false_complement(self):
4868+
all_names = set(self._list_by_name().keys())
4869+
verified = set(self._list_by_name(self.list_url + "&verified=true").keys())
4870+
unverified = set(self._list_by_name(self.list_url + "&verified=false").keys())
4871+
self.assertEqual(verified, {"Vanessa cardui", "Vanessa atalanta"})
4872+
self.assertEqual(unverified, {"Vanessa itea"})
4873+
# verified=false is the strict complement of verified=true on the filtered set.
4874+
self.assertEqual(verified | unverified, all_names)
4875+
self.assertEqual(verified & unverified, set())
4876+
4877+
def test_ordering_by_verified_count(self):
4878+
res = self.client.get(self.list_url + "&ordering=verified_count")
4879+
self.assertEqual(res.status_code, status.HTTP_200_OK)
4880+
counts = [row["verified_count"] for row in res.json()["results"]]
4881+
self.assertEqual(counts, sorted(counts))
4882+
4883+
# --- apply_defaults handling ---
4884+
4885+
def test_verified_filter_respects_apply_defaults(self):
4886+
self.project.default_filters_exclude_taxa.add(self.atalanta)
4887+
4888+
verified_default = set(self._list_by_name(self.list_url + "&verified=true").keys())
4889+
self.assertEqual(verified_default, {"Vanessa cardui"})
4890+
4891+
verified_bypassed = set(self._list_by_name(self.list_url + "&verified=true&apply_defaults=false").keys())
4892+
self.assertEqual(verified_bypassed, {"Vanessa cardui", "Vanessa atalanta"})

0 commit comments

Comments
 (0)