@@ -1402,19 +1402,6 @@ def filter_queryset(self, request, queryset, view):
14021402 return queryset .distinct ()
14031403
14041404
1405- class JSONBContains (models .Func ):
1406- """Postgres ``@>`` containment rendered as a boolean expression.
1407-
1408- Needed for correlated subqueries where the right-hand side is built from an
1409- ``OuterRef`` — the literal ``parents_json__contains=[{"id": ...}]`` lookup
1410- can't embed an ``OuterRef`` (it would try to JSON-serialize the expression).
1411- """
1412-
1413- arg_joiner = " @> "
1414- template = "(%(expressions)s)"
1415- output_field = models .BooleanField ()
1416-
1417-
14181405class TaxonViewSet (DefaultViewSet , ProjectMixin ):
14191406 """
14201407 API endpoint that allows taxa to be viewed or edited.
@@ -1674,36 +1661,6 @@ def get_taxa_observed(
16741661
16751662 return qs
16761663
1677- def _occurrences_under_taxon (self , occurrence_filters : models .Q , default_filters_q : models .Q ) -> QuerySet :
1678- """
1679- Correlated Occurrence queryset matching occurrences whose determination is the
1680- outer Taxon (``OuterRef("id")``) or any of its descendants, project-scoped and
1681- default-filtered.
1682-
1683- Mirrors the hierarchical match used by the occurrence-list ``taxon=<id>`` filter
1684- (``CustomOccurrenceDeterminationFilter``), but the descendant test is built with
1685- an ``OuterRef`` right-hand side so a Family/Order row aggregates all its
1686- descendant species' occurrences.
1687- """
1688- descendant_match = JSONBContains (
1689- models .F ("determination__parents_json" ),
1690- models .Func (
1691- models .Func (
1692- models .Value ("id" ),
1693- models .OuterRef ("id" ),
1694- function = "jsonb_build_object" ,
1695- ),
1696- function = "jsonb_build_array" ,
1697- output_field = models .JSONField (),
1698- ),
1699- )
1700- return (
1701- Occurrence .objects .filter (occurrence_filters )
1702- .filter (default_filters_q )
1703- .alias (_under_taxon = descendant_match )
1704- .filter (models .Q (determination_id = models .OuterRef ("id" )) | models .Q (_under_taxon = True ))
1705- )
1706-
17071664 def _include_agreement (self ) -> bool :
17081665 """Whether the heavier ``agreed_exact_count`` annotation should be computed."""
17091666 if self .action == "retrieve" :
@@ -1717,65 +1674,96 @@ def add_verification_data(
17171674 Annotate per-taxon verification and human/model agreement counts, and apply the
17181675 ``verified=true|false`` filter on list responses.
17191676
1720- All counts roll up descendant occurrences via ``_occurrences_under_taxon`` and
1721- respect the project's default filters (same ``apply_defaults`` handling as
1722- ``occurrences_count``).
1723- """
1724- under_taxon = self ._occurrences_under_taxon (occurrence_filters , default_filters_q )
1677+ Counts roll up descendant occurrences (verifying a species also counts toward its
1678+ genus/family rows) and respect the project's default filters (same
1679+ ``apply_defaults`` handling as ``occurrences_count``).
17251680
1726- has_identification = models .Exists (
1727- Identification .objects .filter (occurrence = models .OuterRef ("pk" ), withdrawn = False )
1728- )
1729- verified_occurrences = under_taxon .filter (has_identification )
1730-
1731- def correlated_count (occurrence_qs : QuerySet ) -> Coalesce :
1732- # Group by project_id (constant within the subquery) to collapse the
1733- # hierarchical match — determination_id varies across descendants so it
1734- # can't be the grouping key.
1735- return Coalesce (
1736- models .Subquery (
1737- occurrence_qs .values ("project_id" ).annotate (c = models .Count ("id" )).values ("c" )[:1 ],
1738- output_field = models .IntegerField (),
1739- ),
1740- 0 ,
1741- )
1681+ All three counts only concern *verified* occurrences (those with a non-withdrawn
1682+ Identification), which are sparse relative to all occurrences. So the hierarchical
1683+ rollup is computed in a single pass over that small set in Python and applied as
1684+ constant-time ``CASE`` annotations. A correlated ``parents_json`` subquery per
1685+ taxon does not scale: on large projects it forces a per-row scan that the GIN
1686+ index can't serve (the containment RHS is an ``OuterRef``), timing out the list.
1687+ """
1688+ include_agreement = self ._include_agreement ()
17421689
1743- # The chosen (best, non-withdrawn) identification's agreed_with_prediction FK is set .
1690+ # The chosen (best, non-withdrawn) identification's agreed_with_prediction FK.
17441691 best_identification_agreed_prediction = models .Subquery (
17451692 Identification .objects .filter (occurrence = models .OuterRef ("pk" ), withdrawn = False )
17461693 .order_by (* BEST_IDENTIFICATION_ORDER )
17471694 .values ("agreed_with_prediction_id" )[:1 ]
17481695 )
1749- agreed_with_prediction_occurrences = under_taxon .annotate (
1750- _best_agreed_prediction = best_identification_agreed_prediction
1751- ).filter (_best_agreed_prediction__isnull = False )
1752-
1753- qs = qs .annotate (
1754- verified_count = correlated_count (verified_occurrences ),
1755- agreed_with_prediction_count = correlated_count (agreed_with_prediction_occurrences ),
1696+ verified_occurrences = (
1697+ Occurrence .objects .filter (occurrence_filters )
1698+ .filter (default_filters_q )
1699+ .filter (models .Exists (Identification .objects .filter (occurrence = models .OuterRef ("pk" ), withdrawn = False )))
1700+ .annotate (_agreed_prediction_id = best_identification_agreed_prediction )
17561701 )
1757-
1758- if self ._include_agreement ():
1759- # Verified occurrence where the user determination equals the top machine
1760- # prediction's taxon for the same occurrence.
1761- best_machine_taxon = models .Subquery (
1762- Classification .objects .filter (detection__occurrence = models .OuterRef ("pk" ))
1763- .order_by (* BEST_MACHINE_PREDICTION_ORDER )
1764- .values ("taxon_id" )[:1 ]
1702+ value_fields = ["determination_id" , "determination__parents_json" , "_agreed_prediction_id" ]
1703+ if include_agreement :
1704+ # Top machine prediction's taxon for the same occurrence.
1705+ verified_occurrences = verified_occurrences .annotate (
1706+ _best_machine_taxon_id = models .Subquery (
1707+ Classification .objects .filter (detection__occurrence = models .OuterRef ("pk" ))
1708+ .order_by (* BEST_MACHINE_PREDICTION_ORDER )
1709+ .values ("taxon_id" )[:1 ]
1710+ )
17651711 )
1766- agreed_exact_occurrences = verified_occurrences .annotate (_best_machine_taxon = best_machine_taxon ).filter (
1767- determination_id = models .F ("_best_machine_taxon" )
1712+ value_fields .append ("_best_machine_taxon_id" )
1713+
1714+ verified_counts : dict [int , int ] = {}
1715+ agreed_with_prediction_counts : dict [int , int ] = {}
1716+ agreed_exact_counts : dict [int , int ] = {}
1717+ for row in verified_occurrences .values (* value_fields ):
1718+ determination_id = row ["determination_id" ]
1719+ # The taxon itself plus every ancestor — i.e. every row this occurrence rolls up to.
1720+ taxon_ids : set [int ] = set ()
1721+ if determination_id is not None :
1722+ taxon_ids .add (determination_id )
1723+ for parent in row ["determination__parents_json" ] or []:
1724+ # parents_json round-trips through the pydantic schema field, so elements
1725+ # may be dicts or ``TaxonParent`` objects depending on the query path.
1726+ parent_id = parent .get ("id" ) if isinstance (parent , dict ) else getattr (parent , "id" , None )
1727+ if parent_id is not None :
1728+ taxon_ids .add (int (parent_id ))
1729+
1730+ for taxon_id in taxon_ids :
1731+ verified_counts [taxon_id ] = verified_counts .get (taxon_id , 0 ) + 1
1732+ if row ["_agreed_prediction_id" ] is not None :
1733+ for taxon_id in taxon_ids :
1734+ agreed_with_prediction_counts [taxon_id ] = agreed_with_prediction_counts .get (taxon_id , 0 ) + 1
1735+ if (
1736+ include_agreement
1737+ and determination_id is not None
1738+ and determination_id == row ["_best_machine_taxon_id" ]
1739+ ):
1740+ for taxon_id in taxon_ids :
1741+ agreed_exact_counts [taxon_id ] = agreed_exact_counts .get (taxon_id , 0 ) + 1
1742+
1743+ def count_annotation (counts : dict [int , int ]) -> models .expressions .Combinable :
1744+ if not counts :
1745+ return models .Value (0 , output_field = models .IntegerField ())
1746+ return models .Case (
1747+ * (models .When (id = taxon_id , then = models .Value (count )) for taxon_id , count in counts .items ()),
1748+ default = models .Value (0 ),
1749+ output_field = models .IntegerField (),
17681750 )
1769- qs = qs .annotate (agreed_exact_count = correlated_count (agreed_exact_occurrences ))
17701751
1771- # verified=true|false filter (list only); the complement uses the same set, so
1772- # verified=false is the strict complement of verified=true on the filtered taxa.
1752+ qs = qs .annotate (
1753+ verified_count = count_annotation (verified_counts ),
1754+ agreed_with_prediction_count = count_annotation (agreed_with_prediction_counts ),
1755+ )
1756+ if include_agreement :
1757+ qs = qs .annotate (agreed_exact_count = count_annotation (agreed_exact_counts ))
1758+
1759+ # verified=true|false filter (list only); verified=false is the strict complement.
17731760 if self .action == "list" and "verified" in self .request .query_params :
17741761 verified = BooleanField (required = False ).clean (self .request .query_params .get ("verified" ))
1762+ verified_taxon_ids = list (verified_counts .keys ())
17751763 if verified :
1776- qs = qs .filter (models . Exists ( verified_occurrences ) )
1764+ qs = qs .filter (id__in = verified_taxon_ids )
17771765 else :
1778- qs = qs .filter ( ~ models . Exists ( verified_occurrences ) )
1766+ qs = qs .exclude ( id__in = verified_taxon_ids )
17791767
17801768 return qs
17811769
0 commit comments