@@ -2430,31 +2430,22 @@ def get_best_detection(self) -> Detection | None:
24302430
24312431 def get_best_predictions (self , filters : dict = {}) -> models .QuerySet [Classification ]:
24322432 """
2433- Retrieve the classification with the max score for each algorithm
2434- from any detection belonging to this occurrence.
2433+ Retrieve all classifications for this occurrence in chronological order.
24352434
24362435 This data is for the list of predictions in the Identification tab of the Occurrence Detail view
24372436 in the UI. See the OccurrenceSerializer for the serializer method.
24382437
24392438 If this is need for a list view (multiple occurrenes) it should be overriden
24402439 in the viewset to use the pre-fetched classifications instead of hitting the database
24412440 for each occurrence (n+1 query problem).
2441+
2442+ In the past, this was a more complext query that returned a single result
2443+ for each algorithm, but now it returns all classifications for the occurrence
24422444 """
2443- # Get the highest scoring classification for each algorithm
2444- # Use a subquery to find the max score for each algorithm
2445- subquery = (
2446- Classification .objects .filter (detection__occurrence = self , ** filters )
2447- .values ("algorithm" )
2448- .annotate (max_score = models .Max ("score" ))
2449- )
24502445
2451- # Join the subquery results to get the classifications with those max scores
2452- # This ensures we get one classification per algorithm (the one with highest score)
24532446 classifications = Classification .objects .filter (
24542447 detection__occurrence = self ,
24552448 ** filters ,
2456- algorithm__in = models .Subquery (subquery .values ("algorithm" )),
2457- score__in = models .Subquery (subquery .values ("max_score" )),
24582449 ).order_by ("-created_at" )
24592450
24602451 return classifications
@@ -2473,6 +2464,22 @@ def get_best_prediction(self, filters: dict = {}) -> Classification | None:
24732464 # Get all classifications for this occurrence to choose from
24742465 all_classifications = Classification .objects .filter (detection__occurrence = self , ** filters )
24752466
2467+ # Prioritize derived classifications (e.g. clustering) regardless of score
2468+ derived_classification_task_types = (
2469+ "clustering" ,
2470+ "tracking" ,
2471+ )
2472+ derived_classification = (
2473+ all_classifications .filter (
2474+ algorithm__task_type__in = derived_classification_task_types ,
2475+ terminal = True ,
2476+ )
2477+ .order_by ("-created_at" )
2478+ .first ()
2479+ )
2480+ if derived_classification :
2481+ return derived_classification
2482+
24762483 # First try to get a terminal classification
24772484 terminal_classification = all_classifications .filter (terminal = True ).order_by ("-score" , "-created_at" ).first ()
24782485 if terminal_classification :
@@ -2481,6 +2488,9 @@ def get_best_prediction(self, filters: dict = {}) -> Classification | None:
24812488 # If no terminal classification exists, fall back to non-terminal
24822489 return all_classifications .filter (terminal = False ).order_by ("-score" ).first ()
24832490
2491+ def get_best_ood_prediction (self ) -> Classification | None :
2492+ return self .get_best_prediction (filters = {"ood_score__isnull" : False })
2493+
24842494 def get_best_identification (self ) -> Identification | None :
24852495 """
24862496 The most recent human identification is used as the best identification.
@@ -2489,17 +2499,17 @@ def get_best_identification(self) -> Identification | None:
24892499 """
24902500 return Identification .objects .filter (occurrence = self , withdrawn = False ).order_by ("-created_at" ).first ()
24912501
2492- def get_determination_score (self ) -> float | None :
2493- if not self .determination :
2502+ def get_determination_score (self , prediction : Classification | None = None ) -> float | None :
2503+ """
2504+ Always return a score from an algorithm, even if a human has identified the occurrence.
2505+ """
2506+ best_prediction = prediction or self .get_best_prediction ()
2507+ if not best_prediction :
24942508 return None
2495- elif self .best_identification :
2496- return self .best_identification .score
2497- elif self .best_prediction :
2498- return self .best_prediction .score
24992509 else :
2500- return None
2510+ return best_prediction . score
25012511
2502- def get_determination_ood_score (self ) -> float | None :
2512+ def get_determination_ood_score (self , prediction : Classification | None = None ) -> float | None :
25032513 """
25042514 Calculate the OOD score for the whole occurrence.
25052515 Uses the average OOD score of all detections belonging to this occurrence.
@@ -2508,16 +2518,16 @@ def get_determination_ood_score(self) -> float | None:
25082518 """
25092519 # Get the best prediction that has an OOD score
25102520 # this should be the last classification before the clustering algorithm
2511- # @TODO copy the OOD score from the best classification to the clustering classification during clustering
2512- best_prediction = self .get_best_prediction (filters = {"ood_score__isnull" : False })
2521+ best_prediction = prediction or self .get_best_ood_prediction ()
25132522 if not best_prediction :
25142523 return None
2515- mean_ood_score = Classification .objects .filter (
2516- detection__occurrence = self ,
2517- ood_score__isnull = False ,
2518- algorithm = best_prediction .algorithm ,
2519- ).aggregate (models .Avg ("ood_score" ),)["ood_score__avg" ]
2520- return mean_ood_score
2524+ else :
2525+ mean_ood_score = Classification .objects .filter (
2526+ detection__occurrence = self ,
2527+ ood_score__isnull = False ,
2528+ algorithm = best_prediction .algorithm ,
2529+ ).aggregate (models .Avg ("ood_score" ),)["ood_score__avg" ]
2530+ return mean_ood_score
25212531
25222532 def context_url (self ):
25232533 detection = self .best_detection
@@ -2540,16 +2550,6 @@ def save(self, update_determination=True, *args, **kwargs):
25402550 save = True ,
25412551 )
25422552
2543- if self .determination and not self .determination_score :
2544- # This may happen for legacy occurrences that were created
2545- # before the determination_score field was added
2546- # @TODO remove
2547- self .determination_score = self .get_determination_score ()
2548- if not self .determination_score :
2549- logger .warning (f"Could not determine score for { self } " )
2550- else :
2551- self .save (update_determination = False )
2552-
25532553 class Meta :
25542554 ordering = ["-determination_score" ]
25552555
@@ -2584,23 +2584,24 @@ def update_occurrence_determination(
25842584
25852585 # Collect all necessary values first
25862586 best_identification = occurrence .get_best_identification ()
2587- best_prediction = occurrence .get_best_prediction () if not best_identification else None
2587+ best_prediction = occurrence .get_best_prediction ()
2588+ best_ood_prediction = occurrence .get_best_ood_prediction ()
25882589
25892590 # Best detection is used as the representative image for the occurrence in either case
25902591 best_detection = occurrence .get_best_detection ()
25912592
2592- # Determine values for all attributes
2593+ # Update the determination (Taxon) first
25932594 new_determination = None
2594- new_determination_score = None
2595- new_determination_ood_score = occurrence .get_determination_ood_score ()
25962595
25972596 # Identifications take precedence over machine predictions
25982597 if best_identification :
25992598 new_determination = best_identification .taxon
2600- new_determination_score = best_identification .score
26012599 elif best_prediction :
26022600 new_determination = best_prediction .taxon
2603- new_determination_score = best_prediction .score
2601+
2602+ # Update scores, which may or may not come from the same source as the determination
2603+ new_determination_score = occurrence .get_determination_score (prediction = best_prediction )
2604+ new_determination_ood_score = occurrence .get_determination_ood_score (prediction = best_ood_prediction )
26042605
26052606 # Prepare fields that need to be updated (using a dictionary for bulk update)
26062607 update_fields = {}
@@ -2862,6 +2863,7 @@ class Config:
28622863 # so we can sort by rank. The DRF serializer will convert it to a string.
28632864 # just for the API responses.
28642865 use_enum_values = False
2866+ frozen = True # Allow hashing for use in a set
28652867
28662868
28672869@final
@@ -3099,6 +3101,60 @@ def save(self, update_calculated_fields=True, *args, **kwargs):
30993101 self .update_calculated_fields (save = True )
31003102
31013103
3104+ def find_common_ancestor_taxon (
3105+ taxa : list ["Taxon" ],
3106+ ignore_missing_parents : bool = True ,
3107+ ) -> typing .Optional ["Taxon" ]:
3108+ """
3109+ Find the common ancestor taxon for a list of taxa.
3110+ Args:
3111+ taxa (list[Taxon]): A list of Taxon objects.
3112+ ignore_rootless (bool): If True, ignore taxa without parents. Defaults to True.
3113+ Returns:
3114+ Taxon | None: The common ancestor taxon, or None if no common ancestor exists.
3115+ """
3116+ if not taxa :
3117+ return None
3118+
3119+ # Filter taxa based on whether they have parents
3120+ valid_taxa = taxa
3121+ if ignore_missing_parents :
3122+ valid_taxa = [t for t in taxa if t .parents_json ]
3123+ rootless_count = len (taxa ) - len (valid_taxa )
3124+ if rootless_count :
3125+ logger .warning (f"Ignoring { rootless_count } rootless taxa" )
3126+
3127+ if not valid_taxa :
3128+ logger .error ("No taxa with parents found" )
3129+ return None
3130+
3131+ # Build ancestor sets for each taxon
3132+ ancestor_sets = []
3133+ for taxon in valid_taxa :
3134+ ancestors = set (taxon .parents_json )
3135+ # Include the taxon itself
3136+ ancestors .add (TaxonParent (id = taxon .pk , name = taxon .name , rank = TaxonRank (taxon .rank )))
3137+ ancestor_sets .append (ancestors )
3138+
3139+ # Find common ancestors
3140+ common_ancestors = set .intersection (* ancestor_sets )
3141+
3142+ if not common_ancestors :
3143+ logger .info ("No common ancestor found" )
3144+ return None
3145+
3146+ # Find the most specific common ancestor (highest rank index)
3147+ best_ancestor = max (common_ancestors , key = lambda a : list (TaxonRank ).index (a .rank ))
3148+
3149+ logger .info (f"Common ancestor: { best_ancestor .name } ({ best_ancestor .rank } )" )
3150+
3151+ # Return the actual Taxon object
3152+ from .models import Taxon
3153+
3154+ result = Taxon .objects .get (id = best_ancestor .id )
3155+ return result
3156+
3157+
31023158@final
31033159class TaxaList (BaseModel ):
31043160 """A checklist of taxa"""
0 commit comments