Skip to content

Commit 50d618e

Browse files
mihowclaude
andcommitted
feat(perms,tasks): regroup_sessions_deployment perm + idempotent regroup task
Addresses takeaway-review feedback on PR #1292: - Add `regroup_sessions_deployment` custom Project permission so non-superusers with the ProjectManager role can hit `POST /deployments/<pk>/regroup-sessions/` (mirrors `sync_deployment`). Without this, `BaseModel.check_custom_permission` computes the codename `regroup_sessions_deployment` and the perm doesn't exist, so the action 403s for everyone except superusers. - Wrap `ami.tasks.regroup_events` in a per-deployment cache lock. Concurrent enqueues (double-clicks, sync_captures auto-regroup racing with manual trigger) collapse to a single run instead of overlapping on the same rows. Lower-level than the view so all callers (Deployment.save, admin bulk action, new API action) are protected. - Defensive clamp on `session_time_gap_seconds` — fall back to the historical 120-minute default if the value is 0 / negative, with a warning log. Stops pathological "every gap = new event" behavior when admins enter bad values. - Migration 0085 adds the new perm to `Project.Meta.permissions` and grants it to existing `ProjectManager` role groups via a data migration (mirrors how 0084 revoked `delete_job`). - Tests: - test_invalid_session_time_gap_falls_back_to_default exercises 0 / -1 / -7200 - test_regroup_events_task_is_idempotent_under_concurrent_calls pre-takes the cache lock, asserts a second call skips group_images_into_events Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 55cef8b commit 50d618e

5 files changed

Lines changed: 228 additions & 9 deletions

File tree

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
"""
2+
Add the ``regroup_sessions_deployment`` custom permission to ``Project`` and
3+
grant it to existing ``ProjectManager`` role groups.
4+
5+
The new ``POST /api/v2/deployments/<pk>/regroup-sessions/`` action runs through
6+
``BaseModel.check_custom_permission``, which builds the codename as
7+
``{action}_{model_name}`` — for the ``regroup_sessions`` action on a
8+
``Deployment`` viewset that resolves project permission via the parent project,
9+
the perm needed is ``regroup_sessions_deployment``. Mirrors how
10+
``sync_deployment`` is granted in ``ami.users.roles.ProjectManager``.
11+
"""
12+
13+
from django.db import migrations
14+
from django.db.models import Q
15+
16+
17+
def grant_regroup_sessions_to_project_managers(apps, schema_editor):
18+
Group = apps.get_model("auth", "Group")
19+
Permission = apps.get_model("auth", "Permission")
20+
ContentType = apps.get_model("contenttypes", "ContentType")
21+
22+
try:
23+
project_ct = ContentType.objects.get(app_label="main", model="project")
24+
except ContentType.DoesNotExist:
25+
return
26+
27+
perm, _ = Permission.objects.get_or_create(
28+
codename="regroup_sessions_deployment",
29+
content_type=project_ct,
30+
defaults={"name": "Can regroup deployment captures into sessions"},
31+
)
32+
33+
role_groups = Group.objects.filter(Q(name__endswith="_ProjectManager"))
34+
for group in role_groups:
35+
group.permissions.add(perm)
36+
37+
38+
def revoke_regroup_sessions_from_project_managers(apps, schema_editor):
39+
Group = apps.get_model("auth", "Group")
40+
Permission = apps.get_model("auth", "Permission")
41+
ContentType = apps.get_model("contenttypes", "ContentType")
42+
GroupObjectPermission = apps.get_model("guardian", "GroupObjectPermission")
43+
44+
try:
45+
project_ct = ContentType.objects.get(app_label="main", model="project")
46+
except ContentType.DoesNotExist:
47+
return
48+
try:
49+
perm = Permission.objects.get(codename="regroup_sessions_deployment", content_type=project_ct)
50+
except Permission.DoesNotExist:
51+
return
52+
53+
role_groups = Group.objects.filter(Q(name__endswith="_ProjectManager"))
54+
for group in role_groups:
55+
group.permissions.remove(perm)
56+
57+
GroupObjectPermission.objects.filter(
58+
permission=perm,
59+
content_type=project_ct,
60+
group__in=role_groups,
61+
).delete()
62+
63+
64+
class Migration(migrations.Migration):
65+
dependencies = [
66+
("main", "0084_revoke_delete_job_from_roles"),
67+
("guardian", "0002_generic_permissions_index"),
68+
]
69+
70+
operations = [
71+
migrations.AlterModelOptions(
72+
name="project",
73+
options={
74+
"ordering": ["-priority", "created_at"],
75+
"permissions": [
76+
("create_identification", "Can create identifications"),
77+
("update_identification", "Can update identifications"),
78+
("delete_identification", "Can delete identifications"),
79+
("create_job", "Can create a job"),
80+
("update_job", "Can update a job"),
81+
("run_ml_job", "Can run/retry/cancel ML jobs"),
82+
("run_populate_captures_collection_job", "Can run/retry/cancel Populate Collection jobs"),
83+
("run_data_storage_sync_job", "Can run/retry/cancel Data Storage Sync jobs"),
84+
("run_data_export_job", "Can run/retry/cancel Data Export jobs"),
85+
("run_single_image_ml_job", "Can process a single capture"),
86+
("run_post_processing_job", "Can run/retry/cancel Post-Processing jobs"),
87+
("delete_job", "Can delete a job"),
88+
("create_deployment", "Can create a deployment"),
89+
("delete_deployment", "Can delete a deployment"),
90+
("update_deployment", "Can update a deployment"),
91+
("sync_deployment", "Can sync images to a deployment"),
92+
("regroup_sessions_deployment", "Can regroup deployment captures into sessions"),
93+
("create_sourceimagecollection", "Can create a collection"),
94+
("update_sourceimagecollection", "Can update a collection"),
95+
("delete_sourceimagecollection", "Can delete a collection"),
96+
("populate_sourceimagecollection", "Can populate a collection"),
97+
("create_sourceimage", "Can create a source image"),
98+
("update_sourceimage", "Can update a source image"),
99+
("delete_sourceimage", "Can delete a source image"),
100+
("star_sourceimage", "Can star a source image"),
101+
("create_sourceimageupload", "Can create a source image upload"),
102+
("update_sourceimageupload", "Can update a source image upload"),
103+
("delete_sourceimageupload", "Can delete a source image upload"),
104+
("create_s3storagesource", "Can create storage"),
105+
("delete_s3storagesource", "Can delete storage"),
106+
("update_s3storagesource", "Can update storage"),
107+
("test_s3storagesource", "Can test storage connection"),
108+
("create_site", "Can create a site"),
109+
("delete_site", "Can delete a site"),
110+
("update_site", "Can update a site"),
111+
("create_device", "Can create a device"),
112+
("delete_device", "Can delete a device"),
113+
("update_device", "Can update a device"),
114+
("view_userprojectmembership", "Can view project members"),
115+
("create_userprojectmembership", "Can add a user to the project"),
116+
("update_userprojectmembership", "Can update a user's project membership and role in the project"),
117+
("delete_userprojectmembership", "Can remove a user from the project"),
118+
("create_dataexport", "Can create a data export"),
119+
("update_dataexport", "Can update a data export"),
120+
("delete_dataexport", "Can delete a data export"),
121+
("create_projectpipelineconfig", "Can register pipelines for the project"),
122+
("update_projectpipelineconfig", "Can update pipeline configurations"),
123+
("delete_projectpipelineconfig", "Can remove pipelines from the project"),
124+
("create_taxalist", "Can create a taxa list"),
125+
("update_taxalist", "Can update a taxa list"),
126+
("delete_taxalist", "Can delete a taxa list"),
127+
("view_private_data", "Can view private data"),
128+
],
129+
},
130+
),
131+
migrations.RunPython(
132+
grant_regroup_sessions_to_project_managers,
133+
revoke_regroup_sessions_from_project_managers,
134+
),
135+
]

ami/main/models.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,7 @@ class Permissions:
419419
DELETE_DEPLOYMENT = "delete_deployment"
420420
UPDATE_DEPLOYMENT = "update_deployment"
421421
SYNC_DEPLOYMENT = "sync_deployment"
422+
REGROUP_SESSIONS_DEPLOYMENT = "regroup_sessions_deployment"
422423

423424
# Collection permissions
424425
CREATE_COLLECTION = "create_sourceimagecollection"
@@ -499,6 +500,7 @@ class Meta:
499500
("delete_deployment", "Can delete a deployment"),
500501
("update_deployment", "Can update a deployment"),
501502
("sync_deployment", "Can sync images to a deployment"),
503+
("regroup_sessions_deployment", "Can regroup deployment captures into sessions"),
502504
# Collection permissions
503505
("create_sourceimagecollection", "Can create a collection"),
504506
("update_sourceimagecollection", "Can update a collection"),
@@ -1399,10 +1401,19 @@ def group_images_into_events(
13991401
max_event_duration: datetime.timedelta | None = DEFAULT_MAX_EVENT_DURATION,
14001402
) -> list[Event]:
14011403
if max_time_gap is None:
1404+
default_gap = datetime.timedelta(minutes=120)
14021405
if deployment.project_id:
1403-
max_time_gap = datetime.timedelta(seconds=deployment.project.session_time_gap_seconds)
1406+
gap_seconds = deployment.project.session_time_gap_seconds
1407+
if gap_seconds is None or gap_seconds <= 0:
1408+
logger.warning(
1409+
f"Project {deployment.project_id} has invalid session_time_gap_seconds "
1410+
f"({gap_seconds!r}); falling back to default {default_gap}"
1411+
)
1412+
max_time_gap = default_gap
1413+
else:
1414+
max_time_gap = datetime.timedelta(seconds=gap_seconds)
14041415
else:
1405-
max_time_gap = datetime.timedelta(minutes=120)
1416+
max_time_gap = default_gap
14061417
# Log a warning if multiple SourceImages have the same timestamp
14071418
dupes = (
14081419
SourceImage.objects.filter(deployment=deployment)

ami/main/tests.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,60 @@ def test_session_time_gap_seconds_is_used_when_no_explicit_gap(self):
525525
count_4h = Event.objects.filter(deployment=self.deployment).count()
526526
assert count_4h == 1, f"expected 1 Event at project setting 14400s, got {count_4h}"
527527

528+
def test_invalid_session_time_gap_falls_back_to_default(self):
529+
"""
530+
Non-positive (0 or negative) ``session_time_gap_seconds`` would
531+
otherwise split every timestamp into its own Event. Guard by falling
532+
back to the historical 120-minute default and logging a warning.
533+
"""
534+
self._create_burst(datetime.datetime(2023, 8, 5, 22, 0), n=6, interval_minutes=5)
535+
self._create_burst(datetime.datetime(2023, 8, 6, 1, 0), n=6, interval_minutes=5)
536+
537+
for bad_value in (0, -1, -7200):
538+
Event.objects.filter(deployment=self.deployment).delete()
539+
self.project.session_time_gap_seconds = bad_value
540+
self.project.save()
541+
self.deployment.refresh_from_db()
542+
group_images_into_events(deployment=self.deployment)
543+
count = Event.objects.filter(deployment=self.deployment).count()
544+
# Default 120-min gap on this cross-midnight pattern → 2 Events,
545+
# NOT 12 (which is what bad_value=0 would produce without the guard).
546+
assert count == 2, f"expected 2 Events at gap={bad_value} (default fallback), got {count}"
547+
548+
def test_regroup_events_task_is_idempotent_under_concurrent_calls(self):
549+
"""
550+
``ami.tasks.regroup_events`` uses a per-deployment cache lock so
551+
concurrent enqueues collapse to a single run. The second call should
552+
skip without touching the DB.
553+
"""
554+
from unittest.mock import patch
555+
556+
from django.core.cache import cache
557+
558+
from ami.tasks import regroup_events
559+
560+
# Make sure no stale lock from a prior test leaks in.
561+
cache.delete(f"regroup_events:lock:deployment:{self.deployment.pk}")
562+
563+
self._create_burst(datetime.datetime(2023, 8, 5, 22, 0), n=6, interval_minutes=5)
564+
565+
with patch("ami.main.models.group_images_into_events") as mock_group:
566+
# Pre-take the lock to simulate an in-flight run.
567+
cache.add(f"regroup_events:lock:deployment:{self.deployment.pk}", 1, timeout=60)
568+
try:
569+
regroup_events(self.deployment.pk)
570+
finally:
571+
cache.delete(f"regroup_events:lock:deployment:{self.deployment.pk}")
572+
assert mock_group.call_count == 0, "Locked run should skip group_images_into_events"
573+
574+
# Without the pre-taken lock, the next call should run normally and release the lock.
575+
with patch("ami.main.models.group_images_into_events", return_value=[]) as mock_group:
576+
regroup_events(self.deployment.pk)
577+
assert mock_group.call_count == 1, "Unlocked run should invoke group_images_into_events"
578+
assert (
579+
cache.get(f"regroup_events:lock:deployment:{self.deployment.pk}") is None
580+
), "Lock should be released after the task body completes"
581+
528582
def test_pruning_empty_events(self):
529583
from ami.main.models import delete_empty_events
530584

ami/tasks.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22

33
from django.apps import apps
4+
from django.core.cache import cache
45
from django.db import models
56

67
from config import celery_app
@@ -13,6 +14,11 @@
1314
default_time_limit = two_days + one_hour
1415
default_soft_time_limit = two_days
1516

17+
# Lock TTL for regroup_events. Matches the task's soft_time_limit so a stuck
18+
# task cannot wedge the lock indefinitely; under normal load regroup completes
19+
# in seconds-to-minutes, well below this ceiling.
20+
REGROUP_EVENTS_LOCK_TTL = one_hour
21+
1622

1723
# @TODO use shared_task decorator instead of celery_app?
1824
@celery_app.task(soft_time_limit=two_days, time_limit=two_days + one_hour)
@@ -91,14 +97,26 @@ def populate_collection(collection_id: int) -> None:
9197
def regroup_events(deployment_id: int) -> None:
9298
from ami.main.models import Deployment, group_images_into_events
9399

94-
try:
95-
deployment = Deployment.objects.get(id=deployment_id)
96-
except Deployment.DoesNotExist:
97-
logger.error(f"Deployment with id {deployment_id} not found")
100+
# Per-deployment idempotency lock. Concurrent enqueues (e.g. user double-
101+
# clicks the regroup button, sync_captures auto-regroup races with manual
102+
# admin trigger) collapse to a single run. The cache.add() is atomic
103+
# SETNX-style: only the first caller takes the lock; later callers exit
104+
# early without touching the DB.
105+
lock_key = f"regroup_events:lock:deployment:{deployment_id}"
106+
if not cache.add(lock_key, 1, timeout=REGROUP_EVENTS_LOCK_TTL):
107+
logger.info(f"regroup_events skipped for deployment {deployment_id}: another run is in progress.")
98108
return
99-
logger.info(f"Grouping captures for {deployment}")
100-
events = group_images_into_events(deployment)
101-
logger.info(f"{deployment} now has {len(events)} events")
109+
try:
110+
try:
111+
deployment = Deployment.objects.get(id=deployment_id)
112+
except Deployment.DoesNotExist:
113+
logger.error(f"Deployment with id {deployment_id} not found")
114+
return
115+
logger.info(f"Grouping captures for {deployment}")
116+
events = group_images_into_events(deployment)
117+
logger.info(f"{deployment} now has {len(events)} events")
118+
finally:
119+
cache.delete(lock_key)
102120

103121

104122
@celery_app.task(soft_time_limit=one_hour, time_limit=one_hour + 60)

ami/users/roles.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ class ProjectManager(Role):
181181
Project.Permissions.UPDATE_DEPLOYMENT,
182182
Project.Permissions.DELETE_DEPLOYMENT,
183183
Project.Permissions.SYNC_DEPLOYMENT,
184+
Project.Permissions.REGROUP_SESSIONS_DEPLOYMENT,
184185
Project.Permissions.CREATE_SITE,
185186
Project.Permissions.UPDATE_SITE,
186187
Project.Permissions.DELETE_SITE,

0 commit comments

Comments
 (0)