Skip to content

Commit 25f4eb7

Browse files
committed
sources/ldap: add forward deletion option
1 parent c6333f9 commit 25f4eb7

File tree

11 files changed

+376
-6
lines changed

11 files changed

+376
-6
lines changed

authentik/sources/ldap/api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class Meta:
111111
"sync_parent_group",
112112
"connectivity",
113113
"lookup_groups_from_user",
114+
"delete_not_found_objects",
114115
]
115116
extra_kwargs = {"bind_password": {"write_only": True}}
116117

@@ -147,6 +148,7 @@ class LDAPSourceViewSet(UsedByMixin, ModelViewSet):
147148
"user_property_mappings",
148149
"group_property_mappings",
149150
"lookup_groups_from_user",
151+
"delete_not_found_objects",
150152
]
151153
search_fields = ["name", "slug"]
152154
ordering = ["name"]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Generated by Django 5.1.9 on 2025-05-27 13:24
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("authentik_sources_ldap", "0008_groupldapsourceconnection_userldapsourceconnection"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="groupldapsourceconnection",
15+
name="validated_by",
16+
field=models.UUIDField(
17+
blank=True, help_text="Helper field for batch deletions", null=True
18+
),
19+
),
20+
migrations.AddField(
21+
model_name="ldapsource",
22+
name="delete_not_found_objects",
23+
field=models.BooleanField(
24+
blank=True,
25+
default=False,
26+
help_text="Delete authentik users and groups which were previously supplied by this source, but are now missing from it.",
27+
),
28+
),
29+
migrations.AddField(
30+
model_name="userldapsourceconnection",
31+
name="validated_by",
32+
field=models.UUIDField(
33+
blank=True, help_text="Helper field for batch deletions", null=True
34+
),
35+
),
36+
]

authentik/sources/ldap/models.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,15 @@ class LDAPSource(Source):
137137
),
138138
)
139139

140+
delete_not_found_objects = models.BooleanField(
141+
default=False,
142+
blank=True,
143+
help_text=_(
144+
"Delete authentik users and groups which were previously supplied by this source, "
145+
"but are now missing from it."
146+
),
147+
)
148+
140149
@property
141150
def component(self) -> str:
142151
return "ak-source-ldap-form"
@@ -321,6 +330,10 @@ class Meta:
321330

322331

323332
class UserLDAPSourceConnection(UserSourceConnection):
333+
validated_by = models.UUIDField(
334+
null=True, blank=True, help_text=_("Helper field for batch deletions")
335+
)
336+
324337
@property
325338
def serializer(self) -> type[Serializer]:
326339
from authentik.sources.ldap.api import (
@@ -335,6 +348,10 @@ class Meta:
335348

336349

337350
class GroupLDAPSourceConnection(GroupSourceConnection):
351+
validated_by = models.UUIDField(
352+
null=True, blank=True, help_text=_("Helper field for batch deletions")
353+
)
354+
338355
@property
339356
def serializer(self) -> type[Serializer]:
340357
from authentik.sources.ldap.api import (

authentik/sources/ldap/sync/base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from authentik.core.sources.mapper import SourceMapper
1010
from authentik.lib.config import CONFIG
1111
from authentik.lib.sync.mapper import PropertyMappingManager
12-
from authentik.sources.ldap.models import LDAPSource
12+
from authentik.sources.ldap.models import LDAPSource, flatten
1313

1414

1515
class BaseLDAPSynchronizer:
@@ -77,6 +77,12 @@ def get_objects(self, **kwargs) -> Generator:
7777
"""Get objects from LDAP, implemented in subclass"""
7878
raise NotImplementedError()
7979

80+
def get_identifier(self, object):
81+
attributes = object.get("attributes", {})
82+
if not attributes.get(self._source.object_uniqueness_field):
83+
return
84+
return flatten(attributes[self._source.object_uniqueness_field])
85+
8086
def search_paginator( # noqa: PLR0913
8187
self,
8288
search_base,
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from collections.abc import Generator
2+
from itertools import batched
3+
from uuid import uuid4
4+
5+
from ldap3 import SUBTREE
6+
7+
from authentik.core.models import Group
8+
from authentik.sources.ldap.models import GroupLDAPSourceConnection
9+
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
10+
from authentik.sources.ldap.sync.forward_delete_users import DELETE_CHUNK_SIZE, UPDATE_CHUNK_SIZE
11+
12+
13+
class GroupLDAPForwardDeletion(BaseLDAPSynchronizer):
14+
"""Delete LDAP Groups from authentik"""
15+
16+
@staticmethod
17+
def name() -> str:
18+
return "group_deletions"
19+
20+
def get_objects(self, **kwargs) -> Generator:
21+
if not self._source.sync_groups or not self._source.delete_not_found_objects:
22+
self.message("Group syncing is disabled for this Source")
23+
return iter(())
24+
25+
uuid = uuid4()
26+
groups = self._source.connection().extend.standard.paged_search(
27+
search_base=self.base_dn_groups,
28+
search_filter=self._source.group_object_filter,
29+
search_scope=SUBTREE,
30+
attributes=[self._source.object_uniqueness_field],
31+
generator=True,
32+
**kwargs,
33+
)
34+
for batch in batched(groups, UPDATE_CHUNK_SIZE, strict=False):
35+
identifiers = []
36+
for group in batch:
37+
if identifier := self.get_identifier(group):
38+
identifiers.append(identifier)
39+
GroupLDAPSourceConnection.objects.filter(identifier__in=identifiers).update(
40+
validated_by=uuid
41+
)
42+
43+
return batched(
44+
GroupLDAPSourceConnection.objects.filter(source=self._source)
45+
.exclude(validated_by=uuid)
46+
.values_list("group", flat=True)
47+
.iterator(chunk_size=DELETE_CHUNK_SIZE),
48+
DELETE_CHUNK_SIZE,
49+
strict=False,
50+
)
51+
52+
def sync(self, group_pks: tuple) -> int:
53+
"""Delete authentik groups"""
54+
if not self._source.sync_groups or not self._source.delete_not_found_objects:
55+
self.message("Group syncing is disabled for this Source")
56+
return -1
57+
self._logger.debug("Deleting groups", group_pks=group_pks)
58+
_, deleted_per_type = Group.objects.filter(pk__in=group_pks).delete()
59+
return deleted_per_type.get(Group._meta.label, 0)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from collections.abc import Generator
2+
from itertools import batched
3+
from uuid import uuid4
4+
5+
from ldap3 import SUBTREE
6+
7+
from authentik.core.models import User
8+
from authentik.sources.ldap.models import UserLDAPSourceConnection
9+
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
10+
11+
UPDATE_CHUNK_SIZE = 10_000
12+
DELETE_CHUNK_SIZE = 50
13+
14+
15+
class UserLDAPForwardDeletion(BaseLDAPSynchronizer):
16+
"""Delete LDAP Users from authentik"""
17+
18+
@staticmethod
19+
def name() -> str:
20+
return "user_deletions"
21+
22+
def get_objects(self, **kwargs) -> Generator:
23+
if not self._source.sync_users or not self._source.delete_not_found_objects:
24+
self.message("User syncing is disabled for this Source")
25+
return iter(())
26+
27+
uuid = uuid4()
28+
users = self._source.connection().extend.standard.paged_search(
29+
search_base=self.base_dn_users,
30+
search_filter=self._source.user_object_filter,
31+
search_scope=SUBTREE,
32+
attributes=[self._source.object_uniqueness_field],
33+
generator=True,
34+
**kwargs,
35+
)
36+
for batch in batched(users, UPDATE_CHUNK_SIZE, strict=False):
37+
identifiers = []
38+
for user in batch:
39+
if identifier := self.get_identifier(user):
40+
identifiers.append(identifier)
41+
UserLDAPSourceConnection.objects.filter(identifier__in=identifiers).update(
42+
validated_by=uuid
43+
)
44+
45+
return batched(
46+
UserLDAPSourceConnection.objects.filter(source=self._source)
47+
.exclude(validated_by=uuid)
48+
.values_list("user", flat=True)
49+
.iterator(chunk_size=DELETE_CHUNK_SIZE),
50+
DELETE_CHUNK_SIZE,
51+
strict=False,
52+
)
53+
54+
def sync(self, user_pks: tuple) -> int:
55+
"""Delete authentik users"""
56+
if not self._source.sync_users or not self._source.delete_not_found_objects:
57+
self.message("User syncing is disabled for this Source")
58+
return -1
59+
self._logger.debug("Deleting users", user_pks=user_pks)
60+
_, deleted_per_type = User.objects.filter(pk__in=user_pks).delete()
61+
return deleted_per_type.get(User._meta.label, 0)

authentik/sources/ldap/tasks.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from authentik.root.celery import CELERY_APP
1818
from authentik.sources.ldap.models import LDAPSource
1919
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
20+
from authentik.sources.ldap.sync.forward_delete_groups import GroupLDAPForwardDeletion
21+
from authentik.sources.ldap.sync.forward_delete_users import UserLDAPForwardDeletion
2022
from authentik.sources.ldap.sync.groups import GroupLDAPSynchronizer
2123
from authentik.sources.ldap.sync.membership import MembershipLDAPSynchronizer
2224
from authentik.sources.ldap.sync.users import UserLDAPSynchronizer
@@ -52,11 +54,11 @@ def ldap_connectivity_check(pk: str | None = None):
5254

5355

5456
@CELERY_APP.task(
55-
# We take the configured hours timeout time by 2.5 as we run user and
56-
# group in parallel and then membership, so 2x is to cover the serial tasks,
57+
# We take the configured hours timeout time by 3.5 as we run user and
58+
# group in parallel and then membership, then deletions, so 3x is to cover the serial tasks,
5759
# and 0.5x on top of that to give some more leeway
58-
soft_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 2.5,
59-
task_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 2.5,
60+
soft_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3.5,
61+
task_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3.5,
6062
)
6163
def ldap_sync_single(source_pk: str):
6264
"""Sync a single source"""
@@ -79,6 +81,25 @@ def ldap_sync_single(source_pk: str):
7981
group(
8082
ldap_sync_paginator(source, MembershipLDAPSynchronizer),
8183
),
84+
# Finally, deletions. What we'd really like to do here is something like
85+
# ```
86+
# user_identifiers = <ldap query>
87+
# User.objects.exclude(
88+
# usersourceconnection__identifier__in=user_uniqueness_identifiers,
89+
# ).delete()
90+
# ```
91+
# This runs into performance issues in large installations. So instead we spread the
92+
# work out into three steps:
93+
# 1. Get every object from the LDAP source.
94+
# 2. Mark every object as "safe" in the database. This is quick, but any error could
95+
# mean deleting users which should not be deleted, so we do it immediately, in
96+
# large chunks, and only queue the deletion step afterwards.
97+
# 3. Delete every unmarked item. This is slow, so we spread it over many tasks in
98+
# small chunks.
99+
group(
100+
ldap_sync_paginator(source, UserLDAPForwardDeletion)
101+
+ ldap_sync_paginator(source, GroupLDAPForwardDeletion),
102+
),
82103
)
83104
task()
84105

0 commit comments

Comments
 (0)