Skip to content

Commit c4bb190

Browse files
authored
sources/ldap: add forward deletion option (#14718)
* sources/ldap: add forward deletion option * remove unnecessary `blank=True` * clarify `validated_by` `help_text` * add indices to `validated_by` * factor out `get_identifier` everywhere and `get_attributes` I don't know what that additional `in` check is for, but I'm not about to find out. * add tests for known good user and group * fixup! add tests for known good user and group * fixup! add tests for known good user and group
1 parent 10f4fae commit c4bb190

File tree

15 files changed

+490
-17
lines changed

15 files changed

+490
-17
lines changed

authentik/sources/ldap/api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class Meta:
111111
"sync_parent_group",
112112
"connectivity",
113113
"lookup_groups_from_user",
114+
"delete_not_found_objects",
114115
]
115116
extra_kwargs = {"bind_password": {"write_only": True}}
116117

@@ -147,6 +148,7 @@ class LDAPSourceViewSet(UsedByMixin, ModelViewSet):
147148
"user_property_mappings",
148149
"group_property_mappings",
149150
"lookup_groups_from_user",
151+
"delete_not_found_objects",
150152
]
151153
search_fields = ["name", "slug"]
152154
ordering = ["name"]
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Generated by Django 5.1.9 on 2025-05-28 08:15
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("authentik_core", "0048_delete_oldauthenticatedsession_content_type"),
10+
("authentik_sources_ldap", "0008_groupldapsourceconnection_userldapsourceconnection"),
11+
]
12+
13+
operations = [
14+
migrations.AddField(
15+
model_name="groupldapsourceconnection",
16+
name="validated_by",
17+
field=models.UUIDField(
18+
blank=True,
19+
help_text="Unique ID used while checking if this object still exists in the directory.",
20+
null=True,
21+
),
22+
),
23+
migrations.AddField(
24+
model_name="ldapsource",
25+
name="delete_not_found_objects",
26+
field=models.BooleanField(
27+
default=False,
28+
help_text="Delete authentik users and groups which were previously supplied by this source, but are now missing from it.",
29+
),
30+
),
31+
migrations.AddField(
32+
model_name="userldapsourceconnection",
33+
name="validated_by",
34+
field=models.UUIDField(
35+
blank=True,
36+
help_text="Unique ID used while checking if this object still exists in the directory.",
37+
null=True,
38+
),
39+
),
40+
migrations.AddIndex(
41+
model_name="groupldapsourceconnection",
42+
index=models.Index(fields=["validated_by"], name="authentik_s_validat_b70447_idx"),
43+
),
44+
migrations.AddIndex(
45+
model_name="userldapsourceconnection",
46+
index=models.Index(fields=["validated_by"], name="authentik_s_validat_ff2ebc_idx"),
47+
),
48+
]

authentik/sources/ldap/models.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@ class LDAPSource(Source):
137137
),
138138
)
139139

140+
delete_not_found_objects = models.BooleanField(
141+
default=False,
142+
help_text=_(
143+
"Delete authentik users and groups which were previously supplied by this source, "
144+
"but are now missing from it."
145+
),
146+
)
147+
140148
@property
141149
def component(self) -> str:
142150
return "ak-source-ldap-form"
@@ -321,6 +329,12 @@ class Meta:
321329

322330

323331
class UserLDAPSourceConnection(UserSourceConnection):
332+
validated_by = models.UUIDField(
333+
null=True,
334+
blank=True,
335+
help_text=_("Unique ID used while checking if this object still exists in the directory."),
336+
)
337+
324338
@property
325339
def serializer(self) -> type[Serializer]:
326340
from authentik.sources.ldap.api import (
@@ -332,9 +346,18 @@ def serializer(self) -> type[Serializer]:
332346
class Meta:
333347
verbose_name = _("User LDAP Source Connection")
334348
verbose_name_plural = _("User LDAP Source Connections")
349+
indexes = [
350+
models.Index(fields=["validated_by"]),
351+
]
335352

336353

337354
class GroupLDAPSourceConnection(GroupSourceConnection):
355+
validated_by = models.UUIDField(
356+
null=True,
357+
blank=True,
358+
help_text=_("Unique ID used while checking if this object still exists in the directory."),
359+
)
360+
338361
@property
339362
def serializer(self) -> type[Serializer]:
340363
from authentik.sources.ldap.api import (
@@ -346,3 +369,6 @@ def serializer(self) -> type[Serializer]:
346369
class Meta:
347370
verbose_name = _("Group LDAP Source Connection")
348371
verbose_name_plural = _("Group LDAP Source Connections")
372+
indexes = [
373+
models.Index(fields=["validated_by"]),
374+
]

authentik/sources/ldap/sync/base.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from authentik.core.sources.mapper import SourceMapper
1010
from authentik.lib.config import CONFIG
1111
from authentik.lib.sync.mapper import PropertyMappingManager
12-
from authentik.sources.ldap.models import LDAPSource
12+
from authentik.sources.ldap.models import LDAPSource, flatten
1313

1414

1515
class BaseLDAPSynchronizer:
@@ -77,6 +77,16 @@ def get_objects(self, **kwargs) -> Generator:
7777
"""Get objects from LDAP, implemented in subclass"""
7878
raise NotImplementedError()
7979

80+
def get_attributes(self, object):
81+
if "attributes" not in object:
82+
return
83+
return object.get("attributes", {})
84+
85+
def get_identifier(self, attributes: dict):
86+
if not attributes.get(self._source.object_uniqueness_field):
87+
return
88+
return flatten(attributes[self._source.object_uniqueness_field])
89+
8090
def search_paginator( # noqa: PLR0913
8191
self,
8292
search_base,
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from collections.abc import Generator
2+
from itertools import batched
3+
from uuid import uuid4
4+
5+
from ldap3 import SUBTREE
6+
7+
from authentik.core.models import Group
8+
from authentik.sources.ldap.models import GroupLDAPSourceConnection
9+
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
10+
from authentik.sources.ldap.sync.forward_delete_users import DELETE_CHUNK_SIZE, UPDATE_CHUNK_SIZE
11+
12+
13+
class GroupLDAPForwardDeletion(BaseLDAPSynchronizer):
14+
"""Delete LDAP Groups from authentik"""
15+
16+
@staticmethod
17+
def name() -> str:
18+
return "group_deletions"
19+
20+
def get_objects(self, **kwargs) -> Generator:
21+
if not self._source.sync_groups or not self._source.delete_not_found_objects:
22+
self.message("Group syncing is disabled for this Source")
23+
return iter(())
24+
25+
uuid = uuid4()
26+
groups = self._source.connection().extend.standard.paged_search(
27+
search_base=self.base_dn_groups,
28+
search_filter=self._source.group_object_filter,
29+
search_scope=SUBTREE,
30+
attributes=[self._source.object_uniqueness_field],
31+
generator=True,
32+
**kwargs,
33+
)
34+
for batch in batched(groups, UPDATE_CHUNK_SIZE, strict=False):
35+
identifiers = []
36+
for group in batch:
37+
if not (attributes := self.get_attributes(group)):
38+
continue
39+
if identifier := self.get_identifier(attributes):
40+
identifiers.append(identifier)
41+
GroupLDAPSourceConnection.objects.filter(identifier__in=identifiers).update(
42+
validated_by=uuid
43+
)
44+
45+
return batched(
46+
GroupLDAPSourceConnection.objects.filter(source=self._source)
47+
.exclude(validated_by=uuid)
48+
.values_list("group", flat=True)
49+
.iterator(chunk_size=DELETE_CHUNK_SIZE),
50+
DELETE_CHUNK_SIZE,
51+
strict=False,
52+
)
53+
54+
def sync(self, group_pks: tuple) -> int:
55+
"""Delete authentik groups"""
56+
if not self._source.sync_groups or not self._source.delete_not_found_objects:
57+
self.message("Group syncing is disabled for this Source")
58+
return -1
59+
self._logger.debug("Deleting groups", group_pks=group_pks)
60+
_, deleted_per_type = Group.objects.filter(pk__in=group_pks).delete()
61+
return deleted_per_type.get(Group._meta.label, 0)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from collections.abc import Generator
2+
from itertools import batched
3+
from uuid import uuid4
4+
5+
from ldap3 import SUBTREE
6+
7+
from authentik.core.models import User
8+
from authentik.sources.ldap.models import UserLDAPSourceConnection
9+
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
10+
11+
UPDATE_CHUNK_SIZE = 10_000
12+
DELETE_CHUNK_SIZE = 50
13+
14+
15+
class UserLDAPForwardDeletion(BaseLDAPSynchronizer):
16+
"""Delete LDAP Users from authentik"""
17+
18+
@staticmethod
19+
def name() -> str:
20+
return "user_deletions"
21+
22+
def get_objects(self, **kwargs) -> Generator:
23+
if not self._source.sync_users or not self._source.delete_not_found_objects:
24+
self.message("User syncing is disabled for this Source")
25+
return iter(())
26+
27+
uuid = uuid4()
28+
users = self._source.connection().extend.standard.paged_search(
29+
search_base=self.base_dn_users,
30+
search_filter=self._source.user_object_filter,
31+
search_scope=SUBTREE,
32+
attributes=[self._source.object_uniqueness_field],
33+
generator=True,
34+
**kwargs,
35+
)
36+
for batch in batched(users, UPDATE_CHUNK_SIZE, strict=False):
37+
identifiers = []
38+
for user in batch:
39+
if not (attributes := self.get_attributes(user)):
40+
continue
41+
if identifier := self.get_identifier(attributes):
42+
identifiers.append(identifier)
43+
UserLDAPSourceConnection.objects.filter(identifier__in=identifiers).update(
44+
validated_by=uuid
45+
)
46+
47+
return batched(
48+
UserLDAPSourceConnection.objects.filter(source=self._source)
49+
.exclude(validated_by=uuid)
50+
.values_list("user", flat=True)
51+
.iterator(chunk_size=DELETE_CHUNK_SIZE),
52+
DELETE_CHUNK_SIZE,
53+
strict=False,
54+
)
55+
56+
def sync(self, user_pks: tuple) -> int:
57+
"""Delete authentik users"""
58+
if not self._source.sync_users or not self._source.delete_not_found_objects:
59+
self.message("User syncing is disabled for this Source")
60+
return -1
61+
self._logger.debug("Deleting users", user_pks=user_pks)
62+
_, deleted_per_type = User.objects.filter(pk__in=user_pks).delete()
63+
return deleted_per_type.get(User._meta.label, 0)

authentik/sources/ldap/sync/groups.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,18 +58,16 @@ def sync(self, page_data: list) -> int:
5858
return -1
5959
group_count = 0
6060
for group in page_data:
61-
if "attributes" not in group:
61+
if (attributes := self.get_attributes(group)) is None:
6262
continue
63-
attributes = group.get("attributes", {})
6463
group_dn = flatten(flatten(group.get("entryDN", group.get("dn"))))
65-
if not attributes.get(self._source.object_uniqueness_field):
64+
if not (uniq := self.get_identifier(attributes)):
6665
self.message(
6766
f"Uniqueness field not found/not set in attributes: '{group_dn}'",
6867
attributes=attributes.keys(),
6968
dn=group_dn,
7069
)
7170
continue
72-
uniq = flatten(attributes[self._source.object_uniqueness_field])
7371
try:
7472
defaults = {
7573
k: flatten(v)

authentik/sources/ldap/sync/membership.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ def sync(self, page_data: list) -> int:
6363
group_member_dn = group_member.get("dn", {})
6464
members.append(group_member_dn)
6565
else:
66-
if "attributes" not in group:
66+
if (attributes := self.get_attributes(group)) is None:
6767
continue
68-
members = group.get("attributes", {}).get(self._source.group_membership_field, [])
68+
members = attributes.get(self._source.group_membership_field, [])
6969

7070
ak_group = self.get_group(group)
7171
if not ak_group:

authentik/sources/ldap/sync/users.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,16 @@ def sync(self, page_data: list) -> int:
6060
return -1
6161
user_count = 0
6262
for user in page_data:
63-
if "attributes" not in user:
63+
if (attributes := self.get_attributes(user)) is None:
6464
continue
65-
attributes = user.get("attributes", {})
6665
user_dn = flatten(user.get("entryDN", user.get("dn")))
67-
if not attributes.get(self._source.object_uniqueness_field):
66+
if not (uniq := self.get_identifier(attributes)):
6867
self.message(
6968
f"Uniqueness field not found/not set in attributes: '{user_dn}'",
7069
attributes=attributes.keys(),
7170
dn=user_dn,
7271
)
7372
continue
74-
uniq = flatten(attributes[self._source.object_uniqueness_field])
7573
try:
7674
defaults = {
7775
k: flatten(v)

authentik/sources/ldap/tasks.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from authentik.root.celery import CELERY_APP
1818
from authentik.sources.ldap.models import LDAPSource
1919
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
20+
from authentik.sources.ldap.sync.forward_delete_groups import GroupLDAPForwardDeletion
21+
from authentik.sources.ldap.sync.forward_delete_users import UserLDAPForwardDeletion
2022
from authentik.sources.ldap.sync.groups import GroupLDAPSynchronizer
2123
from authentik.sources.ldap.sync.membership import MembershipLDAPSynchronizer
2224
from authentik.sources.ldap.sync.users import UserLDAPSynchronizer
@@ -52,11 +54,11 @@ def ldap_connectivity_check(pk: str | None = None):
5254

5355

5456
@CELERY_APP.task(
55-
# We take the configured hours timeout time by 2.5 as we run user and
56-
# group in parallel and then membership, so 2x is to cover the serial tasks,
57+
# We take the configured hours timeout time by 3.5 as we run user and
58+
# group in parallel and then membership, then deletions, so 3x is to cover the serial tasks,
5759
# and 0.5x on top of that to give some more leeway
58-
soft_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 2.5,
59-
task_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 2.5,
60+
soft_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3.5,
61+
task_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3.5,
6062
)
6163
def ldap_sync_single(source_pk: str):
6264
"""Sync a single source"""
@@ -79,6 +81,25 @@ def ldap_sync_single(source_pk: str):
7981
group(
8082
ldap_sync_paginator(source, MembershipLDAPSynchronizer),
8183
),
84+
# Finally, deletions. What we'd really like to do here is something like
85+
# ```
86+
# user_identifiers = <ldap query>
87+
# User.objects.exclude(
88+
# usersourceconnection__identifier__in=user_uniqueness_identifiers,
89+
# ).delete()
90+
# ```
91+
# This runs into performance issues in large installations. So instead we spread the
92+
# work out into three steps:
93+
# 1. Get every object from the LDAP source.
94+
# 2. Mark every object as "safe" in the database. This is quick, but any error could
95+
# mean deleting users which should not be deleted, so we do it immediately, in
96+
# large chunks, and only queue the deletion step afterwards.
97+
# 3. Delete every unmarked item. This is slow, so we spread it over many tasks in
98+
# small chunks.
99+
group(
100+
ldap_sync_paginator(source, UserLDAPForwardDeletion)
101+
+ ldap_sync_paginator(source, GroupLDAPForwardDeletion),
102+
),
82103
)
83104
task()
84105

0 commit comments

Comments
 (0)