Skip to content

sources/ldap: add forward deletion option #14718

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions authentik/sources/ldap/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class Meta:
"sync_parent_group",
"connectivity",
"lookup_groups_from_user",
"delete_not_found_objects",
]
extra_kwargs = {"bind_password": {"write_only": True}}

Expand Down Expand Up @@ -147,6 +148,7 @@ class LDAPSourceViewSet(UsedByMixin, ModelViewSet):
"user_property_mappings",
"group_property_mappings",
"lookup_groups_from_user",
"delete_not_found_objects",
]
search_fields = ["name", "slug"]
ordering = ["name"]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Generated by Django 5.1.9 on 2025-05-27 13:24

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("authentik_sources_ldap", "0008_groupldapsourceconnection_userldapsourceconnection"),
]

operations = [
migrations.AddField(
model_name="groupldapsourceconnection",
name="validated_by",
field=models.UUIDField(
blank=True, help_text="Helper field for batch deletions", null=True
),
),
migrations.AddField(
model_name="ldapsource",
name="delete_not_found_objects",
field=models.BooleanField(
blank=True,
default=False,
help_text="Delete authentik users and groups which were previously supplied by this source, but are now missing from it.",
),
),
migrations.AddField(
model_name="userldapsourceconnection",
name="validated_by",
field=models.UUIDField(
blank=True, help_text="Helper field for batch deletions", null=True
),
),
]
17 changes: 17 additions & 0 deletions authentik/sources/ldap/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ class LDAPSource(Source):
),
)

delete_not_found_objects = models.BooleanField(
default=False,
blank=True,
help_text=_(
"Delete authentik users and groups which were previously supplied by this source, "
"but are now missing from it."
),
)

@property
def component(self) -> str:
return "ak-source-ldap-form"
Expand Down Expand Up @@ -321,6 +330,10 @@ class Meta:


class UserLDAPSourceConnection(UserSourceConnection):
validated_by = models.UUIDField(
null=True, blank=True, help_text=_("Helper field for batch deletions")
)

@property
def serializer(self) -> type[Serializer]:
from authentik.sources.ldap.api import (
Expand All @@ -335,6 +348,10 @@ class Meta:


class GroupLDAPSourceConnection(GroupSourceConnection):
validated_by = models.UUIDField(
null=True, blank=True, help_text=_("Helper field for batch deletions")
)

@property
def serializer(self) -> type[Serializer]:
from authentik.sources.ldap.api import (
Expand Down
8 changes: 7 additions & 1 deletion authentik/sources/ldap/sync/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from authentik.core.sources.mapper import SourceMapper
from authentik.lib.config import CONFIG
from authentik.lib.sync.mapper import PropertyMappingManager
from authentik.sources.ldap.models import LDAPSource
from authentik.sources.ldap.models import LDAPSource, flatten


class BaseLDAPSynchronizer:
Expand Down Expand Up @@ -77,6 +77,12 @@ def get_objects(self, **kwargs) -> Generator:
"""Get objects from LDAP, implemented in subclass"""
raise NotImplementedError()

def get_identifier(self, object):
attributes = object.get("attributes", {})
if not attributes.get(self._source.object_uniqueness_field):
return
return flatten(attributes[self._source.object_uniqueness_field])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should also be used in other places, like authentik.sources.ldap.sync.users.UserLDAPSynchronizer.sync

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, maybe? I did my best, see if you like it.

I'm not sure what that additional in check is for (untested code introduced in deb91bd -- should it be something like an isinstance(object, dict) check instead?), but I'm not about to find out, so I did my best to keep the exact same logic for already existing code.


def search_paginator( # noqa: PLR0913
self,
search_base,
Expand Down
59 changes: 59 additions & 0 deletions authentik/sources/ldap/sync/forward_delete_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from collections.abc import Generator
from itertools import batched
from uuid import uuid4

from ldap3 import SUBTREE

from authentik.core.models import Group
from authentik.sources.ldap.models import GroupLDAPSourceConnection
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
from authentik.sources.ldap.sync.forward_delete_users import DELETE_CHUNK_SIZE, UPDATE_CHUNK_SIZE


class GroupLDAPForwardDeletion(BaseLDAPSynchronizer):
"""Delete LDAP Groups from authentik"""

@staticmethod
def name() -> str:
return "group_deletions"

def get_objects(self, **kwargs) -> Generator:
if not self._source.sync_groups or not self._source.delete_not_found_objects:
self.message("Group syncing is disabled for this Source")
return iter(())

uuid = uuid4()
groups = self._source.connection().extend.standard.paged_search(
search_base=self.base_dn_groups,
search_filter=self._source.group_object_filter,
search_scope=SUBTREE,
attributes=[self._source.object_uniqueness_field],
generator=True,
**kwargs,
)
for batch in batched(groups, UPDATE_CHUNK_SIZE, strict=False):
identifiers = []
for group in batch:
if identifier := self.get_identifier(group):
identifiers.append(identifier)
GroupLDAPSourceConnection.objects.filter(identifier__in=identifiers).update(
validated_by=uuid
)

return batched(
GroupLDAPSourceConnection.objects.filter(source=self._source)
.exclude(validated_by=uuid)
.values_list("group", flat=True)
.iterator(chunk_size=DELETE_CHUNK_SIZE),
DELETE_CHUNK_SIZE,
strict=False,
)

def sync(self, group_pks: tuple) -> int:
"""Delete authentik groups"""
if not self._source.sync_groups or not self._source.delete_not_found_objects:
self.message("Group syncing is disabled for this Source")
return -1
self._logger.debug("Deleting groups", group_pks=group_pks)
_, deleted_per_type = Group.objects.filter(pk__in=group_pks).delete()

Check warning on line 58 in authentik/sources/ldap/sync/forward_delete_groups.py

View check run for this annotation

Codecov / codecov/patch

authentik/sources/ldap/sync/forward_delete_groups.py#L55-L58

Added lines #L55 - L58 were not covered by tests
return deleted_per_type.get(Group._meta.label, 0)
61 changes: 61 additions & 0 deletions authentik/sources/ldap/sync/forward_delete_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from collections.abc import Generator
from itertools import batched
from uuid import uuid4

from ldap3 import SUBTREE

from authentik.core.models import User
from authentik.sources.ldap.models import UserLDAPSourceConnection
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer

UPDATE_CHUNK_SIZE = 10_000
DELETE_CHUNK_SIZE = 50


class UserLDAPForwardDeletion(BaseLDAPSynchronizer):
"""Delete LDAP Users from authentik"""

@staticmethod
def name() -> str:
return "user_deletions"

def get_objects(self, **kwargs) -> Generator:
if not self._source.sync_users or not self._source.delete_not_found_objects:
self.message("User syncing is disabled for this Source")
return iter(())

uuid = uuid4()
users = self._source.connection().extend.standard.paged_search(
search_base=self.base_dn_users,
search_filter=self._source.user_object_filter,
search_scope=SUBTREE,
attributes=[self._source.object_uniqueness_field],
generator=True,
**kwargs,
)
for batch in batched(users, UPDATE_CHUNK_SIZE, strict=False):
identifiers = []
for user in batch:
if identifier := self.get_identifier(user):
identifiers.append(identifier)
UserLDAPSourceConnection.objects.filter(identifier__in=identifiers).update(
validated_by=uuid
)

return batched(
UserLDAPSourceConnection.objects.filter(source=self._source)
.exclude(validated_by=uuid)
.values_list("user", flat=True)
.iterator(chunk_size=DELETE_CHUNK_SIZE),
DELETE_CHUNK_SIZE,
strict=False,
)

def sync(self, user_pks: tuple) -> int:
"""Delete authentik users"""
if not self._source.sync_users or not self._source.delete_not_found_objects:
self.message("User syncing is disabled for this Source")
return -1
self._logger.debug("Deleting users", user_pks=user_pks)
_, deleted_per_type = User.objects.filter(pk__in=user_pks).delete()
return deleted_per_type.get(User._meta.label, 0)
29 changes: 25 additions & 4 deletions authentik/sources/ldap/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from authentik.root.celery import CELERY_APP
from authentik.sources.ldap.models import LDAPSource
from authentik.sources.ldap.sync.base import BaseLDAPSynchronizer
from authentik.sources.ldap.sync.forward_delete_groups import GroupLDAPForwardDeletion
from authentik.sources.ldap.sync.forward_delete_users import UserLDAPForwardDeletion
from authentik.sources.ldap.sync.groups import GroupLDAPSynchronizer
from authentik.sources.ldap.sync.membership import MembershipLDAPSynchronizer
from authentik.sources.ldap.sync.users import UserLDAPSynchronizer
Expand Down Expand Up @@ -52,11 +54,11 @@ def ldap_connectivity_check(pk: str | None = None):


@CELERY_APP.task(
# We take the configured hours timeout time by 2.5 as we run user and
# group in parallel and then membership, so 2x is to cover the serial tasks,
# We take the configured hours timeout time by 3.5 as we run user and
# group in parallel and then membership, then deletions, so 3x is to cover the serial tasks,
# and 0.5x on top of that to give some more leeway
soft_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 2.5,
task_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 2.5,
soft_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3.5,
task_time_limit=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3.5,
)
def ldap_sync_single(source_pk: str):
"""Sync a single source"""
Expand All @@ -79,6 +81,25 @@ def ldap_sync_single(source_pk: str):
group(
ldap_sync_paginator(source, MembershipLDAPSynchronizer),
),
# Finally, deletions. What we'd really like to do here is something like
# ```
# user_identifiers = <ldap query>
# User.objects.exclude(
# usersourceconnection__identifier__in=user_uniqueness_identifiers,
# ).delete()
# ```
# This runs into performance issues in large installations. So instead we spread the
# work out into three steps:
# 1. Get every object from the LDAP source.
# 2. Mark every object as "safe" in the database. This is quick, but any error could
# mean deleting users which should not be deleted, so we do it immediately, in
# large chunks, and only queue the deletion step afterwards.
# 3. Delete every unmarked item. This is slow, so we spread it over many tasks in
# small chunks.
group(
ldap_sync_paginator(source, UserLDAPForwardDeletion)
+ ldap_sync_paginator(source, GroupLDAPForwardDeletion),
),
)
task()

Expand Down
Loading
Loading