From 7d79580fde5b14ae0618c9731a6e6e19fb09bd2b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 15:09:28 +0200 Subject: [PATCH 01/16] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 3623042..9a7026f 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 362304227f1852a900661621cab8792d98ed15b2 +Subproject commit 9a7026f95dbfa46b32f26e7d33a49115c5c8fe7f From 3c29e89d6d21b956724ae098031ab3317e745c99 Mon Sep 17 00:00:00 2001 From: Lennart Schmidt <150007074+LennartSchmidtKern@users.noreply.github.com> Date: Thu, 26 Jun 2025 15:10:24 +0200 Subject: [PATCH 02/16] Adding groups for access management (#83) * filter * engineer access * fixes * model * model * chore: update submodules --------- Co-authored-by: andhreljaKern --- app.py | 2 ++ neural_search/util.py | 60 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index a072b6b..3bbf2dd 100644 --- a/app.py +++ b/app.py @@ -66,6 +66,7 @@ class MostSimilarByEmbeddingRequest(BaseModel): att_filter: Optional[List[Dict[str, Any]]] = None threshold: Optional[Union[float, int]] = None question: Optional[str] = None + user_id: Optional[str] = None @app.post("/most_similar_by_embedding") @@ -99,6 +100,7 @@ def most_similar_by_embedding( request.att_filter, request.threshold, include_scores, + request.user_id ) if request.question: diff --git a/neural_search/util.py b/neural_search/util.py index f58b3da..e3c62dd 100644 --- a/neural_search/util.py +++ b/neural_search/util.py @@ -10,10 +10,14 @@ embedding, record_label_association, record, + project, + user, ) -from submodules.model.enums import EmbeddingPlatform, LabelSource +from submodules.model.cognition_objects import group_member +from submodules.model.enums import EmbeddingPlatform, LabelSource, UserRoles from .similarity_threshold import SimilarityThreshold, NO_THRESHOLD_INDICATOR +import traceback port = int(os.environ["QDRANT_PORT"]) qdrant_client = QdrantClient(host="qdrant", port=port, timeout=60) @@ -48,9 +52,25 @@ def most_similar_by_embedding( att_filter: Optional[List[Dict[str, Any]]] = None, threshold: Optional[float] = None, include_scores: bool = False, + user_id: Optional[str] = None, ) -> List[str]: if not is_filter_valid_for_embedding(project_id, embedding_id, att_filter): return [] + if project.check_access_management_active(project_id): + if not user_id: + return [] + requesting_user = user.get(user_id) + if not requesting_user: + return [] + if requesting_user.role != UserRoles.ENGINEER.value: + check_access = True + group_members = group_member.get_by_user_id(user_id) + group_ids = [group_member.group_id for group_member in group_members] + else: + check_access = False + else: + check_access = False + tmp_limit = limit has_sub_key = embedding.has_sub_key(project_id, embedding_id) if has_sub_key: @@ -66,14 +86,20 @@ def most_similar_by_embedding( elif similarity_threshold == NO_THRESHOLD_INDICATOR: similarity_threshold = None try: + _filter = __build_filter(att_filter) + if check_access: + _filter = __add_access_management_filter(_filter, group_ids, user_id) + search_result = qdrant_client.search( collection_name=embedding_id, query_vector=query_vector, - query_filter=__build_filter(att_filter), + query_filter=_filter, limit=tmp_limit, score_threshold=similarity_threshold, ) - except Exception: + except Exception as e: + print(f"Error during search in Qdrant: {e}", flush=True) + print(traceback.format_exc(), flush=True) return [] if include_scores: @@ -125,6 +151,34 @@ def __build_filter(att_filter: List[Dict[str, Any]]) -> models.Filter: return models.Filter(must=must) +def __add_access_management_filter( + base_filter: models.Filter, group_ids, user_id +) -> models.Filter: + access_management_filter = models.Filter( + should=[ + models.FieldCondition( + key="__ACCESS_GROUPS", + match=models.MatchAny( + any=group_ids, + ), + ), + models.FieldCondition( + key="__ACCESS_USERS", + match=models.MatchValue( + value=user_id, + ), + ), + ] + ) + if base_filter is None: + return access_management_filter + else: + return models.Filter( + must=base_filter, + should=access_management_filter, + ) + + def __build_filter_item(filter_item: Dict[str, Any]) -> models.FieldCondition: if isinstance(filter_item["value"], list): if filter_item.get("type") == "between": From 122faeca4c8ed1967d8e0919f1162082869a212b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 11:02:12 +0200 Subject: [PATCH 03/16] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 9a7026f..d57104e 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 9a7026f95dbfa46b32f26e7d33a49115c5c8fe7f +Subproject commit d57104ed703ead2ba4ca493a2ce68430ef26e34e From 93a80afa69800b099de7475f4eb41ff674231916 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 15:44:42 +0200 Subject: [PATCH 04/16] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index d57104e..42b09fd 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit d57104ed703ead2ba4ca493a2ce68430ef26e34e +Subproject commit 42b09fd25be9119c93d4e87866227089d0e0fc32 From 88fe23b328eb75affe63540f19ca7ed5021043dd Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 15:45:26 +0200 Subject: [PATCH 05/16] perf: add REFINERY_ATTRIBUTE_ACCESS constants --- app.py | 2 +- neural_search/util.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index 3bbf2dd..8a52461 100644 --- a/app.py +++ b/app.py @@ -100,7 +100,7 @@ def most_similar_by_embedding( request.att_filter, request.threshold, include_scores, - request.user_id + request.user_id, ) if request.question: diff --git a/neural_search/util.py b/neural_search/util.py index e3c62dd..388732c 100644 --- a/neural_search/util.py +++ b/neural_search/util.py @@ -14,6 +14,10 @@ user, ) from submodules.model.cognition_objects import group_member +from submodules.model.integration_objects.helper import ( + REFINERY_ATTRIBUTE_ACCESS_GROUPS, + REFINERY_ATTRIBUTE_ACCESS_USERS, +) from submodules.model.enums import EmbeddingPlatform, LabelSource, UserRoles from .similarity_threshold import SimilarityThreshold, NO_THRESHOLD_INDICATOR @@ -157,13 +161,13 @@ def __add_access_management_filter( access_management_filter = models.Filter( should=[ models.FieldCondition( - key="__ACCESS_GROUPS", + key=REFINERY_ATTRIBUTE_ACCESS_GROUPS, match=models.MatchAny( any=group_ids, ), ), models.FieldCondition( - key="__ACCESS_USERS", + key=REFINERY_ATTRIBUTE_ACCESS_USERS, match=models.MatchValue( value=user_id, ), From 67463a50287a0f3934f87f3f8d7f4ef21bcb99d6 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 16:54:19 +0200 Subject: [PATCH 06/16] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 42b09fd..e0d1fb4 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 42b09fd25be9119c93d4e87866227089d0e0fc32 +Subproject commit e0d1fb41787569faad82f4dcfdf4da17a4eddbf0 From e5d7e982b9d2cb3f8ffd4df2b66a75d6161bafd0 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 16:58:04 +0200 Subject: [PATCH 07/16] chore: add todo comment --- neural_search/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_search/util.py b/neural_search/util.py index 388732c..a526a11 100644 --- a/neural_search/util.py +++ b/neural_search/util.py @@ -419,6 +419,7 @@ def update_attribute_payloads( for point_id, payload in zip(ids_for_storage, payloads) ] + # TODO: handle errors in batch update qdrant_client.batch_update_points( collection_name=embedding_id, update_operations=update_operations, From 7481ebd799d3cd2bea84c5894f9480e08fc93741 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 30 Jun 2025 14:55:25 +0200 Subject: [PATCH 08/16] perf: error tracing --- app.py | 5 +++++ neural_search/util.py | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index 8a52461..0e36df3 100644 --- a/app.py +++ b/app.py @@ -9,6 +9,8 @@ ) from submodules.model import session +import traceback + app = FastAPI() @@ -17,6 +19,9 @@ async def handle_db_session(request: Request, call_next): session_token = general.get_ctx_token() try: response = await call_next(request) + except: + traceback.print_exc() + response = None finally: general.remove_and_refresh_session(session_token) diff --git a/neural_search/util.py b/neural_search/util.py index a526a11..388732c 100644 --- a/neural_search/util.py +++ b/neural_search/util.py @@ -419,7 +419,6 @@ def update_attribute_payloads( for point_id, payload in zip(ids_for_storage, payloads) ] - # TODO: handle errors in batch update qdrant_client.batch_update_points( collection_name=embedding_id, update_operations=update_operations, From f9efe2401a1b1d772b8153e9039044b5050dda9b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 2 Jul 2025 23:30:39 +0200 Subject: [PATCH 09/16] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index e0d1fb4..c9ea2e2 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit e0d1fb41787569faad82f4dcfdf4da17a4eddbf0 +Subproject commit c9ea2e29fdff2b4b66a603d37653d49997e4f00e From 61d3446b399c5124ddfd9b38eb8c33bac0f6b6d4 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Thu, 3 Jul 2025 11:56:58 +0200 Subject: [PATCH 10/16] group id str --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index e0d1fb4..e38a007 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit e0d1fb41787569faad82f4dcfdf4da17a4eddbf0 +Subproject commit e38a00732c88cfaeb7f2aa30ccb0d69849d38427 From 5b64c2c326418eee32dbbba24fd4a63f3eed95b3 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Thu, 3 Jul 2025 11:57:18 +0200 Subject: [PATCH 11/16] group id str --- neural_search/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_search/util.py b/neural_search/util.py index 388732c..552ff4c 100644 --- a/neural_search/util.py +++ b/neural_search/util.py @@ -69,7 +69,7 @@ def most_similar_by_embedding( if requesting_user.role != UserRoles.ENGINEER.value: check_access = True group_members = group_member.get_by_user_id(user_id) - group_ids = [group_member.group_id for group_member in group_members] + group_ids = [str(group_member.group_id) for group_member in group_members] else: check_access = False else: From 2111e2147b6bff42f552eb31e802a4d064748d81 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 3 Jul 2025 16:16:10 +0200 Subject: [PATCH 12/16] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index e38a007..06ffea3 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit e38a00732c88cfaeb7f2aa30ccb0d69849d38427 +Subproject commit 06ffea3b55e8b16249075d35132897b698f484a1 From b03f31a187cbf71b8cea1a030c68a96983287ea0 Mon Sep 17 00:00:00 2001 From: JWittmeyer Date: Thu, 3 Jul 2025 16:18:33 +0200 Subject: [PATCH 13/16] Fix filter with filter --- neural_search/util.py | 90 ++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 48 deletions(-) diff --git a/neural_search/util.py b/neural_search/util.py index 552ff4c..093a763 100644 --- a/neural_search/util.py +++ b/neural_search/util.py @@ -148,67 +148,61 @@ def __is_label_filter(key: str) -> bool: return parts[0] == LABELS_QDRANT -def __build_filter(att_filter: List[Dict[str, Any]]) -> models.Filter: - if att_filter is None or len(att_filter) == 0: +def __build_filter(att_filter: List[Dict[str, Any]]) -> Optional[models.Filter]: + if not att_filter: return None - must = [__build_filter_item(filter_item) for filter_item in att_filter] + must = [__build_filter_item(item) for item in att_filter] return models.Filter(must=must) def __add_access_management_filter( - base_filter: models.Filter, group_ids, user_id + base_filter: Optional[models.Filter], group_ids: List[str], user_id: str ) -> models.Filter: - access_management_filter = models.Filter( - should=[ - models.FieldCondition( - key=REFINERY_ATTRIBUTE_ACCESS_GROUPS, - match=models.MatchAny( - any=group_ids, - ), - ), - models.FieldCondition( - key=REFINERY_ATTRIBUTE_ACCESS_USERS, - match=models.MatchValue( - value=user_id, - ), - ), - ] - ) + access_conditions = [ + models.FieldCondition( + key=REFINERY_ATTRIBUTE_ACCESS_GROUPS, + match=models.MatchAny(any=group_ids), + ), + models.FieldCondition( + key=REFINERY_ATTRIBUTE_ACCESS_USERS, + match=models.MatchValue(value=user_id), + ), + ] + if base_filter is None: - return access_management_filter - else: - return models.Filter( - must=base_filter, - should=access_management_filter, - ) + return models.Filter(should=access_conditions) + + return models.Filter( + must=base_filter.must or [], + should=access_conditions, + ) def __build_filter_item(filter_item: Dict[str, Any]) -> models.FieldCondition: - if isinstance(filter_item["value"], list): - if filter_item.get("type") == "between": - return models.FieldCondition( - key=filter_item["key"], - range=models.Range( - gte=filter_item["value"][0], - lte=filter_item["value"][1], - ), - ) - else: - should = [ - models.FieldCondition( - key=filter_item["key"], match=models.MatchValue(value=value) - ) - for value in filter_item["value"] - ] - return models.Filter(should=should) - else: + key = filter_item["key"] + value = filter_item["value"] + typ = filter_item.get("type") + + # BETWEEN + if isinstance(value, list) and typ == "between": return models.FieldCondition( - key=filter_item["key"], - match=models.MatchValue( - value=filter_item["value"], - ), + key=key, + range=models.Range(gte=value[0], lte=value[1]), ) + # IN (...) + if isinstance(value, list): + return models.FieldCondition( + key=key, + match=models.MatchAny(any=value), + ) + + # = single value + return models.FieldCondition( + key=key, + match=models.MatchValue(value=value), + ) + def recreate_collection(project_id: str, embedding_id: str) -> int: embedding_item = embedding.get(project_id, embedding_id) From 990a794b2d77abac0ca363479ddcac813f9d2eaf Mon Sep 17 00:00:00 2001 From: JWittmeyer Date: Thu, 3 Jul 2025 16:22:49 +0200 Subject: [PATCH 14/16] PR coment --- app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 0e36df3..cb3d2f2 100644 --- a/app.py +++ b/app.py @@ -19,8 +19,8 @@ async def handle_db_session(request: Request, call_next): session_token = general.get_ctx_token() try: response = await call_next(request) - except: - traceback.print_exc() + except Exception: + print(traceback.format_exc(), flush=True) response = None finally: general.remove_and_refresh_session(session_token) From a740dfa6e42f2dbf2b03ea3b79f9baccd7283ebd Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 4 Jul 2025 09:03:52 +0200 Subject: [PATCH 15/16] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 06ffea3..9c2f2a1 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 06ffea3b55e8b16249075d35132897b698f484a1 +Subproject commit 9c2f2a1bc386567fb9123d46a74f203fcb0b4ced From 3127757415e2d93c9224cac674a959ed66ac807e Mon Sep 17 00:00:00 2001 From: JWittmeyer Date: Fri, 4 Jul 2025 12:03:37 +0200 Subject: [PATCH 16/16] Submodules update --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 9c2f2a1..19c0a4d 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 9c2f2a1bc386567fb9123d46a74f203fcb0b4ced +Subproject commit 19c0a4d25233fa0a7d5c4ee5377954c0594d2750