Skip to content

Commit 256f6e9

Browse files
2 parents a9638f7 + b7d09b4 commit 256f6e9

File tree

2 files changed

+95
-35
lines changed

2 files changed

+95
-35
lines changed

backend/src/QA_integration_new.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def QA_RAG(graph, model, question, document_names,session_id, mode):
322322
if mode == "graph":
323323
graph_chain, qa_llm,model_version = create_graph_chain(model,graph)
324324
graph_response = get_graph_response(graph_chain,question)
325-
ai_response = AIMessage(content=graph_response["response"])
325+
ai_response = AIMessage(content=graph_response["response"]) if graph_response["response"] else AIMessage(content="Something went wrong")
326326
messages.append(ai_response)
327327
summarize_and_log(history, messages, qa_llm)
328328

@@ -342,7 +342,7 @@ def QA_RAG(graph, model, question, document_names,session_id, mode):
342342
elif mode == "vector":
343343
retrieval_query = VECTOR_SEARCH_QUERY
344344
else:
345-
retrieval_query = VECTOR_GRAPH_SEARCH_QUERY
345+
retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT)
346346

347347
llm, doc_retriever, model_version = setup_chat(model, graph, session_id, document_names,retrieval_query)
348348

backend/src/shared/constants.py

Lines changed: 93 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -111,38 +111,102 @@
111111
# """
112112

113113

114+
# VECTOR_GRAPH_SEARCH_QUERY = """
115+
# WITH node as chunk, score
116+
# // find the document of the chunk
117+
# MATCH (chunk)-[:PART_OF]->(d:Document)
118+
# // fetch entities
119+
# CALL { WITH chunk
120+
# // entities connected to the chunk
121+
# // todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks
122+
# MATCH (chunk)-[:HAS_ENTITY]->(e)
123+
124+
# // depending on match to query embedding either 1 or 2 step expansion
125+
# WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95
126+
# THEN
127+
# collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,1}(:!Chunk&!Document) RETURN path }
128+
# ELSE
129+
# collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk&!Document) RETURN path }
130+
# END as paths
131+
132+
# RETURN collect{ unwind paths as p unwind relationships(p) as r return distinct r} as rels,
133+
# collect{ unwind paths as p unwind nodes(p) as n return distinct n} as nodes
134+
# }
135+
# // aggregate chunk-details and de-duplicate nodes and relationships
136+
# WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels,
137+
138+
# // TODO sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes?
139+
# apoc.coll.toSet(apoc.coll.flatten(collect(
140+
# [r in rels |[startNode(r),endNode(r)]]),true)) as nodes
141+
142+
# // generate metadata and text components for chunks, nodes and relationships
143+
# WITH d, avg_score,
144+
# [c IN chunks | c.chunk.text] AS texts,
145+
# [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails,
146+
# apoc.coll.sort([n in nodes |
147+
148+
# coalesce(apoc.coll.removeAll(labels(n),['__Entity__'])[0],"") +":"+
149+
# n.id + (case when n.description is not null then " ("+ n.description+")" else "" end)]) as nodeTexts,
150+
# apoc.coll.sort([r in rels
151+
# // optional filter if we limit the node-set
152+
# // WHERE startNode(r) in nodes AND endNode(r) in nodes
153+
# |
154+
# coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+
155+
# startNode(r).id +
156+
# " " + type(r) + " " +
157+
# coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" +
158+
# endNode(r).id
159+
# ]) as relTexts
160+
161+
# // combine texts into response-text
162+
# WITH d, avg_score,chunkdetails,
163+
# "Text Content:\n" +
164+
# apoc.text.join(texts,"\n----\n") +
165+
# "\n----\nEntities:\n"+
166+
# apoc.text.join(nodeTexts,"\n") +
167+
# "\n----\nRelationships:\n"+
168+
# apoc.text.join(relTexts,"\n")
169+
170+
# as text
171+
# RETURN text, avg_score as score, {length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata
172+
# """
173+
174+
VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 25
175+
114176
VECTOR_GRAPH_SEARCH_QUERY = """
115177
WITH node as chunk, score
116178
// find the document of the chunk
117179
MATCH (chunk)-[:PART_OF]->(d:Document)
180+
181+
// aggregate chunk-details
182+
WITH d, collect(DISTINCT {{chunk: chunk, score: score}}) AS chunks, avg(score) as avg_score
118183
// fetch entities
119-
CALL { WITH chunk
184+
CALL {{ WITH chunks
185+
UNWIND chunks as chunkScore
186+
WITH chunkScore.chunk as chunk
120187
// entities connected to the chunk
121188
// todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks
122-
MATCH (chunk)-[:HAS_ENTITY]->(e)
123-
189+
// todo sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes?
190+
OPTIONAL MATCH (chunk)-[:HAS_ENTITY]->(e)
191+
WITH e, count(*) as numChunks
192+
ORDER BY numChunks DESC LIMIT {no_of_entites}
124193
// depending on match to query embedding either 1 or 2 step expansion
125194
WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95
126195
THEN
127-
collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,1}(:!Chunk&!Document) RETURN path }
196+
collect {{ OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,1}}(:!Chunk&!Document) RETURN path }}
128197
ELSE
129-
collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk&!Document) RETURN path }
130-
END as paths
131-
132-
RETURN collect{ unwind paths as p unwind relationships(p) as r return distinct r} as rels,
133-
collect{ unwind paths as p unwind nodes(p) as n return distinct n} as nodes
134-
}
135-
// aggregate chunk-details and de-duplicate nodes and relationships
136-
WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels,
137-
138-
// TODO sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes?
139-
apoc.coll.toSet(apoc.coll.flatten(collect(
140-
[r in rels |[startNode(r),endNode(r)]]),true)) as nodes
198+
collect {{ OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,2}}(:!Chunk&!Document) RETURN path }}
199+
END as paths, e
200+
WITH apoc.coll.toSet(apoc.coll.flatten(collect(distinct paths))) as paths, collect(distinct e) as entities
201+
// de-duplicate nodes and relationships across chunks
202+
RETURN collect{{ unwind paths as p unwind relationships(p) as r return distinct r}} as rels,
203+
collect{{ unwind paths as p unwind nodes(p) as n return distinct n}} as nodes, entities
204+
}}
141205
142206
// generate metadata and text components for chunks, nodes and relationships
143207
WITH d, avg_score,
144208
[c IN chunks | c.chunk.text] AS texts,
145-
[c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails,
209+
[c IN chunks | {{id: c.chunk.id, score: c.score}}] AS chunkdetails,
146210
apoc.coll.sort([n in nodes |
147211
148212
coalesce(apoc.coll.removeAll(labels(n),['__Entity__'])[0],"") +":"+
@@ -154,24 +218,20 @@
154218
coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+
155219
startNode(r).id +
156220
" " + type(r) + " " +
157-
coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" +
158-
endNode(r).id
221+
coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id
159222
]) as relTexts
160-
223+
, entities
161224
// combine texts into response-text
162-
WITH d, avg_score,chunkdetails,
163-
"Text Content:\n" +
164-
apoc.text.join(texts,"\n----\n") +
165-
"\n----\nEntities:\n"+
166-
apoc.text.join(nodeTexts,"\n") +
167-
"\n----\nRelationships:\n"+
168-
apoc.text.join(relTexts,"\n")
169-
170-
as text
171-
RETURN text, avg_score as score, {length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata
172-
"""
173-
174-
175225
226+
WITH d, avg_score,chunkdetails,
227+
"Text Content:\\n" +
228+
apoc.text.join(texts,"\\n----\\n") +
229+
"\\n----\\nEntities:\\n"+
230+
apoc.text.join(nodeTexts,"\\n") +
231+
"\\n----\\nRelationships:\\n" +
232+
apoc.text.join(relTexts,"\\n")
176233
234+
as text,entities
177235
236+
RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata
237+
"""

0 commit comments

Comments
 (0)