|
196 | 196 | },
|
197 | 197 | {
|
198 | 198 | "cell_type": "code",
|
199 |
| - "execution_count": 5, |
| 199 | + "execution_count": null, |
200 | 200 | "metadata": {},
|
201 | 201 | "outputs": [],
|
202 | 202 | "source": [
|
203 | 203 | "def pretty_response(response):\n",
|
204 | 204 | " if len(response[\"hits\"][\"hits\"]) == 0:\n",
|
205 | 205 | " print(\"Your search returned no results.\")\n",
|
206 | 206 | " else:\n",
|
207 |
| - " for hit in response[\"hits\"][\"hits\"]:\n", |
| 207 | + " for idx, hit in enumerate(response[\"hits\"][\"hits\"], start=1):\n", |
208 | 208 | " id = hit[\"_id\"]\n",
|
209 | 209 | " publication_date = hit[\"_source\"][\"publish_date\"]\n",
|
210 |
| - " rank = hit[\"_rank\"]\n", |
| 210 | + " score = hit[\"_score\"]\n", |
211 | 211 | " title = hit[\"_source\"][\"title\"]\n",
|
212 | 212 | " summary = hit[\"_source\"][\"summary\"]\n",
|
213 |
| - " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {rank}\"\n", |
| 213 | + " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {idx}\\nScore: {score}\"\n", |
214 | 214 | " print(pretty_output)"
|
215 | 215 | ]
|
216 | 216 | },
|
|
231 | 231 | "\n",
|
232 | 232 | "We then use [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html) to balance the scores to provide a final list of documents, ranked in order of relevance. RRF is a ranking algorithm for combining results from different information retrieval strategies.\n",
|
233 | 233 | "\n",
|
234 |
| - "Note that _score is null, and we instead use _rank to show our top-ranked documents." |
| 234 | + "Note: With the retriever API, _score contains the document’s relevance score, and the rank is simply the position in the results (first result is rank 1, etc.)." |
235 | 235 | ]
|
236 | 236 | },
|
237 | 237 | {
|
238 | 238 | "cell_type": "code",
|
239 |
| - "execution_count": 6, |
| 239 | + "execution_count": null, |
240 | 240 | "metadata": {},
|
241 | 241 | "outputs": [
|
242 | 242 | {
|
|
280 | 280 | "response = client.search(\n",
|
281 | 281 | " index=\"book_index\",\n",
|
282 | 282 | " size=5,\n",
|
283 |
| - " query={\"match\": {\"summary\": \"python programming\"}},\n", |
284 |
| - " knn={\n", |
285 |
| - " \"field\": \"title_vector\",\n", |
286 |
| - " \"query_vector\": model.encode(\n", |
287 |
| - " \"python programming\"\n", |
288 |
| - " ).tolist(), # generate embedding for query so it can be compared to `title_vector`\n", |
289 |
| - " \"k\": 5,\n", |
290 |
| - " \"num_candidates\": 10,\n", |
| 283 | + " retriever={\n", |
| 284 | + " \"rrf\": {\n", |
| 285 | + " \"retrievers\": [\n", |
| 286 | + " {\"standard\": {\"query\": {\"match\": {\"summary\": \"python programming\"}}}},\n", |
| 287 | + " {\n", |
| 288 | + " \"knn\": {\n", |
| 289 | + " \"field\": \"title_vector\",\n", |
| 290 | + " \"query_vector\": model.encode(\"python programming\").tolist(),\n", |
| 291 | + " \"k\": 5,\n", |
| 292 | + " \"num_candidates\": 10,\n", |
| 293 | + " }\n", |
| 294 | + " },\n", |
| 295 | + " ]\n", |
| 296 | + " }\n", |
291 | 297 | " },\n",
|
292 |
| - " rank={\"rrf\": {}},\n", |
293 | 298 | ")\n",
|
294 |
| - "\n", |
295 | 299 | "pretty_response(response)"
|
296 | 300 | ]
|
297 | 301 | }
|
|
0 commit comments