Skip to content

Commit 3be51b3

Browse files
More notebooks added to nbtest (#172)
1 parent 5369ab6 commit 3be51b3

14 files changed

+923
-307
lines changed

Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@
22

33
test: nbtest notebooks
44

5-
notebooks: search document-chunking
5+
notebooks: search document-chunking model-upgrades langchain
66

77
search:
88
$(MAKE) -C notebooks/search
99

1010
document-chunking:
1111
$(MAKE) -C notebooks/document-chunking
1212

13+
model-upgrades:
14+
$(MAKE) -C notebooks/model-upgrades
15+
16+
langchain:
17+
$(MAKE) -C notebooks/langchain
18+
1319
install: pre-commit nbtest
1420

1521
pre-commit:
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "385c47c3-27e8-4b51-b8b7-26c97b9a3ad3",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from elasticsearch import Elasticsearch\n",
11+
"from getpass import getpass\n",
12+
"\n",
13+
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
14+
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
15+
"\n",
16+
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n",
17+
"\n",
18+
"# delete the notebook's index\n",
19+
"client.indices.delete(index=\"blogs\", ignore_unavailable=True)\n",
20+
"\n",
21+
"# delete the pipeline\n",
22+
"try:\n",
23+
" client.ingest.delete_pipeline(id=\"vectorize_blogs\")\n",
24+
"except:\n",
25+
" pass\n",
26+
"\n",
27+
"# delete the model\n",
28+
"try:\n",
29+
" client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n",
30+
"except:\n",
31+
" pass"
32+
]
33+
}
34+
],
35+
"metadata": {
36+
"kernelspec": {
37+
"display_name": "Python 3 (ipykernel)",
38+
"language": "python",
39+
"name": "python3"
40+
},
41+
"language_info": {
42+
"codemirror_mode": {
43+
"name": "ipython",
44+
"version": 3
45+
},
46+
"file_extension": ".py",
47+
"mimetype": "text/x-python",
48+
"name": "python",
49+
"nbconvert_exporter": "python",
50+
"pygments_lexer": "ipython3",
51+
"version": "3.11.6"
52+
}
53+
},
54+
"nbformat": 4,
55+
"nbformat_minor": 5
56+
}

notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb

Lines changed: 82 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
{
44
"cell_type": "markdown",
55
"metadata": {
6-
"collapsed": false
6+
"collapsed": false,
7+
"jupyter": {
8+
"outputs_hidden": false
9+
}
710
},
811
"source": [
912
"# NLP text search using hugging face transformer model\n",
@@ -44,14 +47,20 @@
4447
},
4548
"outputs": [],
4649
"source": [
47-
"# install packages\n",
48-
"!python3 -m pip install -qU sentence-transformers eland elasticsearch transformers\n",
49-
"\n",
50+
"!python3 -m pip -qU install sentence-transformers eland elasticsearch transformers"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
5059
"# import modules\n",
51-
"import pandas as pd, json\n",
5260
"from elasticsearch import Elasticsearch\n",
5361
"from getpass import getpass\n",
54-
"from urllib.request import urlopen"
62+
"from urllib.request import urlopen\n",
63+
"import json"
5564
]
5665
},
5766
{
@@ -93,8 +102,15 @@
93102
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
94103
"\n",
95104
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
96-
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
97-
"\n",
105+
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")"
106+
]
107+
},
108+
{
109+
"cell_type": "code",
110+
"execution_count": null,
111+
"metadata": {},
112+
"outputs": [],
113+
"source": [
98114
"!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start"
99115
]
100116
},
@@ -304,7 +320,7 @@
304320
},
305321
{
306322
"cell_type": "code",
307-
"execution_count": 106,
323+
"execution_count": 22,
308324
"metadata": {
309325
"colab": {
310326
"base_uri": "https://localhost:8080/",
@@ -315,125 +331,40 @@
315331
},
316332
"outputs": [
317333
{
318-
"data": {
319-
"text/html": [
320-
"<div>\n",
321-
"<style scoped>\n",
322-
" .dataframe tbody tr th:only-of-type {\n",
323-
" vertical-align: middle;\n",
324-
" }\n",
325-
"\n",
326-
" .dataframe tbody tr th {\n",
327-
" vertical-align: top;\n",
328-
" }\n",
329-
"\n",
330-
" .dataframe thead th {\n",
331-
" text-align: right;\n",
332-
" }\n",
333-
"</style>\n",
334-
"<table border=\"1\" class=\"dataframe\">\n",
335-
" <thead>\n",
336-
" <tr style=\"text-align: right;\">\n",
337-
" <th></th>\n",
338-
" <th>_id</th>\n",
339-
" <th>_score</th>\n",
340-
" <th>fields.title</th>\n",
341-
" </tr>\n",
342-
" </thead>\n",
343-
" <tbody>\n",
344-
" <tr>\n",
345-
" <th>0</th>\n",
346-
" <td>TxUU-YkBAHcz2kFqAun2</td>\n",
347-
" <td>0.591786</td>\n",
348-
" <td>[Brewing in Beats: Track network connections]</td>\n",
349-
" </tr>\n",
350-
" <tr>\n",
351-
" <th>1</th>\n",
352-
" <td>SxUU-YkBAHcz2kFqAun2</td>\n",
353-
" <td>0.401099</td>\n",
354-
" <td>[Machine Learning for Nginx Logs - Identifying...</td>\n",
355-
" </tr>\n",
356-
" <tr>\n",
357-
" <th>2</th>\n",
358-
" <td>UxUU-YkBAHcz2kFqAun2</td>\n",
359-
" <td>0.390279</td>\n",
360-
" <td>[Data Visualization For Machine Learning]</td>\n",
361-
" </tr>\n",
362-
" <tr>\n",
363-
" <th>3</th>\n",
364-
" <td>TBUU-YkBAHcz2kFqAun2</td>\n",
365-
" <td>0.368995</td>\n",
366-
" <td>[Logstash Lines: Introduce integration plugins]</td>\n",
367-
" </tr>\n",
368-
" <tr>\n",
369-
" <th>4</th>\n",
370-
" <td>UhUU-YkBAHcz2kFqAun2</td>\n",
371-
" <td>0.368995</td>\n",
372-
" <td>[Logstash Lines: Introduce integration plugins]</td>\n",
373-
" </tr>\n",
374-
" <tr>\n",
375-
" <th>5</th>\n",
376-
" <td>URUU-YkBAHcz2kFqAun2</td>\n",
377-
" <td>0.356903</td>\n",
378-
" <td>[Keeping up with Kibana: This week in Kibana f...</td>\n",
379-
" </tr>\n",
380-
" <tr>\n",
381-
" <th>6</th>\n",
382-
" <td>UBUU-YkBAHcz2kFqAun2</td>\n",
383-
" <td>0.341939</td>\n",
384-
" <td>[Kibana 4 Video Tutorials, Part 3]</td>\n",
385-
" </tr>\n",
386-
" <tr>\n",
387-
" <th>7</th>\n",
388-
" <td>VBUU-YkBAHcz2kFqAun2</td>\n",
389-
" <td>0.337294</td>\n",
390-
" <td>[Introducing approximate nearest neighbor sear...</td>\n",
391-
" </tr>\n",
392-
" <tr>\n",
393-
" <th>8</th>\n",
394-
" <td>ThUU-YkBAHcz2kFqAun2</td>\n",
395-
" <td>0.336460</td>\n",
396-
" <td>[Where in the World is Elastic? - QCon Beijing...</td>\n",
397-
" </tr>\n",
398-
" <tr>\n",
399-
" <th>9</th>\n",
400-
" <td>TRUU-YkBAHcz2kFqAun2</td>\n",
401-
" <td>0.320756</td>\n",
402-
" <td>[EQL for the masses]</td>\n",
403-
" </tr>\n",
404-
" </tbody>\n",
405-
"</table>\n",
406-
"</div>"
407-
],
408-
"text/plain": [
409-
" _id _score \\\n",
410-
"0 TxUU-YkBAHcz2kFqAun2 0.591786 \n",
411-
"1 SxUU-YkBAHcz2kFqAun2 0.401099 \n",
412-
"2 UxUU-YkBAHcz2kFqAun2 0.390279 \n",
413-
"3 TBUU-YkBAHcz2kFqAun2 0.368995 \n",
414-
"4 UhUU-YkBAHcz2kFqAun2 0.368995 \n",
415-
"5 URUU-YkBAHcz2kFqAun2 0.356903 \n",
416-
"6 UBUU-YkBAHcz2kFqAun2 0.341939 \n",
417-
"7 VBUU-YkBAHcz2kFqAun2 0.337294 \n",
418-
"8 ThUU-YkBAHcz2kFqAun2 0.336460 \n",
419-
"9 TRUU-YkBAHcz2kFqAun2 0.320756 \n",
420-
"\n",
421-
" fields.title \n",
422-
"0 [Brewing in Beats: Track network connections] \n",
423-
"1 [Machine Learning for Nginx Logs - Identifying... \n",
424-
"2 [Data Visualization For Machine Learning] \n",
425-
"3 [Logstash Lines: Introduce integration plugins] \n",
426-
"4 [Logstash Lines: Introduce integration plugins] \n",
427-
"5 [Keeping up with Kibana: This week in Kibana f... \n",
428-
"6 [Kibana 4 Video Tutorials, Part 3] \n",
429-
"7 [Introducing approximate nearest neighbor sear... \n",
430-
"8 [Where in the World is Elastic? - QCon Beijing... \n",
431-
"9 [EQL for the masses] "
432-
]
433-
},
434-
"execution_count": 106,
435-
"metadata": {},
436-
"output_type": "execute_result"
334+
"name": "stdout",
335+
"output_type": "stream",
336+
"text": [
337+
"['Brewing in Beats: Track network connections']\n",
338+
"Score: 0.5917864\n",
339+
"\n",
340+
"['Machine Learning for Nginx Logs - Identifying Operational Issues with Your Website']\n",
341+
"Score: 0.40109876\n",
342+
"\n",
343+
"['Data Visualization For Machine Learning']\n",
344+
"Score: 0.39027885\n",
345+
"\n",
346+
"['Logstash Lines: Introduce integration plugins']\n",
347+
"Score: 0.36899462\n",
348+
"\n",
349+
"['Keeping up with Kibana: This week in Kibana for November 29th, 2019']\n",
350+
"Score: 0.35690257\n",
351+
"\n",
352+
"['How to implement similarity image search | Elastic.co | Elastic Blog']\n",
353+
"Score: 0.34473613\n",
354+
"\n",
355+
"['Kibana 4 Video Tutorials, Part 3']\n",
356+
"Score: 0.34193927\n",
357+
"\n",
358+
"['Introducing approximate nearest neighbor search in Elasticsearch 8.0 | Elastic Blog']\n",
359+
"Score: 0.3372936\n",
360+
"\n",
361+
"['Where in the World is Elastic? - QCon Beijing, Devoxx France, Percona Live & AWS Summit Chicago']\n",
362+
"Score: 0.33645985\n",
363+
"\n",
364+
"['EQL for the masses']\n",
365+
"Score: 0.3207562\n",
366+
"\n"
367+
]
437368
}
438369
],
439370
"source": [
@@ -458,26 +389,41 @@
458389
" knn=query,\n",
459390
" source=False)\n",
460391
"\n",
461-
"\n",
462-
"results = pd.json_normalize(json.loads(json.dumps(response.body['hits']['hits'])))\n",
463-
"\n",
464-
"# shows the result\n",
465-
"results[['_id', '_score', 'fields.title']]\n"
392+
"def show_results(results):\n",
393+
" for result in results:\n",
394+
" print(f'{result[\"fields\"][\"title\"]}\\nScore: {result[\"_score\"]}\\n')\n",
395+
" \n",
396+
"show_results(response.body['hits']['hits'])"
466397
]
398+
},
399+
{
400+
"cell_type": "code",
401+
"execution_count": null,
402+
"metadata": {},
403+
"outputs": [],
404+
"source": []
467405
}
468406
],
469407
"metadata": {
470408
"colab": {
471409
"provenance": []
472410
},
473411
"kernelspec": {
474-
"display_name": "Python 3.11.3 64-bit",
412+
"display_name": "Python 3 (ipykernel)",
475413
"language": "python",
476414
"name": "python3"
477415
},
478416
"language_info": {
417+
"codemirror_mode": {
418+
"name": "ipython",
419+
"version": 3
420+
},
421+
"file_extension": ".py",
422+
"mimetype": "text/x-python",
479423
"name": "python",
480-
"version": "3.9.6"
424+
"nbconvert_exporter": "python",
425+
"pygments_lexer": "ipython3",
426+
"version": "3.11.6"
481427
},
482428
"vscode": {
483429
"interpreter": {
@@ -486,5 +432,5 @@
486432
}
487433
},
488434
"nbformat": 4,
489-
"nbformat_minor": 0
435+
"nbformat_minor": 4
490436
}

notebooks/langchain/Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
NBTEST = ../../bin/nbtest
2+
NOTEBOOKS = \
3+
langchain-using-own-model.ipynb \
4+
langchain-vector-store-using-elser.ipynb
5+
6+
.PHONY: all $(NOTEBOOKS)
7+
8+
all: $(NOTEBOOKS)
9+
10+
$(NOTEBOOKS):
11+
-$(NBTEST) $@

0 commit comments

Comments
 (0)