Skip to content

Commit 6bc02a4

Browse files
committed
full switch to not using datasets
1 parent d7dfd40 commit 6bc02a4

File tree

3 files changed

+30
-22
lines changed

3 files changed

+30
-22
lines changed

AstraDB_langchain_quickstart_1.ipynb

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@
3636
"!pip install --quiet \\\n",
3737
" \"langchain>=0.3,<0.4\" \\\n",
3838
" \"langchain-astradb>=0.6,<0.7\" \\\n",
39-
" \"langchain-openai>=0.3,<0.4\" \\\n",
40-
" \"datasets>=3.5,<4.0\" \\\n",
41-
" \"numpy<2.0\" # this fixes a python 3.12 issue"
39+
" \"langchain-openai>=0.3,<0.4\""
4240
]
4341
},
4442
{
@@ -65,15 +63,14 @@
6563
"outputs": [],
6664
"source": [
6765
"import os\n",
66+
"import requests\n",
6867
"from getpass import getpass\n",
6968
"\n",
7069
"from astrapy.info import VectorServiceOptions\n",
7170
"from langchain_astradb import AstraDBVectorStore\n",
7271
"\n",
7372
"from langchain_core.documents import Document\n",
74-
"from langchain_openai import OpenAIEmbeddings\n",
75-
"\n",
76-
"from datasets import load_dataset"
73+
"from langchain_openai import OpenAIEmbeddings"
7774
]
7875
},
7976
{
@@ -96,7 +93,9 @@
9693
"os.environ[\"ASTRA_DB_API_ENDPOINT\"] = input(\"ASTRA_DB_API_ENDPOINT =\")\n",
9794
"os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"] = getpass(\"ASTRA_DB_APPLICATION_TOKEN =\")\n",
9895
"\n",
99-
"os.environ[\"ASTRA_DB_KEYSPACE\"] = input(\"ASTRA_DB_KEYSPACE (optional) =\")\n",
96+
"if _keyspace := input(\"ASTRA_DB_KEYSPACE (optional) =\"):\n",
97+
" os.environ[\"ASTRA_DB_KEYSPACE\"] = _keyspace\n",
98+
"\n",
10099
"os.environ[\"ASTRA_DB_API_KEY_NAME\"] = input(\"ASTRA_DB_API_KEY_NAME (required for 'vectorize') =\")"
101100
]
102101
},
@@ -159,7 +158,7 @@
159158
"source": [
160159
"ASTRA_DB_APPLICATION_TOKEN = os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"]\n",
161160
"ASTRA_DB_API_ENDPOINT = os.environ[\"ASTRA_DB_API_ENDPOINT\"]\n",
162-
"ASTRA_DB_KEYSPACE = os.environ.get(\"ASTRA_DB_KEYSPACE\") or None\n",
161+
"ASTRA_DB_KEYSPACE = os.environ.get(\"ASTRA_DB_KEYSPACE\")\n",
163162
"ASTRA_DB_API_KEY_NAME = os.environ.get(\"ASTRA_DB_API_KEY_NAME\") or None\n",
164163
"\n",
165164
"OPENAI_API_KEY = os.environ.get(\"OPENAI_API_KEY\") or None"
@@ -262,7 +261,11 @@
262261
"metadata": {},
263262
"outputs": [],
264263
"source": [
265-
"philo_dataset = load_dataset(\"datastax/philosopher-quotes\")[\"train\"]\n",
264+
"philo_dataset = requests.get(\n",
265+
" \"https://raw.githubusercontent.com/\"\n",
266+
" \"datastaxdevs/mini-demo-astradb-langchain/\"\n",
267+
" \"refs/heads/main/data/philosopher-quotes.json\"\n",
268+
").json()\n",
266269
"\n",
267270
"print(\"An example entry:\")\n",
268271
"print(philo_dataset[16])"
@@ -288,14 +291,13 @@
288291
"documents_to_insert = []\n",
289292
"\n",
290293
"for entry_idx, entry in enumerate(philo_dataset):\n",
291-
" metadata = {\"author\": entry[\"author\"]}\n",
292-
" if entry[\"tags\"]:\n",
293-
" # Add metadata tags to the metadata dictionary\n",
294-
" for tag in entry[\"tags\"].split(\";\"):\n",
295-
" metadata[tag] = \"y\"\n",
294+
" metadata = {\n",
295+
" \"author\": entry[\"author\"],\n",
296+
" **entry[\"metadata\"],\n",
297+
" }\n",
296298
" # Construct the Document, with the quote and metadata tags\n",
297299
" new_document = Document(\n",
298-
" id=f\"{entry['author'][:4]}_{entry_idx:03}\",\n",
300+
" id=entry[\"_id\"],\n",
299301
" page_content=entry[\"quote\"],\n",
300302
" metadata=metadata,\n",
301303
" )\n",
@@ -632,7 +634,7 @@
632634
"name": "python",
633635
"nbconvert_exporter": "python",
634636
"pygments_lexer": "ipython3",
635-
"version": "3.12.8"
637+
"version": "3.12.0"
636638
}
637639
},
638640
"nbformat": 4,

integrate_explicit_embeddings.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212

1313
# Import dependencies
14-
import json
1514
import os
15+
import requests
1616
from getpass import getpass
1717

1818
from astrapy.info import VectorServiceOptions
@@ -21,7 +21,6 @@
2121
from langchain_core.documents import Document
2222
from langchain_openai import OpenAIEmbeddings
2323

24-
from datasets import load_dataset
2524
from dotenv import load_dotenv
2625

2726

@@ -48,7 +47,11 @@
4847

4948

5049
# Load data
51-
philo_dataset = json.load(open("data/philosopher-quotes.json"))
50+
philo_dataset = requests.get(
51+
"https://raw.githubusercontent.com/"
52+
"datastaxdevs/mini-demo-astradb-langchain/"
53+
"refs/heads/main/data/philosopher-quotes.json"
54+
).json()
5255

5356
print("An example entry:")
5457
print(philo_dataset[16])

integrate_vectorize.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212

1313
# Import dependencies
14-
import json
1514
import os
15+
import requests
1616
from getpass import getpass
1717

1818
from astrapy.info import VectorServiceOptions
@@ -21,7 +21,6 @@
2121
from langchain_core.documents import Document
2222
from langchain_openai import OpenAIEmbeddings
2323

24-
from datasets import load_dataset
2524
from dotenv import load_dotenv
2625

2726

@@ -52,7 +51,11 @@
5251

5352

5453
# Load data
55-
philo_dataset = json.load(open("data/philosopher-quotes.json"))
54+
philo_dataset = requests.get(
55+
"https://raw.githubusercontent.com/"
56+
"datastaxdevs/mini-demo-astradb-langchain/"
57+
"refs/heads/main/data/philosopher-quotes.json"
58+
).json()
5659

5760
print("An example entry:")
5861
print(philo_dataset[16])

0 commit comments

Comments
 (0)