|
36 | 36 | "!pip install --quiet \\\n",
|
37 | 37 | " \"langchain>=0.3,<0.4\" \\\n",
|
38 | 38 | " \"langchain-astradb>=0.6,<0.7\" \\\n",
|
39 |
| - " \"langchain-openai>=0.3,<0.4\" \\\n", |
40 |
| - " \"datasets>=3.5,<4.0\" \\\n", |
41 |
| - " \"numpy<2.0\" # this fixes a python 3.12 issue" |
| 39 | + " \"langchain-openai>=0.3,<0.4\"" |
42 | 40 | ]
|
43 | 41 | },
|
44 | 42 | {
|
|
65 | 63 | "outputs": [],
|
66 | 64 | "source": [
|
67 | 65 | "import os\n",
|
| 66 | + "import requests\n", |
68 | 67 | "from getpass import getpass\n",
|
69 | 68 | "\n",
|
70 | 69 | "from astrapy.info import VectorServiceOptions\n",
|
71 | 70 | "from langchain_astradb import AstraDBVectorStore\n",
|
72 | 71 | "\n",
|
73 | 72 | "from langchain_core.documents import Document\n",
|
74 |
| - "from langchain_openai import OpenAIEmbeddings\n", |
75 |
| - "\n", |
76 |
| - "from datasets import load_dataset" |
| 73 | + "from langchain_openai import OpenAIEmbeddings" |
77 | 74 | ]
|
78 | 75 | },
|
79 | 76 | {
|
|
96 | 93 | "os.environ[\"ASTRA_DB_API_ENDPOINT\"] = input(\"ASTRA_DB_API_ENDPOINT =\")\n",
|
97 | 94 | "os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"] = getpass(\"ASTRA_DB_APPLICATION_TOKEN =\")\n",
|
98 | 95 | "\n",
|
99 |
| - "os.environ[\"ASTRA_DB_KEYSPACE\"] = input(\"ASTRA_DB_KEYSPACE (optional) =\")\n", |
| 96 | + "if _keyspace := input(\"ASTRA_DB_KEYSPACE (optional) =\"):\n", |
| 97 | + " os.environ[\"ASTRA_DB_KEYSPACE\"] = _keyspace\n", |
| 98 | + "\n", |
100 | 99 | "os.environ[\"ASTRA_DB_API_KEY_NAME\"] = input(\"ASTRA_DB_API_KEY_NAME (required for 'vectorize') =\")"
|
101 | 100 | ]
|
102 | 101 | },
|
|
159 | 158 | "source": [
|
160 | 159 | "ASTRA_DB_APPLICATION_TOKEN = os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"]\n",
|
161 | 160 | "ASTRA_DB_API_ENDPOINT = os.environ[\"ASTRA_DB_API_ENDPOINT\"]\n",
|
162 |
| - "ASTRA_DB_KEYSPACE = os.environ.get(\"ASTRA_DB_KEYSPACE\") or None\n", |
| 161 | + "ASTRA_DB_KEYSPACE = os.environ.get(\"ASTRA_DB_KEYSPACE\")\n", |
163 | 162 | "ASTRA_DB_API_KEY_NAME = os.environ.get(\"ASTRA_DB_API_KEY_NAME\") or None\n",
|
164 | 163 | "\n",
|
165 | 164 | "OPENAI_API_KEY = os.environ.get(\"OPENAI_API_KEY\") or None"
|
|
262 | 261 | "metadata": {},
|
263 | 262 | "outputs": [],
|
264 | 263 | "source": [
|
265 |
| - "philo_dataset = load_dataset(\"datastax/philosopher-quotes\")[\"train\"]\n", |
| 264 | + "philo_dataset = requests.get(\n", |
| 265 | + " \"https://raw.githubusercontent.com/\"\n", |
| 266 | + " \"datastaxdevs/mini-demo-astradb-langchain/\"\n", |
| 267 | + " \"refs/heads/main/data/philosopher-quotes.json\"\n", |
| 268 | + ").json()\n", |
266 | 269 | "\n",
|
267 | 270 | "print(\"An example entry:\")\n",
|
268 | 271 | "print(philo_dataset[16])"
|
|
288 | 291 | "documents_to_insert = []\n",
|
289 | 292 | "\n",
|
290 | 293 | "for entry_idx, entry in enumerate(philo_dataset):\n",
|
291 |
| - " metadata = {\"author\": entry[\"author\"]}\n", |
292 |
| - " if entry[\"tags\"]:\n", |
293 |
| - " # Add metadata tags to the metadata dictionary\n", |
294 |
| - " for tag in entry[\"tags\"].split(\";\"):\n", |
295 |
| - " metadata[tag] = \"y\"\n", |
| 294 | + " metadata = {\n", |
| 295 | + " \"author\": entry[\"author\"],\n", |
| 296 | + " **entry[\"metadata\"],\n", |
| 297 | + " }\n", |
296 | 298 | " # Construct the Document, with the quote and metadata tags\n",
|
297 | 299 | " new_document = Document(\n",
|
298 |
| - " id=f\"{entry['author'][:4]}_{entry_idx:03}\",\n", |
| 300 | + " id=entry[\"_id\"],\n", |
299 | 301 | " page_content=entry[\"quote\"],\n",
|
300 | 302 | " metadata=metadata,\n",
|
301 | 303 | " )\n",
|
|
632 | 634 | "name": "python",
|
633 | 635 | "nbconvert_exporter": "python",
|
634 | 636 | "pygments_lexer": "ipython3",
|
635 |
| - "version": "3.12.8" |
| 637 | + "version": "3.12.0" |
636 | 638 | }
|
637 | 639 | },
|
638 | 640 | "nbformat": 4,
|
|
0 commit comments