Skip to content

Commit 3bb4aaf

Browse files
committed
py scripts not using datasets
1 parent ee88fa6 commit 3bb4aaf

File tree

2 files changed

+16
-18
lines changed

2 files changed

+16
-18
lines changed

integrate_explicit_embeddings.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
pip install \
55
"langchain>=0.3,<0.4" \
66
"langchain-astradb>=0.6,<0.7" \
7-
"langchain-openai>=0.3,<0.4" \
8-
"datasets>=3.5,<4.0"
7+
"langchain-openai>=0.3,<0.4"
98
109
Requires a `.env` file with environment variables, see `template.env`.
1110
"""
1211

1312

1413
# Import dependencies
14+
import json
1515
import os
1616
from getpass import getpass
1717

@@ -48,7 +48,7 @@
4848

4949

5050
# Load data
51-
philo_dataset = load_dataset("datastax/philosopher-quotes")["train"]
51+
philo_dataset = json.load(open("data/philosopher-quotes.json"))
5252

5353
print("An example entry:")
5454
print(philo_dataset[16])
@@ -58,14 +58,13 @@
5858
documents_to_insert = []
5959

6060
for entry_idx, entry in enumerate(philo_dataset):
61-
metadata = {"author": entry["author"]}
62-
if entry["tags"]:
63-
# Add metadata tags to the metadata dictionary
64-
for tag in entry["tags"].split(";"):
65-
metadata[tag] = "y"
61+
metadata = {
62+
"author": entry["author"],
63+
**entry["metadata"],
64+
}
6665
# Construct the Document, with the quote and metadata tags
6766
new_document = Document(
68-
id=f"{entry['author'][:4]}_{entry_idx:03}",
67+
id=entry["_id"],
6968
page_content=entry["quote"],
7069
metadata=metadata,
7170
)

integrate_vectorize.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
pip install \
55
"langchain>=0.3,<0.4" \
66
"langchain-astradb>=0.6,<0.7" \
7-
"langchain-openai>=0.3,<0.4" \
8-
"datasets>=3.5,<4.0"
7+
"langchain-openai>=0.3,<0.4"
98
109
Requires a `.env` file with environment variables, see `template.env`.
1110
"""
1211

1312

1413
# Import dependencies
14+
import json
1515
import os
1616
from getpass import getpass
1717

@@ -52,7 +52,7 @@
5252

5353

5454
# Load data
55-
philo_dataset = load_dataset("datastax/philosopher-quotes")["train"]
55+
philo_dataset = json.load(open("data/philosopher-quotes.json"))
5656

5757
print("An example entry:")
5858
print(philo_dataset[16])
@@ -62,14 +62,13 @@
6262
documents_to_insert = []
6363

6464
for entry_idx, entry in enumerate(philo_dataset):
65-
metadata = {"author": entry["author"]}
66-
if entry["tags"]:
67-
# Add metadata tags to the metadata dictionary
68-
for tag in entry["tags"].split(";"):
69-
metadata[tag] = "y"
65+
metadata = {
66+
"author": entry["author"],
67+
**entry["metadata"],
68+
}
7069
# Construct the Document, with the quote and metadata tags
7170
new_document = Document(
72-
id=f"{entry['author'][:4]}_{entry_idx:03}",
71+
id=entry["_id"],
7372
page_content=entry["quote"],
7473
metadata=metadata,
7574
)

0 commit comments

Comments
 (0)