File tree Expand file tree Collapse file tree 2 files changed +16
-18
lines changed Expand file tree Collapse file tree 2 files changed +16
-18
lines changed Original file line number Diff line number Diff line change 4
4
pip install \
5
5
"langchain>=0.3,<0.4" \
6
6
"langchain-astradb>=0.6,<0.7" \
7
- "langchain-openai>=0.3,<0.4" \
8
- "datasets>=3.5,<4.0"
7
+ "langchain-openai>=0.3,<0.4"
9
8
10
9
Requires a `.env` file with environment variables, see `template.env`.
11
10
"""
12
11
13
12
14
13
# Import dependencies
14
+ import json
15
15
import os
16
16
from getpass import getpass
17
17
48
48
49
49
50
50
# Load data
51
- philo_dataset = load_dataset ( "datastax /philosopher-quotes" )[ "train" ]
51
+ philo_dataset = json . load ( open ( "data /philosopher-quotes.json" ))
52
52
53
53
print ("An example entry:" )
54
54
print (philo_dataset [16 ])
58
58
documents_to_insert = []
59
59
60
60
for entry_idx , entry in enumerate (philo_dataset ):
61
- metadata = {"author" : entry ["author" ]}
62
- if entry ["tags" ]:
63
- # Add metadata tags to the metadata dictionary
64
- for tag in entry ["tags" ].split (";" ):
65
- metadata [tag ] = "y"
61
+ metadata = {
62
+ "author" : entry ["author" ],
63
+ ** entry ["metadata" ],
64
+ }
66
65
# Construct the Document, with the quote and metadata tags
67
66
new_document = Document (
68
- id = f" { entry ['author' ][: 4 ] } _ { entry_idx :03 } " ,
67
+ id = entry ["_id" ] ,
69
68
page_content = entry ["quote" ],
70
69
metadata = metadata ,
71
70
)
Original file line number Diff line number Diff line change 4
4
pip install \
5
5
"langchain>=0.3,<0.4" \
6
6
"langchain-astradb>=0.6,<0.7" \
7
- "langchain-openai>=0.3,<0.4" \
8
- "datasets>=3.5,<4.0"
7
+ "langchain-openai>=0.3,<0.4"
9
8
10
9
Requires a `.env` file with environment variables, see `template.env`.
11
10
"""
12
11
13
12
14
13
# Import dependencies
14
+ import json
15
15
import os
16
16
from getpass import getpass
17
17
52
52
53
53
54
54
# Load data
55
- philo_dataset = load_dataset ( "datastax /philosopher-quotes" )[ "train" ]
55
+ philo_dataset = json . load ( open ( "data /philosopher-quotes.json" ))
56
56
57
57
print ("An example entry:" )
58
58
print (philo_dataset [16 ])
62
62
documents_to_insert = []
63
63
64
64
for entry_idx , entry in enumerate (philo_dataset ):
65
- metadata = {"author" : entry ["author" ]}
66
- if entry ["tags" ]:
67
- # Add metadata tags to the metadata dictionary
68
- for tag in entry ["tags" ].split (";" ):
69
- metadata [tag ] = "y"
65
+ metadata = {
66
+ "author" : entry ["author" ],
67
+ ** entry ["metadata" ],
68
+ }
70
69
# Construct the Document, with the quote and metadata tags
71
70
new_document = Document (
72
- id = f" { entry ['author' ][: 4 ] } _ { entry_idx :03 } " ,
71
+ id = entry ["_id" ] ,
73
72
page_content = entry ["quote" ],
74
73
metadata = metadata ,
75
74
)
You can’t perform that action at this time.
0 commit comments