-
Notifications
You must be signed in to change notification settings - Fork 4k
Add embeddings and search file support #35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,14 +3,17 @@ | |
import signal | ||
import sys | ||
import warnings | ||
from functools import partial | ||
|
||
import openai | ||
from openai.validators import ( | ||
apply_necessary_remediation, | ||
apply_optional_remediation, | ||
apply_validators, | ||
get_search_validators, | ||
get_validators, | ||
read_any_format, | ||
write_out_file, | ||
write_out_search_file, | ||
) | ||
|
||
|
||
|
@@ -224,6 +227,41 @@ def list(cls, args): | |
|
||
|
||
class Search: | ||
@classmethod | ||
def prepare_data(cls, args): | ||
|
||
sys.stdout.write("Analyzing...\n") | ||
fname = args.file | ||
auto_accept = args.quiet | ||
purpose = args.purpose | ||
|
||
optional_fields = ["metadata"] | ||
|
||
if purpose == "classifications": | ||
required_fields = ["text", "labels"] | ||
else: | ||
required_fields = ["text"] | ||
|
||
df, remediation = read_any_format( | ||
fname, fields=required_fields + optional_fields | ||
) | ||
|
||
if "metadata" not in df: | ||
df["metadata"] = None | ||
|
||
apply_necessary_remediation(None, remediation) | ||
validators = get_search_validators(required_fields, optional_fields) | ||
|
||
write_out_file_func = partial( | ||
write_out_search_file, | ||
purpose=purpose, | ||
fields=required_fields + optional_fields, | ||
) | ||
|
||
apply_validators( | ||
df, fname, remediation, validators, auto_accept, write_out_file_func | ||
) | ||
|
||
@classmethod | ||
def create_alpha(cls, args): | ||
resp = openai.Search.create_alpha( | ||
|
@@ -436,49 +474,14 @@ def prepare_data(cls, args): | |
|
||
validators = get_validators() | ||
|
||
optional_remediations = [] | ||
if remediation is not None: | ||
optional_remediations.append(remediation) | ||
for validator in validators: | ||
remediation = validator(df) | ||
if remediation is not None: | ||
optional_remediations.append(remediation) | ||
df = apply_necessary_remediation(df, remediation) | ||
|
||
any_optional_or_necessary_remediations = any( | ||
[ | ||
remediation | ||
for remediation in optional_remediations | ||
if remediation.optional_msg is not None | ||
or remediation.necessary_msg is not None | ||
] | ||
apply_validators( | ||
df, | ||
fname, | ||
remediation, | ||
validators, | ||
auto_accept, | ||
write_out_file_func=write_out_file, | ||
) | ||
any_necessary_applied = any( | ||
[ | ||
remediation | ||
for remediation in optional_remediations | ||
if remediation.necessary_msg is not None | ||
] | ||
) | ||
any_optional_applied = False | ||
|
||
if any_optional_or_necessary_remediations: | ||
sys.stdout.write( | ||
"\n\nBased on the analysis we will perform the following actions:\n" | ||
) | ||
for remediation in optional_remediations: | ||
df, optional_applied = apply_optional_remediation( | ||
df, remediation, auto_accept | ||
) | ||
any_optional_applied = any_optional_applied or optional_applied | ||
else: | ||
sys.stdout.write("\n\nNo remediations found.\n") | ||
|
||
any_optional_or_necessary_applied = ( | ||
any_optional_applied or any_necessary_applied | ||
) | ||
|
||
write_out_file(df, fname, any_optional_or_necessary_applied, auto_accept) | ||
|
||
|
||
def tools_register(parser): | ||
|
@@ -508,6 +511,29 @@ def help(args): | |
) | ||
sub.set_defaults(func=FineTune.prepare_data) | ||
|
||
sub = subparsers.add_parser("search.prepare_data") | ||
sub.add_argument( | ||
"-f", | ||
"--file", | ||
required=True, | ||
help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed." | ||
"This should be the local file path.", | ||
) | ||
sub.add_argument( | ||
"-p", | ||
"--purpose", | ||
help="Why are you uploading this file? (see https://beta.openai.com/docs/api-reference/ for purposes)", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems unintuitive to me that the |
||
required=True, | ||
) | ||
sub.add_argument( | ||
"-q", | ||
"--quiet", | ||
required=False, | ||
action="store_true", | ||
help="Auto accepts all suggestions, without asking for user input. To be used within scripts.", | ||
) | ||
sub.set_defaults(func=Search.prepare_data) | ||
|
||
|
||
def api_register(parser): | ||
# Engine management | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copy pasta of
prompt-completion
from fine tuning?