29
29
30
30
31
31
from common import get_answer_with_settings_with_weaviate_filter
32
- from config import WeaviateSettings
32
+ from config import LlmSettings , WeaviateSettings
33
33
34
34
SYSTEM_PROMPT_DEFAULT = """You are a specialized support ticket assistant. Format your responses following these rules:
35
35
1. Answer the provided question only using the provided context.
44
44
45
45
def setup (
46
46
relevant_docs : int ,
47
- llm_server_url : str ,
48
- model_id : str ,
49
- max_tokens : int ,
50
- model_temperature : float ,
51
47
sql_search_db_and_model_path : str ,
52
48
max_context_length : int ,
53
49
sql_ticket_source : str ,
@@ -74,13 +70,13 @@ def setup(
74
70
embedding = embeddings ,
75
71
)
76
72
77
- if not llm_server_url . endswith ( "/v1" ):
78
- llm_server_url = llm_server_url + "/v1"
73
+ llm_settings = LlmSettings ()
74
+
79
75
logger .info (
80
- f"Creating an OpenAI client to the hosted model at URL: { llm_server_url } "
76
+ f"Creating an OpenAI client to the hosted model at URL: { llm_settings . llm_server_url } "
81
77
)
82
78
try :
83
- client = OpenAI (base_url = llm_server_url , api_key = "na" )
79
+ client = OpenAI (base_url = llm_settings . llm_server_url , api_key = "na" )
84
80
except Exception as e :
85
81
logger .error (f"Error creating client: { e } " )
86
82
sys .exit (1 )
@@ -89,12 +85,12 @@ def setup(
89
85
get_answer_with_settings_with_weaviate_filter ,
90
86
vectorstore = vectorstore ,
91
87
client = client ,
92
- model_id = model_id ,
93
- max_tokens = max_tokens ,
94
- model_temperature = model_temperature ,
88
+ model_id = llm_settings . model_id ,
89
+ max_tokens = llm_settings . max_tokens ,
90
+ model_temperature = llm_settings . model_temperature ,
95
91
system_prompt = SYSTEM_PROMPT_DEFAULT ,
96
92
relevant_docs = relevant_docs ,
97
- llm_server_url = llm_server_url ,
93
+ llm_server_url = llm_settings . llm_server_url ,
98
94
sql_search_db_and_model_path = sql_search_db_and_model_path ,
99
95
alpha = weaviate_settings .weaviate_hybrid_search_alpha ,
100
96
max_context_length = max_context_length ,
@@ -113,24 +109,11 @@ def read_item(question: Union[str, None] = None):
113
109
MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
114
110
MOSAICML_MODEL_ID = "mosaicml/mpt-7b-chat"
115
111
RELEVANT_DOCS_DEFAULT = 2
116
- MAX_TOKENS_DEFAULT = 256
117
- MODEL_TEMPERATURE_DEFAULT = 0.01
118
112
SQL_SEARCH_DB_AND_MODEL_PATH_DEFAULT = "/app/db/"
119
- WEAVIATE_HYBRID_ALPHA_DEFAULT = 0.5
120
113
MODEL_MAX_CONTEXT_LEN = 8192
121
114
122
115
relevant_docs = int (os .getenv ("RELEVANT_DOCS" , RELEVANT_DOCS_DEFAULT ))
123
116
124
- # llm_server_url = os.getenv("MODEL_LLM_SERVER_URL", "http://localhost:11434/v1")
125
- # model_id = os.getenv("MODEL_ID", "llama2")
126
-
127
- llm_server_url = os .getenv ("MODEL_LLM_SERVER_URL" , "http://localhost:9000/v1" )
128
- # model_id = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
129
- model_id = os .getenv ("MODEL_ID" , "rubra-ai/Phi-3-mini-128k-instruct" )
130
-
131
- max_tokens = int (os .getenv ("MAX_TOKENS" , MAX_TOKENS_DEFAULT ))
132
- model_temperature = float (os .getenv ("MODEL_TEMPERATURE" , MODEL_TEMPERATURE_DEFAULT ))
133
-
134
117
sql_search_db_and_model_path = os .getenv (
135
118
"SQL_SEARCH_DB_AND_MODEL_PATH" , SQL_SEARCH_DB_AND_MODEL_PATH_DEFAULT
136
119
)
@@ -143,10 +126,6 @@ def read_item(question: Union[str, None] = None):
143
126
144
127
app = setup (
145
128
relevant_docs ,
146
- llm_server_url ,
147
- model_id ,
148
- max_tokens ,
149
- model_temperature ,
150
129
sql_search_db_and_model_path ,
151
130
max_context_length ,
152
131
sql_ticket_source ,
0 commit comments