Skip to content

Commit 3c6f7b4

Browse files
committed
Use llm settings
1 parent 89dadd7 commit 3c6f7b4

File tree

2 files changed

+16
-30
lines changed

2 files changed

+16
-30
lines changed

dockers/llm.rag.service/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,13 @@ class LlmSettings(BaseSettings):
8989
alias="MODEL_TEMPERATURE",
9090
)
9191

92+
@field_validator("llm_server_url")
93+
@classmethod
94+
def ensure_v1_endpoint(cls, v: str) -> str:
95+
if not v.endswith("/v1"):
96+
v = v + "/v1"
97+
return v
98+
9299
@field_validator("max_tokens", mode="before")
93100
@classmethod
94101
def validate_max_tokens(cls, v):

dockers/llm.rag.service/serverragllm_csv_to_weaviate_local.py

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030

3131
from common import get_answer_with_settings_with_weaviate_filter
32-
from config import WeaviateSettings
32+
from config import LlmSettings, WeaviateSettings
3333

3434
SYSTEM_PROMPT_DEFAULT = """You are a specialized support ticket assistant. Format your responses following these rules:
3535
1. Answer the provided question only using the provided context.
@@ -44,10 +44,6 @@
4444

4545
def setup(
4646
relevant_docs: int,
47-
llm_server_url: str,
48-
model_id: str,
49-
max_tokens: int,
50-
model_temperature: float,
5147
sql_search_db_and_model_path: str,
5248
max_context_length: int,
5349
sql_ticket_source: str,
@@ -74,13 +70,13 @@ def setup(
7470
embedding=embeddings,
7571
)
7672

77-
if not llm_server_url.endswith("/v1"):
78-
llm_server_url = llm_server_url + "/v1"
73+
llm_settings = LlmSettings()
74+
7975
logger.info(
80-
f"Creating an OpenAI client to the hosted model at URL: {llm_server_url}"
76+
f"Creating an OpenAI client to the hosted model at URL: {llm_settings.llm_server_url}"
8177
)
8278
try:
83-
client = OpenAI(base_url=llm_server_url, api_key="na")
79+
client = OpenAI(base_url=llm_settings.llm_server_url, api_key="na")
8480
except Exception as e:
8581
logger.error(f"Error creating client: {e}")
8682
sys.exit(1)
@@ -89,12 +85,12 @@ def setup(
8985
get_answer_with_settings_with_weaviate_filter,
9086
vectorstore=vectorstore,
9187
client=client,
92-
model_id=model_id,
93-
max_tokens=max_tokens,
94-
model_temperature=model_temperature,
88+
model_id=llm_settings.model_id,
89+
max_tokens=llm_settings.max_tokens,
90+
model_temperature=llm_settings.model_temperature,
9591
system_prompt=SYSTEM_PROMPT_DEFAULT,
9692
relevant_docs=relevant_docs,
97-
llm_server_url=llm_server_url,
93+
llm_server_url=llm_settings.llm_server_url,
9894
sql_search_db_and_model_path=sql_search_db_and_model_path,
9995
alpha=weaviate_settings.weaviate_hybrid_search_alpha,
10096
max_context_length=max_context_length,
@@ -113,24 +109,11 @@ def read_item(question: Union[str, None] = None):
113109
MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
114110
MOSAICML_MODEL_ID = "mosaicml/mpt-7b-chat"
115111
RELEVANT_DOCS_DEFAULT = 2
116-
MAX_TOKENS_DEFAULT = 256
117-
MODEL_TEMPERATURE_DEFAULT = 0.01
118112
SQL_SEARCH_DB_AND_MODEL_PATH_DEFAULT = "/app/db/"
119-
WEAVIATE_HYBRID_ALPHA_DEFAULT = 0.5
120113
MODEL_MAX_CONTEXT_LEN = 8192
121114

122115
relevant_docs = int(os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT))
123116

124-
# llm_server_url = os.getenv("MODEL_LLM_SERVER_URL", "http://localhost:11434/v1")
125-
# model_id = os.getenv("MODEL_ID", "llama2")
126-
127-
llm_server_url = os.getenv("MODEL_LLM_SERVER_URL", "http://localhost:9000/v1")
128-
# model_id = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
129-
model_id = os.getenv("MODEL_ID", "rubra-ai/Phi-3-mini-128k-instruct")
130-
131-
max_tokens = int(os.getenv("MAX_TOKENS", MAX_TOKENS_DEFAULT))
132-
model_temperature = float(os.getenv("MODEL_TEMPERATURE", MODEL_TEMPERATURE_DEFAULT))
133-
134117
sql_search_db_and_model_path = os.getenv(
135118
"SQL_SEARCH_DB_AND_MODEL_PATH", SQL_SEARCH_DB_AND_MODEL_PATH_DEFAULT
136119
)
@@ -143,10 +126,6 @@ def read_item(question: Union[str, None] = None):
143126

144127
app = setup(
145128
relevant_docs,
146-
llm_server_url,
147-
model_id,
148-
max_tokens,
149-
model_temperature,
150129
sql_search_db_and_model_path,
151130
max_context_length,
152131
sql_ticket_source,

0 commit comments

Comments
 (0)