Skip to content

Commit 7f840b7

Browse files
feat: automatically fetch the price and metadata from openrouter instead of waiting for litellm
Signed-off-by: thiswillbeyourgithub <[email protected]>
1 parent 2b29a9d commit 7f840b7

File tree

2 files changed

+93
-29
lines changed

2 files changed

+93
-29
lines changed

wdoc/utils/misc.py

Lines changed: 91 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import hashlib
6+
import requests
67
import re
78
from copy import copy
89
import platform
@@ -510,6 +511,70 @@ def model_name_matcher(model: str) -> str:
510511
return out
511512

512513

514+
@memoize
515+
@optional_typecheck
516+
def get_openrouter_metadata() -> dict:
517+
"""fetch the metadata from openrouter, because litellm takes always too much time to add new models."""
518+
url = "https://openrouter.ai/api/v1/models"
519+
response = requests.get(url)
520+
rep = response.json()
521+
# put it in a suitable format
522+
data = {}
523+
for info in rep["data"]:
524+
modelid = "openrouter/" + info["id"]
525+
assert modelid not in data, modelid
526+
del info["id"]
527+
pricing = info["pricing"] # fix pricing is a str originally
528+
for k, v in pricing.items():
529+
pricing[k] = float(v)
530+
data[modelid] = info
531+
532+
# for models that for example end with ":free", make them appear
533+
# under their full name too
534+
while ":" in modelid:
535+
modelid = modelid[::-1].split(":")[0][::-1]
536+
if modelid not in data:
537+
data[modelid] = info
538+
539+
# Example of output:
540+
# {'id': 'microsoft/phi-4-reasoning-plus:free',
541+
# 'name': 'Microsoft: Phi 4 Reasoning Plus (free)',
542+
# 'created': 1746130961,
543+
# 'description': REMOVED
544+
# 'context_length': 32768,
545+
# 'architecture': {'modality': 'text->text',
546+
# 'input_modalities': ['text'],
547+
# 'output_modalities': ['text'],
548+
# 'tokenizer': 'Other',
549+
# 'instruct_type': None},
550+
# 'pricing': {'prompt': '0',
551+
# 'completion': '0',
552+
# 'request': '0',
553+
# 'image': '0',
554+
# 'web_search': '0',
555+
# 'internal_reasoning': '0'},
556+
# 'top_provider': {'context_length': 32768,
557+
# 'max_completion_tokens': None,
558+
# 'is_moderated': False},
559+
# 'per_request_limits': None,
560+
# 'supported_parameters': ['max_tokens',
561+
# 'temperature',
562+
# 'top_p',
563+
# 'reasoning',
564+
# 'include_reasoning',
565+
# 'stop',
566+
# 'frequency_penalty',
567+
# 'presence_penalty',
568+
# 'seed',
569+
# 'top_k',
570+
# 'min_p',
571+
# 'repetition_penalty',
572+
# 'logprobs',
573+
# 'logit_bias',
574+
# 'top_logprobs']}
575+
return data
576+
577+
513578
@memoize
514579
@optional_typecheck
515580
def get_model_price(model: str) -> List[float]:
@@ -518,6 +583,20 @@ def get_model_price(model: str) -> List[float]:
518583
), f"Unexpected value for WDOC_ALLOW_NO_PRICE: {env.WDOC_ALLOW_NO_PRICE}"
519584
if model.startswith("ollama/"):
520585
return [0.0, 0.0]
586+
elif model.startswith(f"openrouter/"):
587+
metadata = get_openrouter_metadata()
588+
assert model in metadata, f"Missing {model} from openrouter"
589+
pricing = metadata[model]["pricing"]
590+
if "request" in pricing:
591+
logger.error(
592+
f"Found non 0 request for {model}, this is not supported by wdoc so the price will not be accurate"
593+
)
594+
if "internal_reasoning" in pricing:
595+
logger.error(
596+
f"Found non 0 internal_reasoning cost for {model}, this is not supported by wdoc so the price will not be accurate."
597+
)
598+
return [pricing["prompt"], pricing["completion"]]
599+
521600
if model in litellm.model_cost:
522601
return [
523602
litellm.model_cost[model]["input_cost_per_token"],
@@ -586,13 +665,12 @@ def __hash__(self):
586665
@optional_typecheck
587666
def get_model_max_tokens(modelname: ModelName) -> int:
588667
if modelname.backend == "openrouter":
589-
# openrouter and litellm can have outdated or plain wrong parameters
590-
submodel = ModelName(modelname.original.replace(f"openrouter/", ""))
591-
try:
592-
sub_price = get_model_max_tokens(submodel)
593-
return sub_price
594-
except Exception:
595-
pass
668+
openrouter_data = get_openrouter_metadata()
669+
assert (
670+
modelname.original in openrouter_data
671+
), f"Missing model {modelname.original} from openrouter metadata"
672+
return openrouter_data[modelname.original]["context_length"]
673+
596674
if modelname.original in litellm.model_cost:
597675
return litellm.model_cost[modelname.original]["max_tokens"]
598676
elif (trial := modelname.model) in litellm.model_cost:
@@ -1132,11 +1210,12 @@ def get_supported_model_params(modelname: ModelName) -> list:
11321210
if modelname.backend == "testing":
11331211
return []
11341212
if modelname.backend == "openrouter":
1135-
# openrouter and litellm can have outdated or plain wrong parameters
1136-
submodel = ModelName(modelname.original.replace(f"openrouter/", ""))
1137-
sub_params = get_supported_model_params(submodel)
1138-
if sub_params:
1139-
return sub_params
1213+
metadata = get_openrouter_metadata()
1214+
assert (
1215+
modelname.original in metadata
1216+
), f"Missing {modelname.original} from openrouter"
1217+
return metadata[modelname.original]["supported_parameters"]
1218+
11401219
for test in [
11411220
modelname.original,
11421221
modelname.model,

wdoc/wdoc.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,29 +1373,14 @@ def query_task(self, query: Optional[str] = None) -> dict:
13731373

13741374
# answer 0 or 1 if the document is related
13751375
if not hasattr(self, "eval_llm"):
1376-
if self.query_eval_model.backend == "openrouter":
1377-
try:
1378-
self.eval_llm_params = get_supported_model_params(
1379-
self.query_eval_model
1380-
)
1381-
except Exception as err:
1382-
bypassmodel = self.query_eval_model.original.replace(
1383-
f"openrouter/", ""
1384-
)
1385-
logger.warning(
1386-
f"Failed to get query_eval_model parameters information for model '{self.query_eval_model}'. We will try to bypass openrouter to get them by using '{bypassmodel}'. Error was '{err}'"
1387-
)
1388-
self.eval_llm_params = get_supported_model_params(bypassmodel)
1389-
else:
1390-
self.eval_llm_params = get_supported_model_params(self.query_eval_model)
1376+
self.eval_llm_params = get_supported_model_params(self.query_eval_model)
13911377
eval_args = copy.deepcopy(self.query_eval_model_kwargs)
13921378
if "n" in self.eval_llm_params:
13931379
eval_args["n"] = self.query_eval_check_number
1394-
elif self.query_eval_check_number > 1:
1380+
if self.query_eval_check_number > 1:
13951381
logger.warning(
13961382
f"Model {self.query_eval_model.original} does not support parameter 'n' so will be called multiple times instead. This might cost more."
13971383
)
1398-
assert self.query_eval_model.backend != "openai"
13991384
self.eval_llm = load_llm(
14001385
modelname=self.query_eval_model,
14011386
llm_cache=False, # disables caching because another caching is used on top

0 commit comments

Comments
 (0)