huggingface · OlivierDehaene · Jun 17, 2024 · Jun 8, 2024 · Jun 8, 2024 · Jun 8, 2024
diff --git a/README.md b/README.md
@@ -252,6 +252,12 @@ Options:
 
           [env: OTLP_ENDPOINT=]
 
+      --otlp-service-name <OTLP_SERVICE_NAME>
+          The service name for opentelemetry.
+
+          [env: OTLP_SERVICE_NAME=]
+          [default: text-embeddings-inference.server]
+
       --cors-allow-origin <CORS_ALLOW_ORIGIN>
           [env: CORS_ALLOW_ORIGIN=]
 ```

diff --git a/backends/python/server/text_embeddings_server/cli.py b/backends/python/server/text_embeddings_server/cli.py
@@ -23,6 +23,7 @@ def serve(
     logger_level: str = "INFO",
     json_output: bool = False,
     otlp_endpoint: Optional[str] = None,
+    otlp_service_name: str = "text-embeddings-inference.server",
 ):
     # Remove default handler
     logger.remove()
@@ -42,7 +43,7 @@ def serve(
 
     # Setup OpenTelemetry distributed tracing
     if otlp_endpoint is not None:
-        setup_tracing(otlp_endpoint=otlp_endpoint)
+        setup_tracing(otlp_endpoint=otlp_endpoint, otlp_service_name=otlp_service_name)
 
     # Downgrade enum into str for easier management later on
     dtype = None if dtype is None else dtype.value

diff --git a/backends/python/server/text_embeddings_server/utils/tracing.py b/backends/python/server/text_embeddings_server/utils/tracing.py
@@ -54,10 +54,8 @@ def _start_span(self, handler_call_details, context, set_status_on_exception=Fal
         )
 
 
-def setup_tracing(otlp_endpoint: str):
-    resource = Resource.create(
-        attributes={"service.name": f"text-embeddings-inference.server"}
-    )
+def setup_tracing(otlp_endpoint: str, otlp_service_name: str):
+    resource = Resource.create(attributes={"service.name": otlp_service_name})
     span_exporter = OTLPSpanExporter(endpoint=otlp_endpoint, insecure=True)
     span_processor = BatchSpanProcessor(span_exporter)
 

diff --git a/backends/python/src/lib.rs b/backends/python/src/lib.rs
@@ -22,6 +22,7 @@ impl PythonBackend {
         model_type: ModelType,
         uds_path: String,
         otlp_endpoint: Option<String>,
+        otlp_service_name: String,
     ) -> Result<Self, BackendError> {
         match model_type {
             ModelType::Classifier => {
@@ -37,8 +38,13 @@ impl PythonBackend {
             }
         };
 
-        let backend_process =
-            management::BackendProcess::new(model_path, dtype, &uds_path, otlp_endpoint)?;
+        let backend_process = management::BackendProcess::new(
+            model_path,
+            dtype,
+            &uds_path,
+            otlp_endpoint,
+            otlp_service_name,
+        )?;
         let tokio_runtime = tokio::runtime::Builder::new_current_thread()
             .enable_all()
             .build()

diff --git a/backends/python/src/management.rs b/backends/python/src/management.rs
@@ -21,6 +21,7 @@ impl BackendProcess {
         dtype: String,
         uds_path: &str,
         otlp_endpoint: Option<String>,
+        otlp_service_name: String,
     ) -> Result<Self, BackendError> {
         // Get UDS path
         let uds = Path::new(uds_path);
@@ -33,21 +34,24 @@ impl BackendProcess {
         // Process args
         let mut python_server_args = vec![
             model_path,
-            "--dtype".to_string(),
+            "--dtype".to_owned(),
             dtype,
-            "--uds-path".to_string(),
-            uds_path.to_string(),
-            "--logger-level".to_string(),
-            "INFO".to_string(),
-            "--json-output".to_string(),
+            "--uds-path".to_owned(),
+            uds_path.to_owned(),
+            "--logger-level".to_owned(),
+            "INFO".to_owned(),
+            "--json-output".to_owned(),
         ];
 
         // OpenTelemetry
         if let Some(otlp_endpoint) = otlp_endpoint {
-            python_server_args.push("--otlp-endpoint".to_string());
+            python_server_args.push("--otlp-endpoint".to_owned());
             python_server_args.push(otlp_endpoint);
         }
 
+        python_server_args.push("--otlp-service-name".to_owned());
+        python_server_args.push(otlp_service_name);
+
         // Copy current process env
         let envs: Vec<(OsString, OsString)> = env::vars_os().collect();
 
@@ -64,7 +68,7 @@ impl BackendProcess {
             Err(err) => {
                 if err.kind() == io::ErrorKind::NotFound {
                     return Err(BackendError::Start(
-                        "python-text-embeddings-server not found in PATH".to_string(),
+                        "python-text-embeddings-server not found in PATH".to_owned(),
                     ));
                 }
                 return Err(BackendError::Start(err.to_string()));

diff --git a/backends/src/lib.rs b/backends/src/lib.rs
@@ -38,6 +38,7 @@ impl Backend {
         model_type: ModelType,
         uds_path: String,
         otlp_endpoint: Option<String>,
+        otlp_service_name: String,
     ) -> Result<Self, BackendError> {
         let (backend_sender, backend_receiver) = mpsc::unbounded_channel();
 
@@ -47,6 +48,7 @@ impl Backend {
             model_type.clone(),
             uds_path,
             otlp_endpoint,
+            otlp_service_name,
         )?;
         let padded_model = backend.is_padded();
         let max_batch_size = backend.max_batch_size();
@@ -135,6 +137,7 @@ fn init_backend(
     model_type: ModelType,
     uds_path: String,
     otlp_endpoint: Option<String>,
+    otlp_service_name: String,
 ) -> Result<Box<dyn CoreBackend + Send>, BackendError> {
     if cfg!(feature = "candle") {
         #[cfg(feature = "candle")]
@@ -154,6 +157,7 @@ fn init_backend(
                         model_type,
                         uds_path,
                         otlp_endpoint,
+                        otlp_service_name,
                     )
                 })
                 .join()

diff --git a/docs/source/en/cli_arguments.md b/docs/source/en/cli_arguments.md
@@ -153,6 +153,12 @@ Options:
 
           [env: OTLP_ENDPOINT=]
 
+      --otlp-service-name <OTLP_SERVICE_NAME>
+          The service name for opentelemetry.
+
+          [env: OTLP_SERVICE_NAME=]
+          [default: text-embeddings-inference.server]
+
       --cors-allow-origin <CORS_ALLOW_ORIGIN>
           [env: CORS_ALLOW_ORIGIN=]
 ```
diff --git a/router/src/lib.rs b/router/src/lib.rs
@@ -60,6 +60,7 @@ pub async fn run(
     payload_limit: usize,
     api_key: Option<String>,
     otlp_endpoint: Option<String>,
+    otlp_service_name: String,
     cors_allow_origin: Option<Vec<String>>,
 ) -> Result<()> {
     let model_id_path = Path::new(&model_id);
@@ -198,6 +199,7 @@ pub async fn run(
         backend_model_type,
         uds_path.unwrap_or("/tmp/text-embeddings-inference-server".to_string()),
         otlp_endpoint.clone(),
+        otlp_service_name.clone(),
     )
     .context("Could not create backend")?;
     backend

diff --git a/router/src/logging.rs b/router/src/logging.rs
@@ -10,7 +10,11 @@ use tracing_subscriber::{EnvFilter, Layer};
 /// Init logging using env variables LOG_LEVEL and LOG_FORMAT:
 ///     - otlp_endpoint is an optional URL to an Open Telemetry collector
 ///     - LOG_LEVEL may be TRACE, DEBUG, INFO, WARN or ERROR (default to INFO)
-pub fn init_logging(otlp_endpoint: Option<&String>, json_output: bool) -> bool {
+pub fn init_logging(
+    otlp_endpoint: Option<&String>,
+    otlp_service_name: String,
+    json_output: bool,
+) -> bool {
     let mut layers = Vec::new();
 
     // STDOUT/STDERR layer
@@ -40,7 +44,7 @@ pub fn init_logging(otlp_endpoint: Option<&String>, json_output: bool) -> bool {
                 trace::config()
                     .with_resource(Resource::new(vec![KeyValue::new(
                         "service.name",
-                        "text-embeddings-inference.router",
+                        otlp_service_name,
                     )]))
                     .with_sampler(Sampler::AlwaysOn),
             )

diff --git a/router/src/main.rs b/router/src/main.rs
@@ -123,6 +123,11 @@ struct Args {
     #[clap(long, env)]
     otlp_endpoint: Option<String>,
 
+    /// The service name for opentelemetry.
+    /// e.g. `text-embeddings-inference.server`
+    #[clap(default_value = "text-embeddings-inference.server", long, env)]
+    otlp_service_name: String,
+
     /// Unused for gRPC servers
     #[clap(long, env)]
     cors_allow_origin: Option<Vec<String>>,
@@ -134,8 +139,11 @@ async fn main() -> Result<()> {
     let args: Args = Args::parse();
 
     // Initialize logging and telemetry
-    let global_tracer =
-        text_embeddings_router::init_logging(args.otlp_endpoint.as_ref(), args.json_output);
+    let global_tracer = text_embeddings_router::init_logging(
+        args.otlp_endpoint.as_ref(),
+        args.otlp_service_name.clone(),
+        args.json_output,
+    );
 
     tracing::info!("{args:?}");
 
@@ -158,6 +166,7 @@ async fn main() -> Result<()> {
         args.payload_limit,
         args.api_key,
         args.otlp_endpoint,
+        args.otlp_service_name,
         args.cors_allow_origin,
     )
     .await?;

diff --git a/router/tests/common.rs b/router/tests/common.rs
@@ -64,6 +64,7 @@ pub async fn start_server(model_id: String, revision: Option<String>, dtype: DTy
             2_000_000,
             None,
             None,
+            "text-embeddings-inference.server".to_owned(),
             None,
         )
     });
-Original file line number
+Diff line change
@@ Expand Up @@
 _000_000,
                 None,
                 None,
+                "text-embeddings-inference.server".to_owned(),
                 None,
             )
         });
@@ Expand Down @@