fix: Update model in tests for IBM Watsonx and default models (#2515)

sjrl · web-flow · commit 9426ea2e131f · 2025-11-12T11:02:15.000+01:00
* Update model

* update default model

* change default model

* Update model defaults

* fix test
diff --git a/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/document_embedder.py b/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/document_embedder.py
@@ -29,7 +29,7 @@ class WatsonxDocumentEmbedder:
     ]
 
     document_embedder = WatsonxDocumentEmbedder(
-        model="ibm/slate-30m-english-rtrvr",
+        model="ibm/slate-30m-english-rtrvr-v2",
         api_key=Secret.from_env_var("WATSONX_API_KEY"),
         api_base_url="https://us-south.ml.cloud.ibm.com",
         project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
@@ -45,7 +45,7 @@ class WatsonxDocumentEmbedder:
     def __init__(
         self,
         *,
-        model: str = "ibm/slate-30m-english-rtrvr",
+        model: str = "ibm/slate-30m-english-rtrvr-v2",
         api_key: Secret = Secret.from_env_var("WATSONX_API_KEY"),  # noqa: B008
         api_base_url: str = "https://us-south.ml.cloud.ibm.com",
         project_id: Secret = Secret.from_env_var("WATSONX_PROJECT_ID"),  # noqa: B008
@@ -64,7 +64,7 @@ def __init__(
 
         :param model:
             The name of the model to use for calculating embeddings.
-            Default is "ibm/slate-30m-english-rtrvr".
+            Default is "ibm/slate-30m-english-rtrvr-v2".
         :param api_key:
             The WATSONX API key. Can be set via environment variable WATSONX_API_KEY.
         :param api_base_url:
diff --git a/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/text_embedder.py b/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/text_embedder.py
@@ -25,7 +25,7 @@ class WatsonxTextEmbedder:
     text_to_embed = "I love pizza!"
 
     text_embedder = WatsonxTextEmbedder(
-        model="ibm/slate-30m-english-rtrvr",
+        model="ibm/slate-30m-english-rtrvr-v2",
         api_key=Secret.from_env_var("WATSONX_API_KEY"),
         api_base_url="https://us-south.ml.cloud.ibm.com",
         project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
@@ -34,15 +34,15 @@ class WatsonxTextEmbedder:
     print(text_embedder.run(text_to_embed))
 
     # {'embedding': [0.017020374536514282, -0.023255806416273117, ...],
-    #  'meta': {'model': 'ibm/slate-30m-english-rtrvr',
+    #  'meta': {'model': 'ibm/slate-30m-english-rtrvr-v2',
     #           'truncated_input_tokens': 3}}
     ```
     """
 
     def __init__(
         self,
         *,
-        model: str = "ibm/slate-30m-english-rtrvr",
+        model: str = "ibm/slate-30m-english-rtrvr-v2",
         api_key: Secret = Secret.from_env_var("WATSONX_API_KEY"),  # noqa: B008
         api_base_url: str = "https://us-south.ml.cloud.ibm.com",
         project_id: Secret = Secret.from_env_var("WATSONX_PROJECT_ID"),  # noqa: B008
@@ -57,7 +57,7 @@ def __init__(
 
         :param model:
             The name of the IBM watsonx model to use for calculating embeddings.
-            Default is "ibm/slate-30m-english-rtrvr".
+            Default is "ibm/slate-30m-english-rtrvr-v2".
         :param api_key:
             The WATSONX API key. Can be set via environment variable WATSONX_API_KEY.
         :param api_base_url:
diff --git a/integrations/watsonx/src/haystack_integrations/components/generators/watsonx/chat/chat_generator.py b/integrations/watsonx/src/haystack_integrations/components/generators/watsonx/chat/chat_generator.py
@@ -39,14 +39,8 @@ class WatsonxChatGenerator:
     models. It supports the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage) format for both input
     and output, including multimodal inputs with text and images.
 
-    The generator works with IBM's foundation models including:
-    - granite-13b-chat-v2
-    - llama-2-70b-chat
-    - llama-3-70b-instruct
-    - llama-3-2-11b-vision-instruct (multimodal)
-    - llama-3-2-90b-vision-instruct (multimodal)
-    - pixtral-12b (multimodal)
-    - Other watsonx.ai chat models
+    The generator works with IBM's foundation models that are listed
+    [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&audience=wdp).
 
     You can customize the generation behavior by passing parameters to the watsonx.ai API through the
     `generation_kwargs` argument. These parameters are passed directly to the watsonx.ai inference endpoint.
@@ -98,7 +92,7 @@ def __init__(
         self,
         *,
         api_key: Secret = Secret.from_env_var("WATSONX_API_KEY"),  # noqa: B008
-        model: str = "ibm/granite-3-2b-instruct",
+        model: str = "ibm/granite-3-3-8b-instruct",
         project_id: Secret = Secret.from_env_var("WATSONX_PROJECT_ID"),  # noqa: B008
         api_base_url: str = "https://us-south.ml.cloud.ibm.com",
         generation_kwargs: dict[str, Any] | None = None,
diff --git a/integrations/watsonx/src/haystack_integrations/components/generators/watsonx/generator.py b/integrations/watsonx/src/haystack_integrations/components/generators/watsonx/generator.py
@@ -21,11 +21,8 @@ class WatsonxGenerator(WatsonxChatGenerator):
     This component extends WatsonxChatGenerator to provide the standard Generator interface that works with prompt
     strings instead of ChatMessage objects.
 
-    The generator works with IBM's foundation models including:
-    - granite-13b-chat-v2
-    - llama-2-70b-chat
-    - llama-3-70b-instruct
-    - Other watsonx.ai chat models
+    The generator works with IBM's foundation models that are listed
+    [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&audience=wdp).
 
     You can customize the generation behavior by passing parameters to the watsonx.ai API through the
     `generation_kwargs` argument. These parameters are passed directly to the watsonx.ai inference endpoint.
@@ -74,7 +71,7 @@ def __init__(
         self,
         *,
         api_key: Secret = Secret.from_env_var("WATSONX_API_KEY"),  # noqa: B008
-        model: str = "ibm/granite-3-2b-instruct",
+        model: str = "ibm/granite-3-3-8b-instruct",
         project_id: Secret = Secret.from_env_var("WATSONX_PROJECT_ID"),  # noqa: B008
         api_base_url: str = "https://us-south.ml.cloud.ibm.com",
         system_prompt: str | None = None,
diff --git a/integrations/watsonx/tests/test_chat_generator.py b/integrations/watsonx/tests/test_chat_generator.py
@@ -100,31 +100,30 @@ async def __anext__(self):
 
     def test_init_default(self, mock_watsonx):
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct", project_id=Secret.from_token("fake-project-id")
+            model="ibm/granite-3-3-8b-instruct", project_id=Secret.from_token("fake-project-id")
         )
 
         _, kwargs = mock_watsonx["model"].call_args
-        assert kwargs["model_id"] == "ibm/granite-3-2b-instruct"
+        assert kwargs["model_id"] == "ibm/granite-3-3-8b-instruct"
         assert kwargs["project_id"] == "fake-project-id"
         assert kwargs["verify"] is None
 
-        assert generator.model == "ibm/granite-3-2b-instruct"
+        assert generator.model == "ibm/granite-3-3-8b-instruct"
         assert isinstance(generator.project_id, Secret)
         assert generator.project_id.resolve_value() == "fake-project-id"
         assert generator.api_base_url == "https://us-south.ml.cloud.ibm.com"
 
     def test_init_with_all_params(self, mock_watsonx):
         generator = WatsonxChatGenerator(
             api_key=Secret.from_token("test-api-key"),
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
             api_base_url="https://custom-url.com",
             generation_kwargs={"max_tokens": 100, "temperature": 0.7, "top_p": 0.9},
             verify=False,
         )
 
         _, kwargs = mock_watsonx["model"].call_args
-        assert kwargs["model_id"] == "ibm/granite-3-2b-instruct"
+        assert kwargs["model_id"] == "ibm/granite-3-3-8b-instruct"
         assert kwargs["project_id"] == "test-project"
         assert kwargs["verify"] is False
 
@@ -135,11 +134,10 @@ def test_init_fails_without_project(self, mock_watsonx):
         os.environ.pop("WATSONX_PROJECT_ID", None)
 
         with pytest.raises(ValueError, match="None of the following authentication environment variables are set"):
-            WatsonxChatGenerator(api_key=Secret.from_token("test-api-key"), model="ibm/granite-3-2b-instruct")
+            WatsonxChatGenerator(api_key=Secret.from_token("test-api-key"))
 
     def test_to_dict(self, mock_watsonx):
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
             generation_kwargs={"max_tokens": 100},
         )
@@ -150,7 +148,7 @@ def test_to_dict(self, mock_watsonx):
             "type": "haystack_integrations.components.generators.watsonx.chat.chat_generator.WatsonxChatGenerator",
             "init_parameters": {
                 "api_key": {"env_vars": ["WATSONX_API_KEY"], "strict": True, "type": "env_var"},
-                "model": "ibm/granite-3-2b-instruct",
+                "model": "ibm/granite-3-3-8b-instruct",
                 "project_id": {"env_vars": ["WATSONX_PROJECT_ID"], "strict": True, "type": "env_var"},
                 "api_base_url": "https://us-south.ml.cloud.ibm.com",
                 "generation_kwargs": {"max_tokens": 100},
@@ -164,7 +162,6 @@ def test_to_dict(self, mock_watsonx):
 
     def test_to_dict_with_params(self, mock_watsonx):
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
             generation_kwargs={"max_tokens": 100},
             streaming_callback=print_streaming_chunk,
@@ -176,7 +173,7 @@ def test_to_dict_with_params(self, mock_watsonx):
             "type": "haystack_integrations.components.generators.watsonx.chat.chat_generator.WatsonxChatGenerator",
             "init_parameters": {
                 "api_key": {"env_vars": ["WATSONX_API_KEY"], "strict": True, "type": "env_var"},
-                "model": "ibm/granite-3-2b-instruct",
+                "model": "ibm/granite-3-3-8b-instruct",
                 "project_id": {"env_vars": ["WATSONX_PROJECT_ID"], "strict": True, "type": "env_var"},
                 "api_base_url": "https://us-south.ml.cloud.ibm.com",
                 "generation_kwargs": {"max_tokens": 100},
@@ -194,14 +191,14 @@ def test_from_dict(self, mock_watsonx):
             "type": "haystack_integrations.components.generators.watsonx.chat.chat_generator.WatsonxChatGenerator",
             "init_parameters": {
                 "api_key": {"env_vars": ["WATSONX_API_KEY"], "strict": True, "type": "env_var"},
-                "model": "ibm/granite-3-2b-instruct",
+                "model": "ibm/granite-3-3-8b-instruct",
                 "project_id": {"env_vars": ["WATSONX_PROJECT_ID"], "strict": True, "type": "env_var"},
                 "generation_kwargs": {"max_tokens": 100},
             },
         }
 
         generator = WatsonxChatGenerator.from_dict(data)
-        assert generator.model == "ibm/granite-3-2b-instruct"
+        assert generator.model == "ibm/granite-3-3-8b-instruct"
         assert isinstance(generator.project_id, Secret)
         assert generator.project_id.resolve_value() == "fake-project-id"
         assert generator.generation_kwargs == {"max_tokens": 100}
@@ -212,7 +209,7 @@ def test_from_dict_with_callback(self, mock_watsonx):
             "type": "haystack_integrations.components.generators.watsonx.chat.chat_generator.WatsonxChatGenerator",
             "init_parameters": {
                 "api_key": {"env_vars": ["WATSONX_API_KEY"], "strict": True, "type": "env_var"},
-                "model": "ibm/granite-3-2b-instruct",
+                "model": "ibm/granite-3-3-8b-instruct",
                 "project_id": {"env_vars": ["WATSONX_PROJECT_ID"], "strict": True, "type": "env_var"},
                 "streaming_callback": callback_str,
             },
@@ -224,7 +221,6 @@ def test_from_dict_with_callback(self, mock_watsonx):
     def test_run_single_message(self, mock_watsonx):
         generator = WatsonxChatGenerator(
             api_key=Secret.from_token("test-api-key"),
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
         )
 
@@ -242,7 +238,6 @@ def test_run_single_message(self, mock_watsonx):
     def test_run_with_generation_params(self, mock_watsonx):
         generator = WatsonxChatGenerator(
             api_key=Secret.from_token("test-api-key"),
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
             generation_kwargs={"max_tokens": 100, "temperature": 0.7, "top_p": 0.9},
         )
@@ -287,7 +282,6 @@ def test_run_with_streaming(self, mock_watsonx):
     def test_run_with_empty_messages(self, mock_watsonx):
         generator = WatsonxChatGenerator(
             api_key=Secret.from_token("test-api-key"),
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
         )
 
@@ -296,7 +290,6 @@ def test_run_with_empty_messages(self, mock_watsonx):
 
     def test_skips_tool_messages(self, mock_watsonx):
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
         )
 
@@ -313,7 +306,6 @@ def custom_callback(chunk: StreamingChunk):
             pass
 
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
             streaming_callback=custom_callback,
         )
@@ -327,7 +319,6 @@ def run_callback(chunk: StreamingChunk):
             pass
 
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
             streaming_callback=init_callback,
         )
@@ -343,7 +334,6 @@ def run_callback(chunk: StreamingChunk):
     async def test_run_async_single_message(self, mock_watsonx):
         generator = WatsonxChatGenerator(
             api_key=Secret.from_token("test-api-key"),
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
         )
 
@@ -358,7 +348,6 @@ async def test_run_async_single_message(self, mock_watsonx):
     async def test_run_async_streaming(self, mock_watsonx):
         generator = WatsonxChatGenerator(
             api_key=Secret.from_token("test-api-key"),
-            model="ibm/granite-3-2b-instruct",
             project_id=Secret.from_token("test-project"),
         )
         received_chunks = []
@@ -551,7 +540,7 @@ class TestWatsonxChatGeneratorIntegration:
     )
     def test_live_run(self):
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct",
+            model="ibm/granite-3-3-8b-instruct",
             project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
             generation_kwargs={"max_tokens": 50, "temperature": 0.7, "top_p": 0.9},
         )
@@ -572,7 +561,7 @@ def test_live_run(self):
     )
     def test_live_run_streaming(self):
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct", project_id=Secret.from_env_var("WATSONX_PROJECT_ID")
+            model="ibm/granite-3-3-8b-instruct", project_id=Secret.from_env_var("WATSONX_PROJECT_ID")
         )
         collected_chunks = []
 
@@ -597,7 +586,7 @@ def callback(chunk: StreamingChunk):
     )
     async def test_live_run_async(self):
         generator = WatsonxChatGenerator(
-            model="ibm/granite-3-2b-instruct", project_id=Secret.from_env_var("WATSONX_PROJECT_ID")
+            model="ibm/granite-3-3-8b-instruct", project_id=Secret.from_env_var("WATSONX_PROJECT_ID")
         )
         messages = [ChatMessage.from_user("What's the capital of Germany? Answer concisely.")]
         results = await generator.run_async(messages=messages)
diff --git a/integrations/watsonx/tests/test_document_embedder.py b/integrations/watsonx/tests/test_document_embedder.py
@@ -44,7 +44,7 @@ def test_init_default(self, mock_watsonx):
             api_key="fake-api-key", url="https://us-south.ml.cloud.ibm.com"
         )
         mock_watsonx["embeddings"].assert_called_once_with(
-            model_id="ibm/slate-30m-english-rtrvr",
+            model_id="ibm/slate-30m-english-rtrvr-v2",
             credentials=mock_watsonx["creds_instance"],
             project_id="fake-project-id",
             params=None,
@@ -53,7 +53,7 @@ def test_init_default(self, mock_watsonx):
             max_retries=None,
         )
 
-        assert embedder.model == "ibm/slate-30m-english-rtrvr"
+        assert embedder.model == "ibm/slate-30m-english-rtrvr-v2"
         assert embedder.prefix == ""
         assert embedder.suffix == ""
         assert embedder.batch_size == 1000
@@ -64,7 +64,6 @@ def test_init_default(self, mock_watsonx):
     def test_init_with_parameters(self, mock_watsonx):
         embedder = WatsonxDocumentEmbedder(
             api_key=Secret.from_token("fake-api-key"),
-            model="ibm/slate-125m-english-rtrvr",
             api_base_url="https://custom-url.ibm.com",
             project_id=Secret.from_token("custom-project-id"),
             truncate_input_tokens=128,
@@ -78,7 +77,7 @@ def test_init_with_parameters(self, mock_watsonx):
 
         mock_watsonx["credentials"].assert_called_once_with(api_key="fake-api-key", url="https://custom-url.ibm.com")
         mock_watsonx["embeddings"].assert_called_once_with(
-            model_id="ibm/slate-125m-english-rtrvr",
+            model_id="ibm/slate-30m-english-rtrvr-v2",
             credentials=mock_watsonx["creds_instance"],
             project_id="custom-project-id",
             params={"truncate_input_tokens": 128},
@@ -110,7 +109,7 @@ def test_to_dict(self, mock_watsonx):
             "type": "haystack_integrations.components.embedders.watsonx.document_embedder.WatsonxDocumentEmbedder",
             "init_parameters": {
                 "api_key": {"env_vars": ["WATSONX_API_KEY"], "strict": True, "type": "env_var"},
-                "model": "ibm/slate-30m-english-rtrvr",
+                "model": "ibm/slate-30m-english-rtrvr-v2",
                 "api_base_url": "https://us-south.ml.cloud.ibm.com",
                 "project_id": {"env_vars": ["WATSONX_PROJECT_ID"], "strict": True, "type": "env_var"},
                 "truncate_input_tokens": None,
@@ -173,7 +172,7 @@ def test_run_empty_documents(self, mock_watsonx):
         result = embedder.run(documents=[])
         assert result == {
             "documents": [],
-            "meta": {"model": "ibm/slate-30m-english-rtrvr", "truncate_input_tokens": None, "batch_size": 1000},
+            "meta": {"model": "ibm/slate-30m-english-rtrvr-v2", "truncate_input_tokens": None, "batch_size": 1000},
         }
 
 
@@ -196,7 +195,6 @@ def test_documents(self):
     def test_run(self, test_documents):
         """Test real API call with documents"""
         embedder = WatsonxDocumentEmbedder(
-            model="ibm/slate-30m-english-rtrvr",
             api_key=Secret.from_env_var("WATSONX_API_KEY"),
             project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
             truncate_input_tokens=128,
@@ -209,7 +207,7 @@ def test_run(self, test_documents):
             assert len(doc.embedding) > 0
             assert all(isinstance(x, float) for x in doc.embedding)
 
-        assert result["meta"]["model"] == "ibm/slate-30m-english-rtrvr"
+        assert result["meta"]["model"] == "ibm/slate-30m-english-rtrvr-v2"
 
     @pytest.mark.skipif(
         not os.environ.get("WATSONX_API_KEY") or not os.environ.get("WATSONX_PROJECT_ID"),
@@ -218,7 +216,6 @@ def test_run(self, test_documents):
     def test_batch_processing(self, test_documents):
         """Test that batch processing works"""
         embedder = WatsonxDocumentEmbedder(
-            model="ibm/slate-30m-english-rtrvr",
             api_key=Secret.from_env_var("WATSONX_API_KEY"),
             project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
             batch_size=2,
@@ -239,7 +236,6 @@ def test_text_truncation(self):
         long_document = Document(content=long_content)
 
         embedder = WatsonxDocumentEmbedder(
-            model="ibm/slate-30m-english-rtrvr",
             api_key=Secret.from_env_var("WATSONX_API_KEY"),
             project_id=Secret.from_env_var("WATSONX_PROJECT_ID"),
             truncate_input_tokens=4,
diff --git a/integrations/watsonx/tests/test_generator.py b/integrations/watsonx/tests/test_generator.py
diff --git a/integrations/watsonx/tests/test_text_embedder.py b/integrations/watsonx/tests/test_text_embedder.py