sramakintel
diff --git a/‎intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/sample.txt
Lines changed: 1 addition & 0 deletions b/‎intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/sample.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/sample_1.txt
Lines changed: 1 addition & 0 deletions b/‎intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/sample_1.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.csv
Lines changed: 2 additions & 0 deletions b/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.csv
Lines changed: 2 additions & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.html
Lines changed: 11 additions & 0 deletions b/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.html
Lines changed: 11 additions & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.md
Lines changed: 1 addition & 0 deletions b/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.pdf
44.8 KB b/‎intel_extension_for_transformers/neural_chat/assets/docs/sample.pdf
44.8 KB
diff --git a/‎intel_extension_for_transformers/neural_chat/assets/docs/sample_1.txt
Lines changed: 1 addition & 0 deletions b/‎intel_extension_for_transformers/neural_chat/assets/docs/sample_1.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_build_api.py
Lines changed: 0 additions & 69 deletions b/‎intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_build_api.py
Lines changed: 0 additions & 69 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_normal.py
Lines changed: 1 addition & 27 deletions b/‎intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_normal.py
Lines changed: 1 addition & 27 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/tests/ci/api/test_inference.py
Lines changed: 0 additions & 80 deletions b/‎intel_extension_for_transformers/neural_chat/tests/ci/api/test_inference.py
Lines changed: 0 additions & 80 deletions
@@ -0,0 +1 @@
+Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.
@@ -0,0 +1 @@
+The Intel Xeon Platinum 8480+ is part of Intel's Xeon Scalable processors, which are designed for enterprise-grade servers and data centers.
@@ -0,0 +1,2 @@
+question,correct_answer
+Who is the CEO of Intel?,Patrick P. Gelsinger
@@ -0,0 +1,11 @@
+<!DOCTYPE html>
+<html lang="zh">
+
+<head>
+</head>
+
+<body>
+    Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.
+</body>
+
+</html>
@@ -0,0 +1 @@
+Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.
@@ -0,0 +1 @@
+The Intel Xeon Platinum 8480+ is part of Intel's Xeon Scalable processors, which are designed for enterprise-grade servers and data centers. Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.
@@ -98,75 +98,6 @@ def test_build_chatbot_with_safety_checker_plugin(self):
         self.assertTrue(response, "Your query contains sensitive words, please try another query.")
         plugins.safety_checker.enable = False
 
-    def test_build_chatbot_with_retrieval_plugin(self):
-        plugins.retrieval.enable = True
-        plugins.retrieval.args["input_path"] = "../../../README.md"
-        pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                         plugins=plugins)
-        chatbot = build_chatbot(pipeline_config)
-        self.assertIsNotNone(chatbot)
-        response = chatbot.predict(query="What is Intel extension for transformers?")
-        self.assertIsNotNone(response)
-
-        # test intel_extension_for_transformers.langchain.embeddings.HuggingFaceEmbeddings
-        plugins.retrieval.enable = True
-        plugins.retrieval.args["input_path"] = "../../../README.md"
-        plugins.retrieval.args["embedding_model"] = "thenlper/gte-base"
-        pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                         plugins=plugins)
-        chatbot = build_chatbot(pipeline_config)
-        self.assertIsNotNone(chatbot)
-        response = chatbot.predict(query="What is Intel extension for transformers?")
-        self.assertIsNotNone(response)
-
-        # test intel_extension_for_transformers.langchain.embeddings.HuggingFaceInstructEmbeddings
-        plugins.retrieval.enable = True
-        plugins.retrieval.args["input_path"] = "../../../README.md"
-        plugins.retrieval.args["embedding_model"] = "hkunlp/instructor-large"
-        pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                         plugins=plugins)
-        chatbot = build_chatbot(pipeline_config)
-        self.assertIsNotNone(chatbot)
-        response = chatbot.predict(query="What is Intel extension for transformers?")
-        self.assertIsNotNone(response)
-        plugins.retrieval.enable = False
-
-    def test_build_chatbot_with_retrieval_plugin_bge_int8(self):
-        if self.device != "cpu":
-            self.skipTest("Only support Intel/bge-base-en-v1.5-sts-int8-static run on Intel CPU")
-        plugins.retrieval.enable = True
-        plugins.retrieval.args["input_path"] = "../../../README.md"
-        # Intel/bge-base-en-v1.5-sts-int8-static is private now, so we need to load it from local.
-        plugins.retrieval.args["embedding_model"] = \
-            "/tf_dataset2/inc-ut/bge-base-en-v1.5-sts-int8-static"
-        pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                         plugins=plugins)
-        chatbot = build_chatbot(pipeline_config)
-        self.assertIsNotNone(chatbot)
-        response = chatbot.predict(query="What is Intel extension for transformers?")
-        self.assertIsNotNone(response)
-        plugins.retrieval.enable = False
-
-    def test_build_chatbot_with_retrieval_plugin_using_local_file(self):
-
-        def _run_retrieval(local_dir):
-            plugins.tts.enable = False
-            plugins.retrieval.enable = True
-            plugins.retrieval.args["input_path"] = "../../../README.md"
-            plugins.retrieval.args["embedding_model"] = local_dir
-            pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                             plugins=plugins)
-            chatbot = build_chatbot(pipeline_config)
-            self.assertIsNotNone(chatbot)
-            response = chatbot.predict(query="What is Intel extension for transformers?")
-            self.assertIsNotNone(response)
-            plugins.retrieval.enable = False
-
-        # test local file
-        _run_retrieval(local_dir="/tf_dataset2/inc-ut/gte-base")
-        _run_retrieval(local_dir="/tf_dataset2/inc-ut/instructor-large")
-        _run_retrieval(local_dir="/tf_dataset2/inc-ut/bge-base-en-v1.5")
-
     def test_text_chat_stream_return_stats_with_v1_format(self):
         config = PipelineConfig(model_name_or_path="facebook/opt-125m")
         chatbot = build_chatbot(config)
 
@@ -24,30 +24,13 @@
 from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
 from intel_extension_for_transformers.neural_chat.utils.common import get_device_type
 
-gaudi2_content = """
-Habana Gaudi2 and 4th Gen Intel Xeon Scalable processors deliver leading performance and optimal cost savings for AI training.
-Today, MLCommons published results of its industry AI performance benchmark, MLPerf Training 3.0, in which both the Habana® Gaudi®2 deep learning accelerator and the 4th Gen Intel® Xeon® Scalable processor delivered impressive training results.
-The latest MLPerf Training 3.0 results underscore the performance of Intel's products on an array of deep learning models. The maturity of Gaudi2-based software and systems for training was demonstrated at scale on the large language model, GPT-3. Gaudi2 is one of only two semiconductor solutions to submit performance results to the benchmark for LLM training of GPT-3.
-Gaudi2 also provides substantially competitive cost advantages to customers, both in server and system costs. The accelerator’s MLPerf-validated performance on GPT-3, computer vision and natural language models, plus upcoming software advances make Gaudi2 an extremely compelling price/performance alternative to Nvidia's H100.
-On the CPU front, the deep learning training performance of 4th Gen Xeon processors with Intel AI engines demonstrated that customers can build with Xeon-based servers a single universal AI system for data pre-processing, model training and deployment to deliver the right combination of AI performance, efficiency, accuracy and scalability.
-Gaudi2 delivered impressive time-to-train on GPT-31: 311 minutes on 384 accelerators.
-Near-linear 95% scaling from 256 to 384 accelerators on GPT-3 model.
-Excellent training results on computer vision — ResNet-50 8 accelerators and Unet3D 8 accelerators — and natural language processing models — BERT 8 and 64 accelerators.
-Performance increases of 10% and 4%, respectively, for BERT and ResNet models as compared to the November submission, evidence of growing Gaudi2 software maturity.
-Gaudi2 results were submitted “out of the box,” meaning customers can achieve comparable performance results when implementing Gaudi2 on premise or in the cloud.
-"""
-
 class TestBuildChatbotNormalCases(unittest.TestCase):
     @classmethod
     def setUpClass(self):
-        if not os.path.exists("./gaudi2.txt"):
-            with open("./gaudi2.txt", "w") as file:
-                file.write(gaudi2_content)
+        pass
 
     @classmethod
     def tearDownClass(self) -> None:
-        if os.path.exists("./gaudi2.txt"):
-            os.remove("./gaudi2.txt")
         if os.path.exists("./app.log"):
             os.remove("./app.log")
         if os.path.exists("./output"):
@@ -114,15 +97,6 @@ def test_enable_plugin_asr(self):
         result = build_chatbot(config)
         self.assertIsNotNone(result)
 
-    @unittest.skipIf(get_device_type() != 'cpu', "Only run this test on CPU")
-    def test_enable_plugin_retrieval(self):
-        # Test enabling Retrieval plugin
-        config = PipelineConfig(model_name_or_path="facebook/opt-125m")
-        config.plugins = {"retrieval": {"enable": True, "args":
-            {"input_path": "./gaudi2.txt", "persist_directory": "./output"}}}
-        result = build_chatbot(config)
-        self.assertIsNotNone(result)
-
     @unittest.skipIf(get_device_type() != 'cpu', "Only run this test on CPU")
     def test_enable_plugin_cache(self):
         # Test enabling Cache plugin
 
@@ -35,8 +35,6 @@ def setUp(self):
     def tearDown(self) -> None:
         if os.path.exists("output"):
             shutil.rmtree("output")
-        if os.path.exists("check_append"):
-            shutil.rmtree("check_append")
         for filename in os.listdir("."):
             if filename.endswith(".wav"):
                 os.remove(filename)
@@ -49,84 +47,6 @@ def test_text_chat(self):
         print(response)
         self.assertIsNotNone(response)
 
-    def test_retrieval(self):
-        plugins.retrieval.enable = True
-        input_path="/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/"
-        if os.path.exists(input_path):
-            plugins.retrieval.args["input_path"] = input_path
-        else:
-            plugins.retrieval.args["input_path"] = "../assets/docs/"
-        config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                plugins=plugins)
-        chatbot = build_chatbot(config)
-        response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
-        print(response)
-        self.assertIsNotNone(response)
-        plugins.retrieval.enable = False
-
-    def test_retrieval_with_qdrant(self):
-        plugins.retrieval.enable = True
-        input_path="/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/"
-        if os.path.exists(input_path):
-            plugins.retrieval.args["input_path"] = input_path
-        else:
-            plugins.retrieval.args["input_path"] = "../assets/docs/"
-        plugins.retrieval.args["vector_database"] = "Qdrant"
-        config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                plugins=plugins)
-        chatbot = build_chatbot(config)
-        response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
-        print(response)
-        self.assertIsNotNone(response)
-        plugins.retrieval.enable = False
-
-    def test_retrieval_append(self):
-        plugins.retrieval.enable = True
-        plugins.retrieval.args["append"] = True
-        plugins.retrieval.args["input_path"] = "../assets/docs/"
-        plugins.retrieval.args["persist_directory"] = "./check_append"
-        config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                plugins=plugins)
-        chatbot = build_chatbot(config)
-        response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
-        print(response)
-        self.assertIsNotNone(response)
-
-        plugins.retrieval.args["append"] = False
-        config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                plugins=plugins)
-        chatbot = build_chatbot(config)
-        response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
-        print(response)
-        self.assertIsNotNone(response)
-        plugins.retrieval.args["append"] = True
-        plugins.retrieval.args["persist_directory"] = "./output"
-        plugins.retrieval.enable = False
-
-    def test_retrieval_append_with_qdrant(self):
-        plugins.retrieval.enable = True
-        plugins.retrieval.args["append"] = True
-        plugins.retrieval.args["input_path"] = "../assets/docs/"
-        plugins.retrieval.args["persist_directory"] = "./check_append"
-        plugins.retrieval.args["vector_database"] = "Qdrant"
-        config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                plugins=plugins)
-        chatbot = build_chatbot(config)
-        response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
-        print(response)
-        self.assertIsNotNone(response)
-
-        plugins.retrieval.args["append"] = False
-        config = PipelineConfig(model_name_or_path="facebook/opt-125m",
-                                plugins=plugins)
-        chatbot = build_chatbot(config)
-        response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
-        print(response)
-        self.assertIsNotNone(response)
-        plugins.retrieval.args["append"] = True
-        plugins.retrieval.args["persist_directory"] = "./output"
-        plugins.retrieval.enable = False
-
     @unittest.skipIf(get_device_type() != 'cpu', "Only run this test on CPU")
     def test_voice_chat(self):
         plugins.tts.enable = True
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+The Intel Xeon Platinum 8480+ is part of Intel's Xeon Scalable processors, which are designed for enterprise-grade servers and data centers.`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+question,correct_answer`
	`2`	`+Who is the CEO of Intel?,Patrick P. Gelsinger`