Skip to content

Commit 67cd510

Browse files
authored
[NeuralChat] Enhance Retrieval UTs (intel#1189)
\
1 parent 362b7af commit 67cd510

File tree

19 files changed

+2445
-176
lines changed

19 files changed

+2445
-176
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The Intel Xeon Platinum 8480+ is part of Intel's Xeon Scalable processors, which are designed for enterprise-grade servers and data centers.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
question,correct_answer
2+
Who is the CEO of Intel?,Patrick P. Gelsinger
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<!DOCTYPE html>
2+
<html lang="zh">
3+
4+
<head>
5+
</head>
6+
7+
<body>
8+
Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.
9+
</body>
10+
11+
</html>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The Intel Xeon Platinum 8480+ is part of Intel's Xeon Scalable processors, which are designed for enterprise-grade servers and data centers. Intel Xeon Platinum 8480+ Processor has total 56 CPU Cores.

intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_build_api.py

Lines changed: 0 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -98,75 +98,6 @@ def test_build_chatbot_with_safety_checker_plugin(self):
9898
self.assertTrue(response, "Your query contains sensitive words, please try another query.")
9999
plugins.safety_checker.enable = False
100100

101-
def test_build_chatbot_with_retrieval_plugin(self):
102-
plugins.retrieval.enable = True
103-
plugins.retrieval.args["input_path"] = "../../../README.md"
104-
pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
105-
plugins=plugins)
106-
chatbot = build_chatbot(pipeline_config)
107-
self.assertIsNotNone(chatbot)
108-
response = chatbot.predict(query="What is Intel extension for transformers?")
109-
self.assertIsNotNone(response)
110-
111-
# test intel_extension_for_transformers.langchain.embeddings.HuggingFaceEmbeddings
112-
plugins.retrieval.enable = True
113-
plugins.retrieval.args["input_path"] = "../../../README.md"
114-
plugins.retrieval.args["embedding_model"] = "thenlper/gte-base"
115-
pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
116-
plugins=plugins)
117-
chatbot = build_chatbot(pipeline_config)
118-
self.assertIsNotNone(chatbot)
119-
response = chatbot.predict(query="What is Intel extension for transformers?")
120-
self.assertIsNotNone(response)
121-
122-
# test intel_extension_for_transformers.langchain.embeddings.HuggingFaceInstructEmbeddings
123-
plugins.retrieval.enable = True
124-
plugins.retrieval.args["input_path"] = "../../../README.md"
125-
plugins.retrieval.args["embedding_model"] = "hkunlp/instructor-large"
126-
pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
127-
plugins=plugins)
128-
chatbot = build_chatbot(pipeline_config)
129-
self.assertIsNotNone(chatbot)
130-
response = chatbot.predict(query="What is Intel extension for transformers?")
131-
self.assertIsNotNone(response)
132-
plugins.retrieval.enable = False
133-
134-
def test_build_chatbot_with_retrieval_plugin_bge_int8(self):
135-
if self.device != "cpu":
136-
self.skipTest("Only support Intel/bge-base-en-v1.5-sts-int8-static run on Intel CPU")
137-
plugins.retrieval.enable = True
138-
plugins.retrieval.args["input_path"] = "../../../README.md"
139-
# Intel/bge-base-en-v1.5-sts-int8-static is private now, so we need to load it from local.
140-
plugins.retrieval.args["embedding_model"] = \
141-
"/tf_dataset2/inc-ut/bge-base-en-v1.5-sts-int8-static"
142-
pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
143-
plugins=plugins)
144-
chatbot = build_chatbot(pipeline_config)
145-
self.assertIsNotNone(chatbot)
146-
response = chatbot.predict(query="What is Intel extension for transformers?")
147-
self.assertIsNotNone(response)
148-
plugins.retrieval.enable = False
149-
150-
def test_build_chatbot_with_retrieval_plugin_using_local_file(self):
151-
152-
def _run_retrieval(local_dir):
153-
plugins.tts.enable = False
154-
plugins.retrieval.enable = True
155-
plugins.retrieval.args["input_path"] = "../../../README.md"
156-
plugins.retrieval.args["embedding_model"] = local_dir
157-
pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m",
158-
plugins=plugins)
159-
chatbot = build_chatbot(pipeline_config)
160-
self.assertIsNotNone(chatbot)
161-
response = chatbot.predict(query="What is Intel extension for transformers?")
162-
self.assertIsNotNone(response)
163-
plugins.retrieval.enable = False
164-
165-
# test local file
166-
_run_retrieval(local_dir="/tf_dataset2/inc-ut/gte-base")
167-
_run_retrieval(local_dir="/tf_dataset2/inc-ut/instructor-large")
168-
_run_retrieval(local_dir="/tf_dataset2/inc-ut/bge-base-en-v1.5")
169-
170101
def test_text_chat_stream_return_stats_with_v1_format(self):
171102
config = PipelineConfig(model_name_or_path="facebook/opt-125m")
172103
chatbot = build_chatbot(config)

intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_normal.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,13 @@
2424
from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
2525
from intel_extension_for_transformers.neural_chat.utils.common import get_device_type
2626

27-
gaudi2_content = """
28-
Habana Gaudi2 and 4th Gen Intel Xeon Scalable processors deliver leading performance and optimal cost savings for AI training.
29-
Today, MLCommons published results of its industry AI performance benchmark, MLPerf Training 3.0, in which both the Habana® Gaudi®2 deep learning accelerator and the 4th Gen Intel® Xeon® Scalable processor delivered impressive training results.
30-
The latest MLPerf Training 3.0 results underscore the performance of Intel's products on an array of deep learning models. The maturity of Gaudi2-based software and systems for training was demonstrated at scale on the large language model, GPT-3. Gaudi2 is one of only two semiconductor solutions to submit performance results to the benchmark for LLM training of GPT-3.
31-
Gaudi2 also provides substantially competitive cost advantages to customers, both in server and system costs. The accelerator’s MLPerf-validated performance on GPT-3, computer vision and natural language models, plus upcoming software advances make Gaudi2 an extremely compelling price/performance alternative to Nvidia's H100.
32-
On the CPU front, the deep learning training performance of 4th Gen Xeon processors with Intel AI engines demonstrated that customers can build with Xeon-based servers a single universal AI system for data pre-processing, model training and deployment to deliver the right combination of AI performance, efficiency, accuracy and scalability.
33-
Gaudi2 delivered impressive time-to-train on GPT-31: 311 minutes on 384 accelerators.
34-
Near-linear 95% scaling from 256 to 384 accelerators on GPT-3 model.
35-
Excellent training results on computer vision — ResNet-50 8 accelerators and Unet3D 8 accelerators — and natural language processing models — BERT 8 and 64 accelerators.
36-
Performance increases of 10% and 4%, respectively, for BERT and ResNet models as compared to the November submission, evidence of growing Gaudi2 software maturity.
37-
Gaudi2 results were submitted “out of the box,” meaning customers can achieve comparable performance results when implementing Gaudi2 on premise or in the cloud.
38-
"""
39-
4027
class TestBuildChatbotNormalCases(unittest.TestCase):
4128
@classmethod
4229
def setUpClass(self):
43-
if not os.path.exists("./gaudi2.txt"):
44-
with open("./gaudi2.txt", "w") as file:
45-
file.write(gaudi2_content)
30+
pass
4631

4732
@classmethod
4833
def tearDownClass(self) -> None:
49-
if os.path.exists("./gaudi2.txt"):
50-
os.remove("./gaudi2.txt")
5134
if os.path.exists("./app.log"):
5235
os.remove("./app.log")
5336
if os.path.exists("./output"):
@@ -114,15 +97,6 @@ def test_enable_plugin_asr(self):
11497
result = build_chatbot(config)
11598
self.assertIsNotNone(result)
11699

117-
@unittest.skipIf(get_device_type() != 'cpu', "Only run this test on CPU")
118-
def test_enable_plugin_retrieval(self):
119-
# Test enabling Retrieval plugin
120-
config = PipelineConfig(model_name_or_path="facebook/opt-125m")
121-
config.plugins = {"retrieval": {"enable": True, "args":
122-
{"input_path": "./gaudi2.txt", "persist_directory": "./output"}}}
123-
result = build_chatbot(config)
124-
self.assertIsNotNone(result)
125-
126100
@unittest.skipIf(get_device_type() != 'cpu', "Only run this test on CPU")
127101
def test_enable_plugin_cache(self):
128102
# Test enabling Cache plugin

intel_extension_for_transformers/neural_chat/tests/ci/api/test_inference.py

Lines changed: 0 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ def setUp(self):
3535
def tearDown(self) -> None:
3636
if os.path.exists("output"):
3737
shutil.rmtree("output")
38-
if os.path.exists("check_append"):
39-
shutil.rmtree("check_append")
4038
for filename in os.listdir("."):
4139
if filename.endswith(".wav"):
4240
os.remove(filename)
@@ -49,84 +47,6 @@ def test_text_chat(self):
4947
print(response)
5048
self.assertIsNotNone(response)
5149

52-
def test_retrieval(self):
53-
plugins.retrieval.enable = True
54-
input_path="/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/"
55-
if os.path.exists(input_path):
56-
plugins.retrieval.args["input_path"] = input_path
57-
else:
58-
plugins.retrieval.args["input_path"] = "../assets/docs/"
59-
config = PipelineConfig(model_name_or_path="facebook/opt-125m",
60-
plugins=plugins)
61-
chatbot = build_chatbot(config)
62-
response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
63-
print(response)
64-
self.assertIsNotNone(response)
65-
plugins.retrieval.enable = False
66-
67-
def test_retrieval_with_qdrant(self):
68-
plugins.retrieval.enable = True
69-
input_path="/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/"
70-
if os.path.exists(input_path):
71-
plugins.retrieval.args["input_path"] = input_path
72-
else:
73-
plugins.retrieval.args["input_path"] = "../assets/docs/"
74-
plugins.retrieval.args["vector_database"] = "Qdrant"
75-
config = PipelineConfig(model_name_or_path="facebook/opt-125m",
76-
plugins=plugins)
77-
chatbot = build_chatbot(config)
78-
response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
79-
print(response)
80-
self.assertIsNotNone(response)
81-
plugins.retrieval.enable = False
82-
83-
def test_retrieval_append(self):
84-
plugins.retrieval.enable = True
85-
plugins.retrieval.args["append"] = True
86-
plugins.retrieval.args["input_path"] = "../assets/docs/"
87-
plugins.retrieval.args["persist_directory"] = "./check_append"
88-
config = PipelineConfig(model_name_or_path="facebook/opt-125m",
89-
plugins=plugins)
90-
chatbot = build_chatbot(config)
91-
response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
92-
print(response)
93-
self.assertIsNotNone(response)
94-
95-
plugins.retrieval.args["append"] = False
96-
config = PipelineConfig(model_name_or_path="facebook/opt-125m",
97-
plugins=plugins)
98-
chatbot = build_chatbot(config)
99-
response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
100-
print(response)
101-
self.assertIsNotNone(response)
102-
plugins.retrieval.args["append"] = True
103-
plugins.retrieval.args["persist_directory"] = "./output"
104-
plugins.retrieval.enable = False
105-
106-
def test_retrieval_append_with_qdrant(self):
107-
plugins.retrieval.enable = True
108-
plugins.retrieval.args["append"] = True
109-
plugins.retrieval.args["input_path"] = "../assets/docs/"
110-
plugins.retrieval.args["persist_directory"] = "./check_append"
111-
plugins.retrieval.args["vector_database"] = "Qdrant"
112-
config = PipelineConfig(model_name_or_path="facebook/opt-125m",
113-
plugins=plugins)
114-
chatbot = build_chatbot(config)
115-
response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
116-
print(response)
117-
self.assertIsNotNone(response)
118-
119-
plugins.retrieval.args["append"] = False
120-
config = PipelineConfig(model_name_or_path="facebook/opt-125m",
121-
plugins=plugins)
122-
chatbot = build_chatbot(config)
123-
response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
124-
print(response)
125-
self.assertIsNotNone(response)
126-
plugins.retrieval.args["append"] = True
127-
plugins.retrieval.args["persist_directory"] = "./output"
128-
plugins.retrieval.enable = False
129-
13050
@unittest.skipIf(get_device_type() != 'cpu', "Only run this test on CPU")
13151
def test_voice_chat(self):
13252
plugins.tts.enable = True

0 commit comments

Comments
 (0)