Skip to content

Commit 8147541

Browse files
authored
add more ns examples (intel#1346)
1 parent 3cbec94 commit 8147541

File tree

7 files changed

+49
-7
lines changed

7 files changed

+49
-7
lines changed

docs/weightonlyquant.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ python -m pip install torch==2.1.0a0 -f https://developer.intel.com/ipex-whl-st
136136

137137
source /opt/intel/oneapi/setvars.sh
138138

139+
# Build IPEX from Source Code
139140
git clone https://github.com/intel/intel-extension-for-pytorch.git ipex-gpu
140141
cd ipex-gpu
141142
git checkout -b dev/QLLM origin/dev/QLLM
@@ -144,6 +145,7 @@ export USE_AOT_DEVLIST='pvc,ats-m150'
144145
export BUILD_WITH_CPU=OFF
145146

146147
pip install -r requirements.txt
148+
147149
python setup.py install
148150
```
149151

examples/.config/neural_speed_deploy.json

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,48 @@
2222
"params": {
2323
"model_name": "/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1",
2424
"model_format": "runtime",
25+
"tasks": "piqa"
26+
}
27+
},
28+
"launcher":{}
29+
},
30+
"neural_chat_v3-3_autoround_neural_speed": {
31+
"working_dir": "huggingface/neural_speed",
32+
"data_dir": "",
33+
"hf_model_name": "Intel/neural-chat-7b-v3-3",
34+
"benchmark": {
35+
"cmd": "python run_accuracy.py",
36+
"params": {
37+
"model_name": "/tf_dataset2/models/auto_round/neuralchat_v3-3",
38+
"model_format": "runtime",
39+
"tasks": "lambada_openai"
40+
}
41+
},
42+
"launcher":{}
43+
},
44+
"mistral_7b_neural_speed": {
45+
"working_dir": "huggingface/neural_speed",
46+
"data_dir": "",
47+
"hf_model_name": "mistralai/Mistral-7B-v0.1",
48+
"benchmark": {
49+
"cmd": "python run_accuracy.py",
50+
"params": {
51+
"model_name": "/tf_dataset2/models/pytorch/Mistral-7B-v0.1",
52+
"model_format": "runtime",
53+
"tasks": "piqa"
54+
}
55+
},
56+
"launcher":{}
57+
},
58+
"qwen_neural_speed": {
59+
"working_dir": "huggingface/neural_speed",
60+
"data_dir": "",
61+
"hf_model_name": "Qwen/Qwen-7B-Chat",
62+
"benchmark": {
63+
"cmd": "python run_accuracy.py",
64+
"params": {
65+
"model_name": "/tf_dataset2/models/nlp_toolkit/Qwen-7B-Chat",
66+
"model_format": "runtime",
2567
"tasks": "lambada_openai"
2668
}
2769
},

examples/huggingface/neural_speed/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ gguf
77
torch==2.2.0+cpu
88
transformers
99
intel_extension_for_pytorch==2.2.0
10+
tiktoken
11+
transformers_stream_generator

examples/huggingface/neural_speed/run_accuracy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
parser.add_argument('--batch_size', type=int, default=1)
2525
args = parser.parse_args()
2626
print(args)
27-
model_args=f'pretrained="{args.model_name}",dtype=float32'
27+
model_args=f'pretrained="{args.model_name}",dtype=float32,trust_remote_code=True'
2828
if args.use_gptq:
2929
model_args += ",use_gptq=True"
3030
if args.model_format == "runtime":

examples/huggingface/neural_speed/run_autoround_qdq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
results = evaluate(
2626
model="hf-causal",
27-
model_args=f'pretrained="{args.model_name}",dtype=float32',
27+
model_args=f'pretrained="{args.model_name}",dtype=float32,trust_remote_code=True',
2828
tasks=[f"{args.tasks}"]
2929
)
3030

examples/huggingface/neural_speed/run_inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
3939
streamer = TextStreamer(tokenizer)
4040
inputs = tokenizer(prompt, return_tensors="pt").input_ids
4141

42-
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=woq_config)
42+
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=woq_config, trust_remote_code=True)
4343

4444
outputs = model.generate(inputs, streamer=streamer, ctx_size=args.n_ctx, max_new_tokens=args.max_new_tokens)
4545

intel_extension_for_transformers/neural_chat/README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,6 @@ pip install -r requirements.txt
6161
>**Note**: Suggest using fastapi==0.103.2
6262
6363

64-
>**Note**: Suggest using fastapi==0.103.2
65-
66-
67-
6864
# Getting Started
6965

7066
## OpenAI-Compatible RESTful APIs

0 commit comments

Comments
 (0)