Skip to content

Commit 6ec6296

Browse files
llama: add more context to structured fuzzer (#12464)
Signed-off-by: David Korczynski <[email protected]>
1 parent 30ce0a7 commit 6ec6296

File tree

6 files changed

+184
-32
lines changed

6 files changed

+184
-32
lines changed

projects/llamacpp/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ xxd -i models/ggml-vocab-baichuan.gguf > model_header_baichuan.h
4444
xxd -i models/ggml-vocab-deepseek-coder.gguf > model_header_deepseek_coder.h
4545
xxd -i models/ggml-vocab-falcon.gguf > model_header_falcon.h
4646

47-
OBJ_FILES="ggml/src/llamafile/sgemm.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/llama.o src/llama-vocab.o src/llama-grammar.o src/llama-sampling.o src/unicode.o src/unicode-data.o common/common.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/grammar-parser.o common/build-info.o common/json-schema-to-grammar.o"
47+
OBJ_FILES="ggml/src/llamafile/sgemm.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/llama.o src/llama-vocab.o src/llama-grammar.o src/llama-sampling.o src/unicode.o src/unicode-data.o common/common.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o"
4848
FLAGS="-std=c++11 -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -I./ -DNDEBUG -DGGML_USE_LLAMAFILE"
4949

5050
$CXX $LIB_FUZZING_ENGINE $CXXFLAGS ${FLAGS} ${OBJ_FILES} fuzzers/fuzz_json_to_grammar.cpp -o $OUT/fuzz_json_to_grammar

projects/llamacpp/fuzzers/fuzz_grammar.cpp

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,15 @@ See the License for the specific language governing permissions and
1010
limitations under the License.
1111
*/
1212

13-
#include "grammar-parser.h"
13+
#include <string>
1414
#include "llama.h"
15+
#include "llama-grammar.h"
1516

1617
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
1718
std::string payload(reinterpret_cast<const char *>(data), size);
18-
auto parsed_grammar = grammar_parser::parse(payload.c_str());
19-
if (parsed_grammar.rules.empty()) {
20-
return 0;
21-
}
2219

23-
if (parsed_grammar.symbol_ids.find("root") !=
24-
parsed_grammar.symbol_ids.end()) {
25-
std::vector<const llama_grammar_element *> grammar_rules(
26-
parsed_grammar.c_rules());
27-
auto grammar = llama_grammar_init(grammar_rules.data(), grammar_rules.size(),
28-
parsed_grammar.symbol_ids.at("root"));
29-
if (grammar != nullptr) {
30-
llama_grammar_free(grammar);
31-
}
32-
}
20+
llama_grammar_parser parsed_grammar;
21+
parsed_grammar.parse(payload.c_str());
22+
3323
return 0;
3424
}

projects/llamacpp/fuzzers/fuzz_inference.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
6868
llama_context_params_from_gpt_params(params);
6969
llama_context *ctx = llama_new_context_with_model(model, ctx_params);
7070
if (ctx != NULL) {
71+
/*
7172
std::vector<llama_token> tokens_list;
7273
tokens_list = ::llama_tokenize(ctx, params.prompt, true);
7374
@@ -126,9 +127,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
126127
}
127128
llama_batch_free(batch);
128129
}
130+
*/
131+
llama_free(ctx);
129132
}
130133

131-
llama_free(ctx);
132134
llama_free_model(model);
133135
}
134136
}

projects/llamacpp/fuzzers/fuzz_json_to_grammar.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ See the License for the specific language governing permissions and
1010
limitations under the License.
1111
*/
1212

13-
#include "grammar-parser.h"
13+
#include "llama.h"
14+
#include "llama-grammar.h"
1415
#include "json-schema-to-grammar.h"
1516

1617
using json = nlohmann::json;

projects/llamacpp/fuzzers/fuzz_structured.cpp

Lines changed: 173 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ limitations under the License.
1212

1313
#include "llama.h"
1414
#include <iostream>
15+
#include <string>
16+
#include <vector>
17+
1518
#include <setjmp.h>
1619
#include <unistd.h>
1720

@@ -20,21 +23,165 @@ limitations under the License.
2023

2124
jmp_buf fuzzing_jmp_buf;
2225

23-
#define NUM_OVERRIDES 40
26+
#define NUM_OVERRIDES 75
2427
struct llama_model_kv_override fuzz_kv_overrides[NUM_OVERRIDES + 1];
2528

2629
llama_model_kv_override_type arrayed_enums[4] = {
2730
LLAMA_KV_OVERRIDE_TYPE_INT, LLAMA_KV_OVERRIDE_TYPE_FLOAT,
2831
LLAMA_KV_OVERRIDE_TYPE_BOOL, LLAMA_KV_OVERRIDE_TYPE_STR};
2932

33+
std::vector<std::string> possible_keys = {
34+
"general.type",
35+
"general.quantization_version",
36+
"general.alignment",
37+
"general.name",
38+
"general.author",
39+
"general.version",
40+
"general.url",
41+
"general.description",
42+
"general.license",
43+
"general.source.url",
44+
"general.source.huggingface.repository",
45+
"split.no",
46+
"split.count",
47+
"split.tensors.count",
48+
"tokenizer.ggml.model",
49+
"tokenizer.ggml.pre",
50+
"tokenizer.ggml.tokens",
51+
"tokenizer.ggml.token_type",
52+
"tokenizer.ggml.token_type_count",
53+
"tokenizer.ggml.scores",
54+
"tokenizer.ggml.merges",
55+
"tokenizer.ggml.bos_token_id",
56+
"tokenizer.ggml.eos_token_id",
57+
"tokenizer.ggml.unknown_token_id",
58+
"tokenizer.ggml.seperator_token_id",
59+
"tokenizer.ggml.padding_token_id",
60+
"tokenizer.ggml.cls_token_id",
61+
"tokenizer.ggml.mask_token_id",
62+
"tokenizer.ggml.add_bos_token",
63+
"tokenizer.ggml.add_eos_token",
64+
"tokenizer.ggml.add_space_prefix",
65+
"tokenizer.ggml.remove_extra_whitespaces",
66+
"tokenizer.ggml.precompiled_charsmap",
67+
"tokenizer.huggingface.json",
68+
"tokenizer.rwkv.world",
69+
"tokenizer.ggml.prefix_token_id",
70+
"tokenizer.ggml.suffix_token_id",
71+
"tokenizer.ggml.middle_token_id",
72+
"tokenizer.ggml.eot_token_id",
73+
"tokenizer.ggml.eom_token_id",
74+
"adapter.type",
75+
"adapter.lora.alpha",
76+
77+
};
78+
79+
std::vector<std::string> possible_architectures = {
80+
"llama", "falcon", "grok", "gpt2", "gptj", "gptneox",
81+
"mpt", "baichuan", "starcoder", "refact", "bert", "nomic-bert",
82+
"jina-bert-v2", "bloom", "stablelm", "qwen", "qwen2",
83+
};
84+
85+
std::vector<std::string> possible_prefix_keys = {
86+
".vocab_size",
87+
".context_length",
88+
".embedding_length",
89+
".block_count",
90+
".leading_dense_block_count",
91+
".feed_forward_length",
92+
".expert_feed_forward_length",
93+
".expert_shared_feed_forward_length",
94+
".use_parallel_residual",
95+
".tensor_data_layout",
96+
".expert_count",
97+
".expert_used_count",
98+
".expert_shared_count",
99+
".expert_weights_scale",
100+
".pooling_type",
101+
".logit_scale",
102+
".decoder_start_token_id",
103+
".attn_logit_softcapping",
104+
".final_logit_softcapping",
105+
".rescale_every_n_layers",
106+
".time_mix_extra_dim",
107+
".time_decay_extra_dim",
108+
".attention.head_count",
109+
".attention.head_count_kv",
110+
".attention.max_alibi_bias",
111+
".attention.clamp_kqv",
112+
".attention.key_length",
113+
".attention.value_length",
114+
".attention.layer_norm_epsilon",
115+
".attention.layer_norm_rms_epsilon",
116+
".attention.causal",
117+
".attention.q_lora_rank",
118+
".attention.kv_lora_rank",
119+
".attention.relative_buckets_count",
120+
".attention.sliding_window",
121+
".rope.dimension_count",
122+
".rope.freq_base",
123+
".rope.scale_linear",
124+
".rope.scaling.type",
125+
".rope.scaling.factor",
126+
".rope.scaling.attn_factor",
127+
".rope.scaling.original_context_length",
128+
".rope.scaling.finetuned",
129+
".rope.scaling.yarn_log_multiplier",
130+
".ssm.conv_kernel",
131+
".ssm.inner_size",
132+
".ssm.state_size",
133+
".ssm.time_step_rank",
134+
".ssm.dt_b_c_rms",
135+
".wkv.head_size",
136+
};
137+
30138
extern "C" void __wrap_abort(void) { longjmp(fuzzing_jmp_buf, 1); }
31139

32140
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
141+
if (size < 256) {
142+
return 0;
143+
}
33144
llama_backend_init();
34145
FuzzedDataProvider fdp(data, size);
35146

147+
auto params = llama_model_params{};
148+
memset(&params, 0x0, sizeof(struct llama_model_params));
149+
params.use_mmap = false;
150+
params.progress_callback = [](float progress, void *ctx) {
151+
(void)ctx;
152+
return progress > 0.50;
153+
};
154+
155+
int overwrite_idx = 0;
156+
157+
// set the architecture
158+
std::string arch_key = "general.architecture";
159+
uint8_t arch_index =
160+
fdp.ConsumeIntegralInRange<uint8_t>(0, possible_architectures.size() - 1);
161+
162+
std::string arch_val = std::string(possible_architectures[arch_index]);
163+
fuzz_kv_overrides[overwrite_idx].tag = LLAMA_KV_OVERRIDE_TYPE_STR;
164+
strcpy(fuzz_kv_overrides[overwrite_idx].key, arch_key.c_str());
165+
strcpy(fuzz_kv_overrides[overwrite_idx].val_str, arch_val.c_str());
166+
overwrite_idx++;
167+
168+
for (int i = 0; i < possible_prefix_keys.size(); i++) {
169+
std::string key;
170+
std::string val;
171+
172+
// Get the key
173+
key = arch_val + possible_prefix_keys[i];
174+
val = fdp.ConsumeRandomLengthString(32);
175+
176+
// Copy the data into the overrides array
177+
fuzz_kv_overrides[overwrite_idx].tag = fdp.PickValueInArray(arrayed_enums);
178+
strcpy(fuzz_kv_overrides[overwrite_idx].key, key.c_str());
179+
strcpy(fuzz_kv_overrides[overwrite_idx].val_str, val.c_str());
180+
overwrite_idx++;
181+
}
182+
36183
// Create the model
37-
std::string model_payload = fdp.ConsumeRemainingBytesAsString();
184+
std::string model_payload = fdp.ConsumeRandomLengthString();
38185
if (model_payload.size() < 10) {
39186
return 0;
40187
}
@@ -53,25 +200,33 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
53200
fwrite(model_payload.data(), model_payload.size(), 1, fp);
54201
fclose(fp);
55202

56-
auto params = llama_model_params{};
57-
memset(&params, 0x0, sizeof(struct llama_model_params));
58-
params.use_mmap = false;
59-
params.progress_callback = [](float progress, void *ctx) {
60-
(void)ctx;
61-
return progress > 0.50;
62-
};
63-
64203
// Override an arbitrary set of arguments
65-
for (int i = 0; i < NUM_OVERRIDES; i++) {
66-
std::string key = fdp.ConsumeRandomLengthString(64);
67-
std::string val = fdp.ConsumeRandomLengthString(64);
204+
for (int i = overwrite_idx; i < NUM_OVERRIDES; i++) {
205+
std::string key;
206+
std::string val;
207+
208+
// Get the key
209+
if (fdp.ConsumeProbability<float>() > 0.90) {
210+
key = fdp.ConsumeRandomLengthString(20);
211+
} else {
212+
int i = fdp.ConsumeIntegralInRange<int>(0, possible_keys.size() - 1);
213+
key = possible_keys[i];
214+
}
215+
val = fdp.ConsumeRandomLengthString(30);
68216

69217
// Copy the data into the overrides array
70218
fuzz_kv_overrides[i].tag = fdp.PickValueInArray(arrayed_enums);
71219
strcpy(fuzz_kv_overrides[i].key, key.c_str());
72220
strcpy(fuzz_kv_overrides[i].val_str, val.c_str());
73221
}
74222

223+
// For debugging
224+
// std::cout << "--- overwrote ---\n";
225+
// for (int m = 0; m < NUM_OVERRIDES-1; m++) {
226+
// std::cout << "=== " << fuzz_kv_overrides[m].key << "\n";
227+
//}
228+
// std::cout << "#############\n";
229+
75230
params.kv_overrides =
76231
(const struct llama_model_kv_override *)fuzz_kv_overrides;
77232

@@ -83,6 +238,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
83238
}
84239
llama_backend_free();
85240

241+
// close any open descriptors.
242+
for (int i = 3; i < 1024; i++) {
243+
close(i);
244+
}
245+
86246
unlink(filename);
87247
return 0;
88248
}

projects/llamacpp/fuzzers/fuzz_tokenizer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ See the License for the specific language governing permissions and
1010
limitations under the License.
1111
*/
1212

13-
//#include "grammar-parser.h"
1413
#include <unistd.h>
1514

1615
#include "common.h"

0 commit comments

Comments
 (0)