From 60ff5f05afa1284a9992e0a5211539e8ed1f9d85 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Mon, 23 Sep 2024 14:01:48 -0700 Subject: [PATCH 01/26] Add initial script for model validation tool --- test/python/model_validation/input.json | 3 + .../model_validation/validation_tool.py | 184 ++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 test/python/model_validation/input.json create mode 100644 test/python/model_validation/validation_tool.py diff --git a/test/python/model_validation/input.json b/test/python/model_validation/input.json new file mode 100644 index 0000000000..5c7a5f131d --- /dev/null +++ b/test/python/model_validation/input.json @@ -0,0 +1,3 @@ +{ + "models": "Qwen/Qwen2-0.5B" +} \ No newline at end of file diff --git a/test/python/model_validation/validation_tool.py b/test/python/model_validation/validation_tool.py new file mode 100644 index 0000000000..7df315318f --- /dev/null +++ b/test/python/model_validation/validation_tool.py @@ -0,0 +1,184 @@ +import onnxruntime_genai as og +import argparse +import time +from ...src.python.py.models.builder import create_model +from datasets import load_dataset +import torch +import json +import tqdm + +def calculate_perplexity(args, output_dir): + print("We are now calculating perplexity") + + # actual model object + model = og.Model(f'{output_dir}') + + # tokenizer should already be created + tokenizer = og.Tokenizer(model) + print(tokenizer) + + test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") + + print(test) + # Calculate the perplexity, understand the output of the models + # Understand the output from a model and logits. + print("Test is now loaded") + + max_length = model.config.n_positions + stride = 512 + seq_len = encodings.input_ids.size(1) + + nlls = [] + prev_end_loc = 0 + for begin_loc in tqdm(range(0, seq_len, stride)): + end_loc = min(begin_loc + max_length, seq_len) + trg_len = end_loc - prev_end_loc # may be different from stride on last loop + input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device) + target_ids = input_ids.clone() + target_ids[:, :-trg_len] = -100 + + with torch.no_grad(): + outputs = model(input_ids, labels=target_ids) + + # loss is calculated using CrossEntropyLoss which averages over valid labels + # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels + # to the left by 1. + neg_log_likelihood = outputs.loss + + nlls.append(neg_log_likelihood) + + prev_end_loc = end_loc + if end_loc == seq_len: + break + + ppl = torch.exp(torch.stack(nlls).mean()) + return + + + +def validate_model(args, output_dir): + if args.verbose: print("Loading model...") + if args.timings: + started_timestamp = 0 + first_token_timestamp = 0 + + model = og.Model(f'{output_dir}') + if args.verbose: print("Model loaded") + tokenizer = og.Tokenizer(model) + tokenizer_stream = tokenizer.create_stream() + if args.verbose: print("Tokenizer created") + if args.verbose: print() + search_options = {name:getattr(args, name) for name in ['do_sample', 'max_length', 'min_length', 'top_p', 'top_k', 'temperature', 'repetition_penalty'] if name in args} + + # Set the max length to something sensible by default, unless it is specified by the user, + # since otherwise it will be set to the entire context length + + search_options['max_length'] = 512 + + chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' + + + inputs = ["Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", + "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight"] + + for input in inputs: + + complete_text = '' + + if args.timings: started_timestamp = time.time() + + # If there is a chat template, use it + prompt = f'{chat_template.format(input=input)}' + + + # Tokenizer has interesting behavior, it creates multiple inputs + input_tokens = tokenizer.encode(prompt) + + + params = og.GeneratorParams(model) + params.set_search_options(**search_options) + params.input_ids = input_tokens + + generator = og.Generator(model, params) + if args.verbose: print("Generator created") + + if args.verbose: print("Running generation loop ...") + if args.timings: + first = True + new_tokens = [] + + print() + print("Output: ", end='', flush=True) + + try: + while not generator.is_done(): + generator.compute_logits() + generator.generate_next_token() + if args.timings: + if first: + first_token_timestamp = time.time() + first = False + + new_token = generator.get_next_tokens()[0] + + value_to_save = tokenizer_stream.decode(new_token) + + complete_text += value_to_save + + print(tokenizer_stream.decode(new_token), end='', flush=True) + + + if args.timings: new_tokens.append(new_token) + except KeyboardInterrupt: + print(" --control+c pressed, aborting generation--") + print() + print() + + + + with open('output.txt', 'a') as file: + file.write(complete_text) + + + # Delete the generator to free the captured graph for the next generator, if graph capture is enabled + del generator + + if args.timings: + prompt_time = first_token_timestamp - started_timestamp + run_time = time.time() - first_token_timestamp + print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps") + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") + + parser.add_argument('-j', '--json', type=str, required=True, help='Path to the JSON file containing the arguments') + + # parser.add_argument('-m', '--model', type=str, required=True, help='Onnx model folder path (must contain config.json and model.onnx)') + parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print verbose output and timing information. Defaults to false') + parser.add_argument('-g', '--timings', action='store_true', default=False, help='Print timing information for each generation step. Defaults to false') + + args = parser.parse_args() + + with open(args.json, 'r') as file: + data = json.load(file) + args.model = data['models'] + + ''' + 1. Download the model + 2. Convert to onnx format + 3. Create model and tokenizer from the model + ''' + model = create_model(args.model, '', './output', 'int4', 'cpu', "./cache") + + ''' + 4. Iterate through input texts. In each iteration, embed the input, create generator, generate output text. + 5. Log the input text and output text for manual evaluation. + ''' + validate_model(args, 'output') + + ''' + 6. Automatically calculate the perplexity metrics if the model has the corresponding dataset *. + ''' + calculate_perplexity(args, 'output') \ No newline at end of file From 4d4884120fe7aa88f1d80f5b378032aec47196b0 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Tue, 24 Sep 2024 13:24:09 -0700 Subject: [PATCH 02/26] Removing calculate_perplexity logic --- .../model_validation/validation_tool.py | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/test/python/model_validation/validation_tool.py b/test/python/model_validation/validation_tool.py index 7df315318f..ea0edc9859 100644 --- a/test/python/model_validation/validation_tool.py +++ b/test/python/model_validation/validation_tool.py @@ -7,52 +7,52 @@ import json import tqdm -def calculate_perplexity(args, output_dir): - print("We are now calculating perplexity") +# def calculate_perplexity(args, output_dir): +# print("We are now calculating perplexity") - # actual model object - model = og.Model(f'{output_dir}') +# # actual model object +# model = og.Model(f'{output_dir}') - # tokenizer should already be created - tokenizer = og.Tokenizer(model) - print(tokenizer) +# # tokenizer should already be created +# tokenizer = og.Tokenizer(model) +# print(tokenizer) - test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") +# test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") - print(test) - # Calculate the perplexity, understand the output of the models - # Understand the output from a model and logits. - print("Test is now loaded") +# print(test) +# # Calculate the perplexity, understand the output of the models +# # Understand the output from a model and logits. +# print("Test is now loaded") - max_length = model.config.n_positions - stride = 512 - seq_len = encodings.input_ids.size(1) +# max_length = model.config.n_positions +# stride = 512 +# seq_len = encodings.input_ids.size(1) - nlls = [] - prev_end_loc = 0 - for begin_loc in tqdm(range(0, seq_len, stride)): - end_loc = min(begin_loc + max_length, seq_len) - trg_len = end_loc - prev_end_loc # may be different from stride on last loop - input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device) - target_ids = input_ids.clone() - target_ids[:, :-trg_len] = -100 +# nlls = [] +# prev_end_loc = 0 +# for begin_loc in tqdm(range(0, seq_len, stride)): +# end_loc = min(begin_loc + max_length, seq_len) +# trg_len = end_loc - prev_end_loc # may be different from stride on last loop +# input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device) +# target_ids = input_ids.clone() +# target_ids[:, :-trg_len] = -100 - with torch.no_grad(): - outputs = model(input_ids, labels=target_ids) +# with torch.no_grad(): +# outputs = model(input_ids, labels=target_ids) - # loss is calculated using CrossEntropyLoss which averages over valid labels - # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels - # to the left by 1. - neg_log_likelihood = outputs.loss +# # loss is calculated using CrossEntropyLoss which averages over valid labels +# # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels +# # to the left by 1. +# neg_log_likelihood = outputs.loss - nlls.append(neg_log_likelihood) +# nlls.append(neg_log_likelihood) - prev_end_loc = end_loc - if end_loc == seq_len: - break +# prev_end_loc = end_loc +# if end_loc == seq_len: +# break - ppl = torch.exp(torch.stack(nlls).mean()) - return +# ppl = torch.exp(torch.stack(nlls).mean()) +# return @@ -181,4 +181,4 @@ def validate_model(args, output_dir): ''' 6. Automatically calculate the perplexity metrics if the model has the corresponding dataset *. ''' - calculate_perplexity(args, 'output') \ No newline at end of file + # calculate_perplexity(args, 'output') \ No newline at end of file From 2df8e015219335395a03a4dcfea2a2bd63225ac8 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 25 Sep 2024 14:08:41 -0700 Subject: [PATCH 03/26] Update validation script and config files based on feedback --- test/python/model_validation/input.json | 3 - .../model_validation/validation_config.json | 19 +++ .../model_validation/validation_tool.py | 114 +++++------------- 3 files changed, 51 insertions(+), 85 deletions(-) delete mode 100644 test/python/model_validation/input.json create mode 100644 tools/python/model_validation/validation_config.json rename {test => tools}/python/model_validation/validation_tool.py (52%) diff --git a/test/python/model_validation/input.json b/test/python/model_validation/input.json deleted file mode 100644 index 5c7a5f131d..0000000000 --- a/test/python/model_validation/input.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "models": "Qwen/Qwen2-0.5B" -} \ No newline at end of file diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json new file mode 100644 index 0000000000..d5ea044444 --- /dev/null +++ b/tools/python/model_validation/validation_config.json @@ -0,0 +1,19 @@ +{ + "models": [ + "Qwen/Qwen2-0.5B", + "mistralai/Mistral-7B-Instruct-v0.2" + ], + "inputs": [ + "Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", + "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight" + ], + "max_lengths": [], + "min_lengths": [], + "do_sample": [], + "top_p": [], + "top_k": [], + "temperature": [], + "reptition_penalty": [], + "verbose": [], + "timings": [] +} \ No newline at end of file diff --git a/test/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py similarity index 52% rename from test/python/model_validation/validation_tool.py rename to tools/python/model_validation/validation_tool.py index ea0edc9859..38a69acc6b 100644 --- a/test/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -1,68 +1,17 @@ import onnxruntime_genai as og import argparse import time -from ...src.python.py.models.builder import create_model -from datasets import load_dataset -import torch +from onnxruntime_genai.models.builder import create_model import json -import tqdm - -# def calculate_perplexity(args, output_dir): -# print("We are now calculating perplexity") - -# # actual model object -# model = og.Model(f'{output_dir}') +import os -# # tokenizer should already be created -# tokenizer = og.Tokenizer(model) -# print(tokenizer) - -# test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") - -# print(test) -# # Calculate the perplexity, understand the output of the models -# # Understand the output from a model and logits. -# print("Test is now loaded") - -# max_length = model.config.n_positions -# stride = 512 -# seq_len = encodings.input_ids.size(1) - -# nlls = [] -# prev_end_loc = 0 -# for begin_loc in tqdm(range(0, seq_len, stride)): -# end_loc = min(begin_loc + max_length, seq_len) -# trg_len = end_loc - prev_end_loc # may be different from stride on last loop -# input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device) -# target_ids = input_ids.clone() -# target_ids[:, :-trg_len] = -100 - -# with torch.no_grad(): -# outputs = model(input_ids, labels=target_ids) - -# # loss is calculated using CrossEntropyLoss which averages over valid labels -# # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels -# # to the left by 1. -# neg_log_likelihood = outputs.loss - -# nlls.append(neg_log_likelihood) - -# prev_end_loc = end_loc -# if end_loc == seq_len: -# break - -# ppl = torch.exp(torch.stack(nlls).mean()) -# return - - - -def validate_model(args, output_dir): +def validate_model(args, model_directory, inputs): if args.verbose: print("Loading model...") if args.timings: started_timestamp = 0 first_token_timestamp = 0 - model = og.Model(f'{output_dir}') + model = og.Model(f'{model_directory}') if args.verbose: print("Model loaded") tokenizer = og.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() @@ -76,10 +25,6 @@ def validate_model(args, output_dir): search_options['max_length'] = 512 chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - - - inputs = ["Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", - "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight"] for input in inputs: @@ -149,36 +94,41 @@ def validate_model(args, output_dir): print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps") +def folder_exists(folder_path): + return os.path.isdir(folder_path) + +def create_folder(folder_path): + os.mkdir(folder_path) + if __name__ == "__main__": parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") parser.add_argument('-j', '--json', type=str, required=True, help='Path to the JSON file containing the arguments') - # parser.add_argument('-m', '--model', type=str, required=True, help='Onnx model folder path (must contain config.json and model.onnx)') - parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print verbose output and timing information. Defaults to false') - parser.add_argument('-g', '--timings', action='store_true', default=False, help='Print timing information for each generation step. Defaults to false') - args = parser.parse_args() with open(args.json, 'r') as file: data = json.load(file) - args.model = data['models'] - - ''' - 1. Download the model - 2. Convert to onnx format - 3. Create model and tokenizer from the model - ''' - model = create_model(args.model, '', './output', 'int4', 'cpu', "./cache") - - ''' - 4. Iterate through input texts. In each iteration, embed the input, create generator, generate output text. - 5. Log the input text and output text for manual evaluation. - ''' - validate_model(args, 'output') - - ''' - 6. Automatically calculate the perplexity metrics if the model has the corresponding dataset *. - ''' - # calculate_perplexity(args, 'output') \ No newline at end of file + models = data['models'] + inputs = data['inputs'] + + # Before model creation, do a check and see if the folder existsS + model_output_dir = "../../../models_outputs" + model_cache_dir = "../../../cache_models" + + if not folder_exists(model_output_dir): + create_folder(model_output_dir) + + if not folder_exists(model_cache_dir): + create_folder(model_cache_dir) + + for model in models: + # Need to give the entire length + onnx_model = create_model(model, '', model_output_dir, 'int4', 'cpu', model_cache_dir) + # Add checks after model creation + # validate_model(args, './models_output', inputs) + #Table values + #columns, model name, validation complete (y/n), third - exception / failure msgs + + # Print the table out once loop is completed \ No newline at end of file From 648145239405923251e4c73f95c65ce8371a8a60 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Thu, 26 Sep 2024 17:25:42 -0700 Subject: [PATCH 04/26] created a json object, loop, and updated the validate_model method --- .../model_validation/validation_config.json | 18 ++--- .../model_validation/validation_tool.py | 76 +++++++++++-------- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index d5ea044444..6dabb5a89d 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -7,13 +7,13 @@ "Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight" ], - "max_lengths": [], - "min_lengths": [], - "do_sample": [], - "top_p": [], - "top_k": [], - "temperature": [], - "reptition_penalty": [], - "verbose": [], - "timings": [] + "max_length": 512, + "min_length": 0, + "do_sample": false, + "top_p": 0.0, + "top_k": 0, + "temperature": 1.0, + "reptition_penalty": 1.0, + "verbose": false, + "timings": false } \ No newline at end of file diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 38a69acc6b..203f4790fb 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -5,32 +5,44 @@ import json import os -def validate_model(args, model_directory, inputs): - if args.verbose: print("Loading model...") - if args.timings: + +class validationConfigObject: + def __init__(self, models, inputs, max_length, min_length, do_sample, top_p, top_k, temperature, reptition_penalty, verbose, timings): + self.models = models + self.inputs = inputs + self.max_length = max_length + self.min_length = min_length + self.do_sample = do_sample + self.top_p = top_p + self.top_k = top_k + self.temperature = temperature + self.reptition_penalty = reptition_penalty + self.verbose = verbose + self.timings = timings + +# Return true or false +def validate_model(args, model_directory, validationConfigObject): + if validationConfigObject.verbose: print("Loading model...") + if validationConfigObject.timings: started_timestamp = 0 first_token_timestamp = 0 model = og.Model(f'{model_directory}') - if args.verbose: print("Model loaded") + + if validationConfigObject.verbose: print("Model loaded") tokenizer = og.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() - if args.verbose: print("Tokenizer created") - if args.verbose: print() - search_options = {name:getattr(args, name) for name in ['do_sample', 'max_length', 'min_length', 'top_p', 'top_k', 'temperature', 'repetition_penalty'] if name in args} - - # Set the max length to something sensible by default, unless it is specified by the user, - # since otherwise it will be set to the entire context length - - search_options['max_length'] = 512 + if validationConfigObject.verbose: print("Tokenizer created") + if validationConfigObject.verbose: print() + search_options = {name: getattr(validationConfigObject, name, None) for name in ['do_sample', 'max_length', 'min_length', 'top_p', 'top_k', 'temperature']} chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - for input in inputs: + for input in validationConfigObject.inputs: complete_text = '' - if args.timings: started_timestamp = time.time() + if validationConfigObject.timings: started_timestamp = time.time() # If there is a chat template, use it prompt = f'{chat_template.format(input=input)}' @@ -45,10 +57,10 @@ def validate_model(args, model_directory, inputs): params.input_ids = input_tokens generator = og.Generator(model, params) - if args.verbose: print("Generator created") + if validationConfigObject.verbose: print("Generator created") - if args.verbose: print("Running generation loop ...") - if args.timings: + if validationConfigObject.verbose: print("Running generation loop ...") + if validationConfigObject.timings: first = True new_tokens = [] @@ -59,7 +71,7 @@ def validate_model(args, model_directory, inputs): while not generator.is_done(): generator.compute_logits() generator.generate_next_token() - if args.timings: + if validationConfigObject.timings: if first: first_token_timestamp = time.time() first = False @@ -71,15 +83,13 @@ def validate_model(args, model_directory, inputs): complete_text += value_to_save print(tokenizer_stream.decode(new_token), end='', flush=True) - - if args.timings: new_tokens.append(new_token) + if validationConfigObject.timings: new_tokens.append(new_token) except KeyboardInterrupt: print(" --control+c pressed, aborting generation--") print() print() - with open('output.txt', 'a') as file: file.write(complete_text) @@ -88,7 +98,7 @@ def validate_model(args, model_directory, inputs): # Delete the generator to free the captured graph for the next generator, if graph capture is enabled del generator - if args.timings: + if validationConfigObject.timings: prompt_time = first_token_timestamp - started_timestamp run_time = time.time() - first_token_timestamp print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps") @@ -100,7 +110,6 @@ def folder_exists(folder_path): def create_folder(folder_path): os.mkdir(folder_path) - if __name__ == "__main__": parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") @@ -110,11 +119,15 @@ def create_folder(folder_path): with open(args.json, 'r') as file: data = json.load(file) - models = data['models'] - inputs = data['inputs'] - # Before model creation, do a check and see if the folder existsS - model_output_dir = "../../../models_outputs" + validationConfigObject = validationConfigObject(**data) + + + # Create a json object that holds all this information and + # then pass that in + + # Check and see if the folder exists, if not create the folder + model_output_dir = "../../../models_outputs/" model_cache_dir = "../../../cache_models" if not folder_exists(model_output_dir): @@ -123,11 +136,10 @@ def create_folder(folder_path): if not folder_exists(model_cache_dir): create_folder(model_cache_dir) - for model in models: - # Need to give the entire length - onnx_model = create_model(model, '', model_output_dir, 'int4', 'cpu', model_cache_dir) - # Add checks after model creation - # validate_model(args, './models_output', inputs) + for model in validationConfigObject.models: + # Wrap in a try catch + create_model(model, '', model_output_dir+f'/{model}', 'int4', 'cpu', model_cache_dir+f'/{model}') + validate_model(args, model_output_dir, validationConfigObject) #Table values #columns, model name, validation complete (y/n), third - exception / failure msgs From 15eb7d85e64ce94de53326824aa3bcfba8006e97 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Mon, 30 Sep 2024 11:36:19 -0700 Subject: [PATCH 05/26] removed json object, added table to be printed --- .../model_validation/validation_config.json | 7 +- .../model_validation/validation_tool.py | 90 +++++++++---------- 2 files changed, 47 insertions(+), 50 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index 6dabb5a89d..e7d2e6104e 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,7 +1,6 @@ { "models": [ - "Qwen/Qwen2-0.5B", - "mistralai/Mistral-7B-Instruct-v0.2" + "Qwen/Qwen2-0.5B" ], "inputs": [ "Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", @@ -11,9 +10,9 @@ "min_length": 0, "do_sample": false, "top_p": 0.0, - "top_k": 0, + "top_k": 1, "temperature": 1.0, - "reptition_penalty": 1.0, + "repetition_penalty": 1.0, "verbose": false, "timings": false } \ No newline at end of file diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 203f4790fb..ad79fe0ec6 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -4,51 +4,43 @@ from onnxruntime_genai.models.builder import create_model import json import os +import pandas as pd +def create_table(output): + df = pd.DataFrame(output, columns=['Model Name', 'Validation Completed', 'Exceptions / Failures']) + return df -class validationConfigObject: - def __init__(self, models, inputs, max_length, min_length, do_sample, top_p, top_k, temperature, reptition_penalty, verbose, timings): - self.models = models - self.inputs = inputs - self.max_length = max_length - self.min_length = min_length - self.do_sample = do_sample - self.top_p = top_p - self.top_k = top_k - self.temperature = temperature - self.reptition_penalty = reptition_penalty - self.verbose = verbose - self.timings = timings - -# Return true or false -def validate_model(args, model_directory, validationConfigObject): - if validationConfigObject.verbose: print("Loading model...") - if validationConfigObject.timings: +def validate_model(config, model_directory): + if config["verbose"]: print("Loading model...") + if config["timings"]: started_timestamp = 0 first_token_timestamp = 0 model = og.Model(f'{model_directory}') - if validationConfigObject.verbose: print("Model loaded") + if config["verbose"]: print("Model loaded") tokenizer = og.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() - if validationConfigObject.verbose: print("Tokenizer created") - if validationConfigObject.verbose: print() - search_options = {name: getattr(validationConfigObject, name, None) for name in ['do_sample', 'max_length', 'min_length', 'top_p', 'top_k', 'temperature']} + if config["verbose"]: print("Tokenizer created") + if config["verbose"]: print() + + search_option_keys = [ + 'do_sample', 'max_length', 'min_length', 'top_p', 'top_k', + 'temperature', 'repetition_penalty' + ] + + search_options = {key: config[key] for key in search_option_keys} chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - for input in validationConfigObject.inputs: + for input in config["inputs"]: complete_text = '' - if validationConfigObject.timings: started_timestamp = time.time() + if config["timings"]: started_timestamp = time.time() - # If there is a chat template, use it prompt = f'{chat_template.format(input=input)}' - - # Tokenizer has interesting behavior, it creates multiple inputs input_tokens = tokenizer.encode(prompt) @@ -57,21 +49,23 @@ def validate_model(args, model_directory, validationConfigObject): params.input_ids = input_tokens generator = og.Generator(model, params) - if validationConfigObject.verbose: print("Generator created") + if config["verbose"]: print("Generator created") - if validationConfigObject.verbose: print("Running generation loop ...") - if validationConfigObject.timings: + if config["verbose"]: print("Running generation loop ...") + if config["timings"]: first = True new_tokens = [] print() - print("Output: ", end='', flush=True) + # print("Output: ", end='', flush=True) + + generation_successful = True try: while not generator.is_done(): generator.compute_logits() generator.generate_next_token() - if validationConfigObject.timings: + if config["timings"]: if first: first_token_timestamp = time.time() first = False @@ -82,11 +76,16 @@ def validate_model(args, model_directory, validationConfigObject): complete_text += value_to_save - print(tokenizer_stream.decode(new_token), end='', flush=True) + # print(tokenizer_stream.decode(new_token), end='', flush=True) - if validationConfigObject.timings: new_tokens.append(new_token) + if config["timings"]: new_tokens.append(new_token) except KeyboardInterrupt: print(" --control+c pressed, aborting generation--") + generation_successful = False + except Exception as e: + print(f"An error occurred: {e}") + generation_successful = False + print() print() @@ -94,15 +93,16 @@ def validate_model(args, model_directory, validationConfigObject): with open('output.txt', 'a') as file: file.write(complete_text) - # Delete the generator to free the captured graph for the next generator, if graph capture is enabled del generator - if validationConfigObject.timings: + if config["timings"]: prompt_time = first_token_timestamp - started_timestamp run_time = time.time() - first_token_timestamp print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps") + return generation_successful + def folder_exists(folder_path): return os.path.isdir(folder_path) @@ -118,13 +118,7 @@ def create_folder(folder_path): args = parser.parse_args() with open(args.json, 'r') as file: - data = json.load(file) - - validationConfigObject = validationConfigObject(**data) - - - # Create a json object that holds all this information and - # then pass that in + config = json.load(file) # Check and see if the folder exists, if not create the folder model_output_dir = "../../../models_outputs/" @@ -136,11 +130,15 @@ def create_folder(folder_path): if not folder_exists(model_cache_dir): create_folder(model_cache_dir) - for model in validationConfigObject.models: + output = [] + + for model in config["models"]: # Wrap in a try catch create_model(model, '', model_output_dir+f'/{model}', 'int4', 'cpu', model_cache_dir+f'/{model}') - validate_model(args, model_output_dir, validationConfigObject) + generation_successful = validate_model(config, model_output_dir) #Table values + output.append([model, generation_successful, "no"]) #columns, model name, validation complete (y/n), third - exception / failure msgs - # Print the table out once loop is completed \ No newline at end of file + df = create_table(output) + print(df) \ No newline at end of file From e1cb1b70d8fa733108c53c330e0c5f93d135002e Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Mon, 30 Sep 2024 12:03:54 -0700 Subject: [PATCH 06/26] fix return statement for validate_model --- tools/python/model_validation/validation_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index ad79fe0ec6..132ae35c09 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -101,7 +101,7 @@ def validate_model(config, model_directory): run_time = time.time() - first_token_timestamp print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps") - return generation_successful + return generation_successful def folder_exists(folder_path): From 6063694ad3d1c0dac5a7f6dc70cb7eb6a4d6b6fc Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Mon, 30 Sep 2024 13:01:32 -0700 Subject: [PATCH 07/26] updated validation_tool and add exception messages --- tools/python/model_validation/validation_tool.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 132ae35c09..5504aae85d 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -103,7 +103,6 @@ def validate_model(config, model_directory): return generation_successful - def folder_exists(folder_path): return os.path.isdir(folder_path) @@ -133,12 +132,14 @@ def create_folder(folder_path): output = [] for model in config["models"]: - # Wrap in a try catch - create_model(model, '', model_output_dir+f'/{model}', 'int4', 'cpu', model_cache_dir+f'/{model}') - generation_successful = validate_model(config, model_output_dir) - #Table values - output.append([model, generation_successful, "no"]) - #columns, model name, validation complete (y/n), third - exception / failure msgs + try: + create_model(model, '', model_output_dir+f'/{model}', 'int4', 'cpu', model_cache_dir+f'/{model}') + generation_successful = validate_model(config, model_output_dir) + exception_message = None + except Exception as e: + exception_message = str(e) + + output.append([model, generation_successful, exception_message]) df = create_table(output) print(df) \ No newline at end of file From b2b27fb3eb6ae381d11b4db7fcdd938288bbb1f4 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 2 Oct 2024 16:40:50 -0700 Subject: [PATCH 08/26] fixing the config file --- tools/python/model_validation/validation_config.json | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index e7d2e6104e..6dc863d5d8 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,10 +1,7 @@ { "models": [ - "Qwen/Qwen2-0.5B" ], "inputs": [ - "Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", - "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight" ], "max_length": 512, "min_length": 0, @@ -14,5 +11,6 @@ "temperature": 1.0, "repetition_penalty": 1.0, "verbose": false, - "timings": false + "output_directory": "", + "cache_directory": "" } \ No newline at end of file From d6e9aede7d47b05a2e783b0dfe82f95320a697e3 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Thu, 3 Oct 2024 15:49:14 -0700 Subject: [PATCH 09/26] added the precision and executive provider to the config --- tools/python/model_validation/validation_config.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index 6dc863d5d8..caf926518e 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -12,5 +12,7 @@ "repetition_penalty": 1.0, "verbose": false, "output_directory": "", - "cache_directory": "" + "cache_directory": "", + "precision": "", + "executive_provider": "" } \ No newline at end of file From 8f291baab4d5afd5e6bbf95fc173b4774481e51d Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Fri, 4 Oct 2024 14:01:18 -0700 Subject: [PATCH 10/26] adding chat template --- .../model_validation/validation_tool.py | 95 ++++++++----------- 1 file changed, 38 insertions(+), 57 deletions(-) diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 5504aae85d..824e45c074 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -12,52 +12,35 @@ def create_table(output): def validate_model(config, model_directory): if config["verbose"]: print("Loading model...") - if config["timings"]: - started_timestamp = 0 - first_token_timestamp = 0 model = og.Model(f'{model_directory}') if config["verbose"]: print("Model loaded") tokenizer = og.Tokenizer(model) - tokenizer_stream = tokenizer.create_stream() + tokenizer_stream = tokenizer.create_stream() if config["verbose"]: print("Tokenizer created") - if config["verbose"]: print() + if config["verbose"]: print() - search_option_keys = [ - 'do_sample', 'max_length', 'min_length', 'top_p', 'top_k', - 'temperature', 'repetition_penalty' - ] - - search_options = {key: config[key] for key in search_option_keys} - - chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' + chat_template = get_chat_template(model_directory) for input in config["inputs"]: complete_text = '' - if config["timings"]: started_timestamp = time.time() - prompt = f'{chat_template.format(input=input)}' input_tokens = tokenizer.encode(prompt) - params = og.GeneratorParams(model) - params.set_search_options(**search_options) params.input_ids = input_tokens generator = og.Generator(model, params) if config["verbose"]: print("Generator created") if config["verbose"]: print("Running generation loop ...") - if config["timings"]: - first = True - new_tokens = [] print() - # print("Output: ", end='', flush=True) + print("Output: ", end='', flush=True) generation_successful = True @@ -65,10 +48,6 @@ def validate_model(config, model_directory): while not generator.is_done(): generator.compute_logits() generator.generate_next_token() - if config["timings"]: - if first: - first_token_timestamp = time.time() - first = False new_token = generator.get_next_tokens()[0] @@ -76,9 +55,8 @@ def validate_model(config, model_directory): complete_text += value_to_save - # print(tokenizer_stream.decode(new_token), end='', flush=True) + print(tokenizer_stream.decode(new_token), end='', flush=True) - if config["timings"]: new_tokens.append(new_token) except KeyboardInterrupt: print(" --control+c pressed, aborting generation--") generation_successful = False @@ -86,28 +64,20 @@ def validate_model(config, model_directory): print(f"An error occurred: {e}") generation_successful = False - print() - print() - - - with open('output.txt', 'a') as file: + with open(f'{model_directory}/output.txt', 'a') as file: file.write(complete_text) # Delete the generator to free the captured graph for the next generator, if graph capture is enabled del generator - if config["timings"]: - prompt_time = first_token_timestamp - started_timestamp - run_time = time.time() - first_token_timestamp - print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps") - return generation_successful -def folder_exists(folder_path): - return os.path.isdir(folder_path) +def get_chat_template(output_directory): + tokenizer_json = output_directory + '/tokenizer_config.json' + with open(tokenizer_json, 'r') as file: + config = json.load(file) + return config["chat_template"] -def create_folder(folder_path): - os.mkdir(folder_path) if __name__ == "__main__": parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") @@ -119,27 +89,38 @@ def create_folder(folder_path): with open(args.json, 'r') as file: config = json.load(file) - # Check and see if the folder exists, if not create the folder - model_output_dir = "../../../models_outputs/" - model_cache_dir = "../../../cache_models" - - if not folder_exists(model_output_dir): - create_folder(model_output_dir) - - if not folder_exists(model_cache_dir): - create_folder(model_cache_dir) + os.makedirs(config["output_directory"], exist_ok=True) + os.makedirs(config["cache_directory"], exist_ok=True) output = [] + validation_complete = False + for model in config["models"]: + + print(f"We are validating {model}") + adjusted_model = model.replace("/", "_") + output_path = config["output_directory"] + f'/{adjusted_model}' + # From the output directory, there exist a file named tokenizer_config.json which contains the chat + cache_path = config["cache_directory"] + f'/{adjusted_model}' + try: - create_model(model, '', model_output_dir+f'/{model}', 'int4', 'cpu', model_cache_dir+f'/{model}') - generation_successful = validate_model(config, model_output_dir) - exception_message = None + create_model(model, '', output_path, config["precision"], config["executive_provider"], cache_path) except Exception as e: - exception_message = str(e) + print(f'Failure after create model {e}') + output.append([model, validation_complete, e]) + continue + try: + validation_complete = validate_model(config, output_path) + except Exception as e: + print(f'Failure after validation model {e}') + output.append([model, validation_complete, e]) - output.append([model, generation_successful, exception_message]) - + df = create_table(output) - print(df) \ No newline at end of file + + df.to_csv("models.csv") + + print(df) + + # From the folder name, get the chat template \ No newline at end of file From 9b94bc7b0f5b43fa96f92ab96fc37d133aa0bcd7 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Mon, 7 Oct 2024 11:08:19 -0700 Subject: [PATCH 11/26] Add the README.md --- tools/python/model_validation/README.md | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tools/python/model_validation/README.md diff --git a/tools/python/model_validation/README.md b/tools/python/model_validation/README.md new file mode 100644 index 0000000000..c449984a8e --- /dev/null +++ b/tools/python/model_validation/README.md @@ -0,0 +1,33 @@ +# ONNX Runtime GenAI Model Validation Example + +## Setup + +Clone this repository and navigate to the `tools/python/model_validation folder`. + +```bash +git clone https://github.com/microsoft/onnxruntime-genai.git +cd tools/python/model_validation +``` + +In the model_validation folder, you should find the validation_tool.py script, validation_config.json file, and this README.md. + +### Current Support +* Gemma +* Llama +* Mistral +* Phi +* Qwen + +### Usage - Build the Model +This step creates optimized and quantized ONNX models that run with ONNX Runtime GenAI. + +1. In the validation_config.json file, enter the supported Hugging Face model name. Models can be found here. +2. Include the path to the output folder, precision, and execution provider. + +Once the model is built, you can find it in path_to_output_folder/{model_name}. This should include the ONNX model data and tokenizer. + +### Run the Model Validation Script +```bash +python validation_tool.py -j validation_config.json +``` + From d25d69f8fe91251237bc83d3bdce6c328b11d873 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Mon, 7 Oct 2024 14:42:01 -0700 Subject: [PATCH 12/26] reformatting the config file --- .../model_validation/validation_config.json | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index caf926518e..a8261c7724 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,18 +1,23 @@ { "models": [ - ], + "Qwen/Qwen2-0.5B" + ], "inputs": [ + "Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", + "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight" ], - "max_length": 512, - "min_length": 0, - "do_sample": false, - "top_p": 0.0, - "top_k": 1, - "temperature": 1.0, - "repetition_penalty": 1.0, + "output_directory": "../../../models_outputs/", + "cache_directory": "../../../cache_models", + "precision": "int4", + "executive_provider": "cpu", "verbose": false, - "output_directory": "", - "cache_directory": "", - "precision": "", - "executive_provider": "" + "search_options": { + "max_length": 512, + "min_length": 0, + "do_sample": false, + "top_p": 0.0, + "top_k": 1, + "temperature": 1.0, + "repetition_penalty": 1.0 + } } \ No newline at end of file From 2c5bad9c0f60652afa4711aaf0b4ef674572fb2f Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Tue, 8 Oct 2024 10:15:51 -0700 Subject: [PATCH 13/26] updating the chat templates --- .../model_validation/validation_tool.py | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 824e45c074..b62c9f3a6f 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -1,6 +1,5 @@ import onnxruntime_genai as og import argparse -import time from onnxruntime_genai.models.builder import create_model import json import os @@ -18,20 +17,24 @@ def validate_model(config, model_directory): if config["verbose"]: print("Model loaded") tokenizer = og.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() + if config["verbose"]: print("Tokenizer created") if config["verbose"]: print() - chat_template = get_chat_template(model_directory) - - for input in config["inputs"]: + chat_template = get_chat_template(model_directory.lower()) + + search_options = config["search_options"] + + for text in config["inputs"]: complete_text = '' - prompt = f'{chat_template.format(input=input)}' + prompt = f'{chat_template.format(input=text)}' input_tokens = tokenizer.encode(prompt) params = og.GeneratorParams(model) + params.set_search_options(**search_options) params.input_ids = input_tokens generator = og.Generator(model, params) @@ -55,7 +58,7 @@ def validate_model(config, model_directory): complete_text += value_to_save - print(tokenizer_stream.decode(new_token), end='', flush=True) + # print(tokenizer_stream.decode(new_token), end='', flush=True) except KeyboardInterrupt: print(" --control+c pressed, aborting generation--") @@ -64,7 +67,7 @@ def validate_model(config, model_directory): print(f"An error occurred: {e}") generation_successful = False - with open(f'{model_directory}/output.txt', 'a') as file: + with open(f'{model_directory}/output.txt', 'a', encoding='utf-8') as file: file.write(complete_text) # Delete the generator to free the captured graph for the next generator, if graph capture is enabled @@ -72,11 +75,15 @@ def validate_model(config, model_directory): return generation_successful -def get_chat_template(output_directory): - tokenizer_json = output_directory + '/tokenizer_config.json' - with open(tokenizer_json, 'r') as file: - config = json.load(file) - return config["chat_template"] +def get_chat_template(model_name): + if 'phi' in model_name: + return '<|user|>\n{input} <|end|>\n<|assistant|>' + elif 'qwen' in model_name: + return '\n<|user|>\n{input} <|end|>\n<|assistant|>' + elif 'mistral' in model_name: + return '<|im_start|> <|user|> \n {input} <|im_end>|\n' + # elif model_name.contains("llama"): return + # elif model_name.contains("gemma"): return if __name__ == "__main__": @@ -95,32 +102,32 @@ def get_chat_template(output_directory): output = [] validation_complete = False + e = None + exception = False for model in config["models"]: print(f"We are validating {model}") adjusted_model = model.replace("/", "_") output_path = config["output_directory"] + f'/{adjusted_model}' - # From the output directory, there exist a file named tokenizer_config.json which contains the chat cache_path = config["cache_directory"] + f'/{adjusted_model}' - try: create_model(model, '', output_path, config["precision"], config["executive_provider"], cache_path) except Exception as e: print(f'Failure after create model {e}') output.append([model, validation_complete, e]) + exception = True continue try: validation_complete = validate_model(config, output_path) except Exception as e: print(f'Failure after validation model {e}') - output.append([model, validation_complete, e]) + exception = True + output.append([model, validation_complete, e]) + if not exception: + output.append([model, validation_complete, e]) df = create_table(output) - df.to_csv("models.csv") - - print(df) - - # From the folder name, get the chat template \ No newline at end of file + df.to_csv("validation_summary.csv") From e8756edcf2f6362f7b1f45bb0e7db6cd971bf6b9 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Tue, 8 Oct 2024 15:20:25 -0700 Subject: [PATCH 14/26] updated README --- tools/python/model_validation/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/python/model_validation/README.md b/tools/python/model_validation/README.md index c449984a8e..b44096f5e7 100644 --- a/tools/python/model_validation/README.md +++ b/tools/python/model_validation/README.md @@ -18,9 +18,7 @@ In the model_validation folder, you should find the validation_tool.py script, v * Phi * Qwen -### Usage - Build the Model -This step creates optimized and quantized ONNX models that run with ONNX Runtime GenAI. - +### Usage 1. In the validation_config.json file, enter the supported Hugging Face model name. Models can be found here. 2. Include the path to the output folder, precision, and execution provider. From 3f707c4ce7d360ed524ec0fbb454aa88a34b60c6 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Tue, 8 Oct 2024 16:39:13 -0700 Subject: [PATCH 15/26] updated chat templates + README.md --- tools/python/model_validation/README.md | 4 ++++ tools/python/model_validation/validation_tool.py | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/python/model_validation/README.md b/tools/python/model_validation/README.md index b44096f5e7..1f9ea96a2c 100644 --- a/tools/python/model_validation/README.md +++ b/tools/python/model_validation/README.md @@ -9,6 +9,10 @@ git clone https://github.com/microsoft/onnxruntime-genai.git cd tools/python/model_validation ``` +### Requirements +protobuf version must be atleast 5.28.2 +sentencepiece + In the model_validation folder, you should find the validation_tool.py script, validation_config.json file, and this README.md. ### Current Support diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index b62c9f3a6f..507f993ddc 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -82,8 +82,10 @@ def get_chat_template(model_name): return '\n<|user|>\n{input} <|end|>\n<|assistant|>' elif 'mistral' in model_name: return '<|im_start|> <|user|> \n {input} <|im_end>|\n' - # elif model_name.contains("llama"): return - # elif model_name.contains("gemma"): return + elif model_name.contains("llama"): + return '[INST]<>\n{input}<>[INST]' + elif model_name.contains("gemma"): + return '' + 'user' '\n' + {input} + '\n' if __name__ == "__main__": From c4f503e33ede5669855906a5019c9443c430fcd6 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 9 Oct 2024 10:47:29 -0700 Subject: [PATCH 16/26] updated README.md and requirements.txt file --- tools/python/model_validation/README.md | 29 ++++++++++--------- .../python/model_validation/requirements.txt | 12 ++++++++ 2 files changed, 28 insertions(+), 13 deletions(-) create mode 100644 tools/python/model_validation/requirements.txt diff --git a/tools/python/model_validation/README.md b/tools/python/model_validation/README.md index 1f9ea96a2c..d8ec3b4eca 100644 --- a/tools/python/model_validation/README.md +++ b/tools/python/model_validation/README.md @@ -1,35 +1,38 @@ -# ONNX Runtime GenAI Model Validation Example +# ONNX Runtime GenAI Model Validation Tutorial -## Setup +## Background +Gen-AI serves as an API framework designed to operate generative models via ONNX Runtime. With the expansion in the variety of models, there's a growing need for a tool chain that can effectively assess the compatibility between Gen-AI and different model variants. +## Setup and Requirements Clone this repository and navigate to the `tools/python/model_validation folder`. ```bash git clone https://github.com/microsoft/onnxruntime-genai.git cd tools/python/model_validation +pip install -r requirements.txt ``` +Within the model_validation directory, you'll locate the script named validation_tool.py, alongside the validation_config.json configuration file and a README.md document. -### Requirements -protobuf version must be atleast 5.28.2 -sentencepiece - -In the model_validation folder, you should find the validation_tool.py script, validation_config.json file, and this README.md. - -### Current Support +### Current Supported Model Architectures * Gemma * Llama * Mistral -* Phi +* Phi (language + vision) * Qwen ### Usage -1. In the validation_config.json file, enter the supported Hugging Face model name. Models can be found here. -2. Include the path to the output folder, precision, and execution provider. +1. Input the name of the Hugging Face model you're using into the validation_config.json file. You can find a list of supported models via this link: (https://huggingface.co) + + * Also, add the chat_template associated with your model. This is located in the tokenizer_config.json file on the Hugging Face website. Make sure to replace ``` message['content'] ``` with ``` {input} ```. + +2. Specify the path for the output folder you prefer, along with the precision and execution provider details. -Once the model is built, you can find it in path_to_output_folder/{model_name}. This should include the ONNX model data and tokenizer. +After the model has been created, it will be located in the path_to_output_folder/{model_name} directory. This directory will contain both the ONNX model data and the tokenizer. ### Run the Model Validation Script ```bash python validation_tool.py -j validation_config.json ``` +### Output +Once the tool has been executed successfully, it generates a file named model_validation.csv. This file contains the Model Name, the validation tool's completion status, and details of any exceptions or failures encountered by the model during the validation process. diff --git a/tools/python/model_validation/requirements.txt b/tools/python/model_validation/requirements.txt new file mode 100644 index 0000000000..96431e1c39 --- /dev/null +++ b/tools/python/model_validation/requirements.txt @@ -0,0 +1,12 @@ +coloredlogs +flatbuffers +numpy<2 +packaging +protobuf>=5.28.2 +sympy +pytest +onnx +transformers +huggingface_hub[cli] +onnxruntime-genai +sentencepiece \ No newline at end of file From 876d22f792bfdab0047c7630631d97a8530c7b48 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 9 Oct 2024 11:36:28 -0700 Subject: [PATCH 17/26] updated requirements.txt, config file, and validation tool script --- .../python/model_validation/requirements.txt | 3 +- .../model_validation/validation_config.json | 17 ++--- .../model_validation/validation_tool.py | 69 ++++++++----------- 3 files changed, 39 insertions(+), 50 deletions(-) diff --git a/tools/python/model_validation/requirements.txt b/tools/python/model_validation/requirements.txt index 96431e1c39..393080317d 100644 --- a/tools/python/model_validation/requirements.txt +++ b/tools/python/model_validation/requirements.txt @@ -9,4 +9,5 @@ onnx transformers huggingface_hub[cli] onnxruntime-genai -sentencepiece \ No newline at end of file +sentencepiece +pandas \ No newline at end of file diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index a8261c7724..d7ac0725b9 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,15 +1,16 @@ { "models": [ - "Qwen/Qwen2-0.5B" - ], + { + "name": "", + "chat_template": "" + } + ], "inputs": [ - "Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", - "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight" ], - "output_directory": "../../../models_outputs/", - "cache_directory": "../../../cache_models", - "precision": "int4", - "executive_provider": "cpu", + "output_directory": "", + "cache_directory": "", + "precision": "", + "executive_provider": "", "verbose": false, "search_options": { "max_length": 512, diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 507f993ddc..7d5a4943d4 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -9,23 +9,23 @@ def create_table(output): df = pd.DataFrame(output, columns=['Model Name', 'Validation Completed', 'Exceptions / Failures']) return df -def validate_model(config, model_directory): - if config["verbose"]: print("Loading model...") +def validate_model(args, model_dict, model_dir): + if args["verbose"]: print("Loading model...") - model = og.Model(f'{model_directory}') + model = og.Model(f'{model_dir}') - if config["verbose"]: print("Model loaded") + if args["verbose"]: print("Model loaded") tokenizer = og.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() - if config["verbose"]: print("Tokenizer created") - if config["verbose"]: print() + if args["verbose"]: print("Tokenizer created") + if args["verbose"]: print() - chat_template = get_chat_template(model_directory.lower()) + chat_template = model_dict["chat_template"] - search_options = config["search_options"] + search_options = args["search_options"] - for text in config["inputs"]: + for text in args["inputs"]: complete_text = '' @@ -38,9 +38,9 @@ def validate_model(config, model_directory): params.input_ids = input_tokens generator = og.Generator(model, params) - if config["verbose"]: print("Generator created") + if args["verbose"]: print("Generator created") - if config["verbose"]: print("Running generation loop ...") + if args["verbose"]: print("Running generation loop ...") print() print("Output: ", end='', flush=True) @@ -58,7 +58,7 @@ def validate_model(config, model_directory): complete_text += value_to_save - # print(tokenizer_stream.decode(new_token), end='', flush=True) + print(tokenizer_stream.decode(new_token), end='', flush=True) except KeyboardInterrupt: print(" --control+c pressed, aborting generation--") @@ -67,7 +67,7 @@ def validate_model(config, model_directory): print(f"An error occurred: {e}") generation_successful = False - with open(f'{model_directory}/output.txt', 'a', encoding='utf-8') as file: + with open(f'{model_dir}/output.txt', 'a', encoding='utf-8') as file: file.write(complete_text) # Delete the generator to free the captured graph for the next generator, if graph capture is enabled @@ -75,31 +75,16 @@ def validate_model(config, model_directory): return generation_successful -def get_chat_template(model_name): - if 'phi' in model_name: - return '<|user|>\n{input} <|end|>\n<|assistant|>' - elif 'qwen' in model_name: - return '\n<|user|>\n{input} <|end|>\n<|assistant|>' - elif 'mistral' in model_name: - return '<|im_start|> <|user|> \n {input} <|im_end>|\n' - elif model_name.contains("llama"): - return '[INST]<>\n{input}<>[INST]' - elif model_name.contains("gemma"): - return '' + 'user' '\n' + {input} + '\n' - - if __name__ == "__main__": parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") - parser.add_argument('-j', '--json', type=str, required=True, help='Path to the JSON file containing the arguments') - args = parser.parse_args() with open(args.json, 'r') as file: - config = json.load(file) + args = json.load(file) - os.makedirs(config["output_directory"], exist_ok=True) - os.makedirs(config["cache_directory"], exist_ok=True) + os.makedirs(args["output_directory"], exist_ok=True) + os.makedirs(args["cache_directory"], exist_ok=True) output = [] @@ -107,28 +92,30 @@ def get_chat_template(model_name): e = None exception = False - for model in config["models"]: + for model_dict in args["models"]: + + print(f"We are validating {model_dict['name']}") + adjusted_model = model_dict["name"].replace("/", "_") + + output_path = args["output_directory"] + f'/{adjusted_model}' + cache_path = args["cache_directory"] + f'/{adjusted_model}' - print(f"We are validating {model}") - adjusted_model = model.replace("/", "_") - output_path = config["output_directory"] + f'/{adjusted_model}' - cache_path = config["cache_directory"] + f'/{adjusted_model}' try: - create_model(model, '', output_path, config["precision"], config["executive_provider"], cache_path) + create_model(model_dict["name"], '', output_path, args["precision"], args["executive_provider"], cache_path) except Exception as e: print(f'Failure after create model {e}') - output.append([model, validation_complete, e]) + output.append([model_dict["name"], validation_complete, e]) exception = True continue try: - validation_complete = validate_model(config, output_path) + validation_complete = validate_model(args, model_dict, output_path) except Exception as e: print(f'Failure after validation model {e}') exception = True - output.append([model, validation_complete, e]) + output.append([model_dict["name"], validation_complete, e]) if not exception: - output.append([model, validation_complete, e]) + output.append([model_dict["name"], validation_complete, e]) df = create_table(output) From 693bf4938f8d311b1ffaef1ff825843524441afe Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Thu, 10 Oct 2024 13:05:38 -0700 Subject: [PATCH 18/26] added default chat templates --- .../model_validation/validation_config.json | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index d7ac0725b9..b506b9bc2e 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,11 +1,28 @@ { "models": [ { - "name": "", - "chat_template": "" + "name": "Qwen/Qwen2-0.5B", + "chat_template": "<|im_start|> <|user|> \n {input} <|im_end>|" + }, + { + "name": "google/gemma-2-9b-it", + "chat_template": "'' + <|user|> '\n' + {input} + '\n" + }, + { + "name": "microsoft/Phi-3.5-mini-instruct", + "chat_template": "<|user|>\n{input} <|end|>\n<|assistant|>" + }, + { + "name": "mistralai/Mistral-Small-Instruct-2409", + "chat_template": "<|im_start|> <|user|> \n {input} <|im_end>|\n" + }, + { + "name": "meta-llama/Llama-3.2-3B", + "chat_template": "[INST]<>\n{input}<>[INST]" } ], "inputs": [ + "Provide a detailed analysis as to why the University of Southern California is better than the University of California, Los Angeles." ], "output_directory": "", "cache_directory": "", From 3ff50bb9b9e177d7f3781da6f6d0eb8fe56b35b5 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Tue, 15 Oct 2024 13:11:57 -0700 Subject: [PATCH 19/26] updated default chat templates --- .../model_validation/validation_config.json | 18 +++++++++--------- .../python/model_validation/validation_tool.py | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index b506b9bc2e..394191ad49 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,11 +1,11 @@ { "models": [ { - "name": "Qwen/Qwen2-0.5B", - "chat_template": "<|im_start|> <|user|> \n {input} <|im_end>|" + "name": "Qwen/Qwen2.5-7B-Instruct", + "chat_template": "<|im_start|>\n <|user|> \n {input} <|im_end>\n'<|im_start|>assistant\n" }, { - "name": "google/gemma-2-9b-it", + "name": "google/gemma-2-2b-it", "chat_template": "'' + <|user|> '\n' + {input} + '\n" }, { @@ -13,8 +13,8 @@ "chat_template": "<|user|>\n{input} <|end|>\n<|assistant|>" }, { - "name": "mistralai/Mistral-Small-Instruct-2409", - "chat_template": "<|im_start|> <|user|> \n {input} <|im_end>|\n" + "name": "mistralai/Mistral-7B-Instruct-v0.3", + "chat_template": " \"[INST] \" + {input} + \"[/INST]\"" }, { "name": "meta-llama/Llama-3.2-3B", @@ -24,10 +24,10 @@ "inputs": [ "Provide a detailed analysis as to why the University of Southern California is better than the University of California, Los Angeles." ], - "output_directory": "", - "cache_directory": "", - "precision": "", - "executive_provider": "", + "output_directory": "../../output_directory", + "cache_directory": "../../cache_directory", + "precision": "int4", + "executive_provider": "cpu", "verbose": false, "search_options": { "max_length": 512, diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 7d5a4943d4..4ee4a2472d 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -75,6 +75,7 @@ def validate_model(args, model_dict, model_dir): return generation_successful +# Think of how to expand coverage to different variants (vision, etc) if __name__ == "__main__": parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") parser.add_argument('-j', '--json', type=str, required=True, help='Path to the JSON file containing the arguments') From 71556ed517091751c7152b56086ccb70fc0a52ad Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 16 Oct 2024 11:04:14 -0700 Subject: [PATCH 20/26] removing values --- tools/python/model_validation/validation_config.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index 394191ad49..6f28f95ec8 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -24,10 +24,10 @@ "inputs": [ "Provide a detailed analysis as to why the University of Southern California is better than the University of California, Los Angeles." ], - "output_directory": "../../output_directory", - "cache_directory": "../../cache_directory", - "precision": "int4", - "executive_provider": "cpu", + "output_directory": "", + "cache_directory": "", + "precision": "", + "executive_provider": "", "verbose": false, "search_options": { "max_length": 512, From be6ae2ca425863064b820b00a21f50e7197a4ba3 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 16 Oct 2024 11:25:26 -0700 Subject: [PATCH 21/26] fixing ep typo --- tools/python/model_validation/validation_config.json | 2 +- tools/python/model_validation/validation_tool.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index 6f28f95ec8..bd814c728e 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -27,7 +27,7 @@ "output_directory": "", "cache_directory": "", "precision": "", - "executive_provider": "", + "execution_provider": "", "verbose": false, "search_options": { "max_length": 512, diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index 4ee4a2472d..b47125ed6b 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -102,7 +102,7 @@ def validate_model(args, model_dict, model_dir): cache_path = args["cache_directory"] + f'/{adjusted_model}' try: - create_model(model_dict["name"], '', output_path, args["precision"], args["executive_provider"], cache_path) + create_model(model_dict["name"], '', output_path, args["precision"], args["execution_provider"], cache_path) except Exception as e: print(f'Failure after create model {e}') output.append([model_dict["name"], validation_complete, e]) From b7e404162da231323c6f6d18ed9b44eadd2609a3 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 16 Oct 2024 11:30:52 -0700 Subject: [PATCH 22/26] update README.md --- tools/python/model_validation/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/python/model_validation/README.md b/tools/python/model_validation/README.md index d8ec3b4eca..46232fd779 100644 --- a/tools/python/model_validation/README.md +++ b/tools/python/model_validation/README.md @@ -11,6 +11,14 @@ git clone https://github.com/microsoft/onnxruntime-genai.git cd tools/python/model_validation pip install -r requirements.txt ``` + +Ensure you log into HuggingFace. + +More about the HuggingFace CLI [here](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli) +```bash +huggingface-cli login +``` + Within the model_validation directory, you'll locate the script named validation_tool.py, alongside the validation_config.json configuration file and a README.md document. ### Current Supported Model Architectures From e47b9a4bf06e06a175b57a75f67bf633b6ce2380 Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 16 Oct 2024 11:43:38 -0700 Subject: [PATCH 23/26] added additional resources for chat templates --- tools/python/model_validation/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/python/model_validation/README.md b/tools/python/model_validation/README.md index 46232fd779..1823d4e1ae 100644 --- a/tools/python/model_validation/README.md +++ b/tools/python/model_validation/README.md @@ -28,10 +28,13 @@ Within the model_validation directory, you'll locate the script named validation * Phi (language + vision) * Qwen -### Usage +## Usage +### Steps to Configure 1. Input the name of the Hugging Face model you're using into the validation_config.json file. You can find a list of supported models via this link: (https://huggingface.co) * Also, add the chat_template associated with your model. This is located in the tokenizer_config.json file on the Hugging Face website. Make sure to replace ``` message['content'] ``` with ``` {input} ```. + * Discover more about chat templates [here](https://huggingface.co/docs/transformers/main/chat_templating) + 2. Specify the path for the output folder you prefer, along with the precision and execution provider details. From 6279a1c6003029626548ca6c1d66780e9830994b Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 16 Oct 2024 11:44:39 -0700 Subject: [PATCH 24/26] removing unnecessary comments --- tools/python/model_validation/validation_tool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py index b47125ed6b..fad36aa360 100644 --- a/tools/python/model_validation/validation_tool.py +++ b/tools/python/model_validation/validation_tool.py @@ -75,7 +75,6 @@ def validate_model(args, model_dict, model_dir): return generation_successful -# Think of how to expand coverage to different variants (vision, etc) if __name__ == "__main__": parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") parser.add_argument('-j', '--json', type=str, required=True, help='Path to the JSON file containing the arguments') From 856f5eb80fa6de2481a3ae013059b4cc914e6d4b Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 16 Oct 2024 15:41:11 -0700 Subject: [PATCH 25/26] adding the correct chat template for llama 2 --- .../model_validation/validation_config.json | 20 ++----------------- 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index bd814c728e..b6bccc8922 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,23 +1,7 @@ { "models": [ { - "name": "Qwen/Qwen2.5-7B-Instruct", - "chat_template": "<|im_start|>\n <|user|> \n {input} <|im_end>\n'<|im_start|>assistant\n" - }, - { - "name": "google/gemma-2-2b-it", - "chat_template": "'' + <|user|> '\n' + {input} + '\n" - }, - { - "name": "microsoft/Phi-3.5-mini-instruct", - "chat_template": "<|user|>\n{input} <|end|>\n<|assistant|>" - }, - { - "name": "mistralai/Mistral-7B-Instruct-v0.3", - "chat_template": " \"[INST] \" + {input} + \"[/INST]\"" - }, - { - "name": "meta-llama/Llama-3.2-3B", + "name": "meta-llama/Llama-2-7b-chat-hf", "chat_template": "[INST]<>\n{input}<>[INST]" } ], @@ -38,4 +22,4 @@ "temperature": 1.0, "repetition_penalty": 1.0 } -} \ No newline at end of file +} From 7bb1996dfc92207d83b1f30c81d0a4b2fe53f27c Mon Sep 17 00:00:00 2001 From: ayissi-msft Date: Wed, 16 Oct 2024 15:51:50 -0700 Subject: [PATCH 26/26] added all the supported model family's chat_templates --- .../model_validation/validation_config.json | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json index b6bccc8922..c7259481da 100644 --- a/tools/python/model_validation/validation_config.json +++ b/tools/python/model_validation/validation_config.json @@ -1,8 +1,24 @@ { "models": [ + { + "name": "Qwen/Qwen2.5-7B-Instruct", + "chat_template": "<|im_start|>\n <|user|> \n {input} <|im_end>\n'<|im_start|>assistant\n" + }, { "name": "meta-llama/Llama-2-7b-chat-hf", "chat_template": "[INST]<>\n{input}<>[INST]" + }, + { + "name": "mistralai/Mistral-7B-Instruct-v0.3", + "chat_template": " \"[INST] \" + {input} + \"[/INST]\"" + }, + { + "name": "microsoft/Phi-3.5-mini-instruct", + "chat_template": "<|user|>\n{input} <|end|>\n<|assistant|>" + }, + { + "name": "google/gemma-2-2b-it", + "chat_template": "'' + <|user|> '\n' + {input} + '\n" } ], "inputs": [ @@ -22,4 +38,4 @@ "temperature": 1.0, "repetition_penalty": 1.0 } -} +} \ No newline at end of file