Keep example flags and repo names consistent

kunal-vaishnavi · kunal-vaishnavi · commit 6518b82ca6f9 · 2024-12-19T18:05:15.000Z
diff --git a/.pipelines/stages/jobs/steps/python-validation-step.yml b/.pipelines/stages/jobs/steps/python-validation-step.yml
@@ -43,9 +43,9 @@ steps:
       python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
 
       if ("$(ep)" -eq "directml") {
-        python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} --provider dml --non-interactive
+        python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e dml --non-interactive
       } else {
-        python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} --provider $(ep) --non-interactive
+        python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non-interactive
       }
     displayName: 'Run ${{ parameters.PythonScriptName }} With Artifact on Windows'
     workingDirectory: '$(Build.Repository.LocalPath)'
@@ -72,7 +72,7 @@ steps:
             $python_exe -m pip install -r /ort_genai_src/test/python/cuda/ort/requirements.txt && \
             cd /ort_genai_src/${{ parameters.PythonScriptFolder }} && \
             $python_exe -m pip install --no-index --find-links=/ort_genai_binary/wheel $(pip_package_name) && \
-            $python_exe ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} --provider $(ep) --non-interactive"
+            $python_exe ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non-interactive"
 
     displayName: 'Run ${{ parameters.PythonScriptName }} With Artifact on Linux CUDA'
     workingDirectory: '$(Build.Repository.LocalPath)'
@@ -91,7 +91,7 @@ steps:
       fi
       cd ${{ parameters.PythonScriptFolder }}
       python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
-      python ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} --provider $(ep) --non-interactive
+      python ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non-interactive
     displayName: 'Run ${{ parameters.PythonScriptName }} With Artifact on Linux/macOS CPU'
     workingDirectory: '$(Build.Repository.LocalPath)'
     condition: and(or(eq(variables['os'], 'linux'), eq(variables['os'], 'osx')), eq(variables['ep'], 'cpu'))
diff --git a/README.md b/README.md
@@ -1,18 +1,16 @@
-# ONNX Runtime generate() API
+# ONNX Runtime GenAI
 
 ## *Main branch contains new API changes and examples in main branch reflect these changes. For example scripts compatible with current release (0.5.2), [see release branch](https://github.com/microsoft/onnxruntime-genai/tree/rel-0.5.2).*
 
 
 [![Latest version](https://img.shields.io/nuget/vpre/Microsoft.ML.OnnxRuntimeGenAI.Managed?label=latest)](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntimeGenAI.Managed/absoluteLatest)
 
-Run Llama, Phi, Gemma, Mistral with ONNX Runtime.
+Run generative AI models with ONNX Runtime.
 
 This API gives you an easy, flexible and performant way of running LLMs on device. 
 
 It implements the generative AI loop for ONNX models, including pre and post processing, inference with ONNX Runtime, logits processing, search and sampling, and KV cache management.
 
-You can call a high level `generate()` method to generate all of the output at once, or stream the output one token at a time.
-
 See documentation at https://onnxruntime.ai/docs/genai.
 
 |Support matrix|Supported now|Under development|On the roadmap|
diff --git a/examples/c/README.md b/examples/c/README.md
@@ -1,4 +1,4 @@
-# ONNX Runtime generate() API C Example
+# ONNX Runtime GenAI C Example
 
 ## Setup
 
diff --git a/examples/csharp/HelloPhi/README.md b/examples/csharp/HelloPhi/README.md
@@ -1,4 +1,4 @@
-# Generate() API C# example 
+# ONNX Runtime GenAI C# example 
 
 ## Obtain a model
 
diff --git a/examples/python/README.md b/examples/python/README.md
@@ -1,4 +1,4 @@
-# Generate() API Python Examples
+# ONNX Runtime GenAI API Python Examples
 
 ## Install the onnxruntime-genai library
 
@@ -12,19 +12,15 @@ If you bring your own model, you need to provide the configuration. See the [con
 
 To generate the model with model builder:
 
-1. Install the model builder script dependencies
+1. Install the model builder's dependencies
 
    ```bash
    pip install numpy transformers torch onnx onnxruntime
    ```
 
-2. Choose a model. Examples of supported ones are:
-   - Phi-2
-   - Mistral
-   - Gemma 2B IT
-   - LLama 7B
+2. Choose a model. Examples of supported ones are listed on the repo's main README.
 
-3. Run the model builder script to export, optimize, and quantize the model. More details can be found [here](../../src/python/py/models/README.md)
+3. Run the model builder to export, optimize, and quantize the model. More details can be found [here](../../src/python/py/models/README.md)
 
    ```bash
    cd examples/python
@@ -41,6 +37,6 @@ The `model-qa` script streams the output text token by token.
 
 To run the python examples...
 ```bash
-python model-generate.py -m {path to model folder} -pr {input prompt}
-python model-qa.py -m {path to model folder}
+python model-generate.py -m {path to model folder} -e {execution provider} -pr {input prompt}
+python model-qa.py -m {path to model folder} -e {execution provider}
 ```
diff --git a/examples/python/awq-quantized-model.md b/examples/python/awq-quantized-model.md
@@ -1,9 +1,9 @@
-# Create AWQ-quantized and optimized ONNX models from PyTorch models with AutoAWQ + ONNX Runtime generate() API
+# Create AWQ-quantized and optimized ONNX models from PyTorch models with AutoAWQ + ONNX Runtime GenAI
 
 ## Steps
 1. [Download your PyTorch model](#1-download-your-pytorch-model)
 2. [Install AutoAWQ](#2-install-autoawq)
-3. [Install the generate() API](#3-install-the-generate-api)
+3. [Install ONNX Runtime GenAI](#3-install-onnx-runtime-genai)
     - [CPU](#cpu)
     - [CUDA](#cuda)
     - [DirectML](#directml)
@@ -13,7 +13,7 @@
 
 Activation-aware Weight Quantization (AWQ) works by identifying the top 1% most salient weights that are most important for maintaining accuracy and quantizing the remaining 99% of weights. This leads to less accuracy loss from quantization compared to many other quantization techniques. For more on AWQ, see [here](https://arxiv.org/abs/2306.00978).
 
-This tutorial downloads the Phi-3 mini short context PyTorch model, applies AWQ quantization, generates the corresponding optimized & quantized ONNX model, and runs the ONNX model with ONNX Runtime's generate() API. If you would like to use another model, please change the model name in the instructions below.
+This tutorial downloads the Phi-3 mini short context PyTorch model, applies AWQ quantization, generates the corresponding optimized & quantized ONNX model, and runs the ONNX model with ONNX Runtime GenAI. If you would like to use another model, please change the model name in the instructions below.
 
 ## 1. Download your PyTorch model
 
@@ -47,7 +47,7 @@ $ pip install -e .
 
 Note: You can try to install AutoAWQ directly with `pip install autoawq`. However, AutoAWQ will try to auto-detect the CUDA version installed on your machine. If the CUDA version it detects is incorrect, the `.whl` file that `pip` will choose will be incorrect. This will cause an error during runtime when trying to quantize. Thus, it is recommended to install AutoAWQ from source to get the right `.whl` file.
 
-## 3. Install the generate() API
+## 3. Install ONNX Runtime GenAI
 
 Based on your desired hardware target, pick from one of the following options to install ONNX Runtime GenAI.
 
diff --git a/examples/python/generate-e2e-example.sh b/examples/python/generate-e2e-example.sh
@@ -1,4 +1,4 @@
 # Description: Example of generate end-to-end usage, including model building and running
 pip install numpy transformers torch onnx onnxruntime
 python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cpu -p int4 -e cpu -c hf_cache
-python3 model-generate.py -m genai_models/phi2-int4-cpu -pr "my favorite movie is" "write a function that always returns True" "I am very happy" -p 0.0 -k 1 -v 
+python3 model-generate.py -m genai_models/phi2-int4-cpu -e cpu -pr "my favorite movie is" "write a function that always returns True" "I am very happy" -p 0.0 -k 1 -v 
diff --git a/examples/python/model-generate.py b/examples/python/model-generate.py
@@ -7,10 +7,10 @@ def main(args):
     if hasattr(og, 'Config'):
         config = og.Config(args.model_path)
         config.clear_providers()
-        if args.provider != "cpu":
+        if args.execution_provider != "cpu":
             if args.verbose:
-                print(f"Setting model to {args.provider}...")
-            config.append_provider(args.provider)
+                print(f"Setting model to {args.execution_provider}...")
+            config.append_provider(args.execution_provider)
         model = og.Model(config)
     else:
         model = og.Model(args.model_path)
@@ -80,8 +80,8 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end token generation loop example for gen-ai")
-    parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain config.json and model.onnx)')
-    parser.add_argument("-p", "--provider", type=str, required=True, choices=["cpu", "cuda", "dml"], help="Provider to run model")
+    parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
+    parser.add_argument("-e", "--execution_provider", type=str, required=True, choices=["cpu", "cuda", "dml"], help="Provider to run model")
     parser.add_argument('-pr', '--prompts', nargs='*', required=False, help='Input prompts to generate tokens from. Provide this parameter multiple times to batch multiple prompts')
     parser.add_argument('-i', '--min_length', type=int, default=25, help='Min number of tokens to generate including the prompt')
     parser.add_argument('-l', '--max_length', type=int, default=50, help='Max number of tokens to generate including the prompt')
diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py
@@ -98,7 +98,7 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai")
-    parser.add_argument('-m', '--model', type=str, required=True, help='Onnx model folder path (must contain config.json and model.onnx)')
+    parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
     parser.add_argument('-e', '--execution_provider', type=str, required=True, choices=["cpu", "cuda", "dml"], help="Execution provider to run ONNX model with")
     parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt')
     parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt')
diff --git a/examples/python/phi-3-tutorial.md b/examples/python/phi-3-tutorial.md
@@ -1,4 +1,4 @@
-# Run the Phi-3 models with the ONNX Runtime generate() API
+# Run the Phi-3 models with ONNX Runtime GenAI
 
 ## Steps
 1. [Setup](#setup)
@@ -56,7 +56,7 @@ Are you on a Windows machine with GPU?
    This command downloads the model into a folder called `directml`.
 
 
-2. Install the generate() API
+2. Install ONNX Runtime GenAI
 
    ```bash
    pip install onnxruntime-genai-directml
@@ -97,7 +97,7 @@ Are you on a Windows machine with GPU?
 
    This command downloads the model into a folder called `cuda`.
 
-2. Install the generate() API
+2. Install ONNX Runtime GenAI
 
    ```bash
    pip install onnxruntime-genai-cuda
@@ -130,7 +130,7 @@ Are you on a Windows machine with GPU?
 
    This command downloads the model into a folder called `cpu_and_mobile`
 
-2. Install the generate() API for CPU
+2. Install ONNX Runtime GenAI
    
    ```bash
    pip install onnxruntime-genai
diff --git a/examples/python/phi-3-vision.md b/examples/python/phi-3-vision.md
@@ -107,15 +107,15 @@ Currently, both JSON files needed to run with ONNX Runtime GenAI are created by
 
 ## 4. Run Phi-3 vision ONNX models
 
-[Here](https://github.com/microsoft/onnxruntime-genai/blob/main/examples/python/phi3v.py) is an example of how you can run your Phi-3 vision model with the ONNX Runtime generate() API.
+[Here](https://github.com/microsoft/onnxruntime-genai/blob/main/examples/python/phi3v.py) is an example of how you can run your Phi-3 vision model with ONNX Runtime GenAI.
 
 ### CUDA
 ```bash
-$ python .\phi3v.py -m .\phi3-vision-128k-instruct\cuda -p cuda
+$ python .\phi3v.py -m .\phi3-vision-128k-instruct\cuda -e cuda
 ```
 
 ### DirectML
 
 ```bash
-$ python .\phi3v.py -m .\phi3-vision-128k-instruct\dml -p dml
+$ python .\phi3v.py -m .\phi3-vision-128k-instruct\dml -e dml
 ```
diff --git a/examples/python/phi-3.5-vision.md b/examples/python/phi-3.5-vision.md
@@ -104,15 +104,15 @@ Currently, both JSON files needed to run with ONNX Runtime GenAI are created by
 
 ## 4. Run Phi-3.5 vision ONNX models
 
-[Here](https://github.com/microsoft/onnxruntime-genai/blob/main/examples/python/phi3v.py) is an example of how you can run your Phi-3.5 vision model with the ONNX Runtime generate() API.
+[Here](https://github.com/microsoft/onnxruntime-genai/blob/main/examples/python/phi3v.py) is an example of how you can run your Phi-3.5 vision model with ONNX Runtime GenAI.
 
 ### CUDA
 ```bash
-$ python .\phi3v.py -m .\phi3.5-vision-instruct\cuda -p cuda
+$ python .\phi3v.py -m .\phi3.5-vision-instruct\cuda -e cuda
 ```
 
 ### DirectML
 
 ```bash
-$ python .\phi3v.py -m .\phi3.5-vision-instruct\dml -p dml
+$ python .\phi3v.py -m .\phi3.5-vision-instruct\dml -e dml
 ```
diff --git a/examples/python/phi3-qa.py b/examples/python/phi3-qa.py
@@ -87,7 +87,7 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai")
-    parser.add_argument('-m', '--model', type=str, required=True, help='Onnx model folder path (must contain config.json and model.onnx)')
+    parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
     parser.add_argument('-e', '--execution_provider', type=str, required=True, choices=["cpu", "cuda", "dml"], help="Execution provider to run ONNX model with")
     parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt')
     parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt')
diff --git a/examples/python/phi3v.py b/examples/python/phi3v.py
@@ -125,7 +125,7 @@ def run(args: argparse.Namespace):
         "-m", "--model_path", type=str, required=True, help="Path to the folder containing the model"
     )
     parser.add_argument(
-        "-p", "--provider", type=str, required=True, choices=["cpu", "cuda", "dml"], help="Provider to run model"
+        "-e", "--execution_provider", type=str, required=True, choices=["cpu", "cuda", "dml"], help="Execution provider to run model"
     )
     parser.add_argument(
         "--image_paths", nargs='*', type=str, required=False, help="Path to the images, mainly for CI usage"
diff --git a/examples/python/qa-e2e-example.sh b/examples/python/qa-e2e-example.sh
@@ -1,3 +1,3 @@
 # Description: Example of chatbot end-to-end usage, including model building and running.
 python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cpu -p int4 -e cpu -c hf_cache
-python3 model-qa.py -m genai_models/phi2-int4-cpu -ep cpu -p 0.0 -k 1
+python3 model-qa.py -m genai_models/phi2-int4-cpu -e cpu -p 0.0 -k 1
diff --git a/src/python/package_description.md b/src/python/package_description.md
@@ -1,7 +1,5 @@
-# ONNX Runtime generate() API
+# ONNX Runtime GenAI
 
-Run SLMs/LLMs and multi modal models on-device and in the cloud with ONNX Runtime.
-
-Model architectures supported so far (and more coming soon): Gemma, Llama, Mistral, Phi (language and vision).
+Run SLMs/LLMs and multi-modal models on-device and in the cloud with ONNX Runtime.
 
 For more details, see: docs https://onnxruntime.ai/docs/genai and repo: https://github.com/microsoft/onnxruntime-genai
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
@@ -50,7 +50,7 @@ def _onnxruntime_dependency() -> str:
 setup(
     name=package_name,
     version='@VERSION_INFO@',
-    description='ONNX Runtime generate() API',
+    description='ONNX Runtime GenAI',
     long_description=long_description,
     long_description_content_type='text/markdown',
     packages=['onnxruntime_genai', 'onnxruntime_genai.models'],

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# ONNX Runtime generate() API C Example`
	`1`	`+# ONNX Runtime GenAI C Example`
`2`	`2`
`3`	`3`	`## Setup`
`4`	`4`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Generate() API C# example`
	`1`	`+# ONNX Runtime GenAI C# example`
`2`	`2`
`3`	`3`	`## Obtain a model`
`4`	`4`