Skip to content

Commit 496cb05

Browse files
authored
[llm] Add sentencepiece tokenizer support to llm runner
Differential Revision: D76789606 Pull Request resolved: #11645
1 parent 5c91435 commit 496cb05

File tree

10 files changed

+61
-56
lines changed

10 files changed

+61
-56
lines changed

examples/models/llama/main.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@ DEFINE_int32(
4242
-1,
4343
"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");
4444

45+
DEFINE_int32(
46+
num_bos,
47+
0,
48+
"Number of BOS tokens to prepend to the prompt. Defaults to 0. If > 0, the prompt will be prepended with BOS tokens. This is useful for models that expect one or more BOS token at the start.");
49+
50+
DEFINE_int32(
51+
num_eos,
52+
0,
53+
"Number of EOS tokens to append to the prompt. Defaults to 0. If > 0, the prompt will be appended with EOS tokens. This is useful for models that expect one or more EOS token at the end.");
54+
4555
DEFINE_bool(warmup, false, "Whether to run a warmup run.");
4656

4757
int32_t main(int32_t argc, char** argv) {

examples/qualcomm/CMakeLists.txt

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ find_package(gflags REQUIRED)
3535
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
3636

3737
# Let files say "include <executorch/path/to/header.h>".
38-
set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json/single_include)
38+
set(_common_include_directories
39+
${EXECUTORCH_ROOT}/..
40+
${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json/single_include
41+
)
3942

4043
#
4144
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
@@ -72,20 +75,11 @@ target_include_directories(
7275
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
7376
)
7477

75-
# find RE2 for tokenizer
76-
set(ABSL_ENABLE_INSTALL ON)
77-
set(ABSL_PROPAGATE_CXX_STD ON)
78-
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
79-
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
80-
add_subdirectory(
81-
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/abseil-cpp
82-
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
83-
)
78+
# add tokenizers
8479
add_subdirectory(
85-
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/re2
86-
${CMAKE_CURRENT_BINARY_DIR}/re2
80+
${EXECUTORCH_ROOT}/extension/llm/tokenizers
81+
${CMAKE_CURRENT_BINARY_DIR}/../../extension/llm/tokenizers
8782
)
88-
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
8983

9084
# build qnn_executor_runner
9185
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/executor_runner)

examples/qualcomm/oss_scripts/llama/CMakeLists.txt

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66

77
# model sharding with custom op
88
set(CUSTOM_OP_SRCS_FILE
9-
"${EXECUTORCH_SOURCE_DIR}/extension/llm/custom_ops/op_fallback.cpp"
9+
"${EXECUTORCH_SOURCE_DIR}/extension/llm/custom_ops/op_fallback.cpp"
1010
)
11+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../..)
12+
1113
add_library(custom_ops ${CUSTOM_OP_SRCS_FILE})
1214
target_include_directories(custom_ops PUBLIC "${_common_include_directories}")
1315
target_include_directories(
1416
custom_ops PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/../../include"
1517
)
16-
target_link_libraries(
17-
custom_ops PUBLIC full_portable_ops_lib
18-
)
18+
target_link_libraries(custom_ops PUBLIC full_portable_ops_lib)
1919
target_link_options_shared_lib(custom_ops)
2020

2121
# preprocess qnn runner src files for llama
@@ -44,17 +44,15 @@ list(
4444
${CMAKE_CURRENT_LIST_DIR}/runner/kv_manager.h
4545
)
4646

47-
list(
48-
APPEND
49-
_llama_runner__srcs
50-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/src/tiktoken.cpp
51-
${CMAKE_CURRENT_SOURCE_DIR}/../../../models/llama/tokenizer/llama_tiktoken.cpp
52-
)
47+
list(APPEND _llama_runner__srcs)
5348

5449
# build qnn llama runner
5550
add_executable(qnn_llama_runner ${_llama_runner__srcs})
5651
target_include_directories(
57-
qnn_llama_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
52+
qnn_llama_runner
53+
PUBLIC
54+
${_common_include_directories}
55+
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
5856
)
5957

6058
target_link_options_shared_lib(quantized_ops_lib)
@@ -68,14 +66,12 @@ target_link_libraries(
6866
extension_module
6967
extension_tensor
7068
gflags
71-
re2::re2
7269
custom_ops
7370
quantized_ops_lib
7471
quantized_kernels
72+
tokenizers
7573
)
76-
target_compile_options(
77-
qnn_llama_runner PUBLIC ${_common_compile_options}
78-
)
74+
target_compile_options(qnn_llama_runner PUBLIC ${_common_compile_options})
7975
set_target_properties(
8076
qnn_llama_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
8177
)

examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
# preprocess qaihub runner src files for llama2,3
88
set(_qaihub_llama_runner__srcs ${_llama_runner__srcs})
9+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../..)
10+
911
list(TRANSFORM _qaihub_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
1012
list(FILTER _qaihub_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
1113
list(
@@ -26,13 +28,11 @@ list(PREPEND _qaihub_llama2_7b_runner__srcs
2628

2729
# build qaihub llama2 7b runner
2830
add_executable(qaihub_llama2_7b_runner ${_qaihub_llama2_7b_runner__srcs})
31+
2932
target_include_directories(
30-
qaihub_llama2_7b_runner PUBLIC
31-
${_common_include_directories}
32-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
33-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
34-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
35-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
33+
qaihub_llama2_7b_runner
34+
PUBLIC ${_common_include_directories}
35+
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
3636
)
3737
target_link_libraries(
3838
qaihub_llama2_7b_runner
@@ -43,7 +43,7 @@ target_link_libraries(
4343
extension_module
4444
extension_tensor
4545
gflags
46-
re2::re2
46+
tokenizers
4747
)
4848
target_compile_options(
4949
qaihub_llama2_7b_runner PUBLIC ${_common_compile_options}
@@ -62,25 +62,13 @@ list(PREPEND _qaihub_llama3_8b_runner__srcs
6262
# Adding a compile option to differentiate llama2 with llama3 logic
6363
list(APPEND _common_compile_options -DQAIHUB_LLAMA3_RUNNER)
6464

65-
list(
66-
APPEND _qaihub_llama3_8b_runner__srcs
67-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/src/tiktoken.cpp
68-
)
69-
list(
70-
APPEND
71-
_qaihub_llama3_8b_runner__srcs
72-
${CMAKE_CURRENT_SOURCE_DIR}/../../../models/llama/tokenizer/llama_tiktoken.cpp
73-
)
74-
7565
# build qaihub llama3 8b runner
7666
add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs})
7767
target_include_directories(
78-
qaihub_llama3_8b_runner PUBLIC
79-
${_common_include_directories}
68+
qaihub_llama3_8b_runner
69+
PUBLIC
70+
${_common_include_directories}
8071
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
81-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
82-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
83-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
8472
)
8573

8674
target_link_libraries(
@@ -92,7 +80,7 @@ target_link_libraries(
9280
extension_module
9381
extension_tensor
9482
gflags
95-
re2::re2
83+
tokenizers
9684
)
9785
target_compile_options(
9886
qaihub_llama3_8b_runner PUBLIC ${_common_compile_options}

extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; };
3939
F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; };
4040
F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B01A2D88AF3500BE6839 /* tiktoken.cpp */; };
41+
F2E1B5172E03AC19002C9718 /* sentencepiece.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */; };
4142
/* End PBXBuildFile section */
4243

4344
/* Begin PBXContainerItemProxy section */
@@ -110,6 +111,7 @@
110111
F292B0292D88AF4800BE6839 /* result.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = result.h; sourceTree = "<group>"; };
111112
F292B02B2D88AF4800BE6839 /* tiktoken.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = "<group>"; };
112113
F292B02D2D88AF4800BE6839 /* tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = "<group>"; };
114+
F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = sentencepiece.cpp; path = src/sentencepiece.cpp; sourceTree = "<group>"; };
113115
/* End PBXFileReference section */
114116

115117
/* Begin PBXFrameworksBuildPhase section */
@@ -183,6 +185,7 @@
183185
032A74022CAFBB7800932D36 /* tokenizers */ = {
184186
isa = PBXGroup;
185187
children = (
188+
F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */,
186189
3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */,
187190
30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */,
188191
30AA4B5A2DC0766800B1BE50 /* pcre2_regex.cpp */,
@@ -426,6 +429,7 @@
426429
F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */,
427430
F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */,
428431
F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */,
432+
F2E1B5172E03AC19002C9718 /* sentencepiece.cpp in Sources */,
429433
03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */,
430434
032A74232CAFC1B300932D36 /* runner.cpp in Sources */,
431435
03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */,

extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ OTHER_LDFLAGS = $(inherited) \
1717
HEADER_SEARCH_PATHS = $(inherited) \
1818
$(SRCROOT)/../../../../.. \
1919
$(TEMP_DIR)/cmake/include \
20-
$(SRCROOT)/../../../../extension/llm/tokenizers/include
20+
$(SRCROOT)/../../../../extension/llm/tokenizers/include \
21+
$(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece \
22+
$(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece/src
2123

2224
LIBRARY_SEARCH_PATHS = $(inherited) \
2325
$(TEMP_DIR)/cmake/lib

extension/llm/runner/irunner.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ struct GenerationConfig {
4949
// Temperature for sampling (higher = more random)
5050
float temperature = 0.8f;
5151

52+
// Number of eos and bos to add to the prompt
53+
int32_t num_bos = 0;
54+
int32_t num_eos = 0;
55+
5256
/**
5357
* Resolve the maximum number of new tokens to generate based on constraints.
5458
*

extension/llm/runner/targets.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def define_common_targets():
103103
":text_token_generator" + aten_suffix,
104104
"//pytorch/tokenizers:hf_tokenizer",
105105
"//pytorch/tokenizers:llama2c_tokenizer",
106-
# "//pytorch/tokenizers:sentencepiece", # TODO(larryliu0820) Make sure this compiles in xplat.
106+
"//pytorch/tokenizers:sentencepiece",
107107
"//pytorch/tokenizers:tiktoken",
108108
],
109109
)

extension/llm/runner/text_llm_runner.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <executorch/extension/llm/runner/util.h>
1515
#include <pytorch/tokenizers/hf_tokenizer.h>
1616
#include <pytorch/tokenizers/llama2c_tokenizer.h>
17+
#include <pytorch/tokenizers/sentencepiece.h>
1718
#include <pytorch/tokenizers/tiktoken.h>
1819

1920
namespace executorch::extension::llm {
@@ -116,8 +117,8 @@ Error TextLLMRunner::generate_from_pos(
116117

117118
::tokenizers::Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
118119
prompt,
119-
/* bos */ 0,
120-
/* eos */ 0);
120+
/*bos=*/config.num_bos,
121+
/*eos=*/config.num_eos);
121122

122123
ET_CHECK_TK_OK_OR_RETURN_ERROR(
123124
encode_res.error(), "Failed to encode prompt %s", prompt.c_str());
@@ -278,6 +279,12 @@ std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
278279
return tiktoken_tokenizer;
279280
}
280281

282+
auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>();
283+
if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
284+
ET_LOG(Info, "Loaded Sentencepiece tokenizer");
285+
return sp_tokenizer;
286+
}
287+
281288
auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
282289
if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
283290
ET_LOG(Info, "Loaded BPE tokenizer");

0 commit comments

Comments
 (0)