diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj index 08c4fb9f906..5679b55ce1e 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj @@ -64,6 +64,7 @@ 306A71512DC1DC3D00936B1F /* pre_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 306A71472DC1DC3D00936B1F /* pre_tokenizer.cpp */; }; 306A71522DC1DC3D00936B1F /* token_decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 306A714B2DC1DC3D00936B1F /* token_decoder.cpp */; }; 3072D5232DC3EA280083FC83 /* Constants.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3072D5222DC3EA280083FC83 /* Constants.swift */; }; + F24909E82E207004001E5B69 /* normalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F24909E72E207004001E5B69 /* normalizer.cpp */; }; F292B0752D88B0C200BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06F2D88B0C200BE6839 /* tiktoken.cpp */; }; F292B0762D88B0C200BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06C2D88B0C200BE6839 /* llama2c_tokenizer.cpp */; }; F292B0772D88B0C200BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */; }; @@ -152,6 +153,7 @@ 306A714A2DC1DC3D00936B1F /* std_regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = std_regex.cpp; path = src/std_regex.cpp; sourceTree = ""; }; 306A714B2DC1DC3D00936B1F /* token_decoder.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = token_decoder.cpp; path = src/token_decoder.cpp; sourceTree = ""; }; 3072D5222DC3EA280083FC83 /* Constants.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Constants.swift; sourceTree = ""; }; + F24909E72E207004001E5B69 /* normalizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = normalizer.cpp; path = src/normalizer.cpp; sourceTree = ""; }; F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = ""; }; F292B06C2D88B0C200BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = ""; }; F292B06F2D88B0C200BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = ""; }; @@ -309,6 +311,7 @@ 03729F0E2BB203D700152F2E /* tokenizers */ = { isa = PBXGroup; children = ( + F24909E72E207004001E5B69 /* normalizer.cpp */, F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */, 306A71452DC1DC3D00936B1F /* hf_tokenizer.cpp */, F292B1002D88B20C00BE6839 /* llama_tiktoken.cpp */, @@ -598,6 +601,7 @@ files = ( 03D151B82E0E0908007A38BE /* LLaVARunner.mm in Sources */, 03729EE12BB1F93800152F2E /* LLaMARunner.mm in Sources */, + F24909E82E207004001E5B69 /* normalizer.cpp in Sources */, 0372C3152C89418E00CD942A /* llava_runner.cpp in Sources */, 03D151CA2E0E98C4007A38BE /* sentencepiece.cpp in Sources */, 03D151D92E0E9E43007A38BE /* ExecuTorchTextLLMRunner.mm in Sources */, diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj index c9b68f250c1..dd776a1feb3 100644 --- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj +++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj @@ -35,6 +35,7 @@ 30AA4B662DC0766800B1BE50 /* re2_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5C2DC0766800B1BE50 /* re2_regex.cpp */; }; 3C6ABD332DFA27DE0015DE55 /* regex_lookahead.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */; }; F22E9E1A2DF2CBB900EC5425 /* text_llm_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */; }; + F24909E22E206FBA001E5B69 /* normalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F24909E12E206FBA001E5B69 /* normalizer.cpp */; }; F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; }; F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; }; F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B01A2D88AF3500BE6839 /* tiktoken.cpp */; }; @@ -100,6 +101,7 @@ 3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = regex_lookahead.cpp; path = src/regex_lookahead.cpp; sourceTree = ""; }; F22E9E182DF2CBB900EC5425 /* text_llm_runner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = text_llm_runner.h; sourceTree = ""; }; F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = text_llm_runner.cpp; sourceTree = ""; }; + F24909E12E206FBA001E5B69 /* normalizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = normalizer.cpp; path = src/normalizer.cpp; sourceTree = ""; }; F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = ""; }; F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = ""; }; F292B01A2D88AF3500BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = ""; }; @@ -185,6 +187,7 @@ 032A74022CAFBB7800932D36 /* tokenizers */ = { isa = PBXGroup; children = ( + F24909E12E206FBA001E5B69 /* normalizer.cpp */, F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */, 3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */, 30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */, @@ -430,6 +433,7 @@ F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */, F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */, F2E1B5172E03AC19002C9718 /* sentencepiece.cpp in Sources */, + F24909E22E206FBA001E5B69 /* normalizer.cpp in Sources */, 03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */, 032A74232CAFC1B300932D36 /* runner.cpp in Sources */, 03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */, diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index d202b36fe00..23359bdce7b 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit d202b36fe006457c2139a423ef183ca4ce7c410c +Subproject commit 23359bdce7bedc084e101c39e89506dff459dab8 diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt index 7456d749f34..ff61a36e6fe 100644 --- a/third-party/CMakeLists.txt +++ b/third-party/CMakeLists.txt @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +set(CMAKE_POLICY_VERSION_MINIMUM 3.5) add_subdirectory(json) add_subdirectory(gflags) @@ -86,6 +87,7 @@ ExternalProject_Add( -DFLATCC_REFLECTION=OFF -DFLATCC_DEBUG_CLANG_SANITIZE=OFF -DFLATCC_INSTALL=ON + -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_INSTALL_PREFIX:PATH= -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_TOOLCHAIN_FILE=