File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -936,7 +936,11 @@ def _create_vocab_sentencepiece(self):
936
936
scores : list [float ] = [- 10000.0 ] * vocab_size
937
937
toktypes : list [int ] = [SentencePieceTokenTypes .UNUSED ] * vocab_size
938
938
939
- for token_id in range (vocab_size ):
939
+ for token_id in range (tokenizer .vocab_size ()):
940
+ if token_id >= vocab_size :
941
+ logger .warning (f'ignore tokens from { token_id } : id is out of range, max={ vocab_size - 1 } ' )
942
+ break
943
+
940
944
piece = tokenizer .IdToPiece (token_id )
941
945
text = piece .encode ("utf-8" )
942
946
score = tokenizer .GetScore (token_id )
@@ -951,10 +955,6 @@ def _create_vocab_sentencepiece(self):
951
955
elif tokenizer .IsByte (token_id ):
952
956
toktype = SentencePieceTokenTypes .BYTE
953
957
954
- if token_id >= vocab_size :
955
- logger .warning (f'ignore tokens from { token_id } : id is out of range, max={ vocab_size - 1 } ' )
956
- break
957
-
958
958
tokens [token_id ] = text
959
959
scores [token_id ] = score
960
960
toktypes [token_id ] = toktype
You can’t perform that action at this time.
0 commit comments