@@ -89,6 +89,8 @@ public uint BatchThreads
8989 /// Get the maximum batch size for this context
9090 /// </summary>
9191 public uint BatchSize => NativeHandle . BatchSize ;
92+
93+ private LLamaTokenData [ ] ? _samplingBuffer ;
9294
9395 /// <summary>
9496 /// Create a new LLamaContext for the given LLamaWeights
@@ -496,7 +498,9 @@ public LLamaTokenDataArray ApplyPenalty(int logits_i, IEnumerable<LLamaToken> la
496498 var nl_logit = logits [ ( int ? ) nl_token ?? 0 ] ;
497499
498500 // Convert logits into token candidates
499- var candidates_p = LLamaTokenDataArray . Create ( logits ) ;
501+ if ( _samplingBuffer == null || _samplingBuffer . Length < logits . Length )
502+ _samplingBuffer = new LLamaTokenData [ logits . Length ] ;
503+ var candidates_p = LLamaTokenDataArray . Create ( logits , _samplingBuffer ) ;
500504
501505 // Extract most recently returned tokens
502506 var last_n_repeat = Math . Min ( ( int ) ContextSize , repeatLastTokensCount ) ;
@@ -508,14 +512,14 @@ public LLamaTokenDataArray ApplyPenalty(int logits_i, IEnumerable<LLamaToken> la
508512 // Restore newline token logit value if necessary
509513 if ( ! penalizeNL && nl_token . HasValue )
510514 {
511- var candidatesSpan = candidates_p . data . Span ;
512- for ( var i = 0 ; i < candidates_p . data . Length ; i ++ )
515+ var candidatesSpan = candidates_p . Data . Span ;
516+ for ( var i = 0 ; i < candidates_p . Data . Length ; i ++ )
513517 {
514518 ref var item = ref candidatesSpan [ i ] ;
515519 if ( item . id == nl_token )
516520 item . logit = nl_logit ;
517521 }
518- candidates_p . sorted = false ;
522+ candidates_p . Sorted = false ;
519523 }
520524
521525 return candidates_p ;
0 commit comments