Skip to content

Commit 3582d82

Browse files
authored
Merge pull request #736 from martindevans/llama_get_timings
Exposed basic timing information from llama.cpp
2 parents e2a770e + e38d96b commit 3582d82

File tree

3 files changed

+147
-2
lines changed

3 files changed

+147
-2
lines changed

LLama.Examples/Examples/BatchedExecutorFork.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using LLama.Batched;
1+
using LLama.Batched;
22
using LLama.Common;
33
using LLama.Native;
44
using LLama.Sampling;
@@ -67,6 +67,13 @@ await AnsiConsole
6767
root.Display(display);
6868
AnsiConsole.Write(display);
6969
});
70+
71+
// Print some stats
72+
var timings = executor.Context.NativeHandle.GetTimings();
73+
AnsiConsole.MarkupLine($"Total Tokens Evaluated: {timings.TokensEvaluated}");
74+
AnsiConsole.MarkupLine($"Total Tokens Sampled: {timings.TokensSampled}");
75+
AnsiConsole.MarkupLine($"Eval Time: {(timings.Eval + timings.PromptEval).TotalMilliseconds}ms");
76+
AnsiConsole.MarkupLine($"Sample Time: {timings.Sampling.TotalMilliseconds}ms");
7077
}
7178

7279
private class Node

LLama/Native/LLamaTimings.cs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
using System;
2+
using System.Runtime.InteropServices;
3+
4+
namespace LLama.Native;
5+
6+
/// <summary>
7+
/// LLama performance information
8+
/// </summary>
9+
[StructLayout(LayoutKind.Sequential)]
10+
public struct LLamaTimings
11+
{
12+
/// <summary>
13+
/// Timestamp when reset was last called
14+
/// </summary>
15+
private double t_start_ms;
16+
17+
/// <summary>
18+
/// Timestamp when these timings were read
19+
/// </summary>
20+
private double t_end_ms;
21+
22+
/// <summary>
23+
/// Loading milliseconds
24+
/// </summary>
25+
private double t_load_ms;
26+
27+
/// <summary>
28+
/// Total sampling milliseconds
29+
/// </summary>
30+
private double t_sample_ms;
31+
32+
/// <summary>
33+
/// total milliseconds spent prompt processing
34+
/// </summary>
35+
private double t_p_eval_ms;
36+
37+
/// <summary>
38+
/// Total milliseconds in eval/decode calls
39+
/// </summary>
40+
private double t_eval_ms;
41+
42+
/// <summary>
43+
/// number of tokens sampled
44+
/// </summary>
45+
private int n_sample;
46+
47+
/// <summary>
48+
/// number of tokens in eval calls for the prompt (with batch size > 1)
49+
/// </summary>
50+
private int n_p_eval;
51+
52+
/// <summary>
53+
/// number of eval calls
54+
/// </summary>
55+
private int n_eval;
56+
57+
58+
59+
60+
/// <summary>
61+
/// Timestamp when reset was last called
62+
/// </summary>
63+
public readonly TimeSpan ResetTimestamp => TimeSpan.FromMilliseconds(t_start_ms);
64+
65+
/// <summary>
66+
/// Timestamp when these timings were read
67+
/// </summary>
68+
public readonly TimeSpan ReadTimestamp => TimeSpan.FromMilliseconds(t_start_ms);
69+
70+
/// <summary>
71+
/// Time spent loading
72+
/// </summary>
73+
public readonly TimeSpan Loading => TimeSpan.FromMilliseconds(t_load_ms);
74+
75+
/// <summary>
76+
/// Time spent sampling
77+
/// </summary>
78+
public readonly TimeSpan Sampling => TimeSpan.FromMilliseconds(t_load_ms);
79+
80+
/// <summary>
81+
/// total milliseconds spent prompt processing
82+
/// </summary>
83+
public TimeSpan PromptEval => TimeSpan.FromMilliseconds(t_p_eval_ms);
84+
85+
/// <summary>
86+
/// Total milliseconds in eval/decode calls
87+
/// </summary>
88+
public readonly TimeSpan Eval => TimeSpan.FromMilliseconds(t_eval_ms);
89+
90+
/// <summary>
91+
/// Total number of tokens sampled
92+
/// </summary>
93+
public readonly int TokensSampled => n_sample;
94+
95+
/// <summary>
96+
/// number of tokens in eval calls for the prompt (with batch size > 1)
97+
/// </summary>
98+
public readonly int PrompTokensEvaluated => n_p_eval;
99+
100+
/// <summary>
101+
/// number of eval calls
102+
/// </summary>
103+
public readonly int TokensEvaluated => n_p_eval;
104+
}

LLama/Native/SafeLLamaContextHandle.cs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,21 @@ static SafeLLamaContextHandle()
282282
/// </summary>
283283
/// <param name="ctx"></param>
284284
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
285-
public static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);
285+
private static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);
286+
287+
/// <summary>
288+
/// get performance information
289+
/// </summary>
290+
/// <param name="ctx"></param>
291+
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
292+
private static extern LLamaTimings llama_get_timings(SafeLLamaContextHandle ctx);
293+
294+
/// <summary>
295+
/// Reset performance information
296+
/// </summary>
297+
/// <param name="ctx"></param>
298+
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
299+
private static extern void llama_reset_timings(SafeLLamaContextHandle ctx);
286300
#endregion
287301

288302
/// <summary>
@@ -510,6 +524,26 @@ public void SetThreads(uint threads, uint threadsBatch)
510524
{
511525
llama_set_n_threads(this, threads, threadsBatch);
512526
}
527+
528+
#region timing
529+
/// <summary>
530+
/// Get performance information
531+
/// </summary>
532+
/// <returns></returns>
533+
public LLamaTimings GetTimings()
534+
{
535+
return llama_get_timings(this);
536+
}
537+
538+
/// <summary>
539+
/// Reset all performance information for this context
540+
/// </summary>
541+
public void ResetTimings()
542+
{
543+
llama_reset_timings(this);
544+
}
545+
#endregion
546+
513547

514548
#region KV Cache Management
515549
/// <summary>

0 commit comments

Comments
 (0)