Skip to content

Commit 745f11f

Browse files
authored
memory : correctly handle failure in apply() (#14438)
ggml-ci
1 parent 5dd942d commit 745f11f

File tree

6 files changed

+32
-4
lines changed

6 files changed

+32
-4
lines changed

src/llama-kv-cache-unified-iswa.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ bool llama_kv_cache_unified_iswa_context::next() {
246246
}
247247

248248
bool llama_kv_cache_unified_iswa_context::apply() {
249-
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
249+
assert(!llama_memory_status_is_fail(status));
250250

251251
bool res = true;
252252

src/llama-kv-cache-unified.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1776,7 +1776,7 @@ bool llama_kv_cache_unified_context::next() {
17761776
}
17771777

17781778
bool llama_kv_cache_unified_context::apply() {
1779-
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
1779+
assert(!llama_memory_status_is_fail(status));
17801780

17811781
// no ubatches -> this is a KV cache update
17821782
if (ubatches.empty()) {

src/llama-memory-hybrid.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ bool llama_memory_hybrid_context::next() {
218218
}
219219

220220
bool llama_memory_hybrid_context::apply() {
221-
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
221+
assert(!llama_memory_status_is_fail(status));
222222

223223
bool res = true;
224224

src/llama-memory-recurrent.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1071,7 +1071,15 @@ bool llama_memory_recurrent_context::next() {
10711071
}
10721072

10731073
bool llama_memory_recurrent_context::apply() {
1074-
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
1074+
assert(!llama_memory_status_is_fail(status));
1075+
1076+
// no ubatches -> this is an update
1077+
if (ubatches.empty()) {
1078+
// recurrent cache never performs updates
1079+
assert(status == LLAMA_MEMORY_STATUS_NO_UPDATE);
1080+
1081+
return true;
1082+
}
10751083

10761084
mem->find_slot(ubatches[i_next]);
10771085

src/llama-memory.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,20 @@ llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_me
4040
// if either status has an update, then the combined status has an update
4141
return has_update ? LLAMA_MEMORY_STATUS_SUCCESS : LLAMA_MEMORY_STATUS_NO_UPDATE;
4242
}
43+
44+
bool llama_memory_status_is_fail(llama_memory_status status) {
45+
switch (status) {
46+
case LLAMA_MEMORY_STATUS_SUCCESS:
47+
case LLAMA_MEMORY_STATUS_NO_UPDATE:
48+
{
49+
return false;
50+
}
51+
case LLAMA_MEMORY_STATUS_FAILED_PREPARE:
52+
case LLAMA_MEMORY_STATUS_FAILED_COMPUTE:
53+
{
54+
return true;
55+
}
56+
}
57+
58+
return false;
59+
}

src/llama-memory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ enum llama_memory_status {
3131
// useful for implementing hybrid memory types (e.g. iSWA)
3232
llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);
3333

34+
// helper function for checking if a memory status indicates a failure
35+
bool llama_memory_status_is_fail(llama_memory_status status);
36+
3437
// the interface for managing the memory context during batch processing
3538
// this interface is implemented per memory type. see:
3639
// - llama_kv_cache_unified_context

0 commit comments

Comments
 (0)