Skip to content

Commit c3fd345

Browse files
authored
Fix/memory leaks, add functions to report context usage, add unittest (#20)
* fix(chat): avoid memory leaks and unnecessary re-initialization in llm_chat_check_context Adds a check to return early if the chat struct is already initialized, preventing memory leaks and unnecessary re-initialization and ensuring correct chat context handling. * fix(chat): remove the duplicate_content arg in llm_messages_append to avoid a crash on llm_chat_free() Refactored llm_messages_append to always duplicate message content using sqlite_strdup, removing the duplicate_content parameter. This option was causing a double-free crash in llm_chat_free with non-duplicated content. Also, the ai->chat.response buffer is reset on new prompts in the same chat so we cannot rely on that buffer to store previous response messages. * test: add C unit tests Introduces a new C unit test runner at tests/c/unittest.c with basic tests for the SQLite AI extension. The Makefile is updated to build and run these tests, including logic to download a test model if needed and link against the appropriate SQLite libraries. * ci: add GGUF model caching to workflow Introduces a new 'download-model' job in the GitHub Actions workflow to cache and restore the GGUF model, reducing redundant downloads. Updates the Makefile to use GGUF model variables, simplifies test model handling, and ensures the test binary is built with the correct SQLite source. Add SQLite amalgamation for testing. * feat: add llm_context_used and llm_context_size functions to report context usage Add two functions: llm_context_size, which returns the total context size, and llm_context_used, which returns the number of tokens used.
1 parent 7e3b8c1 commit c3fd345

File tree

7 files changed

+256457
-14
lines changed

7 files changed

+256457
-14
lines changed

.github/workflows/main.yml

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,48 @@ permissions:
77
contents: write
88
id-token: write
99

10+
env:
11+
GGUF_MODEL_DIR: tests/models/unsloth/gemma-3-270m-it-GGUF
12+
GGUF_MODEL_NAME: gemma-3-270m-it-UD-IQ2_M.gguf
13+
GGUF_MODEL_URL: https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
14+
1015
jobs:
16+
download-model:
17+
outputs:
18+
cache-key: gguf-${{ steps.meta.outputs.hash }}
19+
model-path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
20+
name: Download GGUF model
21+
runs-on: ubuntu-22.04
22+
steps:
23+
- name: Compute model URL hash
24+
id: meta
25+
run: |
26+
if command -v sha256sum >/dev/null 2>&1; then
27+
hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | sha256sum | cut -d' ' -f1)
28+
else
29+
hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | shasum -a 256 | cut -d' ' -f1)
30+
fi
31+
echo "hash=$hash" >> "$GITHUB_OUTPUT"
32+
33+
- name: Prepare model directory
34+
run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
35+
36+
- name: Restore GGUF cache
37+
id: cache
38+
uses: actions/cache@v4
39+
with:
40+
path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
41+
key: gguf-${{ steps.meta.outputs.hash }}
42+
43+
- name: Download GGUF model
44+
if: steps.cache.outputs.cache-hit != 'true'
45+
run: |
46+
curl -L --fail --retry 3 "${{ env.GGUF_MODEL_URL }}" -o "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
47+
- name: Verify GGUF model
48+
run: test -f "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
49+
1150
build:
51+
needs: download-model
1252
runs-on: ${{ matrix.os }}
1353
container: ${{ matrix.container && matrix.container || '' }}
1454
name: ${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} build${{ matrix.arch != 'arm64-v8a' && matrix.name != 'ios-sim' && matrix.name != 'ios' && matrix.name != 'apple-xcframework' && matrix.name != 'android-aar' && ( matrix.name != 'macos' || matrix.arch != 'x86_64' ) && ' + test' || ''}}
@@ -48,12 +88,12 @@ jobs:
4888
arch: x86_64
4989
name: linux-musl-cpu
5090
container: alpine:latest
51-
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
91+
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
5292
- os: ubuntu-22.04
5393
arch: x86_64
5494
name: linux-musl-gpu
5595
container: alpine:latest
56-
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
96+
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
5797
- os: ubuntu-22.04-arm
5898
arch: arm64
5999
name: linux-musl-cpu
@@ -65,20 +105,20 @@ jobs:
65105
- os: windows-2022
66106
arch: x86_64
67107
name: windows-cpu
68-
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
108+
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
69109
- os: windows-2022
70110
arch: x86_64
71111
name: windows-gpu
72-
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
112+
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
73113
- os: ubuntu-22.04
74114
arch: x86_64
75115
name: android
76-
make: PLATFORM=android ARCH=x86_64
116+
make: SKIP_UNITTEST=1 PLATFORM=android ARCH=x86_64
77117
sqlite-amalgamation-zip: https://sqlite.org/2025/sqlite-amalgamation-3490100.zip
78118
- os: ubuntu-22.04
79119
arch: arm64-v8a
80120
name: android
81-
make: PLATFORM=android ARCH=arm64-v8a
121+
make: SKIP_UNITTEST=1 PLATFORM=android ARCH=arm64-v8a
82122
- os: macos-15
83123
name: ios
84124
make: PLATFORM=ios LLAMA="-DGGML_NATIVE=OFF -DGGML_METAL=ON -DGGML_ACCELERATE=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Apple" WHISPER="-DWHISPER_COREML=ON -DWHISPER_COREML_ALLOW_FALLBACK=ON"
@@ -106,6 +146,15 @@ jobs:
106146
with:
107147
submodules: true
108148

149+
- name: Prepare GGUF model directory
150+
run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
151+
152+
- name: Restore GGUF cache
153+
uses: actions/cache@v4
154+
with:
155+
path: ${{ needs.download-model.outputs.model-path }}
156+
key: ${{ needs.download-model.outputs.cache-key }}
157+
109158
- name: android setup java
110159
if: matrix.name == 'android-aar'
111160
uses: actions/setup-java@v4

API.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,35 @@ Frees the current inference context.
245245
SELECT llm_context_free();
246246
```
247247

248+
---
249+
## `llm_context_size()`
250+
251+
**Returns:** `INTEGER`
252+
253+
**Description**:
254+
Returns the total token capacity (context window) of the current llama context. Use this after `llm_context_create` to confirm the configured `context_size`. Raises an error if no context is active.
255+
256+
```sql
257+
SELECT llm_context_size();
258+
-- 4096
259+
```
260+
261+
---
262+
263+
## `llm_context_used()`
264+
265+
**Returns:** `INTEGER`
266+
267+
**Description:**
268+
Returns how many tokens of the current llama context have already been consumed. Combine this with `llm_context_size()` to monitor usage. Raises an error if no context is active.
269+
270+
**Example:**
271+
272+
```sql
273+
SELECT llm_context_used();
274+
-- 1024
275+
```
276+
248277
---
249278

250279
## `llm_sampler_create()`
@@ -546,6 +575,22 @@ SELECT llm_sampler_init_penalties(64, 1.2, 0.5, 0.8);
546575

547576
---
548577

578+
## `llm_token_count(text TEXT)`
579+
580+
**Returns:** `INTEGER`
581+
582+
**Description:**
583+
Returns how many tokens the current model would consume for the supplied `text`, using the active context’s vocabulary. Requires a context created via `llm_context_create`.
584+
585+
**Example:**
586+
587+
```sql
588+
SELECT llm_token_count('Hello world!');
589+
-- 5
590+
```
591+
592+
---
593+
549594
## `llm_embed_generate(text TEXT, options TEXT)`
550595

551596
**Returns:** `BLOB` or `TEXT`

Makefile

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,15 @@ BUILD_LLAMA = $(BUILD_DIR)/llama.cpp
3737
BUILD_WHISPER = $(BUILD_DIR)/whisper.cpp
3838
BUILD_MINIAUDIO = $(BUILD_DIR)/miniaudio
3939

40+
# Test
41+
# gemma-3-270m-it-UD-IQ2_M.gguf is just a lightweight model to use for testing
42+
CTEST_BIN = $(BUILD_DIR)/tests/sqlite_ai_tests
43+
GGUF_MODEL_DIR ?= tests/models/unsloth/gemma-3-270m-it-GGUF
44+
GGUF_MODEL_NAME ?= gemma-3-270m-it-UD-IQ2_M.gguf
45+
GGUF_MODEL_URL ?= https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
46+
GGUF_MODEL_PATH := $(GGUF_MODEL_DIR)/$(GGUF_MODEL_NAME)
47+
SKIP_UNITTEST ?= 0
48+
4049
# Compiler and flags
4150
CC = gcc
4251
CXX = g++
@@ -55,6 +64,14 @@ LLAMA_LDFLAGS = -L./$(BUILD_LLAMA)/common -L./$(BUILD_GGML)/lib -L./$(BUILD_LLAM
5564
WHISPER_LDFLAGS = -L./$(BUILD_WHISPER)/src -lwhisper
5665
MINIAUDIO_LDFLAGS = -L./$(BUILD_MINIAUDIO) -lminiaudio -lminiaudio_channel_combiner_node -lminiaudio_channel_separator_node -lminiaudio_ltrim_node -lminiaudio_reverb_node -lminiaudio_vocoder_node
5766
LDFLAGS = $(LLAMA_LDFLAGS) $(WHISPER_LDFLAGS) $(MINIAUDIO_LDFLAGS)
67+
SQLITE_TEST_LIBS =
68+
ifneq ($(PLATFORM),windows)
69+
SQLITE_TEST_LIBS += -lpthread -lm
70+
ifneq ($(PLATFORM),macos)
71+
SQLITE_TEST_LIBS += -ldl
72+
endif
73+
endif
74+
SQLITE_TEST_SRC = tests/c/sqlite3.c
5875

5976
# Files
6077
SRC_FILES = $(wildcard $(SRC_DIR)/*.c)
@@ -210,8 +227,27 @@ endif
210227
$(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/llama.cpp.stamp
211228
$(CC) $(CFLAGS) -O3 -fPIC -c $< -o $@
212229

213-
test: $(TARGET)
214-
$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
230+
$(CTEST_BIN): tests/c/unittest.c $(SQLITE_TEST_SRC)
231+
@mkdir -p $(dir $@)
232+
$(CC) -std=c11 -Wall -Wextra -DSQLITE_ENABLE_LOAD_EXTENSION -I$(SRC_DIR) tests/c/unittest.c $(SQLITE_TEST_SRC) -o $@ $(SQLITE_TEST_LIBS)
233+
234+
$(GGUF_MODEL_PATH):
235+
@mkdir -p $(GGUF_MODEL_DIR)
236+
curl -L --fail --retry 3 -o $@ $(GGUF_MODEL_URL)
237+
238+
TEST_DEPS := $(TARGET)
239+
ifeq ($(SKIP_UNITTEST),0)
240+
TEST_DEPS += $(CTEST_BIN) $(GGUF_MODEL_PATH)
241+
endif
242+
243+
test: $(TEST_DEPS)
244+
@echo "Running sqlite3 CLI smoke test (ensures .load works)..."
245+
$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
246+
ifeq ($(SKIP_UNITTEST),0)
247+
$(CTEST_BIN) --extension "$(TARGET)" --model "$(GGUF_MODEL_PATH)"
248+
else
249+
@echo "Skipping C unit tests (SKIP_UNITTEST=$(SKIP_UNITTEST))."
250+
endif
215251

216252
# Build submodules
217253
ifeq ($(PLATFORM),windows)

src/sqlite-ai.c

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,7 @@ static bool llm_check_context (sqlite3_context *context) {
784784

785785
// MARK: - Chat Messages -
786786

787-
bool llm_messages_append (ai_messages *list, const char *role, const char *content, bool duplicate_content) {
787+
bool llm_messages_append (ai_messages *list, const char *role, const char *content) {
788788
if (list->count >= list->capacity) {
789789
size_t new_cap = list->capacity ? list->capacity * 2 : MIN_ALLOC_MESSAGES;
790790
llama_chat_message *new_items = sqlite3_realloc64(list->items, new_cap * sizeof(llama_chat_message));
@@ -796,7 +796,7 @@ bool llm_messages_append (ai_messages *list, const char *role, const char *conte
796796

797797
bool duplicate_role = ((role != ROLE_USER) && (role != ROLE_ASSISTANT));
798798
list->items[list->count].role = (duplicate_role) ? sqlite_strdup(role) : role;
799-
list->items[list->count].content = (duplicate_content) ? sqlite_strdup(content) : content;
799+
list->items[list->count].content = sqlite_strdup(content);
800800
list->count += 1;
801801
return true;
802802
}
@@ -1490,6 +1490,9 @@ static bool llm_chat_check_context (ai_context *ai) {
14901490
llama_sampler_chain_add(ai->sampler, llama_sampler_init_dist((uint32_t)LLAMA_DEFAULT_SEED));
14911491
}
14921492

1493+
// initialize the chat struct if already created
1494+
if (ai->chat.uuid[0] != '\0') return true;
1495+
14931496
// create history structs
14941497
ai_uuid_v7_string(ai->chat.uuid, true);
14951498

@@ -1509,7 +1512,7 @@ static bool llm_chat_save_response (ai_context *ai, ai_messages *messages, const
15091512
char *response = ai->chat.response.data;
15101513
if (!response) return false;
15111514

1512-
if (!llm_messages_append(messages, ROLE_ASSISTANT, response, false)) {
1515+
if (!llm_messages_append(messages, ROLE_ASSISTANT, response)) {
15131516
sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
15141517
return false;
15151518
}
@@ -1640,7 +1643,7 @@ static bool llm_chat_run (ai_context *ai, ai_cursor *c, const char *user_prompt)
16401643
buffer_t *formatted = &ai->chat.formatted;
16411644

16421645
// save prompt input in history
1643-
if (!llm_messages_append(messages, ROLE_USER, user_prompt, true)) {
1646+
if (!llm_messages_append(messages, ROLE_USER, user_prompt)) {
16441647
sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append message");
16451648
return false;
16461649
}
@@ -1976,7 +1979,7 @@ static void llm_chat_restore (sqlite3_context *context, int argc, sqlite3_value
19761979
const char *role = (const char *)sqlite3_column_text(vm, 0);
19771980
const char *content = (const char *)sqlite3_column_text(vm, 1);
19781981

1979-
if (!llm_messages_append(messages, role, content, true)) {
1982+
if (!llm_messages_append(messages, role, content)) {
19801983
sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
19811984
rc = SQLITE_OK;
19821985
goto abort_restore;
@@ -2369,6 +2372,27 @@ static void llm_context_create_textgen (sqlite3_context *context, int argc, sqli
23692372
llm_context_create_with_options(context, ai, options, options2);
23702373
}
23712374

2375+
static void llm_context_size (sqlite3_context *context, int argc, sqlite3_value **argv) {
2376+
ai_context *ai = (ai_context *)sqlite3_user_data(context);
2377+
if (!ai->ctx) {
2378+
sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
2379+
return;
2380+
}
2381+
uint32_t n_ctx = llama_n_ctx(ai->ctx);
2382+
sqlite3_result_int(context, n_ctx);
2383+
}
2384+
2385+
static void llm_context_used (sqlite3_context *context, int argc, sqlite3_value **argv) {
2386+
ai_context *ai = (ai_context *)sqlite3_user_data(context);
2387+
if (!ai->ctx) {
2388+
sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
2389+
return;
2390+
}
2391+
int32_t n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ai->ctx), 0) + 1;
2392+
if (n_ctx_used < 0) n_ctx_used = 0;
2393+
sqlite3_result_int(context, n_ctx_used);
2394+
}
2395+
23722396
static void llm_model_free (sqlite3_context *context, int argc, sqlite3_value **argv) {
23732397
ai_context *ai = (ai_context *)sqlite3_user_data(context);
23742398
ai_cleanup((void *)ai, true, false);
@@ -2707,6 +2731,12 @@ SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_a
27072731
rc = sqlite3_create_function(db, "llm_context_create", 1, SQLITE_UTF8, ctx, llm_context_create, NULL, NULL);
27082732
if (rc != SQLITE_OK) goto cleanup;
27092733

2734+
rc = sqlite3_create_function(db, "llm_context_size", 0, SQLITE_UTF8, ctx, llm_context_size, NULL, NULL);
2735+
if (rc != SQLITE_OK) goto cleanup;
2736+
2737+
rc = sqlite3_create_function(db, "llm_context_used", 0, SQLITE_UTF8, ctx, llm_context_used, NULL, NULL);
2738+
if (rc != SQLITE_OK) goto cleanup;
2739+
27102740
rc = sqlite3_create_function(db, "llm_context_create_embedding", 0, SQLITE_UTF8, ctx, llm_context_create_embedding, NULL, NULL);
27112741
if (rc != SQLITE_OK) goto cleanup;
27122742

src/sqlite-ai.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
extern "C" {
2525
#endif
2626

27-
#define SQLITE_AI_VERSION "0.7.57"
27+
#define SQLITE_AI_VERSION "0.7.58"
2828

2929
SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);
3030

0 commit comments

Comments
 (0)