Fix/memory leaks, add functions to report context usage, add unittest (#20)

andinux · web-flow · commit c3fd3457d6ed · 2025-11-13T08:49:01.000-06:00
* fix(chat): avoid memory leaks and unnecessary re-initialization in llm_chat_check_context

Adds a check to return early if the chat struct is already initialized, preventing memory leaks and unnecessary re-initialization and ensuring correct chat context handling.

* fix(chat): remove the duplicate_content arg in llm_messages_append to avoid a crash on llm_chat_free()

Refactored llm_messages_append to always duplicate message content using sqlite_strdup, removing the duplicate_content parameter. This option was causing a double-free crash in llm_chat_free with non-duplicated content. Also, the ai-&gt;chat.response buffer is reset on new prompts in the same chat so we cannot rely on that buffer to store previous response messages.

* test: add C unit tests

Introduces a new C unit test runner at tests/c/unittest.c with basic tests for the SQLite AI extension. The Makefile is updated to build and run these tests, including logic to download a test model if needed and link against the appropriate SQLite libraries.

* ci: add GGUF model caching to workflow

Introduces a new 'download-model' job in the GitHub Actions workflow to cache and restore the GGUF model, reducing redundant downloads. Updates the Makefile to use GGUF model variables, simplifies test model handling, and ensures the test binary is built with the correct SQLite source.
Add SQLite amalgamation for testing.

* feat: add llm_context_used and llm_context_size functions to report context usage

Add two functions: llm_context_size, which returns the total context size, and llm_context_used, which returns the number of tokens used.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -7,8 +7,48 @@ permissions:
   contents: write
   id-token: write
 
+env:
+  GGUF_MODEL_DIR: tests/models/unsloth/gemma-3-270m-it-GGUF
+  GGUF_MODEL_NAME: gemma-3-270m-it-UD-IQ2_M.gguf
+  GGUF_MODEL_URL: https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
+
 jobs:
+  download-model:
+    outputs:
+      cache-key: gguf-${{ steps.meta.outputs.hash }}
+      model-path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
+    name: Download GGUF model
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Compute model URL hash
+        id: meta
+        run: |
+          if command -v sha256sum >/dev/null 2>&1; then
+            hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | sha256sum | cut -d' ' -f1)
+          else
+            hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | shasum -a 256 | cut -d' ' -f1)
+          fi
+          echo "hash=$hash" >> "$GITHUB_OUTPUT"
+
+      - name: Prepare model directory
+        run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+
+      - name: Restore GGUF cache
+        id: cache
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
+          key: gguf-${{ steps.meta.outputs.hash }}
+
+      - name: Download GGUF model
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          curl -L --fail --retry 3 "${{ env.GGUF_MODEL_URL }}" -o "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
+      - name: Verify GGUF model
+        run: test -f "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
+
   build:
+    needs: download-model
     runs-on: ${{ matrix.os }}
     container: ${{ matrix.container && matrix.container || '' }}
     name: ${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} build${{ matrix.arch != 'arm64-v8a' && matrix.name != 'ios-sim' && matrix.name != 'ios' && matrix.name != 'apple-xcframework' && matrix.name != 'android-aar' && ( matrix.name != 'macos' || matrix.arch != 'x86_64' ) && ' + test' || ''}}
@@ -48,12 +88,12 @@ jobs:
             arch: x86_64
             name: linux-musl-cpu
             container: alpine:latest
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
           - os: ubuntu-22.04
             arch: x86_64
             name: linux-musl-gpu
             container: alpine:latest
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
           - os: ubuntu-22.04-arm
             arch: arm64
             name: linux-musl-cpu
@@ -65,20 +105,20 @@ jobs:
           - os: windows-2022
             arch: x86_64
             name: windows-cpu
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
           - os: windows-2022
             arch: x86_64
             name: windows-gpu
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
           - os: ubuntu-22.04
             arch: x86_64
             name: android
-            make: PLATFORM=android ARCH=x86_64
+            make: SKIP_UNITTEST=1 PLATFORM=android ARCH=x86_64
             sqlite-amalgamation-zip: https://sqlite.org/2025/sqlite-amalgamation-3490100.zip
           - os: ubuntu-22.04
             arch: arm64-v8a
             name: android
-            make: PLATFORM=android ARCH=arm64-v8a
+            make: SKIP_UNITTEST=1 PLATFORM=android ARCH=arm64-v8a
           - os: macos-15
             name: ios
             make: PLATFORM=ios LLAMA="-DGGML_NATIVE=OFF -DGGML_METAL=ON -DGGML_ACCELERATE=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Apple" WHISPER="-DWHISPER_COREML=ON -DWHISPER_COREML_ALLOW_FALLBACK=ON"
@@ -106,6 +146,15 @@ jobs:
         with:
           submodules: true
 
+      - name: Prepare GGUF model directory
+        run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+
+      - name: Restore GGUF cache
+        uses: actions/cache@v4
+        with:
+          path: ${{ needs.download-model.outputs.model-path }}
+          key: ${{ needs.download-model.outputs.cache-key }}
+
       - name: android setup java
         if: matrix.name == 'android-aar'
         uses: actions/setup-java@v4
diff --git a/API.md b/API.md
@@ -245,6 +245,35 @@ Frees the current inference context.
 SELECT llm_context_free();
 ```
 
+---
+## `llm_context_size()`
+
+**Returns:** `INTEGER`
+
+**Description**:
+Returns the total token capacity (context window) of the current llama context. Use this after `llm_context_create` to confirm the configured `context_size`. Raises an error if no context is active.
+
+```sql
+SELECT llm_context_size();
+-- 4096
+```
+
+---
+
+## `llm_context_used()`
+
+**Returns:** `INTEGER`
+
+**Description:**
+Returns how many tokens of the current llama context have already been consumed. Combine this with `llm_context_size()` to monitor usage. Raises an error if no context is active.
+
+**Example:**
+
+```sql
+SELECT llm_context_used();
+-- 1024
+```
+
 ---
 
 ## `llm_sampler_create()`
@@ -546,6 +575,22 @@ SELECT llm_sampler_init_penalties(64, 1.2, 0.5, 0.8);
 
 ---
 
+## `llm_token_count(text TEXT)`
+
+**Returns:** `INTEGER`
+
+**Description:**
+Returns how many tokens the current model would consume for the supplied `text`, using the active context’s vocabulary. Requires a context created via `llm_context_create`.
+
+**Example:**
+
+```sql
+SELECT llm_token_count('Hello world!');
+-- 5
+```
+
+---
+
 ## `llm_embed_generate(text TEXT, options TEXT)`
 
 **Returns:** `BLOB` or `TEXT`
diff --git a/Makefile b/Makefile
@@ -37,6 +37,15 @@ BUILD_LLAMA = $(BUILD_DIR)/llama.cpp
 BUILD_WHISPER = $(BUILD_DIR)/whisper.cpp
 BUILD_MINIAUDIO = $(BUILD_DIR)/miniaudio
 
+# Test 
+# gemma-3-270m-it-UD-IQ2_M.gguf is just a lightweight model to use for testing
+CTEST_BIN = $(BUILD_DIR)/tests/sqlite_ai_tests
+GGUF_MODEL_DIR ?= tests/models/unsloth/gemma-3-270m-it-GGUF
+GGUF_MODEL_NAME ?= gemma-3-270m-it-UD-IQ2_M.gguf
+GGUF_MODEL_URL ?= https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
+GGUF_MODEL_PATH := $(GGUF_MODEL_DIR)/$(GGUF_MODEL_NAME)
+SKIP_UNITTEST ?= 0
+
 # Compiler and flags
 CC = gcc
 CXX = g++
@@ -55,6 +64,14 @@ LLAMA_LDFLAGS = -L./$(BUILD_LLAMA)/common -L./$(BUILD_GGML)/lib -L./$(BUILD_LLAM
 WHISPER_LDFLAGS = -L./$(BUILD_WHISPER)/src -lwhisper
 MINIAUDIO_LDFLAGS = -L./$(BUILD_MINIAUDIO) -lminiaudio -lminiaudio_channel_combiner_node -lminiaudio_channel_separator_node -lminiaudio_ltrim_node -lminiaudio_reverb_node -lminiaudio_vocoder_node
 LDFLAGS = $(LLAMA_LDFLAGS) $(WHISPER_LDFLAGS) $(MINIAUDIO_LDFLAGS)
+SQLITE_TEST_LIBS =
+ifneq ($(PLATFORM),windows)
+	SQLITE_TEST_LIBS += -lpthread -lm
+	ifneq ($(PLATFORM),macos)
+		SQLITE_TEST_LIBS += -ldl
+	endif
+endif
+SQLITE_TEST_SRC = tests/c/sqlite3.c
 
 # Files
 SRC_FILES = $(wildcard $(SRC_DIR)/*.c)
@@ -210,8 +227,27 @@ endif
 $(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/llama.cpp.stamp
 	$(CC) $(CFLAGS) -O3 -fPIC -c $< -o $@
 
-test: $(TARGET)
-	$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
+$(CTEST_BIN): tests/c/unittest.c $(SQLITE_TEST_SRC)
+	@mkdir -p $(dir $@)
+	$(CC) -std=c11 -Wall -Wextra -DSQLITE_ENABLE_LOAD_EXTENSION -I$(SRC_DIR) tests/c/unittest.c $(SQLITE_TEST_SRC) -o $@ $(SQLITE_TEST_LIBS)
+
+$(GGUF_MODEL_PATH):
+	@mkdir -p $(GGUF_MODEL_DIR)
+	curl -L --fail --retry 3 -o $@ $(GGUF_MODEL_URL)
+
+TEST_DEPS := $(TARGET)
+ifeq ($(SKIP_UNITTEST),0)
+TEST_DEPS += $(CTEST_BIN) $(GGUF_MODEL_PATH)
+endif
+
+test: $(TEST_DEPS)
+		@echo "Running sqlite3 CLI smoke test (ensures .load works)..."
+		$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
+ifeq ($(SKIP_UNITTEST),0)
+		$(CTEST_BIN) --extension "$(TARGET)" --model "$(GGUF_MODEL_PATH)"
+else
+		@echo "Skipping C unit tests (SKIP_UNITTEST=$(SKIP_UNITTEST))."
+endif
 
 # Build submodules
 ifeq ($(PLATFORM),windows)
diff --git a/src/sqlite-ai.c b/src/sqlite-ai.c
@@ -784,7 +784,7 @@ static bool llm_check_context (sqlite3_context *context) {
 
 // MARK: - Chat Messages -
 
-bool llm_messages_append (ai_messages *list, const char *role, const char *content, bool duplicate_content) {
+bool llm_messages_append (ai_messages *list, const char *role, const char *content) {
     if (list->count >= list->capacity) {
         size_t new_cap = list->capacity ? list->capacity * 2 : MIN_ALLOC_MESSAGES;
         llama_chat_message *new_items = sqlite3_realloc64(list->items, new_cap * sizeof(llama_chat_message));
@@ -796,7 +796,7 @@ bool llm_messages_append (ai_messages *list, const char *role, const char *conte
 
     bool duplicate_role = ((role != ROLE_USER) && (role != ROLE_ASSISTANT));
     list->items[list->count].role = (duplicate_role) ? sqlite_strdup(role) : role;
-    list->items[list->count].content = (duplicate_content) ? sqlite_strdup(content) : content;
+    list->items[list->count].content = sqlite_strdup(content);
     list->count += 1;
     return true;
 }
@@ -1490,6 +1490,9 @@ static bool llm_chat_check_context (ai_context *ai) {
         llama_sampler_chain_add(ai->sampler, llama_sampler_init_dist((uint32_t)LLAMA_DEFAULT_SEED));
     }
     
+    // initialize the chat struct if already created
+    if (ai->chat.uuid[0] != '\0') return true;
+    
     // create history structs
     ai_uuid_v7_string(ai->chat.uuid, true);
     
@@ -1509,7 +1512,7 @@ static bool llm_chat_save_response (ai_context *ai, ai_messages *messages, const
     char *response = ai->chat.response.data;
     if (!response) return false;
     
-    if (!llm_messages_append(messages, ROLE_ASSISTANT, response, false)) {
+    if (!llm_messages_append(messages, ROLE_ASSISTANT, response)) {
         sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
         return false;
     }
@@ -1640,7 +1643,7 @@ static bool llm_chat_run (ai_context *ai, ai_cursor *c, const char *user_prompt)
     buffer_t *formatted = &ai->chat.formatted;
     
     // save prompt input in history
-    if (!llm_messages_append(messages, ROLE_USER, user_prompt, true)) {
+    if (!llm_messages_append(messages, ROLE_USER, user_prompt)) {
         sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append message");
         return false;
     }
@@ -1976,7 +1979,7 @@ static void llm_chat_restore (sqlite3_context *context, int argc, sqlite3_value
         const char *role = (const char *)sqlite3_column_text(vm, 0);
         const char *content = (const char *)sqlite3_column_text(vm, 1);
         
-        if (!llm_messages_append(messages, role, content, true)) {
+        if (!llm_messages_append(messages, role, content)) {
             sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
             rc = SQLITE_OK;
             goto abort_restore;
@@ -2369,6 +2372,27 @@ static void llm_context_create_textgen (sqlite3_context *context, int argc, sqli
     llm_context_create_with_options(context, ai, options, options2);
 }
 
+static void llm_context_size (sqlite3_context *context, int argc, sqlite3_value **argv) {
+    ai_context *ai = (ai_context *)sqlite3_user_data(context);
+    if (!ai->ctx) {
+        sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
+        return;
+    }
+    uint32_t n_ctx = llama_n_ctx(ai->ctx);
+    sqlite3_result_int(context, n_ctx);
+}
+
+static void llm_context_used (sqlite3_context *context, int argc, sqlite3_value **argv) {
+    ai_context *ai = (ai_context *)sqlite3_user_data(context);
+    if (!ai->ctx) {
+        sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
+        return;
+    }
+    int32_t n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ai->ctx), 0) + 1;
+    if (n_ctx_used < 0) n_ctx_used = 0;
+    sqlite3_result_int(context, n_ctx_used);
+}
+
 static void llm_model_free (sqlite3_context *context, int argc, sqlite3_value **argv) {
     ai_context *ai = (ai_context *)sqlite3_user_data(context);
     ai_cleanup((void *)ai, true, false);
@@ -2707,6 +2731,12 @@ SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_a
     rc = sqlite3_create_function(db, "llm_context_create", 1, SQLITE_UTF8, ctx, llm_context_create, NULL, NULL);
     if (rc != SQLITE_OK) goto cleanup;
     
+    rc = sqlite3_create_function(db, "llm_context_size", 0, SQLITE_UTF8, ctx, llm_context_size, NULL, NULL);
+    if (rc != SQLITE_OK) goto cleanup;
+    
+    rc = sqlite3_create_function(db, "llm_context_used", 0, SQLITE_UTF8, ctx, llm_context_used, NULL, NULL);
+    if (rc != SQLITE_OK) goto cleanup;
+    
     rc = sqlite3_create_function(db, "llm_context_create_embedding", 0, SQLITE_UTF8, ctx, llm_context_create_embedding, NULL, NULL);
     if (rc != SQLITE_OK) goto cleanup;
     
diff --git a/src/sqlite-ai.h b/src/sqlite-ai.h
@@ -24,7 +24,7 @@
 extern "C" {
 #endif
 
-#define SQLITE_AI_VERSION "0.7.57"
+#define SQLITE_AI_VERSION "0.7.58"
 
 SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);
 
diff --git a/tests/c/sqlite3.c b/tests/c/sqlite3.c
diff --git a/tests/c/unittest.c b/tests/c/unittest.c