Skip to content

Commit 6e2fa75

Browse files
committed
Add managed local embedding helper
1 parent 273ec40 commit 6e2fa75

24 files changed

Lines changed: 1397 additions & 59 deletions

AGENTS.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,16 @@ The most important implementation seams are:
262262
- Semantic query embeddings must use a real provider. QMD uses a local GGUF
263263
embedding model with explicit query/document formatting; deterministic hash
264264
embeddings are acceptable only as test doubles, never as product behavior.
265+
- Pure local embedding model work means downloading and caching the GGUF
266+
artifact itself. Do not substitute Ollama or another local service shim when
267+
the requirement is "pure GGUF".
268+
- Local GGUF embeddings are an explicit provider override using a managed Node
269+
helper with `node-llama-cpp`, matching QMD's model lifecycle. Do not swap this
270+
back to per-call `llama-embedding` process invocations.
265271
- Semantic query provider/model settings are global runtime settings, not
266272
workspace config. Embeddings should be treated as on by default; if provider
267-
credentials are missing, return/report unavailable status without hard-failing
268-
normal query flows.
273+
runtime dependencies are missing, return/report unavailable status without
274+
hard-failing normal query flows.
269275
- `AFS_EMBED_MODEL`, `AFS_EMBED_PROVIDER`, `AFS_EMBED_DIMENSIONS`, and
270276
`OPENAI_API_KEY` are read by the control-plane process. CLI help and
271277
troubleshooting copy must not imply that setting them only on an `afs query`

README.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,12 +216,16 @@ If you want to search workspace contents:
216216
currently falls back to keyword-ranked results until hybrid vector/rerank is
217217
complete. Keyword ranking uses RedisSearch BM25 query chunks when available,
218218
then falls back to direct content ranking. Use `query --semantic` for
219-
vector-only retrieval. Semantic embeddings are globally enabled and use OpenAI
220-
when `OPENAI_API_KEY` is available in the control-plane environment. Override
221-
the default `openai:text-embedding-3-small` model with `AFS_EMBED_MODEL` in the
222-
same environment, then restart the control plane. Semantic queries read
223-
existing embedding indexes; imports start embedding creation in the background,
224-
and existing workspaces can be prepared with
219+
vector-only retrieval. Semantic embeddings are globally enabled and default to
220+
OpenAI when `OPENAI_API_KEY` is set in the control-plane environment. For the
221+
explicit local provider, `afs query model download` asks the control plane to
222+
resolve, download, and load
223+
`hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf` through the
224+
same `node-llama-cpp` helper used for local indexing. On AFS Cloud, only an
225+
admin identity can trigger the server-side local model warm-up. Override model
226+
selection with `AFS_EMBED_MODEL` in the control-plane environment, then restart
227+
the control plane. Semantic queries read existing embedding indexes; imports start
228+
embedding creation in the background, and existing workspaces can be prepared with
225229
`afs fs <workspace> query index create --embeddings --wait`.
226230

227231
If you want commands with an optional workspace argument to use `my-repo` by

cmd/afs/afs_commands_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,14 @@ func (s stubAFSControlPlane) RebuildQueryIndex(context.Context, string, controlp
138138
return controlplane.WorkspaceQueryIndexRebuildResponse{}, fmt.Errorf("unexpected RebuildQueryIndex call")
139139
}
140140

141+
func (s stubAFSControlPlane) QueryModelStatus(context.Context, controlplane.QueryModelStatusRequest) (controlplane.QueryModelStatus, error) {
142+
return controlplane.QueryModelStatus{}, fmt.Errorf("unexpected QueryModelStatus call")
143+
}
144+
145+
func (s stubAFSControlPlane) DownloadQueryModel(context.Context, controlplane.QueryModelDownloadRequest) (controlplane.QueryModelDownloadResult, error) {
146+
return controlplane.QueryModelDownloadResult{}, fmt.Errorf("unexpected DownloadQueryModel call")
147+
}
148+
141149
func (s stubAFSControlPlane) DiffWorkspace(context.Context, string, string, string) (controlplane.WorkspaceDiffResponse, error) {
142150
return controlplane.WorkspaceDiffResponse{}, fmt.Errorf("unexpected DiffWorkspace call")
143151
}

cmd/afs/afs_query_commands.go

Lines changed: 151 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ func runWorkspaceQuery(mode, workspace string, args []string) error {
6868
if mode == mcptools.FileQueryModeHybrid && isWorkspaceQueryIndexInvocation(args) {
6969
return runWorkspaceQueryIndex(workspace, args[1:])
7070
}
71+
if mode == mcptools.FileQueryModeHybrid && workspace == "" && isWorkspaceQueryModelInvocation(args) {
72+
return runWorkspaceQueryModel(args[1:])
73+
}
7174
opts, err := parseWorkspaceQueryArgs(mode, args)
7275
if err != nil {
7376
return err
@@ -84,6 +87,121 @@ func runWorkspaceQuery(mode, workspace string, args []string) error {
8487
return runWorkspaceQueryRequest(ctx, remote, opts, request)
8588
}
8689

90+
func isWorkspaceQueryModelInvocation(args []string) bool {
91+
if len(args) == 0 || args[0] != "model" {
92+
return false
93+
}
94+
if len(args) == 1 || isHelpArg(args[1]) {
95+
return true
96+
}
97+
switch strings.TrimSpace(args[1]) {
98+
case "status", "download":
99+
return true
100+
default:
101+
return false
102+
}
103+
}
104+
105+
func runWorkspaceQueryModel(args []string) error {
106+
if len(args) == 0 || isHelpArg(args[0]) {
107+
fmt.Fprint(os.Stderr, workspaceQueryModelUsageText(filepath.Base(os.Args[0])))
108+
return nil
109+
}
110+
switch strings.TrimSpace(args[0]) {
111+
case "status":
112+
return runWorkspaceQueryModelStatus(args[1:])
113+
case "download":
114+
return runWorkspaceQueryModelDownload(args[1:])
115+
default:
116+
return fmt.Errorf("unknown query model subcommand %q\n\n%s", args[0], workspaceQueryModelUsageText(filepath.Base(os.Args[0])))
117+
}
118+
}
119+
120+
func runWorkspaceQueryModelStatus(args []string) error {
121+
fs := flag.NewFlagSet("query model status", flag.ContinueOnError)
122+
fs.SetOutput(io.Discard)
123+
var jsonOut bool
124+
var model string
125+
fs.BoolVar(&jsonOut, "json", false, "write JSON output")
126+
fs.StringVar(&model, "model", "", "local model id")
127+
if err := fs.Parse(args); err != nil || fs.NArg() != 0 {
128+
return fmt.Errorf("%s", workspaceQueryModelUsageText(filepath.Base(os.Args[0])))
129+
}
130+
ctx := context.Background()
131+
_, service, closeFn, err := openAFSControlPlane(ctx)
132+
if err != nil {
133+
return err
134+
}
135+
defer closeFn()
136+
status, err := service.QueryModelStatus(ctx, controlplane.QueryModelStatusRequest{Model: model})
137+
if err != nil {
138+
return workspaceQueryModelControlPlaneError(err)
139+
}
140+
if jsonOut {
141+
enc := json.NewEncoder(os.Stdout)
142+
enc.SetIndent("", " ")
143+
return enc.Encode(status)
144+
}
145+
fmt.Fprintln(os.Stdout, "Query model")
146+
fmt.Fprintln(os.Stdout)
147+
fmt.Fprintf(os.Stdout, "model %s\n", status.Spec.ID)
148+
fmt.Fprintf(os.Stdout, "cache_dir %s\n", status.CacheDir)
149+
fmt.Fprintf(os.Stdout, "path %s\n", status.Path)
150+
fmt.Fprintf(os.Stdout, "downloaded %t\n", status.Exists)
151+
if status.SizeBytes > 0 {
152+
fmt.Fprintf(os.Stdout, "size %s\n", formatBytes(status.SizeBytes))
153+
}
154+
return nil
155+
}
156+
157+
func runWorkspaceQueryModelDownload(args []string) error {
158+
fs := flag.NewFlagSet("query model download", flag.ContinueOnError)
159+
fs.SetOutput(io.Discard)
160+
var jsonOut bool
161+
var model string
162+
fs.BoolVar(&jsonOut, "json", false, "write JSON output")
163+
fs.StringVar(&model, "model", "", "local model id")
164+
if err := fs.Parse(args); err != nil || fs.NArg() != 0 {
165+
return fmt.Errorf("%s", workspaceQueryModelUsageText(filepath.Base(os.Args[0])))
166+
}
167+
ctx := context.Background()
168+
_, service, closeFn, err := openAFSControlPlane(ctx)
169+
if err != nil {
170+
return err
171+
}
172+
defer closeFn()
173+
if !jsonOut {
174+
fmt.Fprintln(os.Stderr, "Resolving local embedding model on the control plane...")
175+
}
176+
result, err := service.DownloadQueryModel(ctx, controlplane.QueryModelDownloadRequest{Model: model})
177+
if err != nil {
178+
return workspaceQueryModelControlPlaneError(err)
179+
}
180+
if jsonOut {
181+
enc := json.NewEncoder(os.Stdout)
182+
enc.SetIndent("", " ")
183+
return enc.Encode(result)
184+
}
185+
fmt.Fprintln(os.Stdout, "Query model")
186+
fmt.Fprintln(os.Stdout)
187+
fmt.Fprintf(os.Stdout, "model %s\n", result.Spec.ID)
188+
fmt.Fprintf(os.Stdout, "cache_dir %s\n", result.CacheDir)
189+
fmt.Fprintf(os.Stdout, "path %s\n", result.Path)
190+
fmt.Fprintf(os.Stdout, "cached %t\n", result.Exists)
191+
fmt.Fprintf(os.Stdout, "resolved %t\n", result.Downloaded || result.Exists)
192+
if result.SizeBytes > 0 {
193+
fmt.Fprintf(os.Stdout, "size %s\n", formatBytes(result.SizeBytes))
194+
}
195+
return nil
196+
}
197+
198+
func workspaceQueryModelControlPlaneError(err error) error {
199+
if errors.Is(err, os.ErrNotExist) {
200+
return fmt.Errorf("query model routes are not available on this control plane; rebuild and restart afs-control-plane")
201+
}
202+
return err
203+
}
204+
87205
func isWorkspaceQueryIndexInvocation(args []string) bool {
88206
if len(args) == 0 || args[0] != "index" {
89207
return false
@@ -726,16 +844,18 @@ func workspaceQueryUsageText(bin, mode string) string {
726844
return brandHeaderString() + fmt.Sprintf(`Usage:
727845
%s query [flags] <query>
728846
%s fs [workspace] query [flags] <query>
729-
%s query index <status|rebuild|clean> [flags]
847+
%s query index <status|create|rebuild|clean> [flags]
848+
%s query model <status|download> [flags]
730849
731850
QMD-style hybrid + rerank workspace query.
732851
Plain text runs hybrid retrieval by default. Use --keyword for keyword-ranked
733852
retrieval only, or --semantic for vector-only semantic search.
734853
735854
Default query currently falls back to keyword ranked results until hybrid
736855
vector/rerank is complete. Use --semantic for vector-only retrieval. Semantic
737-
embeddings are globally enabled and use OpenAI when OPENAI_API_KEY is set in
738-
the control-plane environment.
856+
embeddings are globally enabled and default to OpenAI when OPENAI_API_KEY is set
857+
in the control-plane environment. Set AFS_EMBED_PROVIDER=local to use the local
858+
GGUF helper.
739859
Use grep when you know the exact text.
740860
741861
Typed query documents:
@@ -770,6 +890,33 @@ Examples:
770890
%s query --keyword "checkpoint savepoint"
771891
%s query --semantic "how do I save a snapshot?"
772892
%s query index status
893+
%s query model download
773894
%s fs repo query $'lex: checkpoint\nvec: how do I save a snapshot?'
774-
`, bin, bin, bin, bin, bin, bin, bin, bin)
895+
`, bin, bin, bin, bin, bin, bin, bin, bin, bin, bin)
896+
}
897+
898+
func workspaceQueryModelUsageText(bin string) string {
899+
return brandHeaderString() + fmt.Sprintf(`Usage:
900+
%[1]s query model <status|download> [flags]
901+
902+
Manage the control-plane global local GGUF embedding model cache.
903+
904+
Subcommands:
905+
status Show the configured local model and expected cache path
906+
download Ask the Node helper to resolve/download/load the model now
907+
908+
Flags:
909+
--model <model> Model id, default hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf
910+
--json Write JSON output
911+
912+
Environment:
913+
AFS_EMBED_MODEL_DIR Control-plane cache directory override
914+
AFS_EMBED_HELPER_CMD Control-plane Node.js command override
915+
AFS_NODE_LLAMA_CPP_MODULE
916+
node-llama-cpp module specifier override
917+
918+
Examples:
919+
%[1]s query model status
920+
%[1]s query model download
921+
`, bin)
775922
}

cmd/afs/afs_query_commands_test.go

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"net/http"
77
"net/http/httptest"
88
"os"
9+
"path/filepath"
910
"strings"
1011
"testing"
1112

@@ -124,6 +125,40 @@ func TestWorkspaceQueryIndexInvocationIncludesCreate(t *testing.T) {
124125
}
125126
}
126127

128+
func TestWorkspaceQueryModelInvocationAndStatus(t *testing.T) {
129+
for _, args := range [][]string{
130+
{"model"},
131+
{"model", "status"},
132+
{"model", "download"},
133+
} {
134+
if !isWorkspaceQueryModelInvocation(args) {
135+
t.Fatalf("isWorkspaceQueryModelInvocation(%#v) = false, want true", args)
136+
}
137+
}
138+
if isWorkspaceQueryModelInvocation([]string{"model", "files"}) {
139+
t.Fatal("isWorkspaceQueryModelInvocation(model files) = true, want natural query")
140+
}
141+
142+
_, _, closeStore := setupAFSGrepTest(t)
143+
defer closeStore()
144+
t.Setenv("AFS_EMBED_MODEL_DIR", t.TempDir())
145+
output, err := captureStdout(t, func() error {
146+
return cmdQuery([]string{"query", "model", "status"})
147+
})
148+
if err != nil {
149+
t.Fatalf("cmdQuery(model status) returned error: %v", err)
150+
}
151+
for _, want := range []string{
152+
"Query model",
153+
"hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf",
154+
"downloaded false",
155+
} {
156+
if !strings.Contains(output, want) {
157+
t.Fatalf("output = %q, missing %q", output, want)
158+
}
159+
}
160+
}
161+
127162
func TestParseWorkspaceQueryArgsRejectsModeFlagsWithTypedDocuments(t *testing.T) {
128163
_, err := parseWorkspaceQueryArgs(mcptools.FileQueryModeHybrid, []string{
129164
"--semantic",
@@ -151,8 +186,9 @@ func TestParseWorkspaceQueryArgsRejectsKeywordAndSemanticTogether(t *testing.T)
151186
}
152187
}
153188

154-
func TestCmdQuerySemanticMissingKeyFailsClearly(t *testing.T) {
155-
t.Setenv("OPENAI_API_KEY", "")
189+
func TestCmdQuerySemanticMissingLocalHelperFailsClearly(t *testing.T) {
190+
t.Setenv("AFS_EMBED_PROVIDER", "local")
191+
t.Setenv("AFS_EMBED_HELPER_CMD", filepath.Join(t.TempDir(), "missing-node"))
156192
_, _, closeStore := setupAFSGrepTest(t)
157193
defer closeStore()
158194

@@ -162,8 +198,8 @@ func TestCmdQuerySemanticMissingKeyFailsClearly(t *testing.T) {
162198
if err == nil {
163199
t.Fatal("cmdQuery(--semantic) returned nil error, want unavailable")
164200
}
165-
if !strings.Contains(err.Error(), "OPENAI_API_KEY") {
166-
t.Fatalf("error = %q, want OPENAI_API_KEY guidance", err)
201+
if !strings.Contains(err.Error(), "local embedding helper runtime") {
202+
t.Fatalf("error = %q, want local helper guidance", err)
167203
}
168204
}
169205

@@ -643,7 +679,7 @@ func TestCmdQueryIndexStatusReportsGlobalEmbeddingStatus(t *testing.T) {
643679
if status.Workspace != "repo" || status.Keyword.Files != 0 {
644680
t.Fatalf("status = %+v, want empty repo keyword status", status)
645681
}
646-
if !status.Embeddings.Enabled || status.Embeddings.Model != "openai:text-embedding-3-small" || status.Embeddings.Available {
682+
if !status.Embeddings.Enabled || status.Embeddings.Provider != "openai" || status.Embeddings.Model != "openai:text-embedding-3-small" || status.Embeddings.Available {
647683
t.Fatalf("embedding status = %+v, want global OpenAI unavailable without key", status)
648684
}
649685
}
@@ -844,6 +880,7 @@ func TestCmdQueryContractCoversHybridFallbacksAndIndexDisambiguation(t *testing.
844880
})
845881

846882
t.Run("semantic JSON reports unavailable without failing command", func(t *testing.T) {
883+
t.Setenv("AFS_EMBED_PROVIDER", "")
847884
t.Setenv("OPENAI_API_KEY", "")
848885
output, err := captureStdout(t, func() error {
849886
return cmdQuery([]string{"query", "--semantic", "--json", "workspace recovery"})

cmd/afs/backend.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ type afsControlPlane interface {
144144
QueryWorkspace(ctx context.Context, workspace string, request mcptools.FileQueryRequest) (mcptools.FileQueryResponse, error)
145145
QueryIndexStatus(ctx context.Context, workspace string, request controlplane.WorkspaceQueryIndexStatusRequest) (controlplane.WorkspaceQueryIndexStatus, error)
146146
RebuildQueryIndex(ctx context.Context, workspace string, request controlplane.WorkspaceQueryIndexRebuildRequest) (controlplane.WorkspaceQueryIndexRebuildResponse, error)
147+
QueryModelStatus(ctx context.Context, request controlplane.QueryModelStatusRequest) (controlplane.QueryModelStatus, error)
148+
DownloadQueryModel(ctx context.Context, request controlplane.QueryModelDownloadRequest) (controlplane.QueryModelDownloadResult, error)
147149
DiffWorkspace(ctx context.Context, workspace, baseView, headView string) (controlplane.WorkspaceDiffResponse, error)
148150
RestoreCheckpoint(ctx context.Context, workspace, checkpointID string) error
149151
SaveCheckpoint(ctx context.Context, input controlplane.SaveCheckpointRequest) (bool, error)

cmd/afs/controlplane_http_client.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,26 @@ func (c *httpControlPlaneClient) RebuildQueryIndex(ctx context.Context, workspac
416416
return out, err
417417
}
418418

419+
func (c *httpControlPlaneClient) QueryModelStatus(ctx context.Context, request controlplane.QueryModelStatusRequest) (controlplane.QueryModelStatus, error) {
420+
params := url.Values{}
421+
if strings.TrimSpace(request.Model) != "" {
422+
params.Set("model", strings.TrimSpace(request.Model))
423+
}
424+
rel := "/v1/query/model/status"
425+
if encoded := params.Encode(); encoded != "" {
426+
rel += "?" + encoded
427+
}
428+
var out controlplane.QueryModelStatus
429+
err := c.doJSON(ctx, http.MethodGet, rel, nil, &out, http.StatusOK)
430+
return out, err
431+
}
432+
433+
func (c *httpControlPlaneClient) DownloadQueryModel(ctx context.Context, request controlplane.QueryModelDownloadRequest) (controlplane.QueryModelDownloadResult, error) {
434+
var out controlplane.QueryModelDownloadResult
435+
err := c.doJSONWithClient(ctx, c.queryer, http.MethodPost, "/v1/query/model/download", request, &out, http.StatusOK)
436+
return out, err
437+
}
438+
419439
func (c *httpControlPlaneClient) DiffWorkspace(ctx context.Context, workspace, baseView, headView string) (controlplane.WorkspaceDiffResponse, error) {
420440
params := url.Values{}
421441
if strings.TrimSpace(baseView) != "" {

0 commit comments

Comments
 (0)