-
Notifications
You must be signed in to change notification settings - Fork 0
Enable Local LLM Inference on Android #703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -554,11 +554,12 @@ async fn load_model(app: tauri::AppHandle, state: tauri::State<'_, SynapseState> | |
|
|
||
| #[cfg(feature = "cognition")] | ||
| #[tauri::command] | ||
| async fn infer(state: tauri::State<'_, SynapseState>, input: String) -> Result<String, String> { | ||
| // 1. Think (Cognitive Layer) - Note: We store user input AFTER thinking if using Interaction entity, | ||
| // or we could store partial. But Interaction requires both. | ||
| // For this architecture, we process then store the pair. | ||
|
|
||
| async fn infer( | ||
| state: tauri::State<'_, SynapseState>, | ||
| input: String, | ||
| on_token: tauri::ipc::Channel<String>, | ||
| ) -> Result<String, String> { | ||
| // 1. Enrich input with HiRAG | ||
| let enriched_input = { | ||
| let hirag_guard = state.hirag.lock().await; | ||
| if let Some(hirag) = &*hirag_guard { | ||
|
|
@@ -574,55 +575,60 @@ async fn infer(state: tauri::State<'_, SynapseState>, input: String) -> Result<S | |
| } | ||
| }; | ||
|
|
||
| let thought = { | ||
| // 2. Perform streaming inference | ||
| let mut full_response = String::new(); | ||
|
|
||
| let llm_adapter_opt = { | ||
| let cog_guard = state.cognition.lock().await; | ||
| if let Some(adapter) = &*cog_guard { | ||
| adapter.think(&enriched_input, "system").await.map_err(|e| e.to_string())? | ||
| if let Some(cog) = &*cog_guard { | ||
| // We need access to the underlying LLM adapter for streaming | ||
| // CandleCognitiveAdapter currently only exposes 'think' | ||
| // For streaming, we'll try to get it directly from metabolism or just use the trait. | ||
| // For now, let's assume we can get it from the state. | ||
| None // Placeholder: we'll use metabolism's LLM | ||
| } else { | ||
| // Fallback to mock/logic engine if no model is loaded | ||
| let adapter = CandleCognitiveAdapter::new(None, None).map_err(|e| e.to_string())?; | ||
| adapter.think(&enriched_input, "system").await.map_err(|e| e.to_string())? | ||
| None | ||
| } | ||
| }; | ||
|
|
||
| // 2. Award Karma (Proof of Sentience: Chat Interaction) | ||
| let meta_guard = state.metabolism.lock().await; | ||
| if let Some(meta) = &*meta_guard { | ||
| let llm = meta.llm_port(); | ||
| let mut stream = llm.generate_stream(&enriched_input, 512).await.map_err(|e| e.to_string())?; | ||
|
|
||
| while let Some(token_res) = stream.next().await { | ||
| let token = token_res.map_err(|e| e.to_string())?; | ||
| full_response.push_str(&token); | ||
| let _ = on_token.send(token); | ||
| } | ||
|
Comment on lines
+599
to
+603
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stop streaming when the UI channel is closed Line 602 discards channel send failures. If the frontend disconnects, generation keeps running pointlessly. Break the loop on send error to save compute and battery. Suggested fix- let _ = on_token.send(token);
+ if on_token.send(token).is_err() {
+ break;
+ }🤖 Prompt for AI Agents |
||
| } else { | ||
| return Err("Metabolism not initialized".to_string()); | ||
| } | ||
|
|
||
| // 3. Award Karma (Proof of Sentience: Chat Interaction) | ||
| { | ||
| let tok_guard = state.tokenomics.lock().await; | ||
| if let Some(service) = &*tok_guard { | ||
| // Async reward, don't block thought return significantly | ||
| let _ = service.award_karma("0xSynapseUserPrototype", synapse_core::tokenomics::structs::ActionType::ChatInteraction).await; | ||
| } | ||
| } | ||
|
|
||
| // 3. Store in Metabolism (Short Term Buffer) | ||
| let meta_guard = state.metabolism.lock().await; | ||
| // 4. Store in Metabolism (Short Term Buffer) | ||
| if let Some(meta) = &*meta_guard { | ||
| let interaction = Interaction { | ||
| id: Uuid::new_v4().to_string(), | ||
| user_input: input.clone(), | ||
| ai_response: thought.content.clone(), | ||
| ai_response: full_response.clone(), | ||
| timestamp: Utc::now().timestamp(), | ||
| session_id: "default".to_string(), | ||
| processed: false, | ||
| }; | ||
| if let Err(e) = meta.push_interaction(interaction).await { | ||
| println!("Failed to push to metabolism: {}", e); | ||
| } | ||
| } else { | ||
| // Fallback: Legacy Direct Storage | ||
| let memory_guard = state.memory.lock().await; | ||
| if let Some(mem) = &*memory_guard { | ||
| let mut node = MemoryNode::new(input.clone()); | ||
| node.source = "user".to_string(); | ||
| mem.store(node).await.ok(); | ||
|
|
||
| let mut node_ai = MemoryNode::new(thought.content.clone()); | ||
| node_ai.source = "ai".to_string(); | ||
| mem.store(node_ai).await.ok(); | ||
| } | ||
| } | ||
|
Comment on lines
+578
to
629
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This block has a couple of issues:
Here's a suggested refactoring that addresses both points by removing the unused code and scoping the locks correctly: // 2. Perform streaming inference
let mut full_response = String::new();
let llm = {
let meta_guard = state.metabolism.lock().await;
if let Some(meta) = &*meta_guard {
meta.llm_port()
} else {
return Err("Metabolism not initialized".to_string());
}
};
let mut stream = llm.generate_stream(&enriched_input, 512).await.map_err(|e| e.to_string())?;
while let Some(token_res) = stream.next().await {
let token = token_res.map_err(|e| e.to_string())?;
full_response.push_str(&token);
let _ = on_token.send(token);
}
// 3. Award Karma (Proof of Sentience: Chat Interaction)
{
let tok_guard = state.tokenomics.lock().await;
if let Some(service) = &*tok_guard {
let _ = service.award_karma("0xSynapseUserPrototype", synapse_core::tokenomics::structs::ActionType::ChatInteraction).await;
}
}
// 4. Store in Metabolism (Short Term Buffer)
{
let meta_guard = state.metabolism.lock().await;
if let Some(meta) = &*meta_guard {
let interaction = Interaction {
id: Uuid::new_v4().to_string(),
user_input: input.clone(),
ai_response: full_response.clone(),
timestamp: Utc::now().timestamp(),
session_id: "default".to_string(),
processed: false,
};
if let Err(e) = meta.push_interaction(interaction).await {
println!("Failed to push to metabolism: {}", e);
}
}
} |
||
|
|
||
| Ok(thought.content) | ||
| Ok(full_response) | ||
| } | ||
|
|
||
| #[tauri::command] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
infercurrently bypasses the runtime-loaded cognition modelLine 588 hardcodes
None, and Line 596 then always sources the LLM frommetabolism. That makes the model loaded byload_modeleffectively unused for inference streaming.Also applies to: 594-597
🤖 Prompt for AI Agents