snexus
diff --git a/‎.flake8
Lines changed: 2 additions & 0 deletions b/‎.flake8
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/release.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/release.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md
Lines changed: 9 additions & 6 deletions b/‎README.md
Lines changed: 9 additions & 6 deletions
diff --git a/‎docker/Dockerfile
Lines changed: 0 additions & 81 deletions b/‎docker/Dockerfile
Lines changed: 0 additions & 81 deletions
diff --git a/‎docker/entrypoint.sh
Lines changed: 0 additions & 27 deletions b/‎docker/entrypoint.sh
Lines changed: 0 additions & 27 deletions
diff --git a/‎docs/installation.rst
Lines changed: 12 additions & 20 deletions b/‎docs/installation.rst
Lines changed: 12 additions & 20 deletions
diff --git a/‎docs/usage.rst
Lines changed: 7 additions & 4 deletions b/‎docs/usage.rst
Lines changed: 7 additions & 4 deletions
diff --git a/‎pyproject.toml
Lines changed: 55 additions & 4 deletions b/‎pyproject.toml
Lines changed: 55 additions & 4 deletions
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 119
@@ -8,7 +8,7 @@ jobs:
       name: Build source distribution
       runs-on: ubuntu-latest
       steps:
-        - uses: actions/checkout@v3
+        - uses: actions/checkout@v4
 
         - uses: actions/[email protected]
           with:
@@ -22,7 +22,7 @@ jobs:
         - name: Run build
           run: python -m build
 
-        - uses: actions/upload-artifact@v3
+        - uses: actions/upload-artifact@v4
           with:
             path: ./dist/*
 
@@ -34,7 +34,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
           name: artifact
           path: ./dist
 
@@ -4,10 +4,13 @@
 
 [Documentation](https://llm-search.readthedocs.io/en/latest/)
 
-The purpose of this package is to offer a convenient question-answering (RAG) system with a simple YAML-based configuration that enables interaction with multiple collections of local documents. Special attention is given to improvements in various components of the system **in addition to basic LLM-based RAGs** - better document parsing, hybrid search, HyDE enabled search, chat history, deep linking, re-ranking, the ability to customize embeddings, and more. The package is designed to work with custom Large Language Models (LLMs) – whether from OpenAI or installed locally.
+The purpose of this package is to offer an advanced question-answering (RAG) system with a simple YAML-based configuration that enables interaction with a collection of local documents. Special attention is given to improvements in various components of the system **in addition to basic LLM-based RAGs** - better document parsing, hybrid search, HyDE, chat history, deep linking, re-ranking, the ability to customize embeddings, and more. The package is designed to work with custom Large Language Models (LLMs) – whether from OpenAI or installed locally.
+
+Interaction with the package is supported through the built-in frontend, or by exposing an MCP server, allowing clients like Cursor, Windsurf or VSCode GH Copilot to interact with the RAG system.
 
 ## Features
 
+* Fast, incremental parsing and embedding of medium size document bases (tested on up to few gigabytes of markdown and pdfs)
 * Supported document formats
     * Build-in parsers:
         * `.md` - Divides files based on logical components such as headings, subheadings, and code blocks. Supports additional features like cleaning image links, adding custom metadata, and more.
@@ -17,12 +20,13 @@ The purpose of this package is to offer a convenient question-answering (RAG) sy
         * List of formats see [here](https://unstructured-io.github.io/unstructured/core/partition.html).
 
 * Allows interaction with embedded documents, internally supporting the following models and methods (including locally hosted):
-    * OpenAI models (ChatGPT 3.5/4 and Azure OpenAI).
+    * OpenAI compatible models and APIs.
     * HuggingFace models.
-    * Llama cpp supported models - for full list see [here](https://github.com/ggerganov/llama.cpp#description).
 
 * Interoperability with LiteLLM + Ollama via OpenAI API, supporting hundreds of different models (see [Model configuration for LiteLLM](sample_templates/llm/litellm.yaml))
 
+* SSE MCP Server enabling interface with popular MCP clients.
+
 * Generates dense embeddings from a folder of documents and stores them in a vector database ([ChromaDB](https://github.com/chroma-core/chroma)).
   * The following embedding models are supported:
     * Hugging Face embeddings.
@@ -50,12 +54,11 @@ The purpose of this package is to offer a convenient question-answering (RAG) sy
 
 * Supprts optional chat history with question contextualization
 
-
 * Other features
-    * Simple CLI and web interfaces.
+    * Simple web interfaces.
     * Deep linking into document sections - jump to an individual PDF page or a header in a markdown file.
     * Ability to save responses to an offline database for future analysis.
-    * Experimental API
+    * FastAPI based API + MCP server, allo
 
 
 ## Demo
 
@@ -8,6 +8,7 @@ Prerequisites
     * Tested with CUDA 11.8 to 12.4 - https://developer.nvidia.com/cuda-toolkit
 * To interact with OpenAI models, create `.env` in the root directory of the repository, containing OpenAI API key. A template for the `.env` file is provided in `.env_template`
 * For parsing `.epub` documents, Pandoc is required - https://pandoc.org/installing.html
+* `uv` - https://github.com/astral-sh/uv#installation
 
 
 
@@ -16,21 +17,16 @@ Install Latest Version
 
 .. code-block:: bash
     
-
     # Create a new environment
-    python3 -m venv .venv 
+    uv venv
 
     # Activate new environment
     source .venv/bin/activate
 
-    # Install packages using pip
-    pip install pyllmsearch
-
     # Optional dependencues for Azure parser
-    pip install "pyllmsearch[azureparser]"
+    uv pip install "pyllmsearch[azureparser]"
 
     # Preferred method (much faster) - install packages using uv
-    pip install uv
     uv pip install pyllmsearch
 
 
@@ -45,20 +41,16 @@ Install from source
 
     git clone https://github.com/snexus/llm-search.git
     cd llm-search
+    # Create a new environment
+    uv venv
+    # Activate new environment
+    source .venv/bin/activate
+    # Install packages using uv
 
-    # Optional - Set variables for llama-cpp to compile with CUDA.
-    # Assuming Nvidia CUDA Toolkit is installed and pointing to `usr/local/cuda` on Ubuntu
-
-    source ./setvars.sh 
-
-    # Optional - Install newest stable torch for CUDA 11.x
-    # pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu118
-
-    # or for CUDA 12.x version
-    # pip3 install torch torchvision
+    uv sync 
 
-    # Install the package
-    pip install . # or `pip install -e .` for development
+    # Optional - install in the development mode
+    uv pip install -e . # or `pip install -e .` for development
     
     # For Azure parser, install with optional dependencies
-    pip install ."[azureparser]"
+    uv pip install ."[azureparser]"
@@ -46,11 +46,14 @@ To interact with the documents using one of the supported LLMs, follow these ste
 Here `path/to/config/folder` points to a folder of one or more document config files. The tool will scans the configs and allows to switch between them.
 
 
-API (Experimental)
------------------
+API and MCP Server
+------------------
 
-To launch an api, supply a path config file in the `FASTAPI_LLM_CONFIG` environment variable and launch `llmsearchapi` 
+To launch FastAPI/MCP server, supply a path semantic search config file in the `FASTAPI_RAG_CONFIG` and path to llm config in `FASTAPI_LLM_CONFIG` environment variable and launch `llmsearchapi` 
 
 .. code-block:: bash
 
-    FASTAPI_LLM_CONFIG="/path/to/config.yaml" llmsearchapi
+    FASTAPI_RAG_CONFIG="/path/to/config.yaml" FASTAPI_LLM_CONFIG="/path/to/llm.yaml" llmsearchapi
+
+1. The API server will be available at `http://localhost:8000/docs` and can be used to interact with the documents using the LLMs.
+2. The MCP server will be available at `http://localhost:8000/mcp` and can be configured via any MCP client, assuming SSE MCP server which should point to the same URL.
@@ -5,17 +5,43 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pyllmsearch"
 description = "LLM Powered Advanced RAG Application"
-dynamic = ["dependencies", "version"]
+# dynamic = ["dependencies", "version"]
+dynamic = ["version"]
 keywords = ["llm", "rag", "retrieval-augemented-generation","large-language-models",  "local", "splade", "hyde", "reranking", "chroma", "openai"]
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 classifiers = [
   "License :: OSI Approved :: MIT License",
   "Operating System :: OS Independent",
   "Programming Language :: Python :: 3 :: Only",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
 ]
+dependencies = [
+    "langchain-community>=0.3.22",
+    "langchain>=0.3.24",
+    "langchain-huggingface>=0.1.2",
+    "langchain-chroma>=0.2.3",
+    "python-dotenv>=1.1.0",
+    "loguru>=0.7.3",
+    "click>=8.1.8",
+    "openai>=1.76.0",
+    "streamlit>=1.44.1",
+    "tenacity>=9.1.2",
+    "tqdm>=4.67.1",
+    "gmft==0.2.1",
+    "pypdf2>=3.0.1",
+    "pydantic>=2.11.3",
+    "instructorembedding>=1.0.1",
+    "unstructured>=0.17.2",
+    "tiktoken>=0.9.0",
+    "tokenizers>=0.21.1",
+    "langchain-openai>=0.3.14",
+    "python-docx>=1.1.2",
+    "pymupdf>=1.25.5",
+    "termcolor>=3.0.1",
+    "fastapi-mcp>=0.3.3",
+]
 
 [project.optional-dependencies]
 
@@ -35,6 +61,10 @@ azureparser = [
 "azure-identity==1.17.1"
 ]
 
+googleparser = [
+    "google-generativeai>=0.8.5",
+  ]
+
 [project.urls]
 Homepage = "https://github.com/snexus/llm-search"
 Documentation = "https://llm-search.readthedocs.io/en/latest/"
@@ -46,9 +76,30 @@ local_scheme = "no-local-version"
 [tool.setuptools.packages.find]
 where = ["src"]
 
-[tool.setuptools.dynamic]
-dependencies = {file = ["requirements.txt"]}
+# [tool.setuptools.dynamic]
+# dependencies = {file = ["requirements.txt"]}
 
+[tool.flake8]
+docstring-convention = "all"
+ignore = [
+  "D107",
+  "D212",
+  "E501",
+  "W503",
+  "W605",
+  "D203",
+  "D100",
+  "D400",
+  "D415",
+  "D104",
+  "D203",
+  "D213",
+  "D401",
+  "D406",
+  "D417",
+]
+exclude = [ "venv" ]
+max-line-length = 119
 
 [tool.ruff]
 # Decrease the maximum line length to 79 characters.