|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Sample MCP Server for Deep Research API Integration |
| 4 | +
|
| 5 | +This server implements the Model Context Protocol (MCP) with search and fetch |
| 6 | +capabilities designed to work with ChatGPT's deep research feature. |
| 7 | +""" |
| 8 | + |
| 9 | +import logging |
| 10 | +from typing import Dict, List, Any |
| 11 | +from fastmcp import FastMCP |
| 12 | +from openai import OpenAI |
| 13 | + |
| 14 | +# Configure logging |
| 15 | +logging.basicConfig(level=logging.INFO) |
| 16 | +logger = logging.getLogger(__name__) |
| 17 | + |
| 18 | +# OpenAI configuration |
| 19 | +OPENAI_API_KEY = "" |
| 20 | +VECTOR_STORE_ID = "" #OpenAI Vector Store ID https://platform.openai.com/storage/vector_stores/ |
| 21 | + |
| 22 | +# Initialize OpenAI client |
| 23 | +openai_client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None |
| 24 | + |
| 25 | +# No local data storage needed - using OpenAI Vector Store only |
| 26 | + |
| 27 | + |
| 28 | +def create_server(): |
| 29 | + """Create and configure the MCP server with search and fetch tools.""" |
| 30 | + |
| 31 | + # Initialize the FastMCP server |
| 32 | + mcp = FastMCP(name="Sample Deep Research MCP Server", |
| 33 | + instructions=""" |
| 34 | + This MCP server provides search and document retrieval capabilities for deep research. |
| 35 | + Use the search tool to find relevant documents based on keywords, then use the fetch |
| 36 | + tool to retrieve complete document content with citations. |
| 37 | + """) |
| 38 | + |
| 39 | + @mcp.tool() |
| 40 | + async def search(query: str) -> Dict[str, List[Dict[str, Any]]]: |
| 41 | + """ |
| 42 | + Search for documents using OpenAI Vector Store search. |
| 43 | + |
| 44 | + This tool searches through the vector store to find semantically relevant matches. |
| 45 | + Returns a list of search results with basic information. Use the fetch tool to get |
| 46 | + complete document content. |
| 47 | + |
| 48 | + Args: |
| 49 | + query: Search query string. Natural language queries work best for semantic search. |
| 50 | + |
| 51 | + Returns: |
| 52 | + Dictionary with 'results' key containing list of matching documents. |
| 53 | + Each result includes id, title, text snippet, and optional URL. |
| 54 | + """ |
| 55 | + if not query or not query.strip(): |
| 56 | + return {"results": []} |
| 57 | + |
| 58 | + if not openai_client: |
| 59 | + logger.error("OpenAI client not initialized - API key missing") |
| 60 | + raise ValueError( |
| 61 | + "OpenAI API key is required for vector store search") |
| 62 | + |
| 63 | + # Search the vector store using OpenAI API |
| 64 | + logger.info( |
| 65 | + f"Searching vector store {VECTOR_STORE_ID} for query: '{query}'") |
| 66 | + |
| 67 | + response = openai_client.vector_stores.search( |
| 68 | + vector_store_id=VECTOR_STORE_ID, query=query) |
| 69 | + |
| 70 | + results = [] |
| 71 | + |
| 72 | + # Process the vector store search results |
| 73 | + if hasattr(response, 'data') and response.data: |
| 74 | + for i, item in enumerate(response.data): |
| 75 | + # Extract file_id, filename, and content from the VectorStoreSearchResponse |
| 76 | + item_id = getattr(item, 'file_id', f"vs_{i}") |
| 77 | + item_filename = getattr(item, 'filename', f"Document {i+1}") |
| 78 | + |
| 79 | + # Extract text content from the content array |
| 80 | + content_list = getattr(item, 'content', []) |
| 81 | + text_content = "" |
| 82 | + if content_list and len(content_list) > 0: |
| 83 | + # Get text from the first content item |
| 84 | + first_content = content_list[0] |
| 85 | + if hasattr(first_content, 'text'): |
| 86 | + text_content = first_content.text |
| 87 | + elif isinstance(first_content, dict): |
| 88 | + text_content = first_content.get('text', '') |
| 89 | + |
| 90 | + if not text_content: |
| 91 | + text_content = "No content available" |
| 92 | + |
| 93 | + # Create a snippet from content |
| 94 | + text_snippet = text_content[:200] + "..." if len( |
| 95 | + text_content) > 200 else text_content |
| 96 | + |
| 97 | + result = { |
| 98 | + "id": item_id, |
| 99 | + "title": item_filename, |
| 100 | + "text": text_snippet, |
| 101 | + "url": f"https://platform.openai.com/storage/files/{item_id}" |
| 102 | + } |
| 103 | + |
| 104 | + results.append(result) |
| 105 | + |
| 106 | + logger.info(f"Vector store search returned {len(results)} results") |
| 107 | + return {"results": results} |
| 108 | + |
| 109 | + @mcp.tool() |
| 110 | + async def fetch(id: str) -> Dict[str, Any]: |
| 111 | + """ |
| 112 | + Retrieve complete document content by ID for detailed analysis and citation. |
| 113 | + |
| 114 | + This tool fetches the full document content from OpenAI Vector Store or local storage. |
| 115 | + Use this after finding relevant documents with the search tool to get complete |
| 116 | + information for analysis and proper citation. |
| 117 | + |
| 118 | + Args: |
| 119 | + id: File ID from vector store (file-xxx) or local document ID |
| 120 | + |
| 121 | + Returns: |
| 122 | + Complete document with id, title, full text content, optional URL, and metadata |
| 123 | + |
| 124 | + Raises: |
| 125 | + ValueError: If the specified ID is not found |
| 126 | + """ |
| 127 | + if not id: |
| 128 | + raise ValueError("Document ID is required") |
| 129 | + |
| 130 | + if not openai_client: |
| 131 | + logger.error("OpenAI client not initialized - API key missing") |
| 132 | + raise ValueError( |
| 133 | + "OpenAI API key is required for vector store file retrieval") |
| 134 | + |
| 135 | + logger.info(f"Fetching content from vector store for file ID: {id}") |
| 136 | + |
| 137 | + # Fetch file content from vector store |
| 138 | + content_response = openai_client.vector_stores.files.content( |
| 139 | + vector_store_id=VECTOR_STORE_ID, file_id=id) |
| 140 | + |
| 141 | + # Get file metadata |
| 142 | + file_info = openai_client.vector_stores.files.retrieve( |
| 143 | + vector_store_id=VECTOR_STORE_ID, file_id=id) |
| 144 | + |
| 145 | + # Extract content from paginated response |
| 146 | + file_content = "" |
| 147 | + if hasattr(content_response, 'data') and content_response.data: |
| 148 | + # Combine all content chunks from FileContentResponse objects |
| 149 | + content_parts = [] |
| 150 | + for content_item in content_response.data: |
| 151 | + if hasattr(content_item, 'text'): |
| 152 | + content_parts.append(content_item.text) |
| 153 | + file_content = "\n".join(content_parts) |
| 154 | + else: |
| 155 | + file_content = "No content available" |
| 156 | + |
| 157 | + # Use filename as title and create proper URL for citations |
| 158 | + filename = getattr(file_info, 'filename', f"Document {id}") |
| 159 | + |
| 160 | + result = { |
| 161 | + "id": id, |
| 162 | + "title": filename, |
| 163 | + "text": file_content, |
| 164 | + "url": f"https://platform.openai.com/storage/files/{id}", |
| 165 | + "metadata": None |
| 166 | + } |
| 167 | + |
| 168 | + # Add metadata if available from file info |
| 169 | + if hasattr(file_info, 'attributes') and file_info.attributes: |
| 170 | + result["metadata"] = file_info.attributes |
| 171 | + |
| 172 | + logger.info(f"Successfully fetched vector store file: {id}") |
| 173 | + return result |
| 174 | + |
| 175 | + return mcp |
| 176 | + |
| 177 | + |
| 178 | +def main(): |
| 179 | + """Main function to start the MCP server.""" |
| 180 | + # Verify OpenAI client is initialized |
| 181 | + if not openai_client: |
| 182 | + logger.error( |
| 183 | + "OpenAI API key not found. Please set OPENAI_API_KEY environment variable." |
| 184 | + ) |
| 185 | + raise ValueError("OpenAI API key is required") |
| 186 | + |
| 187 | + logger.info(f"Using vector store: {VECTOR_STORE_ID}") |
| 188 | + |
| 189 | + # Create the MCP server |
| 190 | + server = create_server() |
| 191 | + |
| 192 | + # Configure and start the server |
| 193 | + logger.info("Starting MCP server on 0.0.0.0:8000") |
| 194 | + logger.info("Server will be accessible via SSE transport") |
| 195 | + logger.info("Connect this server to ChatGPT Deep Research for testing") |
| 196 | + |
| 197 | + try: |
| 198 | + # Use FastMCP's built-in run method with SSE transport |
| 199 | + server.run(transport="sse", host="0.0.0.0", port=8000) |
| 200 | + except KeyboardInterrupt: |
| 201 | + logger.info("Server stopped by user") |
| 202 | + except Exception as e: |
| 203 | + logger.error(f"Server error: {e}") |
| 204 | + raise |
| 205 | + |
| 206 | + |
| 207 | +if __name__ == "__main__": |
| 208 | + main() |
0 commit comments