openai · kwhinnery-openai · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025
diff --git a/authors.yaml b/authors.yaml
@@ -376,3 +376,8 @@ alexl-oai:
   name: "Alex Lowden"
   website: "https://www.linkedin.com/in/alex-lowden01/"
   avatar: "https://avatars.githubusercontent.com/u/215167546"
+
+glojain:
+  name: "Glory Jain"
+  website: "https://www.linkedin.com/in/gloryjain/"
+  avatar: "https://media.licdn.com/dms/image/v2/C4E03AQH72n6Sm5q69Q/profile-displayphoto-shrink_400_400/profile-displayphoto-shrink_400_400/0/1557995338725?e=1756339200&v=beta&t=FGTXiCZwTZvqHCY-wd8It15EDf11Rex1oLlBKRGHNtY"
diff --git a/examples/deep_research_api/how_to_build_a_deep_research_mcp_server/README.md b/examples/deep_research_api/how_to_build_a_deep_research_mcp_server/README.md
@@ -0,0 +1,123 @@
+# MCP for Deep Research
+
+This is a minimal example of a Deep Research style MCP server for searching and fetching files from the OpenAI file storage service.
+
+For a reference of _how_ to call this service from the Responses API, with Deep Research see [this cookbook](https://cookbook.openai.com/examples/deep_research_api/introduction_to_deep_research_api). To see how to call the MCP server with the Agents SDK, checkout [this cookbook](https://cookbook.openai.com/examples/deep_research_api/how_to_use_deep_research_API_agents)!
+
+The Deep Research agent relies specifically on Search and Fetch tools. Search should look through your object store for a set of specfic, top-k IDs. Fetch, is a tool that takes objectIds as arguments and pulls back the relevant resources.
+
+## Set up & run
+
+Store your internal file(s) in [OpenAI Vector Storage](https://platform.openai.com/storage/vector_stores/)
+
+Python setup:
+
+```shell
+python3 -m venv env
+source env/bin/activate
+pip install -r requirements.txt
+```
+
+Run the server:
+
+```shell
+python main.py
+```
+
+The server will start on `http://0.0.0.0:8000/sse/` using SSE transport. If you want to reach the server from the public internet, there are a variety of ways to do that including with ngrok:
+
+```shell
+brew install ngrok 
+ngrok config add-authtoken <your_token>
+ngrok http 8000
+```
+
+You should now be able to reach your local server from your client. 
+
+## Files
+
+- `main.py`: Main server code
+
+## Example Flow diagram for MCP Server
+
+```mermaid
+flowchart TD
+  subgraph Connection_Setup
+    A1[MCP Server starts up<br/>listening on /sse/] --> A2[Client opens SSE connection]
+    A2 --> A3[Server confirms SSE connection]
+  end
+
+  subgraph Tool_Discovery
+    A3 --> B1[Client asks 'What tools do you support?']
+    B1 --> B2[Server replies with Search & Fetch schemas]
+    B2 --> B3[Client stores schemas in context]
+  end
+
+  subgraph Search_Fetch_Loop
+    B3 --> C1[Client issues search call]
+    C1 --> C2[MCP Server routes to Search Tool]
+    C2 --> C3[Search Tool queries Data Store<br/>returns one hit]
+    C3 --> C4[Client issues fetch call]
+    C4 --> C5[MCP Server routes to Fetch Tool]
+    C5 --> C6[Fetch Tool retrieves document text]
+    C6 --> C7[Client refines/repeats search<br/> cost-effectiveness, market revenue…]
+    C7 --> C1
+  end
+```
+
+## Example request
+
+```python
+# system_message includes reference to internal file lookups for MCP.
+system_message = """
+You are a professional researcher preparing a structured, data-driven report on behalf of a global health economics team. Your task is to analyze the health question the user poses.
+
+Do:
+- Focus on data-rich insights: include specific figures, trends, statistics, and measurable outcomes (e.g., reduction in hospitalization costs, market size, pricing trends, payer adoption).
+- When appropriate, summarize data in a way that could be turned into charts or tables, and call this out in the response (e.g., "this would work well as a bar chart comparing per-patient costs across regions").
+- Prioritize reliable, up-to-date sources: peer-reviewed research, health organizations (e.g., WHO, CDC), regulatory agencies, or pharmaceutical earnings reports.
+- Include an internal file lookup tool to retrieve information from our own internal data sources. If you've already retrieved a file, do not call fetch again for that same file. Prioritize inclusion of that data.
+- Include inline citations and return all source metadata.
+
+Be analytical, avoid generalities, and ensure that each section supports data-backed reasoning that could inform healthcare policy or financial modeling.
+"""
+
+user_query = "Research the economic impact of semaglutide on global healthcare systems."
+
+response = client.responses.create(
+  model="o3-deep-research-2025-06-26",
+  input=[
+    {
+      "role": "developer",
+      "content": [
+        {
+          "type": "input_text",
+          "text": system_message,
+        }
+      ]
+    },
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "input_text",
+          "text": user_query,
+        }
+      ]
+    }
+  ],
+  reasoning={
+    "summary": "auto"
+  },
+  tools=[
+    {
+      "type": "web_search_preview"
+    },
+    { # ADD MCP TOOL SUPPORT
+      "type": "mcp",
+      "server_label": "internal_file_lookup",
+      "server_url": "http://0.0.0.0:8000/sse/", # Update to the location of *your* MCP server
+      "require_approval": "never"
+    }
+  ]
+)
diff --git a/examples/deep_research_api/how_to_build_a_deep_research_mcp_server/main.py b/examples/deep_research_api/how_to_build_a_deep_research_mcp_server/main.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""
+Sample MCP Server for Deep Research API Integration
+
+This server implements the Model Context Protocol (MCP) with search and fetch
+capabilities designed to work with ChatGPT's deep research feature.
+"""
+
+import logging
+from typing import Dict, List, Any
+from fastmcp import FastMCP
+from openai import OpenAI
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# OpenAI configuration
+OPENAI_API_KEY = ""
+VECTOR_STORE_ID = "" #OpenAI Vector Store ID https://platform.openai.com/storage/vector_stores/
+
+# Initialize OpenAI client
+openai_client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
+
+# No local data storage needed - using OpenAI Vector Store only
+
+
+def create_server():
+    """Create and configure the MCP server with search and fetch tools."""
+
+    # Initialize the FastMCP server
+    mcp = FastMCP(name="Sample Deep Research MCP Server",
+                  instructions="""
+        This MCP server provides search and document retrieval capabilities for deep research.
+        Use the search tool to find relevant documents based on keywords, then use the fetch 
+        tool to retrieve complete document content with citations.
+        """)
+
+    @mcp.tool()
+    async def search(query: str) -> Dict[str, List[Dict[str, Any]]]:
+        """
+        Search for documents using OpenAI Vector Store search.
+
+        This tool searches through the vector store to find semantically relevant matches.
+        Returns a list of search results with basic information. Use the fetch tool to get 
+        complete document content.
+
+        Args:
+            query: Search query string. Natural language queries work best for semantic search.
+
+        Returns:
+            Dictionary with 'results' key containing list of matching documents.
+            Each result includes id, title, text snippet, and optional URL.
+        """
+        if not query or not query.strip():
+            return {"results": []}
+
+        if not openai_client:
+            logger.error("OpenAI client not initialized - API key missing")
+            raise ValueError(
+                "OpenAI API key is required for vector store search")
+
+        # Search the vector store using OpenAI API
+        logger.info(
+            f"Searching vector store {VECTOR_STORE_ID} for query: '{query}'")
+
+        response = openai_client.vector_stores.search(
+            vector_store_id=VECTOR_STORE_ID, query=query)
+
+        results = []
+
+        # Process the vector store search results
+        if hasattr(response, 'data') and response.data:
+            for i, item in enumerate(response.data):
+                # Extract file_id, filename, and content from the VectorStoreSearchResponse
+                item_id = getattr(item, 'file_id', f"vs_{i}")
+                item_filename = getattr(item, 'filename', f"Document {i+1}")
+
+                # Extract text content from the content array
+                content_list = getattr(item, 'content', [])
+                text_content = ""
+                if content_list and len(content_list) > 0:
+                    # Get text from the first content item
+                    first_content = content_list[0]
+                    if hasattr(first_content, 'text'):
+                        text_content = first_content.text
+                    elif isinstance(first_content, dict):
+                        text_content = first_content.get('text', '')
+
+                if not text_content:
+                    text_content = "No content available"
+
+                # Create a snippet from content
+                text_snippet = text_content[:200] + "..." if len(
+                    text_content) > 200 else text_content
+
+                result = {
+                    "id": item_id,
+                    "title": item_filename,
+                    "text": text_snippet,
+                    "url": f"https://platform.openai.com/storage/files/{item_id}"
+                }
+
+                results.append(result)
+
+        logger.info(f"Vector store search returned {len(results)} results")
+        return {"results": results}
+
+    @mcp.tool()
+    async def fetch(id: str) -> Dict[str, Any]:
+        """
+        Retrieve complete document content by ID for detailed analysis and citation.
+
+        This tool fetches the full document content from OpenAI Vector Store or local storage.
+        Use this after finding relevant documents with the search tool to get complete 
+        information for analysis and proper citation.
+
+        Args:
+            id: File ID from vector store (file-xxx) or local document ID
+
+        Returns:
+            Complete document with id, title, full text content, optional URL, and metadata
+
+        Raises:
+            ValueError: If the specified ID is not found
+        """
+        if not id:
+            raise ValueError("Document ID is required")
+
+        if not openai_client:
+            logger.error("OpenAI client not initialized - API key missing")
+            raise ValueError(
+                "OpenAI API key is required for vector store file retrieval")
+
+        logger.info(f"Fetching content from vector store for file ID: {id}")
+
+        # Fetch file content from vector store
+        content_response = openai_client.vector_stores.files.content(
+            vector_store_id=VECTOR_STORE_ID, file_id=id)
+
+        # Get file metadata
+        file_info = openai_client.vector_stores.files.retrieve(
+            vector_store_id=VECTOR_STORE_ID, file_id=id)
+
+        # Extract content from paginated response
+        file_content = ""
+        if hasattr(content_response, 'data') and content_response.data:
+            # Combine all content chunks from FileContentResponse objects
+            content_parts = []
+            for content_item in content_response.data:
+                if hasattr(content_item, 'text'):
+                    content_parts.append(content_item.text)
+            file_content = "\n".join(content_parts)
+        else:
+            file_content = "No content available"
+
+        # Use filename as title and create proper URL for citations
+        filename = getattr(file_info, 'filename', f"Document {id}")
+
+        result = {
+            "id": id,
+            "title": filename,
+            "text": file_content,
+            "url": f"https://platform.openai.com/storage/files/{id}",
+            "metadata": None
+        }
+
+        # Add metadata if available from file info
+        if hasattr(file_info, 'attributes') and file_info.attributes:
+            result["metadata"] = file_info.attributes
+
+        logger.info(f"Successfully fetched vector store file: {id}")
+        return result
+
+    return mcp
+
+
+def main():
+    """Main function to start the MCP server."""
+    # Verify OpenAI client is initialized
+    if not openai_client:
+        logger.error(
+            "OpenAI API key not found. Please set OPENAI_API_KEY environment variable."
+        )
+        raise ValueError("OpenAI API key is required")
+
+    logger.info(f"Using vector store: {VECTOR_STORE_ID}")
+
+    # Create the MCP server
+    server = create_server()
+
+    # Configure and start the server
+    logger.info("Starting MCP server on 0.0.0.0:8000")
+    logger.info("Server will be accessible via SSE transport")
+    logger.info("Connect this server to ChatGPT Deep Research for testing")
+
+    try:
+        # Use FastMCP's built-in run method with SSE transport
+        server.run(transport="sse", host="0.0.0.0", port=8000)
+    except KeyboardInterrupt:
+        logger.info("Server stopped by user")
+    except Exception as e:
+        logger.error(f"Server error: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/deep_research_api/how_to_build_a_deep_research_mcp_server/requirements.txt b/examples/deep_research_api/how_to_build_a_deep_research_mcp_server/requirements.txt
@@ -0,0 +1,15 @@
+# Core dependencies for the Deep Research MCP Server
+fastmcp>=2.9.0
+openai>=1.88.0
+uvicorn>=0.34.3
+
+# Additional dependencies that may be required
+pydantic>=2.0.0
+typing-extensions>=4.0.0
+httpx>=0.23.0
+python-multipart>=0.0.9
+sse-starlette>=1.6.1
+starlette>=0.27.0
+
+# Optional but recommended for production
+python-dotenv>=1.0.0