Skip to content

Added firecrawl as a toolcall to crawl website contents from url #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion apps/studio/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ VITE_SUPABASE_ANON_KEY=
VITE_MIXPANEL_TOKEN=

# Add your keys here to use Anthropic directly
VITE_ANTHROPIC_API_KEY=
VITE_ANTHROPIC_API_KEY=
# Add your Firecrawl API key here
VITE_FIRECRAWL_API_KEY=
42 changes: 42 additions & 0 deletions apps/studio/electron/main/chat/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { z } from 'zod';
import { mainWindow } from '..';
import { PersistentStorage } from '../storage';
import { initModel } from './llmProvider';
import { extractUrls } from '@onlook/ai/src/tools/helpers';

class LlmManager {
private static instance: LlmManager;
Expand Down Expand Up @@ -59,6 +60,43 @@ class LlmManager {
return LlmManager.instance;
}

private async processUrls(content: string): Promise<string> {
const urls = extractUrls(content);

if (urls.length === 0) {
return content;
}

try {
const result = await streamText({
model: await initModel(LLMProvider.ANTHROPIC, CLAUDE_MODELS.SONNET, {
requestType: StreamRequestType.SUGGESTIONS,
}),
messages: [
{
role: 'user',
content: content,
},
],
tools: { crawl_urls: chatToolSet.crawl_urls },
maxTokens: 4000,
});

const crawledContent = `
Original request:
${content}

Referenced content from URLs:
${JSON.stringify(result, null, 2)}
`;

return crawledContent;
} catch (error) {
console.error('Error processing URLs:', error);
return content;
}
}

public async stream(
messages: CoreMessage[],
requestType: StreamRequestType,
Expand All @@ -67,6 +105,10 @@ class LlmManager {
skipSystemPrompt?: boolean;
},
): Promise<CompletedStreamResponse> {
const lastUserMessage = messages.findLast((m) => m.role === 'user');
if (lastUserMessage && typeof lastUserMessage.content === 'string') {
lastUserMessage.content = await this.processUrls(lastUserMessage.content);
}
const { abortController, skipSystemPrompt } = options || {};
this.abortController = abortController || new AbortController();
try {
Expand Down
1 change: 1 addition & 0 deletions apps/studio/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"@emotion/react": "^11.13.3",
"@emotion/styled": "^11.13.0",
"@fontsource-variable/inter": "^5.1.0",
"@mendable/firecrawl-js": "^1.24.0",
"@onlook/foundation": "*",
"@onlook/supabase": "*",
"@onlook/ui": "*",
Expand Down
105 changes: 105 additions & 0 deletions packages/ai/src/tools/crawler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import FirecrawlApp from '@mendable/firecrawl-js';

export interface CrawlOptions {
limit?: number;
scrapeOptions?: {
formats?: (
| 'markdown'
| 'html'
| 'rawHtml'
| 'content'
| 'links'
| 'screenshot'
| 'screenshot@fullPage'
| 'extract'
| 'json'
| 'changeTracking'
)[];
};
}

export interface CrawlerResponse {
success: boolean;
error?: string;
data: Array<{
html?: string;
markdown?: string;
}>;
}

export interface CrawledContent {
markdown?: string;
html?: string;
}

export function validateCrawlerResponse(response: unknown): response is CrawlerResponse {
if (!response || typeof response !== 'object') {
return false;
}

if (!('success' in response) || typeof response.success !== 'boolean') {
return false;
}

if (!('data' in response) || !Array.isArray(response.data)) {
return false;
}

if (response.data.length === 0) {
return false;
}

const firstItem = response.data[0];
return (
typeof firstItem === 'object' &&
firstItem !== null &&
('html' in firstItem || 'markdown' in firstItem) &&
(firstItem.html === undefined || typeof firstItem.html === 'string') &&
(firstItem.markdown === undefined || typeof firstItem.markdown === 'string')
);
}

export class CrawlerService {
private static instance: CrawlerService;

private app: FirecrawlApp;

private constructor() {
const apiKey = import.meta.env.VITE_FIRECRAWL_API_KEY;
if (!apiKey) {
throw new Error(
'VITE_FIRECRAWL_API_KEY is not defined. Please provide a valid API key.',
);
}
this.app = new FirecrawlApp({ apiKey });
}

static getInstance(): CrawlerService {
if (!this.instance) {
this.instance = new CrawlerService();
}
return this.instance;
}

async crawlUrl(
url: string,
options: CrawlOptions = {
limit: 100,
scrapeOptions: {
formats: ['markdown', 'html'],
},
},
) {
try {
const response = await this.app.crawlUrl(url, options);

if (!response.success) {
throw new Error(`Failed to crawl: ${response.error}`);
}
return response;
} catch (error) {
console.error('Error during crawling:', error);
throw error;
}
}
}
37 changes: 37 additions & 0 deletions packages/ai/src/tools/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,40 @@ export async function getAllFiles(
return { success: false, error: error instanceof Error ? error.message : 'Unknown error' };
}
}

export function extractUrls(text: string): string[] {
// Regular URLs with http/https
const httpPattern =
/https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)/gi;

// URLs starting with www
const wwwPattern =
/(?:^|\s)www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)/gi;

// Markdown links [text](url)
const markdownPattern = /\[([^\]]+)\]\(([^)]+)\)/g;

// Extract all types of URLs
const httpUrls = text.match(httpPattern) || [];
const wwwUrls = (text.match(wwwPattern) || []).map((url) => 'https://' + url.trim());
const markdownUrls = Array.from(text.matchAll(markdownPattern), (match) => match[2]);

// Combine all URLs and remove duplicates
const allUrls = [...httpUrls, ...wwwUrls, ...markdownUrls];

// Validate URLs and return unique ones
return Array.from(
new Set(
allUrls.filter((url) => {
try {
// Add https:// prefix if missing
const fullUrl = url.startsWith('http') ? url : `https://${url}`;
new URL(fullUrl);
return fullUrl;
} catch {
return false;
}
}),
),
);
}
58 changes: 58 additions & 0 deletions packages/ai/src/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { readFile } from 'fs/promises';
import { z } from 'zod';
import { ONLOOK_PROMPT } from '../prompt/onlook';
import { getAllFiles } from './helpers';
import { CrawlerService } from './crawler';

export const listFilesTool = tool({
description: 'List all files in the current directory, including subdirectories',
Expand Down Expand Up @@ -130,8 +131,65 @@ export const getStrReplaceEditorTool = (handlers: FileOperationHandlers) => {
return strReplaceEditorTool;
};

export const crawlUrlTool = tool({
description: 'Crawl webpage content from provided URL',
parameters: z.object({
urls: z.array(z.string()).describe('Array of URLs to crawl'),
options: z
.object({
limit: z.number().optional(),
scrapeOptions: z
.object({
formats: z
.array(
z.enum([
'markdown',
'html',
'rawHtml',
'content',
'links',
'screenshot',
'screenshot@fullPage',
'extract',
'json',
'changeTracking',
]),
)
.optional(),
})
.optional(),
})
.optional(),
}),
execute: async ({ urls, options }) => {
try {
const crawler = CrawlerService.getInstance();
const results = await Promise.all(
urls.map(async (url) => {
try {
const result = await crawler.crawlUrl(url, options);
if (!result.success) {
return { url, error: result.status };
}
return { url, data: result.data[0] };
} catch (error) {
return {
url,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
}),
);
return results;
} catch (error) {
return `Error: ${error instanceof Error ? error.message : 'Unknown error'}`;
}
},
});

export const chatToolSet: ToolSet = {
list_files: listFilesTool,
read_files: readFilesTool,
onlook_instructions: onlookInstructionsTool,
crawl_url: crawlUrlTool,
};
9 changes: 8 additions & 1 deletion packages/models/src/chat/message/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export enum MessageContextType {
IMAGE = 'image',
ERROR = 'error',
PROJECT = 'project',
LINK = 'link',
}

type BaseMessageContext = {
Expand Down Expand Up @@ -38,9 +39,15 @@ export type ProjectMessageContext = BaseMessageContext & {
path: string;
};

export type LinkMessageContext = BaseMessageContext & {
type: MessageContextType.LINK;
url: string;
};

export type ChatMessageContext =
| FileMessageContext
| HighlightMessageContext
| ImageMessageContext
| ErrorMessageContext
| ProjectMessageContext;
| ProjectMessageContext
| LinkMessageContext;