Skip to content

Commit 1dacb20

Browse files
authored
feat(cmd): add datasource template (#449)
* feat: add template for datasource plugins Signed-off-by: jingfelix <[email protected]> * fix: description and missing category Signed-off-by: jingfelix <[email protected]> --------- Signed-off-by: jingfelix <[email protected]>
1 parent 878edde commit 1dacb20

File tree

8 files changed

+396
-1
lines changed

8 files changed

+396
-1
lines changed

cmd/commandline/plugin/category.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,13 @@ const PLUGIN_GUIDE = `Before starting, here's some basic knowledge about Plugin
2323
` + BOLD + `- Endpoint` + RESET + `: ` + GREEN + `Similar to Service API in Dify and Ingress in Kubernetes. Extend HTTP services as endpoints with custom logic.` + RESET + `
2424
` + BOLD + `- Agent Strategy` + RESET + `: ` + GREEN + `Implement your own agent strategies like Function Calling, ReAct, ToT, CoT, etc.` + RESET + `
2525
26-
Based on the ability you want to extend, Plugins are divided into four types: ` + BOLD + `Tool` + RESET + `, ` + BOLD + `Model` + RESET + `, ` + BOLD + `Extension` + RESET + `, and ` + BOLD + `Agent Strategy` + RESET + `.
26+
Based on the ability you want to extend, Plugins are divided into five types: ` + BOLD + `Tool` + RESET + `, ` + BOLD + `Model` + RESET + `, ` + BOLD + `Extension` + RESET + `, ` + BOLD + `Agent Strategy` + RESET + `, and ` + BOLD + `Datasource` + RESET + `.
2727
2828
` + BOLD + `- Tool` + RESET + `: ` + YELLOW + `A tool provider that can also implement endpoints. For example, building a Discord Bot requires both ` + BLUE + `Sending` + RESET + YELLOW + ` and ` + BLUE + `Receiving Messages` + RESET + YELLOW + `, so both ` + BOLD + `Tool` + RESET + YELLOW + ` and ` + BOLD + `Endpoint` + RESET + YELLOW + ` functionality.` + RESET + `
2929
` + BOLD + `- Model` + RESET + `: ` + YELLOW + `Strictly for model providers, no other extensions allowed.` + RESET + `
3030
` + BOLD + `- Extension` + RESET + `: ` + YELLOW + `For simple HTTP services that extend functionality.` + RESET + `
3131
` + BOLD + `- Agent Strategy` + RESET + `: ` + YELLOW + `Implement custom agent logic with a focused approach.` + RESET + `
32+
` + BOLD + `- Datasource` + RESET + `: ` + YELLOW + `Provide datasource for Dify RAG Pipeline.` + RESET + `
3233
3334
We've provided templates to help you get started. Choose one of the options below:
3435
`
@@ -47,6 +48,7 @@ var categories = []string{
4748
"speech2text",
4849
"moderation",
4950
"extension",
51+
"datasource",
5052
}
5153

5254
func newCategory() category {

cmd/commandline/plugin/init.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ func InitPluginWithFlags(
156156
"tts",
157157
"extension",
158158
"agent-strategy",
159+
"datasource",
159160
}
160161
valid := false
161162
for _, cat := range validCategories {
@@ -443,6 +444,10 @@ func (m model) createPlugin() {
443444
manifest.Plugins.AgentStrategies = []string{fmt.Sprintf("provider/%s.yaml", manifest.Name)}
444445
}
445446

447+
if categoryString == "datasource" {
448+
manifest.Plugins.Datasources = []string{fmt.Sprintf("provider/%s.yaml", manifest.Name)}
449+
}
450+
446451
manifest.Meta = plugin_entities.PluginMeta{
447452
Version: "0.0.1",
448453
Arch: []constants.Arch{

cmd/commandline/plugin/python.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,18 @@ var PYTHON_AGENT_STRATEGY_MANIFEST_TEMPLATE []byte
9191
//go:embed templates/python/agent_strategy.py
9292
var PYTHON_AGENT_STRATEGY_TEMPLATE []byte
9393

94+
//go:embed templates/python/datasource.yaml
95+
var PYTHON_DATASOURCE_MANIFEST_TEMPLATE []byte
96+
97+
//go:embed templates/python/datasource.py
98+
var PYTHON_DATASOURCE_TEMPLATE []byte
99+
100+
//go:embed templates/python/datasource_provider.yaml
101+
var PYTHON_DATASOURCE_PROVIDER_MANIFEST_TEMPLATE []byte
102+
103+
//go:embed templates/python/datasource_provider.py
104+
var PYTHON_DATASOURCE_PROVIDER_PY_TEMPLATE []byte
105+
94106
//go:embed templates/python/GUIDE.md
95107
var PYTHON_GUIDE []byte
96108

@@ -231,5 +243,11 @@ func createPythonEnvironment(
231243
}
232244
}
233245

246+
if category == "datasource" {
247+
if err := createPythonDatasource(root, manifest); err != nil {
248+
return err
249+
}
250+
}
251+
234252
return nil
235253
}

cmd/commandline/plugin/python_categories.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,25 @@ func createPythonAgentStrategy(root string, manifest *plugin_entities.PluginDecl
270270

271271
return nil
272272
}
273+
274+
func createPythonDatasource(root string, manifest *plugin_entities.PluginDeclaration) error {
275+
datasourceFileContent, err := renderTemplate(PYTHON_DATASOURCE_TEMPLATE, manifest, []string{""})
276+
if err != nil {
277+
return err
278+
}
279+
datasourceFilePath := filepath.Join(root, "datasources", fmt.Sprintf("%s.py", manifest.Name))
280+
if err := writeFile(datasourceFilePath, datasourceFileContent); err != nil {
281+
return err
282+
}
283+
284+
datasourceManifestFilePath := filepath.Join(root, "datasources", fmt.Sprintf("%s.yaml", manifest.Name))
285+
datasourceManifestFileContent, err := renderTemplate(PYTHON_DATASOURCE_MANIFEST_TEMPLATE, manifest, []string{""})
286+
if err != nil {
287+
return err
288+
}
289+
if err := writeFile(datasourceManifestFilePath, datasourceManifestFileContent); err != nil {
290+
return err
291+
}
292+
293+
return nil
294+
}
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# For online document, you can use the following code:
2+
import logging
3+
import re
4+
import urllib.parse
5+
from collections.abc import Generator
6+
from typing import Any
7+
8+
from dify_plugin.entities.datasource import (
9+
DatasourceGetPagesResponse,
10+
DatasourceMessage,
11+
GetOnlineDocumentPageContentRequest,
12+
OnlineDocumentInfo,
13+
OnlineDocumentPage,
14+
)
15+
from dify_plugin.interfaces.datasource.online_document import OnlineDocumentDatasource
16+
17+
logger = logging.getLogger(__name__)
18+
19+
20+
class {{ .PluginName | SnakeToCamel }}DataSource(OnlineDocumentDatasource):
21+
22+
def _get_pages(self, datasource_parameters: dict[str, Any]) -> DatasourceGetPagesResponse:
23+
page = OnlineDocumentPage(
24+
page_name="",
25+
page_id="",
26+
type="page",
27+
last_edited_time="",
28+
parent_id="",
29+
page_icon=None,
30+
)
31+
# Get workspace info from credentials
32+
workspace_name = self.runtime.credentials.get("workspace_name", "{{ .PluginName }}")
33+
workspace_id = self.runtime.credentials.get("workspace_id", "unknown")
34+
workspace_icon = self.runtime.credentials.get("workspace_icon", "")
35+
36+
online_document_info = OnlineDocumentInfo(
37+
workspace_name=workspace_name,
38+
workspace_icon=workspace_icon,
39+
workspace_id=workspace_id,
40+
pages=[page],
41+
total=1,
42+
)
43+
return DatasourceGetPagesResponse(result=[online_document_info])
44+
45+
def _get_content(self, page: GetOnlineDocumentPageContentRequest) -> Generator[DatasourceMessage, None, None]:
46+
47+
yield self.create_variable_message("content", "")
48+
yield self.create_variable_message("page_id", "")
49+
yield self.create_variable_message("workspace_id", "")
50+
51+
52+
# For website crawl, you can use the following code:
53+
# from typing import Any, Generator
54+
55+
# from dify_plugin.entities.datasource import WebSiteInfo, WebSiteInfoDetail
56+
# from dify_plugin.entities.tool import ToolInvokeMessage
57+
# from dify_plugin.interfaces.datasource.website import WebsiteCrawlDatasource
58+
59+
60+
# class {{ .PluginName | SnakeToCamel }}DataSource(WebsiteCrawlDatasource):
61+
62+
# def _get_website_crawl(
63+
# self, datasource_parameters: dict[str, Any]
64+
# ) -> Generator[ToolInvokeMessage, None, None]:
65+
66+
# crawl_res = WebSiteInfo(web_info_list=[], status="", total=0, completed=0)
67+
# crawl_res.status = "processing"
68+
# yield self.create_crawl_message(crawl_res)
69+
70+
# crawl_res.status = "completed"
71+
# crawl_res.web_info_list = [
72+
# WebSiteInfoDetail(
73+
# title="",
74+
# source_url="",
75+
# description="",
76+
# content="",
77+
# )
78+
# ]
79+
# crawl_res.total = 1
80+
# crawl_res.completed = 1
81+
82+
# yield self.create_crawl_message(crawl_res)
83+
84+
85+
# For online drive, you can use the following code:
86+
# from collections.abc import Generator
87+
88+
# from dify_plugin.entities.datasource import (
89+
# DatasourceMessage,
90+
# OnlineDriveBrowseFilesRequest,
91+
# OnlineDriveBrowseFilesResponse,
92+
# OnlineDriveDownloadFileRequest,
93+
# OnlineDriveFile,
94+
# OnlineDriveFileBucket,
95+
# )
96+
# from dify_plugin.interfaces.datasource.online_drive import OnlineDriveDatasource
97+
98+
99+
# class {{ .PluginName | SnakeToCamel }}DataSource(OnlineDriveDatasource):
100+
101+
# def _browse_files(
102+
# self, request: OnlineDriveBrowseFilesRequest
103+
# ) -> OnlineDriveBrowseFilesResponse:
104+
105+
# credentials = self.runtime.credentials
106+
# bucket_name = request.bucket
107+
# prefix = request.prefix or "" # Allow empty prefix for root folder; When you browse the folder, the prefix is the folder id
108+
# max_keys = request.max_keys or 10
109+
# next_page_parameters = request.next_page_parameters or {}
110+
111+
# files = []
112+
# files.append(OnlineDriveFile(
113+
# id="",
114+
# name="",
115+
# size=0,
116+
# type="folder" # or "file"
117+
# ))
118+
119+
# return OnlineDriveBrowseFilesResponse(result=[
120+
# OnlineDriveFileBucket(
121+
# bucket="",
122+
# files=files,
123+
# is_truncated=False,
124+
# next_page_parameters={}
125+
# )
126+
# ])
127+
128+
# # if file.type is "file", the plugin will download the file content
129+
# def _download_file(self, request: OnlineDriveDownloadFileRequest) -> Generator[DatasourceMessage, None, None]:
130+
# credentials = self.runtime.credentials
131+
# file_id = request.id
132+
133+
# file_content = bytes()
134+
# file_name = ""
135+
136+
# mime_type = self._get_mime_type_from_filename(file_name)
137+
138+
# yield self.create_blob_message(file_content, meta={
139+
# "file_name": file_name,
140+
# "mime_type": mime_type
141+
# })
142+
143+
# def _get_mime_type_from_filename(self, filename: str) -> str:
144+
# """Determine MIME type from file extension."""
145+
# import mimetypes
146+
# mime_type, _ = mimetypes.guess_type(filename)
147+
# return mime_type or "application/octet-stream"
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
identity:
2+
name: "{{ .PluginName }}"
3+
author: "{{ .Author }}"
4+
label:
5+
en_US: "{{ .PluginName }}"
6+
zh_Hans: "{{ .PluginName }}"
7+
pt_BR: "{{ .PluginName }}"
8+
description:
9+
human:
10+
en_US: "{{ .PluginDescription }}"
11+
zh_Hans: "{{ .PluginDescription }}"
12+
pt_BR: "{{ .PluginDescription }}"
13+
llm: "{{ .PluginDescription }}"
14+
15+
# For website crawl, you can use the following code:
16+
parameters:
17+
- name: query
18+
type: string
19+
required: true
20+
label:
21+
en_US: Query string
22+
zh_Hans: 查询语句
23+
pt_BR: Query string
24+
human_description:
25+
en_US: "{{ .PluginDescription }}"
26+
zh_Hans: "{{ .PluginDescription }}"
27+
pt_BR: "{{ .PluginDescription }}"
28+
llm_description: "{{ .PluginDescription }}"
29+
form: llm
30+
output_schema:
31+
type: object
32+
properties:
33+
source_url:
34+
type: string
35+
description: the source url of the website
36+
content:
37+
type: string
38+
description: the content from the website
39+
title:
40+
type: string
41+
description: the title of the website
42+
"description":
43+
type: string
44+
description: the description of the website
45+
46+
# For online document, you can use the following code:
47+
parameters:
48+
output_schema:
49+
type: object
50+
properties:
51+
workspace_id:
52+
type: string
53+
description: workspace id
54+
page_id:
55+
type: string
56+
description: page id
57+
content:
58+
type: string
59+
description: page content
60+
61+
62+
# For online document, you can use the following code:
63+
parameters:
64+
output_schema:
65+
type: object
66+
properties:
67+
file:
68+
$ref: "https://dify.ai/schemas/v1/file.json"
69+
70+
extra:
71+
python:
72+
source: datasources/{{ .PluginName }}.py
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from typing import Any, Mapping
2+
import secrets
3+
import urllib.parse
4+
5+
from dify_plugin.errors.tool import ToolProviderCredentialValidationError, DatasourceOAuthError
6+
from dify_plugin.interfaces.datasource import DatasourceProvider, DatasourceOAuthCredentials
7+
8+
9+
10+
class {{ .PluginName | SnakeToCamel }}Provider(DatasourceProvider):
11+
12+
def _validate_credentials(self, credentials: Mapping[str, Any]) -> None:
13+
try:
14+
"""
15+
IMPLEMENT YOUR VALIDATION HERE
16+
"""
17+
except Exception as e:
18+
raise ToolProviderCredentialValidationError(str(e))
19+
20+
21+
#########################################################################################
22+
# If OAuth is supported, uncomment the following functions.
23+
# Warning: please make sure that the sdk version is 0.5.0 or higher.
24+
#########################################################################################
25+
# def _oauth_get_authorization_url(self, redirect_uri: str, system_credentials: Mapping[str, Any]) -> str:
26+
# """
27+
# Generate the authorization URL for {{ .PluginName }} OAuth.
28+
# """
29+
# try:
30+
# """
31+
# IMPLEMENT YOUR AUTHORIZATION URL GENERATION HERE
32+
# """
33+
# except Exception as e:
34+
# raise DatasourceOAuthError(str(e))
35+
# return ""
36+
37+
# def _oauth_get_credentials(
38+
# self, redirect_uri: str, system_credentials: Mapping[str, Any], request: Request
39+
# ) -> DatasourceOAuthCredentials:
40+
# """
41+
# Exchange code for access_token.
42+
# """
43+
# try:
44+
# """
45+
# IMPLEMENT YOUR CREDENTIALS EXCHANGE HERE
46+
# """
47+
# except Exception as e:
48+
# raise DatasourceOAuthError(str(e))
49+
# return DatasourceOAuthCredentials(
50+
# name="",
51+
# avatar_url="",
52+
# expires_at=-1,
53+
# credentials={},
54+
# )
55+
56+
# def _oauth_refresh_credentials(
57+
# self, redirect_uri: str, system_credentials: Mapping[str, Any], credentials: Mapping[str, Any]
58+
# ) -> DatasourceOAuthCredentials:
59+
# """
60+
# Refresh the credentials
61+
# """
62+
# return DatasourceOAuthCredentials(
63+
# name="",
64+
# avatar_url="",
65+
# expires_at=-1,
66+
# credentials={},
67+
# )
68+

0 commit comments

Comments
 (0)