mirror of
https://github.com/langgenius/dify.git
synced 2026-05-09 12:59:18 +08:00
feat: add Perplexity Search built-in tool
Adds a new built-in tool provider `perplexity` with a single tool `perplexity_search` that calls the Perplexity Search API (POST https://api.perplexity.ai/search) and returns ranked web results (title, url, snippet, date) as a JSON message, link messages, and a human-readable text message. The provider mirrors the existing Tavily reference pattern: - `perplexity.yaml` with a `perplexity_api_key` secret-input credential - `perplexity.py` provider whose credential validation runs a tiny search query through the tool - `tools/perplexity_search.{yaml,py}` exposing `query`, `max_results`, `search_domain_filter`, `search_recency_filter`, `search_after_date_filter`, and `search_before_date_filter` - minimal SVG icon under `_assets/` Registered in `builtin_tool/_position.yaml` so the provider shows up in the same UI ordering as the other built-ins. Tests: nine unit tests in `api/tests/unit_tests/core/tools/test_perplexity_search.py` covering payload construction, default/override behavior, domain filter parsing, result rendering, missing-query and missing-credentials paths, HTTP error mapping to ToolInvokeError, and end-to-end message generation with the HTTP layer mocked. Docs: - https://docs.perplexity.ai/docs/search/quickstart - https://docs.perplexity.ai/api-reference/search-post
This commit is contained in:
parent
9f47317032
commit
d605ff5764
@ -2,3 +2,4 @@
|
||||
- code
|
||||
- time
|
||||
- webscraper
|
||||
- perplexity
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64">
|
||||
<rect width="64" height="64" rx="12" fill="#1F1F1F"/>
|
||||
<text x="32" y="42" font-family="Helvetica, Arial, sans-serif" font-size="34" font-weight="700" fill="#20B8CD" text-anchor="middle">P</text>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 290 B |
@ -0,0 +1,26 @@
|
||||
from typing import Any
|
||||
|
||||
from core.tools.__base.tool_runtime import ToolRuntime
|
||||
from core.tools.builtin_tool.provider import BuiltinToolProviderController
|
||||
from core.tools.errors import ToolProviderCredentialValidationError
|
||||
|
||||
|
||||
class PerplexityProvider(BuiltinToolProviderController):
|
||||
def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
|
||||
try:
|
||||
tool = self.get_tool("perplexity_search")
|
||||
if tool is None:
|
||||
raise ToolProviderCredentialValidationError("perplexity_search tool is not registered")
|
||||
forked = tool.fork_tool_runtime(
|
||||
runtime=ToolRuntime(tenant_id="", credentials=credentials),
|
||||
)
|
||||
for _ in forked.invoke(
|
||||
user_id=user_id,
|
||||
tool_parameters={"query": "ping", "max_results": 1},
|
||||
):
|
||||
# Drain the generator so any HTTP error is surfaced.
|
||||
pass
|
||||
except ToolProviderCredentialValidationError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ToolProviderCredentialValidationError(str(e)) from e
|
||||
@ -0,0 +1,31 @@
|
||||
identity:
|
||||
author: Perplexity
|
||||
name: perplexity
|
||||
label:
|
||||
en_US: Perplexity Search
|
||||
zh_Hans: Perplexity 搜索
|
||||
pt_BR: Perplexity Search
|
||||
description:
|
||||
en_US: Search the web for up-to-date information using the Perplexity Search API. Returns ranked results with snippets, titles, URLs, and dates.
|
||||
zh_Hans: 使用 Perplexity Search API 搜索最新的网络信息,返回包含摘要、标题、链接和日期的排序结果。
|
||||
pt_BR: Search the web for up-to-date information using the Perplexity Search API. Returns ranked results with snippets, titles, URLs, and dates.
|
||||
icon: icon.svg
|
||||
tags:
|
||||
- search
|
||||
credentials_for_provider:
|
||||
perplexity_api_key:
|
||||
type: secret-input
|
||||
required: true
|
||||
label:
|
||||
en_US: Perplexity API key
|
||||
zh_Hans: Perplexity API key
|
||||
pt_BR: Perplexity API key
|
||||
placeholder:
|
||||
en_US: Please input your Perplexity API key
|
||||
zh_Hans: 请输入你的 Perplexity API key
|
||||
pt_BR: Please input your Perplexity API key
|
||||
help:
|
||||
en_US: Get your Perplexity API key from the Perplexity dashboard.
|
||||
zh_Hans: 在 Perplexity 控制台获取你的 API key。
|
||||
pt_BR: Get your Perplexity API key from the Perplexity dashboard.
|
||||
url: https://www.perplexity.ai/account/api/keys
|
||||
@ -0,0 +1,127 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from core.tools.builtin_tool.tool import BuiltinTool
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
from core.tools.errors import ToolInvokeError
|
||||
|
||||
PERPLEXITY_SEARCH_URL = "https://api.perplexity.ai/search"
|
||||
DEFAULT_MAX_RESULTS = 5
|
||||
HTTP_TIMEOUT = 30
|
||||
|
||||
|
||||
def _split_domains(value: str) -> list[str]:
|
||||
return [d.strip() for d in value.replace(",", " ").split() if d.strip()]
|
||||
|
||||
|
||||
def _build_payload(tool_parameters: dict[str, Any]) -> dict[str, Any]:
|
||||
payload: dict[str, Any] = {"query": tool_parameters["query"]}
|
||||
|
||||
max_results = tool_parameters.get("max_results")
|
||||
if max_results in (None, ""):
|
||||
max_results = DEFAULT_MAX_RESULTS
|
||||
try:
|
||||
payload["max_results"] = int(max_results)
|
||||
except (TypeError, ValueError):
|
||||
payload["max_results"] = DEFAULT_MAX_RESULTS
|
||||
|
||||
domain_filter = tool_parameters.get("search_domain_filter")
|
||||
if isinstance(domain_filter, str) and domain_filter.strip():
|
||||
domains = _split_domains(domain_filter)
|
||||
if domains:
|
||||
payload["search_domain_filter"] = domains
|
||||
elif isinstance(domain_filter, list):
|
||||
domains = [str(d).strip() for d in domain_filter if str(d).strip()]
|
||||
if domains:
|
||||
payload["search_domain_filter"] = domains
|
||||
|
||||
for key in (
|
||||
"search_recency_filter",
|
||||
"search_after_date_filter",
|
||||
"search_before_date_filter",
|
||||
):
|
||||
value = tool_parameters.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
payload[key] = value.strip()
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
class PerplexitySearchTool(BuiltinTool):
|
||||
def _invoke(
|
||||
self,
|
||||
user_id: str,
|
||||
tool_parameters: dict[str, Any],
|
||||
conversation_id: str | None = None,
|
||||
app_id: str | None = None,
|
||||
message_id: str | None = None,
|
||||
) -> Generator[ToolInvokeMessage, None, None]:
|
||||
query = (tool_parameters.get("query") or "").strip()
|
||||
if not query:
|
||||
yield self.create_text_message("Please input a query.")
|
||||
return
|
||||
|
||||
api_key = (self.runtime.credentials or {}).get("perplexity_api_key") if self.runtime else None
|
||||
if not api_key:
|
||||
yield self.create_text_message(
|
||||
"Perplexity API key is missing. Please set 'perplexity_api_key' in credentials."
|
||||
)
|
||||
return
|
||||
|
||||
payload = _build_payload({**tool_parameters, "query": query})
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
PERPLEXITY_SEARCH_URL,
|
||||
json=payload,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout=HTTP_TIMEOUT,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except requests.HTTPError as e:
|
||||
raise ToolInvokeError(f"Perplexity Search request failed: {e}") from e
|
||||
except requests.RequestException as e:
|
||||
raise ToolInvokeError(f"Perplexity Search request error: {e}") from e
|
||||
except ValueError as e:
|
||||
raise ToolInvokeError(f"Perplexity Search returned invalid JSON: {e}") from e
|
||||
|
||||
results = data.get("results") or []
|
||||
if not results:
|
||||
yield self.create_text_message(f"No results found for '{query}'.")
|
||||
return
|
||||
|
||||
yield self.create_json_message(data)
|
||||
|
||||
for result in results:
|
||||
url = result.get("url")
|
||||
if isinstance(url, str) and url:
|
||||
yield self.create_link_message(url)
|
||||
|
||||
yield self.create_text_message(self._format_results_as_text(results))
|
||||
|
||||
@staticmethod
|
||||
def _format_results_as_text(results: list[dict[str, Any]]) -> str:
|
||||
lines: list[str] = []
|
||||
for idx, result in enumerate(results, 1):
|
||||
title = result.get("title") or "Untitled"
|
||||
url = result.get("url") or ""
|
||||
snippet = result.get("snippet") or ""
|
||||
date = result.get("date") or ""
|
||||
|
||||
lines.append(f"### Result {idx}: [{title}]({url})")
|
||||
if date:
|
||||
lines.append(f"**Date:** {date}")
|
||||
if url:
|
||||
lines.append(f"**URL:** {url}")
|
||||
if snippet:
|
||||
lines.append(f"{snippet}")
|
||||
lines.append("---")
|
||||
return "\n".join(lines)
|
||||
@ -0,0 +1,116 @@
|
||||
identity:
|
||||
name: perplexity_search
|
||||
author: Perplexity
|
||||
label:
|
||||
en_US: Perplexity Search
|
||||
zh_Hans: Perplexity 搜索
|
||||
pt_BR: Perplexity Search
|
||||
description:
|
||||
human:
|
||||
en_US: Search the web for up-to-date information using the Perplexity Search API. Returns ranked results with snippets, titles, URLs, and dates.
|
||||
zh_Hans: 使用 Perplexity Search API 搜索最新的网络信息,返回包含摘要、标题、链接和日期的排序结果。
|
||||
pt_BR: Search the web for up-to-date information using the Perplexity Search API. Returns ranked results with snippets, titles, URLs, and dates.
|
||||
llm: A search engine that returns ranked web results (title, url, snippet, date) for the given query. Use it to fetch current information from the web.
|
||||
parameters:
|
||||
- name: query
|
||||
type: string
|
||||
required: true
|
||||
label:
|
||||
en_US: Query
|
||||
zh_Hans: 查询
|
||||
pt_BR: Query
|
||||
human_description:
|
||||
en_US: The search query string.
|
||||
zh_Hans: 搜索查询字符串。
|
||||
pt_BR: The search query string.
|
||||
llm_description: The search query to send to Perplexity.
|
||||
form: llm
|
||||
- name: max_results
|
||||
type: number
|
||||
required: false
|
||||
label:
|
||||
en_US: Max Results
|
||||
zh_Hans: 最大结果数
|
||||
pt_BR: Max Results
|
||||
human_description:
|
||||
en_US: Maximum number of search results to return (1-20).
|
||||
zh_Hans: 返回的最大搜索结果数(1-20)。
|
||||
pt_BR: Maximum number of search results to return (1-20).
|
||||
form: form
|
||||
default: 5
|
||||
min: 1
|
||||
max: 20
|
||||
- name: search_domain_filter
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: Search Domain Filter
|
||||
zh_Hans: 搜索域过滤
|
||||
pt_BR: Search Domain Filter
|
||||
human_description:
|
||||
en_US: Comma-separated list of domains. Use a leading "-" to deny a domain (e.g. "-pinterest.com"). Do not mix allow and deny entries.
|
||||
zh_Hans: 用逗号分隔的域名列表。使用前缀 "-" 进行拒绝(如 "-pinterest.com")。不要混合允许和拒绝条目。
|
||||
pt_BR: Comma-separated list of domains. Use a leading "-" to deny a domain (e.g. "-pinterest.com"). Do not mix allow and deny entries.
|
||||
form: form
|
||||
- name: search_recency_filter
|
||||
type: select
|
||||
required: false
|
||||
label:
|
||||
en_US: Search Recency Filter
|
||||
zh_Hans: 时间范围
|
||||
pt_BR: Search Recency Filter
|
||||
human_description:
|
||||
en_US: Restrict results to a recent time window.
|
||||
zh_Hans: 将结果限制在最近的时间范围。
|
||||
pt_BR: Restrict results to a recent time window.
|
||||
form: form
|
||||
options:
|
||||
- value: hour
|
||||
label:
|
||||
en_US: Past hour
|
||||
zh_Hans: 过去一小时
|
||||
pt_BR: Past hour
|
||||
- value: day
|
||||
label:
|
||||
en_US: Past day
|
||||
zh_Hans: 过去一天
|
||||
pt_BR: Past day
|
||||
- value: week
|
||||
label:
|
||||
en_US: Past week
|
||||
zh_Hans: 过去一周
|
||||
pt_BR: Past week
|
||||
- value: month
|
||||
label:
|
||||
en_US: Past month
|
||||
zh_Hans: 过去一月
|
||||
pt_BR: Past month
|
||||
- value: year
|
||||
label:
|
||||
en_US: Past year
|
||||
zh_Hans: 过去一年
|
||||
pt_BR: Past year
|
||||
- name: search_after_date_filter
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: After Date
|
||||
zh_Hans: 起始日期
|
||||
pt_BR: After Date
|
||||
human_description:
|
||||
en_US: Only return results published on or after this date. Format m/d/yyyy (e.g. 1/1/2025).
|
||||
zh_Hans: 只返回在该日期之后发布的结果。格式为 m/d/yyyy(例如 1/1/2025)。
|
||||
pt_BR: Only return results published on or after this date. Format m/d/yyyy (e.g. 1/1/2025).
|
||||
form: form
|
||||
- name: search_before_date_filter
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: Before Date
|
||||
zh_Hans: 截止日期
|
||||
pt_BR: Before Date
|
||||
human_description:
|
||||
en_US: Only return results published on or before this date. Format m/d/yyyy (e.g. 12/31/2025).
|
||||
zh_Hans: 只返回在该日期之前发布的结果。格式为 m/d/yyyy(例如 12/31/2025)。
|
||||
pt_BR: Only return results published on or before this date. Format m/d/yyyy (e.g. 12/31/2025).
|
||||
form: form
|
||||
191
api/tests/unit_tests/core/tools/test_perplexity_search.py
Normal file
191
api/tests/unit_tests/core/tools/test_perplexity_search.py
Normal file
@ -0,0 +1,191 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.tools.__base.tool_runtime import ToolRuntime
|
||||
from core.tools.builtin_tool.providers.perplexity.tools.perplexity_search import (
|
||||
PERPLEXITY_SEARCH_URL,
|
||||
PerplexitySearchTool,
|
||||
_build_payload,
|
||||
)
|
||||
from core.tools.entities.common_entities import I18nObject
|
||||
from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage
|
||||
from core.tools.errors import ToolInvokeError
|
||||
|
||||
|
||||
def _make_tool(api_key: str | None = "test-key") -> PerplexitySearchTool:
|
||||
entity = ToolEntity(
|
||||
identity=ToolIdentity(
|
||||
author="Perplexity",
|
||||
name="perplexity_search",
|
||||
label=I18nObject(en_US="Perplexity Search"),
|
||||
provider="perplexity",
|
||||
),
|
||||
parameters=[],
|
||||
)
|
||||
credentials: dict[str, Any] = {}
|
||||
if api_key is not None:
|
||||
credentials["perplexity_api_key"] = api_key
|
||||
runtime = ToolRuntime(tenant_id="t1", credentials=credentials, invoke_from=InvokeFrom.DEBUGGER)
|
||||
return PerplexitySearchTool(provider="perplexity", entity=entity, runtime=runtime)
|
||||
|
||||
|
||||
def _mock_response(payload: dict[str, Any], status: int = 200) -> MagicMock:
|
||||
response = MagicMock(spec=requests.Response)
|
||||
response.status_code = status
|
||||
response.json.return_value = payload
|
||||
if status >= 400:
|
||||
response.raise_for_status.side_effect = requests.HTTPError(f"HTTP {status}")
|
||||
else:
|
||||
response.raise_for_status.return_value = None
|
||||
return response
|
||||
|
||||
|
||||
def test_build_payload_defaults_and_overrides():
|
||||
payload = _build_payload({"query": "hello"})
|
||||
assert payload == {"query": "hello", "max_results": 5}
|
||||
|
||||
payload = _build_payload(
|
||||
{
|
||||
"query": "hello",
|
||||
"max_results": "12",
|
||||
"search_recency_filter": "week",
|
||||
"search_after_date_filter": "1/1/2025",
|
||||
"search_before_date_filter": "12/31/2025",
|
||||
}
|
||||
)
|
||||
assert payload["max_results"] == 12
|
||||
assert payload["search_recency_filter"] == "week"
|
||||
assert payload["search_after_date_filter"] == "1/1/2025"
|
||||
assert payload["search_before_date_filter"] == "12/31/2025"
|
||||
|
||||
|
||||
def test_build_payload_domain_filter_supports_string_and_list():
|
||||
payload = _build_payload({"query": "x", "search_domain_filter": "nytimes.com, -pinterest.com"})
|
||||
assert payload["search_domain_filter"] == ["nytimes.com", "-pinterest.com"]
|
||||
|
||||
payload = _build_payload({"query": "x", "search_domain_filter": ["arxiv.org", " ", "nature.com"]})
|
||||
assert payload["search_domain_filter"] == ["arxiv.org", "nature.com"]
|
||||
|
||||
payload = _build_payload({"query": "x", "search_domain_filter": ""})
|
||||
assert "search_domain_filter" not in payload
|
||||
|
||||
|
||||
def test_invoke_returns_messages_for_results():
|
||||
tool = _make_tool()
|
||||
api_payload = {
|
||||
"id": "abc",
|
||||
"results": [
|
||||
{"title": "T1", "url": "https://example.com/1", "snippet": "s1", "date": "2025-01-01"},
|
||||
{"title": "T2", "url": "https://example.com/2", "snippet": "s2"},
|
||||
],
|
||||
}
|
||||
|
||||
with patch(
|
||||
"core.tools.builtin_tool.providers.perplexity.tools.perplexity_search.requests.post",
|
||||
return_value=_mock_response(api_payload),
|
||||
) as mock_post:
|
||||
messages = list(tool.invoke(user_id="u1", tool_parameters={"query": "test", "max_results": 2}))
|
||||
|
||||
assert mock_post.call_count == 1
|
||||
args, kwargs = mock_post.call_args
|
||||
assert args[0] == PERPLEXITY_SEARCH_URL
|
||||
assert kwargs["json"]["query"] == "test"
|
||||
assert kwargs["json"]["max_results"] == 2
|
||||
assert kwargs["headers"]["Authorization"] == "Bearer test-key"
|
||||
|
||||
types = [m.type for m in messages]
|
||||
assert ToolInvokeMessage.MessageType.JSON in types
|
||||
assert types.count(ToolInvokeMessage.MessageType.LINK) == 2
|
||||
assert types[-1] == ToolInvokeMessage.MessageType.TEXT
|
||||
|
||||
|
||||
def test_invoke_with_no_results_returns_friendly_text():
|
||||
tool = _make_tool()
|
||||
with patch(
|
||||
"core.tools.builtin_tool.providers.perplexity.tools.perplexity_search.requests.post",
|
||||
return_value=_mock_response({"results": []}),
|
||||
):
|
||||
messages = list(tool.invoke(user_id="u1", tool_parameters={"query": "obscure"}))
|
||||
|
||||
assert len(messages) == 1
|
||||
assert messages[0].type == ToolInvokeMessage.MessageType.TEXT
|
||||
assert "No results" in messages[0].message.text
|
||||
|
||||
|
||||
def test_invoke_missing_query_yields_prompt():
|
||||
tool = _make_tool()
|
||||
with patch("core.tools.builtin_tool.providers.perplexity.tools.perplexity_search.requests.post") as mock_post:
|
||||
messages = list(tool.invoke(user_id="u1", tool_parameters={"query": " "}))
|
||||
mock_post.assert_not_called()
|
||||
assert len(messages) == 1
|
||||
assert "query" in messages[0].message.text.lower()
|
||||
|
||||
|
||||
def test_invoke_missing_api_key_yields_prompt():
|
||||
tool = _make_tool(api_key=None)
|
||||
with patch("core.tools.builtin_tool.providers.perplexity.tools.perplexity_search.requests.post") as mock_post:
|
||||
messages = list(tool.invoke(user_id="u1", tool_parameters={"query": "anything"}))
|
||||
mock_post.assert_not_called()
|
||||
assert len(messages) == 1
|
||||
assert "perplexity_api_key" in messages[0].message.text
|
||||
|
||||
|
||||
def test_invoke_http_error_raises_tool_invoke_error():
|
||||
tool = _make_tool()
|
||||
with patch(
|
||||
"core.tools.builtin_tool.providers.perplexity.tools.perplexity_search.requests.post",
|
||||
return_value=_mock_response({}, status=500),
|
||||
):
|
||||
with pytest.raises(ToolInvokeError):
|
||||
list(tool.invoke(user_id="u1", tool_parameters={"query": "boom"}))
|
||||
|
||||
|
||||
def test_invoke_passes_filter_parameters_through():
|
||||
tool = _make_tool()
|
||||
with patch(
|
||||
"core.tools.builtin_tool.providers.perplexity.tools.perplexity_search.requests.post",
|
||||
return_value=_mock_response({"results": [{"title": "x", "url": "https://x.test"}]}),
|
||||
) as mock_post:
|
||||
list(
|
||||
tool.invoke(
|
||||
user_id="u1",
|
||||
tool_parameters={
|
||||
"query": "ai",
|
||||
"max_results": 3,
|
||||
"search_domain_filter": "nytimes.com,-pinterest.com",
|
||||
"search_recency_filter": "month",
|
||||
"search_after_date_filter": "1/1/2025",
|
||||
"search_before_date_filter": "12/31/2025",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
sent = mock_post.call_args.kwargs["json"]
|
||||
assert sent == {
|
||||
"query": "ai",
|
||||
"max_results": 3,
|
||||
"search_domain_filter": ["nytimes.com", "-pinterest.com"],
|
||||
"search_recency_filter": "month",
|
||||
"search_after_date_filter": "1/1/2025",
|
||||
"search_before_date_filter": "12/31/2025",
|
||||
}
|
||||
|
||||
|
||||
def test_format_results_as_text_renders_each_result():
|
||||
text = PerplexitySearchTool._format_results_as_text(
|
||||
[
|
||||
{"title": "Title", "url": "https://example.com", "snippet": "Snippet", "date": "2025-04-01"},
|
||||
{"title": "Other", "url": "https://other.example.com"},
|
||||
]
|
||||
)
|
||||
assert "Result 1" in text
|
||||
assert "[Title](https://example.com)" in text
|
||||
assert "Snippet" in text
|
||||
assert "2025-04-01" in text
|
||||
assert "Result 2" in text
|
||||
Loading…
Reference in New Issue
Block a user