mirror of
https://github.com/langgenius/dify.git
synced 2026-04-29 12:37:20 +08:00
add credential id
This commit is contained in:
parent
bd1d7f8652
commit
ae3addb922
@ -127,7 +127,6 @@ class DatasourceEntity(BaseModel):
|
|||||||
description: I18nObject = Field(..., description="The label of the datasource")
|
description: I18nObject = Field(..., description="The label of the datasource")
|
||||||
output_schema: Optional[dict] = None
|
output_schema: Optional[dict] = None
|
||||||
|
|
||||||
|
|
||||||
@field_validator("parameters", mode="before")
|
@field_validator("parameters", mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:
|
def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
from core.rag.extractor.extractor_base import BaseExtractor
|
from core.rag.extractor.extractor_base import BaseExtractor
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from services.website_service import WebsiteService
|
from services.website_service import WebsiteService
|
||||||
@ -35,9 +34,7 @@ class FirecrawlWebExtractor(BaseExtractor):
|
|||||||
"""Extract content from the URL."""
|
"""Extract content from the URL."""
|
||||||
documents = []
|
documents = []
|
||||||
if self.mode == "crawl":
|
if self.mode == "crawl":
|
||||||
crawl_data = WebsiteService.get_crawl_url_data(
|
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "firecrawl", self._url, self.tenant_id)
|
||||||
self.job_id, "firecrawl", self._url, self.tenant_id
|
|
||||||
)
|
|
||||||
if crawl_data is None:
|
if crawl_data is None:
|
||||||
return []
|
return []
|
||||||
document = Document(
|
document = Document(
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
from core.rag.extractor.extractor_base import BaseExtractor
|
from core.rag.extractor.extractor_base import BaseExtractor
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from services.website_service import WebsiteService
|
from services.website_service import WebsiteService
|
||||||
@ -28,9 +27,7 @@ class JinaReaderWebExtractor(BaseExtractor):
|
|||||||
"""Extract content from the URL."""
|
"""Extract content from the URL."""
|
||||||
documents = []
|
documents = []
|
||||||
if self.mode == "crawl":
|
if self.mode == "crawl":
|
||||||
crawl_data = WebsiteService.get_crawl_url_data(
|
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "jinareader", self._url, self.tenant_id)
|
||||||
self.job_id, "jinareader", self._url, self.tenant_id
|
|
||||||
)
|
|
||||||
if crawl_data is None:
|
if crawl_data is None:
|
||||||
return []
|
return []
|
||||||
document = Document(
|
document = Document(
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
from core.rag.extractor.extractor_base import BaseExtractor
|
from core.rag.extractor.extractor_base import BaseExtractor
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from services.website_service import WebsiteService
|
from services.website_service import WebsiteService
|
||||||
@ -36,9 +35,7 @@ class WaterCrawlWebExtractor(BaseExtractor):
|
|||||||
"""Extract content from the URL."""
|
"""Extract content from the URL."""
|
||||||
documents = []
|
documents = []
|
||||||
if self.mode == "crawl":
|
if self.mode == "crawl":
|
||||||
crawl_data = WebsiteService.get_crawl_url_data(
|
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "watercrawl", self._url, self.tenant_id)
|
||||||
self.job_id, "watercrawl", self._url, self.tenant_id
|
|
||||||
)
|
|
||||||
if crawl_data is None:
|
if crawl_data is None:
|
||||||
return []
|
return []
|
||||||
document = Document(
|
document = Document(
|
||||||
|
|||||||
@ -233,9 +233,11 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
|||||||
dataset_process_rule = DatasetProcessRule(
|
dataset_process_rule = DatasetProcessRule(
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
mode="hierarchical",
|
mode="hierarchical",
|
||||||
rules=json.dumps({
|
rules=json.dumps(
|
||||||
"parent_mode": parent_childs.parent_mode,
|
{
|
||||||
}),
|
"parent_mode": parent_childs.parent_mode,
|
||||||
|
}
|
||||||
|
),
|
||||||
created_by=document.created_by,
|
created_by=document.created_by,
|
||||||
)
|
)
|
||||||
db.session.add(dataset_process_rule)
|
db.session.add(dataset_process_rule)
|
||||||
|
|||||||
@ -816,7 +816,7 @@ class DocumentSegment(Base):
|
|||||||
base_url = f"/files/{upload_file_id}/file-preview"
|
base_url = f"/files/{upload_file_id}/file-preview"
|
||||||
signed_url = f"{base_url}?{params}"
|
signed_url = f"{base_url}?{params}"
|
||||||
signed_urls.append((match.start(), match.end(), signed_url))
|
signed_urls.append((match.start(), match.end(), signed_url))
|
||||||
|
|
||||||
# For tools directory - direct file formats (e.g., .png, .jpg, etc.)
|
# For tools directory - direct file formats (e.g., .png, .jpg, etc.)
|
||||||
pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?.*?)?"
|
pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?.*?)?"
|
||||||
matches = re.finditer(pattern, text)
|
matches = re.finditer(pattern, text)
|
||||||
|
|||||||
@ -37,7 +37,7 @@ class DatasourceProvider(Base):
|
|||||||
encrypted_credentials: Mapped[dict] = db.Column(JSONB, nullable=False)
|
encrypted_credentials: Mapped[dict] = db.Column(JSONB, nullable=False)
|
||||||
avatar_url: Mapped[str] = db.Column(db.String(255), nullable=True, default="default")
|
avatar_url: Mapped[str] = db.Column(db.String(255), nullable=True, default="default")
|
||||||
is_default: Mapped[bool] = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
|
is_default: Mapped[bool] = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
|
||||||
expires_at: Mapped[int] = db.Column(db.Integer, nullable=False, server_default='-1')
|
expires_at: Mapped[int] = db.Column(db.Integer, nullable=False, server_default="-1")
|
||||||
|
|
||||||
created_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)
|
created_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)
|
||||||
updated_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)
|
updated_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)
|
||||||
|
|||||||
@ -2,7 +2,6 @@ import logging
|
|||||||
import time
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from core.plugin.impl.oauth import OAuthHandler
|
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
@ -14,6 +13,7 @@ from core.helper.provider_cache import NoOpProviderCredentialCache
|
|||||||
from core.model_runtime.entities.provider_entities import FormType
|
from core.model_runtime.entities.provider_entities import FormType
|
||||||
from core.plugin.entities.plugin import DatasourceProviderID
|
from core.plugin.entities.plugin import DatasourceProviderID
|
||||||
from core.plugin.impl.datasource import PluginDatasourceManager
|
from core.plugin.impl.datasource import PluginDatasourceManager
|
||||||
|
from core.plugin.impl.oauth import OAuthHandler
|
||||||
from core.tools.entities.tool_entities import CredentialType
|
from core.tools.entities.tool_entities import CredentialType
|
||||||
from core.tools.utils.encryption import ProviderConfigCache, ProviderConfigEncrypter, create_provider_encrypter
|
from core.tools.utils.encryption import ProviderConfigCache, ProviderConfigEncrypter, create_provider_encrypter
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
@ -143,7 +143,7 @@ class DatasourceProviderService:
|
|||||||
plugin_id=plugin_id,
|
plugin_id=plugin_id,
|
||||||
provider=provider,
|
provider=provider,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_all_datasource_credentials_by_provider(
|
def get_all_datasource_credentials_by_provider(
|
||||||
self,
|
self,
|
||||||
tenant_id: str,
|
tenant_id: str,
|
||||||
|
|||||||
@ -307,15 +307,17 @@ class RagPipelineTransformService:
|
|||||||
if file_id:
|
if file_id:
|
||||||
file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
|
file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
|
||||||
if file:
|
if file:
|
||||||
data_source_info = json.dumps({
|
data_source_info = json.dumps(
|
||||||
"real_file_id": file_id,
|
{
|
||||||
"name": file.name,
|
"real_file_id": file_id,
|
||||||
"size": file.size,
|
"name": file.name,
|
||||||
"extension": file.extension,
|
"size": file.size,
|
||||||
"mime_type": file.mime_type,
|
"extension": file.extension,
|
||||||
"url": "",
|
"mime_type": file.mime_type,
|
||||||
"transfer_method": "local_file",
|
"url": "",
|
||||||
})
|
"transfer_method": "local_file",
|
||||||
|
}
|
||||||
|
)
|
||||||
document.data_source_info = data_source_info
|
document.data_source_info = data_source_info
|
||||||
document_pipeline_execution_log = DocumentPipelineExecutionLog(
|
document_pipeline_execution_log = DocumentPipelineExecutionLog(
|
||||||
document_id=document.id,
|
document_id=document.id,
|
||||||
@ -331,17 +333,19 @@ class RagPipelineTransformService:
|
|||||||
db.session.add(document_pipeline_execution_log)
|
db.session.add(document_pipeline_execution_log)
|
||||||
elif document.data_source_type == "notion_import":
|
elif document.data_source_type == "notion_import":
|
||||||
document.data_source_type = "online_document"
|
document.data_source_type = "online_document"
|
||||||
data_source_info = json.dumps({
|
data_source_info = json.dumps(
|
||||||
"workspace_id": data_source_info_dict.get("notion_workspace_id"),
|
{
|
||||||
"page": {
|
"workspace_id": data_source_info_dict.get("notion_workspace_id"),
|
||||||
"page_id": data_source_info_dict.get("notion_page_id"),
|
"page": {
|
||||||
"page_name": document.name,
|
"page_id": data_source_info_dict.get("notion_page_id"),
|
||||||
"page_icon": data_source_info_dict.get("notion_page_icon"),
|
"page_name": document.name,
|
||||||
"type": data_source_info_dict.get("type"),
|
"page_icon": data_source_info_dict.get("notion_page_icon"),
|
||||||
"last_edited_time": data_source_info_dict.get("last_edited_time"),
|
"type": data_source_info_dict.get("type"),
|
||||||
"parent_id": None,
|
"last_edited_time": data_source_info_dict.get("last_edited_time"),
|
||||||
},
|
"parent_id": None,
|
||||||
})
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
document.data_source_info = data_source_info
|
document.data_source_info = data_source_info
|
||||||
document_pipeline_execution_log = DocumentPipelineExecutionLog(
|
document_pipeline_execution_log = DocumentPipelineExecutionLog(
|
||||||
document_id=document.id,
|
document_id=document.id,
|
||||||
@ -357,12 +361,14 @@ class RagPipelineTransformService:
|
|||||||
db.session.add(document_pipeline_execution_log)
|
db.session.add(document_pipeline_execution_log)
|
||||||
elif document.data_source_type == "website_crawl":
|
elif document.data_source_type == "website_crawl":
|
||||||
document.data_source_type = "website_crawl"
|
document.data_source_type = "website_crawl"
|
||||||
data_source_info = json.dumps({
|
data_source_info = json.dumps(
|
||||||
"source_url": data_source_info_dict.get("url"),
|
{
|
||||||
"content": "",
|
"source_url": data_source_info_dict.get("url"),
|
||||||
"title": document.name,
|
"content": "",
|
||||||
"description": "",
|
"title": document.name,
|
||||||
})
|
"description": "",
|
||||||
|
}
|
||||||
|
)
|
||||||
document.data_source_info = data_source_info
|
document.data_source_info = data_source_info
|
||||||
if data_source_info_dict.get("provider") == "firecrawl":
|
if data_source_info_dict.get("provider") == "firecrawl":
|
||||||
datasource_node_id = firecrawl_node_id
|
datasource_node_id = firecrawl_node_id
|
||||||
@ -381,4 +387,4 @@ class RagPipelineTransformService:
|
|||||||
datasource_node_id=datasource_node_id,
|
datasource_node_id=datasource_node_id,
|
||||||
)
|
)
|
||||||
db.session.add(document)
|
db.session.add(document)
|
||||||
db.session.add(document_pipeline_execution_log)
|
db.session.add(document_pipeline_execution_log)
|
||||||
|
|||||||
@ -98,6 +98,7 @@ class WebsiteCrawlStatusApiRequest:
|
|||||||
|
|
||||||
provider: str
|
provider: str
|
||||||
job_id: str
|
job_id: str
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_args(cls, args: dict, job_id: str) -> "WebsiteCrawlStatusApiRequest":
|
def from_args(cls, args: dict, job_id: str) -> "WebsiteCrawlStatusApiRequest":
|
||||||
"""Create from Flask-RESTful parsed arguments."""
|
"""Create from Flask-RESTful parsed arguments."""
|
||||||
@ -114,9 +115,7 @@ class WebsiteService:
|
|||||||
"""Service class for website crawling operations using different providers."""
|
"""Service class for website crawling operations using different providers."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _get_credentials_and_config(
|
def _get_credentials_and_config(cls, tenant_id: str, provider: str) -> tuple[Any, Any]:
|
||||||
cls, tenant_id: str, provider: str
|
|
||||||
) -> tuple[Any, Any]:
|
|
||||||
"""Get and validate credentials for a provider."""
|
"""Get and validate credentials for a provider."""
|
||||||
if provider == "firecrawl":
|
if provider == "firecrawl":
|
||||||
plugin_id = "langgenius/firecrawl_datasource"
|
plugin_id = "langgenius/firecrawl_datasource"
|
||||||
@ -158,9 +157,7 @@ class WebsiteService:
|
|||||||
"""Crawl a URL using the specified provider with typed request."""
|
"""Crawl a URL using the specified provider with typed request."""
|
||||||
request = api_request.to_crawl_request()
|
request = api_request.to_crawl_request()
|
||||||
|
|
||||||
api_key, config = cls._get_credentials_and_config(
|
api_key, config = cls._get_credentials_and_config(current_user.current_tenant_id, request.provider)
|
||||||
current_user.current_tenant_id, request.provider
|
|
||||||
)
|
|
||||||
|
|
||||||
if request.provider == "firecrawl":
|
if request.provider == "firecrawl":
|
||||||
return cls._crawl_with_firecrawl(request=request, api_key=api_key, config=config)
|
return cls._crawl_with_firecrawl(request=request, api_key=api_key, config=config)
|
||||||
@ -250,9 +247,7 @@ class WebsiteService:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]:
|
def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]:
|
||||||
"""Get crawl status using typed request."""
|
"""Get crawl status using typed request."""
|
||||||
api_key, config = cls._get_credentials_and_config(
|
api_key, config = cls._get_credentials_and_config(current_user.current_tenant_id, api_request.provider)
|
||||||
current_user.current_tenant_id, api_request.provider
|
|
||||||
)
|
|
||||||
|
|
||||||
if api_request.provider == "firecrawl":
|
if api_request.provider == "firecrawl":
|
||||||
return cls._get_firecrawl_status(api_request.job_id, api_key, config)
|
return cls._get_firecrawl_status(api_request.job_id, api_key, config)
|
||||||
@ -325,9 +320,7 @@ class WebsiteService:
|
|||||||
return crawl_status_data
|
return crawl_status_data
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_crawl_url_data(
|
def get_crawl_url_data(cls, job_id: str, provider: str, url: str, tenant_id: str) -> dict[str, Any] | None:
|
||||||
cls, job_id: str, provider: str, url: str, tenant_id: str
|
|
||||||
) -> dict[str, Any] | None:
|
|
||||||
api_key, config = cls._get_credentials_and_config(tenant_id, provider)
|
api_key, config = cls._get_credentials_and_config(tenant_id, provider)
|
||||||
|
|
||||||
if provider == "firecrawl":
|
if provider == "firecrawl":
|
||||||
@ -398,14 +391,10 @@ class WebsiteService:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_scrape_url_data(
|
def get_scrape_url_data(cls, provider: str, url: str, tenant_id: str, only_main_content: bool) -> dict[str, Any]:
|
||||||
cls, provider: str, url: str, tenant_id: str, only_main_content: bool
|
|
||||||
) -> dict[str, Any]:
|
|
||||||
request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content)
|
request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content)
|
||||||
|
|
||||||
api_key, config = cls._get_credentials_and_config(
|
api_key, config = cls._get_credentials_and_config(tenant_id=request.tenant_id, provider=request.provider)
|
||||||
tenant_id=request.tenant_id, provider=request.provider
|
|
||||||
)
|
|
||||||
|
|
||||||
if request.provider == "firecrawl":
|
if request.provider == "firecrawl":
|
||||||
return cls._scrape_with_firecrawl(request=request, api_key=api_key, config=config)
|
return cls._scrape_with_firecrawl(request=request, api_key=api_key, config=config)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user