mirror of https://github.com/langgenius/dify.git
add credential id
This commit is contained in:
parent
bd1d7f8652
commit
ae3addb922
|
|
@ -127,7 +127,6 @@ class DatasourceEntity(BaseModel):
|
|||
description: I18nObject = Field(..., description="The label of the datasource")
|
||||
output_schema: Optional[dict] = None
|
||||
|
||||
|
||||
@field_validator("parameters", mode="before")
|
||||
@classmethod
|
||||
def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
from services.website_service import WebsiteService
|
||||
|
|
@ -35,9 +34,7 @@ class FirecrawlWebExtractor(BaseExtractor):
|
|||
"""Extract content from the URL."""
|
||||
documents = []
|
||||
if self.mode == "crawl":
|
||||
crawl_data = WebsiteService.get_crawl_url_data(
|
||||
self.job_id, "firecrawl", self._url, self.tenant_id
|
||||
)
|
||||
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "firecrawl", self._url, self.tenant_id)
|
||||
if crawl_data is None:
|
||||
return []
|
||||
document = Document(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
from services.website_service import WebsiteService
|
||||
|
|
@ -28,9 +27,7 @@ class JinaReaderWebExtractor(BaseExtractor):
|
|||
"""Extract content from the URL."""
|
||||
documents = []
|
||||
if self.mode == "crawl":
|
||||
crawl_data = WebsiteService.get_crawl_url_data(
|
||||
self.job_id, "jinareader", self._url, self.tenant_id
|
||||
)
|
||||
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "jinareader", self._url, self.tenant_id)
|
||||
if crawl_data is None:
|
||||
return []
|
||||
document = Document(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
from services.website_service import WebsiteService
|
||||
|
|
@ -36,9 +35,7 @@ class WaterCrawlWebExtractor(BaseExtractor):
|
|||
"""Extract content from the URL."""
|
||||
documents = []
|
||||
if self.mode == "crawl":
|
||||
crawl_data = WebsiteService.get_crawl_url_data(
|
||||
self.job_id, "watercrawl", self._url, self.tenant_id
|
||||
)
|
||||
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "watercrawl", self._url, self.tenant_id)
|
||||
if crawl_data is None:
|
||||
return []
|
||||
document = Document(
|
||||
|
|
|
|||
|
|
@ -233,9 +233,11 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
|||
dataset_process_rule = DatasetProcessRule(
|
||||
dataset_id=dataset.id,
|
||||
mode="hierarchical",
|
||||
rules=json.dumps({
|
||||
"parent_mode": parent_childs.parent_mode,
|
||||
}),
|
||||
rules=json.dumps(
|
||||
{
|
||||
"parent_mode": parent_childs.parent_mode,
|
||||
}
|
||||
),
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(dataset_process_rule)
|
||||
|
|
|
|||
|
|
@ -816,7 +816,7 @@ class DocumentSegment(Base):
|
|||
base_url = f"/files/{upload_file_id}/file-preview"
|
||||
signed_url = f"{base_url}?{params}"
|
||||
signed_urls.append((match.start(), match.end(), signed_url))
|
||||
|
||||
|
||||
# For tools directory - direct file formats (e.g., .png, .jpg, etc.)
|
||||
pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?.*?)?"
|
||||
matches = re.finditer(pattern, text)
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class DatasourceProvider(Base):
|
|||
encrypted_credentials: Mapped[dict] = db.Column(JSONB, nullable=False)
|
||||
avatar_url: Mapped[str] = db.Column(db.String(255), nullable=True, default="default")
|
||||
is_default: Mapped[bool] = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
|
||||
expires_at: Mapped[int] = db.Column(db.Integer, nullable=False, server_default='-1')
|
||||
expires_at: Mapped[int] = db.Column(db.Integer, nullable=False, server_default="-1")
|
||||
|
||||
created_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)
|
||||
updated_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import logging
|
|||
import time
|
||||
from typing import Any
|
||||
|
||||
from core.plugin.impl.oauth import OAuthHandler
|
||||
from flask_login import current_user
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
|
@ -14,6 +13,7 @@ from core.helper.provider_cache import NoOpProviderCredentialCache
|
|||
from core.model_runtime.entities.provider_entities import FormType
|
||||
from core.plugin.entities.plugin import DatasourceProviderID
|
||||
from core.plugin.impl.datasource import PluginDatasourceManager
|
||||
from core.plugin.impl.oauth import OAuthHandler
|
||||
from core.tools.entities.tool_entities import CredentialType
|
||||
from core.tools.utils.encryption import ProviderConfigCache, ProviderConfigEncrypter, create_provider_encrypter
|
||||
from extensions.ext_database import db
|
||||
|
|
@ -143,7 +143,7 @@ class DatasourceProviderService:
|
|||
plugin_id=plugin_id,
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
|
||||
def get_all_datasource_credentials_by_provider(
|
||||
self,
|
||||
tenant_id: str,
|
||||
|
|
|
|||
|
|
@ -307,15 +307,17 @@ class RagPipelineTransformService:
|
|||
if file_id:
|
||||
file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
|
||||
if file:
|
||||
data_source_info = json.dumps({
|
||||
"real_file_id": file_id,
|
||||
"name": file.name,
|
||||
"size": file.size,
|
||||
"extension": file.extension,
|
||||
"mime_type": file.mime_type,
|
||||
"url": "",
|
||||
"transfer_method": "local_file",
|
||||
})
|
||||
data_source_info = json.dumps(
|
||||
{
|
||||
"real_file_id": file_id,
|
||||
"name": file.name,
|
||||
"size": file.size,
|
||||
"extension": file.extension,
|
||||
"mime_type": file.mime_type,
|
||||
"url": "",
|
||||
"transfer_method": "local_file",
|
||||
}
|
||||
)
|
||||
document.data_source_info = data_source_info
|
||||
document_pipeline_execution_log = DocumentPipelineExecutionLog(
|
||||
document_id=document.id,
|
||||
|
|
@ -331,17 +333,19 @@ class RagPipelineTransformService:
|
|||
db.session.add(document_pipeline_execution_log)
|
||||
elif document.data_source_type == "notion_import":
|
||||
document.data_source_type = "online_document"
|
||||
data_source_info = json.dumps({
|
||||
"workspace_id": data_source_info_dict.get("notion_workspace_id"),
|
||||
"page": {
|
||||
"page_id": data_source_info_dict.get("notion_page_id"),
|
||||
"page_name": document.name,
|
||||
"page_icon": data_source_info_dict.get("notion_page_icon"),
|
||||
"type": data_source_info_dict.get("type"),
|
||||
"last_edited_time": data_source_info_dict.get("last_edited_time"),
|
||||
"parent_id": None,
|
||||
},
|
||||
})
|
||||
data_source_info = json.dumps(
|
||||
{
|
||||
"workspace_id": data_source_info_dict.get("notion_workspace_id"),
|
||||
"page": {
|
||||
"page_id": data_source_info_dict.get("notion_page_id"),
|
||||
"page_name": document.name,
|
||||
"page_icon": data_source_info_dict.get("notion_page_icon"),
|
||||
"type": data_source_info_dict.get("type"),
|
||||
"last_edited_time": data_source_info_dict.get("last_edited_time"),
|
||||
"parent_id": None,
|
||||
},
|
||||
}
|
||||
)
|
||||
document.data_source_info = data_source_info
|
||||
document_pipeline_execution_log = DocumentPipelineExecutionLog(
|
||||
document_id=document.id,
|
||||
|
|
@ -357,12 +361,14 @@ class RagPipelineTransformService:
|
|||
db.session.add(document_pipeline_execution_log)
|
||||
elif document.data_source_type == "website_crawl":
|
||||
document.data_source_type = "website_crawl"
|
||||
data_source_info = json.dumps({
|
||||
"source_url": data_source_info_dict.get("url"),
|
||||
"content": "",
|
||||
"title": document.name,
|
||||
"description": "",
|
||||
})
|
||||
data_source_info = json.dumps(
|
||||
{
|
||||
"source_url": data_source_info_dict.get("url"),
|
||||
"content": "",
|
||||
"title": document.name,
|
||||
"description": "",
|
||||
}
|
||||
)
|
||||
document.data_source_info = data_source_info
|
||||
if data_source_info_dict.get("provider") == "firecrawl":
|
||||
datasource_node_id = firecrawl_node_id
|
||||
|
|
@ -381,4 +387,4 @@ class RagPipelineTransformService:
|
|||
datasource_node_id=datasource_node_id,
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.add(document_pipeline_execution_log)
|
||||
db.session.add(document_pipeline_execution_log)
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@ class WebsiteCrawlStatusApiRequest:
|
|||
|
||||
provider: str
|
||||
job_id: str
|
||||
|
||||
@classmethod
|
||||
def from_args(cls, args: dict, job_id: str) -> "WebsiteCrawlStatusApiRequest":
|
||||
"""Create from Flask-RESTful parsed arguments."""
|
||||
|
|
@ -114,9 +115,7 @@ class WebsiteService:
|
|||
"""Service class for website crawling operations using different providers."""
|
||||
|
||||
@classmethod
|
||||
def _get_credentials_and_config(
|
||||
cls, tenant_id: str, provider: str
|
||||
) -> tuple[Any, Any]:
|
||||
def _get_credentials_and_config(cls, tenant_id: str, provider: str) -> tuple[Any, Any]:
|
||||
"""Get and validate credentials for a provider."""
|
||||
if provider == "firecrawl":
|
||||
plugin_id = "langgenius/firecrawl_datasource"
|
||||
|
|
@ -158,9 +157,7 @@ class WebsiteService:
|
|||
"""Crawl a URL using the specified provider with typed request."""
|
||||
request = api_request.to_crawl_request()
|
||||
|
||||
api_key, config = cls._get_credentials_and_config(
|
||||
current_user.current_tenant_id, request.provider
|
||||
)
|
||||
api_key, config = cls._get_credentials_and_config(current_user.current_tenant_id, request.provider)
|
||||
|
||||
if request.provider == "firecrawl":
|
||||
return cls._crawl_with_firecrawl(request=request, api_key=api_key, config=config)
|
||||
|
|
@ -250,9 +247,7 @@ class WebsiteService:
|
|||
@classmethod
|
||||
def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]:
|
||||
"""Get crawl status using typed request."""
|
||||
api_key, config = cls._get_credentials_and_config(
|
||||
current_user.current_tenant_id, api_request.provider
|
||||
)
|
||||
api_key, config = cls._get_credentials_and_config(current_user.current_tenant_id, api_request.provider)
|
||||
|
||||
if api_request.provider == "firecrawl":
|
||||
return cls._get_firecrawl_status(api_request.job_id, api_key, config)
|
||||
|
|
@ -325,9 +320,7 @@ class WebsiteService:
|
|||
return crawl_status_data
|
||||
|
||||
@classmethod
|
||||
def get_crawl_url_data(
|
||||
cls, job_id: str, provider: str, url: str, tenant_id: str
|
||||
) -> dict[str, Any] | None:
|
||||
def get_crawl_url_data(cls, job_id: str, provider: str, url: str, tenant_id: str) -> dict[str, Any] | None:
|
||||
api_key, config = cls._get_credentials_and_config(tenant_id, provider)
|
||||
|
||||
if provider == "firecrawl":
|
||||
|
|
@ -398,14 +391,10 @@ class WebsiteService:
|
|||
return None
|
||||
|
||||
@classmethod
|
||||
def get_scrape_url_data(
|
||||
cls, provider: str, url: str, tenant_id: str, only_main_content: bool
|
||||
) -> dict[str, Any]:
|
||||
def get_scrape_url_data(cls, provider: str, url: str, tenant_id: str, only_main_content: bool) -> dict[str, Any]:
|
||||
request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content)
|
||||
|
||||
api_key, config = cls._get_credentials_and_config(
|
||||
tenant_id=request.tenant_id, provider=request.provider
|
||||
)
|
||||
api_key, config = cls._get_credentials_and_config(tenant_id=request.tenant_id, provider=request.provider)
|
||||
|
||||
if request.provider == "firecrawl":
|
||||
return cls._scrape_with_firecrawl(request=request, api_key=api_key, config=config)
|
||||
|
|
|
|||
Loading…
Reference in New Issue