add credential id

This commit is contained in:
jyong 2025-08-12 15:43:11 +08:00
parent bd1d7f8652
commit ae3addb922
10 changed files with 52 additions and 65 deletions

View File

@ -127,7 +127,6 @@ class DatasourceEntity(BaseModel):
description: I18nObject = Field(..., description="The label of the datasource")
output_schema: Optional[dict] = None
@field_validator("parameters", mode="before")
@classmethod
def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:

View File

@ -1,4 +1,3 @@
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
from services.website_service import WebsiteService
@ -35,9 +34,7 @@ class FirecrawlWebExtractor(BaseExtractor):
"""Extract content from the URL."""
documents = []
if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "firecrawl", self._url, self.tenant_id
)
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "firecrawl", self._url, self.tenant_id)
if crawl_data is None:
return []
document = Document(

View File

@ -1,4 +1,3 @@
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
from services.website_service import WebsiteService
@ -28,9 +27,7 @@ class JinaReaderWebExtractor(BaseExtractor):
"""Extract content from the URL."""
documents = []
if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "jinareader", self._url, self.tenant_id
)
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "jinareader", self._url, self.tenant_id)
if crawl_data is None:
return []
document = Document(

View File

@ -1,4 +1,3 @@
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
from services.website_service import WebsiteService
@ -36,9 +35,7 @@ class WaterCrawlWebExtractor(BaseExtractor):
"""Extract content from the URL."""
documents = []
if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "watercrawl", self._url, self.tenant_id
)
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "watercrawl", self._url, self.tenant_id)
if crawl_data is None:
return []
document = Document(

View File

@ -233,9 +233,11 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
dataset_process_rule = DatasetProcessRule(
dataset_id=dataset.id,
mode="hierarchical",
rules=json.dumps({
"parent_mode": parent_childs.parent_mode,
}),
rules=json.dumps(
{
"parent_mode": parent_childs.parent_mode,
}
),
created_by=document.created_by,
)
db.session.add(dataset_process_rule)

View File

@ -816,7 +816,7 @@ class DocumentSegment(Base):
base_url = f"/files/{upload_file_id}/file-preview"
signed_url = f"{base_url}?{params}"
signed_urls.append((match.start(), match.end(), signed_url))
# For tools directory - direct file formats (e.g., .png, .jpg, etc.)
pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?.*?)?"
matches = re.finditer(pattern, text)

View File

@ -37,7 +37,7 @@ class DatasourceProvider(Base):
encrypted_credentials: Mapped[dict] = db.Column(JSONB, nullable=False)
avatar_url: Mapped[str] = db.Column(db.String(255), nullable=True, default="default")
is_default: Mapped[bool] = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
expires_at: Mapped[int] = db.Column(db.Integer, nullable=False, server_default='-1')
expires_at: Mapped[int] = db.Column(db.Integer, nullable=False, server_default="-1")
created_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)
updated_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, default=datetime.now)

View File

@ -2,7 +2,6 @@ import logging
import time
from typing import Any
from core.plugin.impl.oauth import OAuthHandler
from flask_login import current_user
from sqlalchemy.orm import Session
@ -14,6 +13,7 @@ from core.helper.provider_cache import NoOpProviderCredentialCache
from core.model_runtime.entities.provider_entities import FormType
from core.plugin.entities.plugin import DatasourceProviderID
from core.plugin.impl.datasource import PluginDatasourceManager
from core.plugin.impl.oauth import OAuthHandler
from core.tools.entities.tool_entities import CredentialType
from core.tools.utils.encryption import ProviderConfigCache, ProviderConfigEncrypter, create_provider_encrypter
from extensions.ext_database import db
@ -143,7 +143,7 @@ class DatasourceProviderService:
plugin_id=plugin_id,
provider=provider,
)
def get_all_datasource_credentials_by_provider(
self,
tenant_id: str,

View File

@ -307,15 +307,17 @@ class RagPipelineTransformService:
if file_id:
file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
if file:
data_source_info = json.dumps({
"real_file_id": file_id,
"name": file.name,
"size": file.size,
"extension": file.extension,
"mime_type": file.mime_type,
"url": "",
"transfer_method": "local_file",
})
data_source_info = json.dumps(
{
"real_file_id": file_id,
"name": file.name,
"size": file.size,
"extension": file.extension,
"mime_type": file.mime_type,
"url": "",
"transfer_method": "local_file",
}
)
document.data_source_info = data_source_info
document_pipeline_execution_log = DocumentPipelineExecutionLog(
document_id=document.id,
@ -331,17 +333,19 @@ class RagPipelineTransformService:
db.session.add(document_pipeline_execution_log)
elif document.data_source_type == "notion_import":
document.data_source_type = "online_document"
data_source_info = json.dumps({
"workspace_id": data_source_info_dict.get("notion_workspace_id"),
"page": {
"page_id": data_source_info_dict.get("notion_page_id"),
"page_name": document.name,
"page_icon": data_source_info_dict.get("notion_page_icon"),
"type": data_source_info_dict.get("type"),
"last_edited_time": data_source_info_dict.get("last_edited_time"),
"parent_id": None,
},
})
data_source_info = json.dumps(
{
"workspace_id": data_source_info_dict.get("notion_workspace_id"),
"page": {
"page_id": data_source_info_dict.get("notion_page_id"),
"page_name": document.name,
"page_icon": data_source_info_dict.get("notion_page_icon"),
"type": data_source_info_dict.get("type"),
"last_edited_time": data_source_info_dict.get("last_edited_time"),
"parent_id": None,
},
}
)
document.data_source_info = data_source_info
document_pipeline_execution_log = DocumentPipelineExecutionLog(
document_id=document.id,
@ -357,12 +361,14 @@ class RagPipelineTransformService:
db.session.add(document_pipeline_execution_log)
elif document.data_source_type == "website_crawl":
document.data_source_type = "website_crawl"
data_source_info = json.dumps({
"source_url": data_source_info_dict.get("url"),
"content": "",
"title": document.name,
"description": "",
})
data_source_info = json.dumps(
{
"source_url": data_source_info_dict.get("url"),
"content": "",
"title": document.name,
"description": "",
}
)
document.data_source_info = data_source_info
if data_source_info_dict.get("provider") == "firecrawl":
datasource_node_id = firecrawl_node_id
@ -381,4 +387,4 @@ class RagPipelineTransformService:
datasource_node_id=datasource_node_id,
)
db.session.add(document)
db.session.add(document_pipeline_execution_log)
db.session.add(document_pipeline_execution_log)

View File

@ -98,6 +98,7 @@ class WebsiteCrawlStatusApiRequest:
provider: str
job_id: str
@classmethod
def from_args(cls, args: dict, job_id: str) -> "WebsiteCrawlStatusApiRequest":
"""Create from Flask-RESTful parsed arguments."""
@ -114,9 +115,7 @@ class WebsiteService:
"""Service class for website crawling operations using different providers."""
@classmethod
def _get_credentials_and_config(
cls, tenant_id: str, provider: str
) -> tuple[Any, Any]:
def _get_credentials_and_config(cls, tenant_id: str, provider: str) -> tuple[Any, Any]:
"""Get and validate credentials for a provider."""
if provider == "firecrawl":
plugin_id = "langgenius/firecrawl_datasource"
@ -158,9 +157,7 @@ class WebsiteService:
"""Crawl a URL using the specified provider with typed request."""
request = api_request.to_crawl_request()
api_key, config = cls._get_credentials_and_config(
current_user.current_tenant_id, request.provider
)
api_key, config = cls._get_credentials_and_config(current_user.current_tenant_id, request.provider)
if request.provider == "firecrawl":
return cls._crawl_with_firecrawl(request=request, api_key=api_key, config=config)
@ -250,9 +247,7 @@ class WebsiteService:
@classmethod
def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]:
"""Get crawl status using typed request."""
api_key, config = cls._get_credentials_and_config(
current_user.current_tenant_id, api_request.provider
)
api_key, config = cls._get_credentials_and_config(current_user.current_tenant_id, api_request.provider)
if api_request.provider == "firecrawl":
return cls._get_firecrawl_status(api_request.job_id, api_key, config)
@ -325,9 +320,7 @@ class WebsiteService:
return crawl_status_data
@classmethod
def get_crawl_url_data(
cls, job_id: str, provider: str, url: str, tenant_id: str
) -> dict[str, Any] | None:
def get_crawl_url_data(cls, job_id: str, provider: str, url: str, tenant_id: str) -> dict[str, Any] | None:
api_key, config = cls._get_credentials_and_config(tenant_id, provider)
if provider == "firecrawl":
@ -398,14 +391,10 @@ class WebsiteService:
return None
@classmethod
def get_scrape_url_data(
cls, provider: str, url: str, tenant_id: str, only_main_content: bool
) -> dict[str, Any]:
def get_scrape_url_data(cls, provider: str, url: str, tenant_id: str, only_main_content: bool) -> dict[str, Any]:
request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content)
api_key, config = cls._get_credentials_and_config(
tenant_id=request.tenant_id, provider=request.provider
)
api_key, config = cls._get_credentials_and_config(tenant_id=request.tenant_id, provider=request.provider)
if request.provider == "firecrawl":
return cls._scrape_with_firecrawl(request=request, api_key=api_key, config=config)