From e89398f41515195fc015158d2fe492dd27fc9ef7 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 29 Jul 2025 14:13:50 +0800 Subject: [PATCH] add old auth transform --- api/controllers/console/datasets/datasets.py | 1 - .../console/datasets/datasets_document.py | 1 - api/controllers/console/datasets/website.py | 1 - api/core/indexing_runner.py | 1 - .../rag/extractor/entity/extract_setting.py | 1 - .../firecrawl/firecrawl_web_extractor.py | 6 +- .../rag/extractor/jina_reader_extractor.py | 4 +- .../rag/extractor/watercrawl/extractor.py | 6 +- api/services/datasource_provider_service.py | 30 ++++++++ api/services/website_service.py | 70 ++++++------------- 10 files changed, 58 insertions(+), 63 deletions(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index bf4a3bac5d..09c3984356 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -464,7 +464,6 @@ class DatasetIndexingEstimateApi(Resource): "tenant_id": current_user.current_tenant_id, "mode": "crawl", "only_main_content": website_info_list["only_main_content"], - "credential_id": website_info_list["credential_id"], }, document_model=args["doc_form"], ) diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 8d880a9912..1ef490705b 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -529,7 +529,6 @@ class DocumentBatchIndexingEstimateApi(DocumentResource): "tenant_id": current_user.current_tenant_id, "mode": data_source_info["mode"], "only_main_content": data_source_info["only_main_content"], - "credential_id": data_source_info["credential_id"], }, document_model=document.doc_form, ) diff --git a/api/controllers/console/datasets/website.py b/api/controllers/console/datasets/website.py index f8b1908f68..27843026bb 100644 --- a/api/controllers/console/datasets/website.py +++ b/api/controllers/console/datasets/website.py @@ -23,7 +23,6 @@ class WebsiteCrawlApi(Resource): ) parser.add_argument("url", type=str, required=True, nullable=True, location="json") parser.add_argument("options", type=dict, required=True, nullable=True, location="json") - parser.add_argument("credential_id", type=str, required=True, nullable=True, location="json") args = parser.parse_args() # Create typed request and validate diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 538c3e20af..2a5a9f344d 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -392,7 +392,6 @@ class IndexingRunner: "url": data_source_info["url"], "mode": data_source_info["mode"], "only_main_content": data_source_info["only_main_content"], - "credential_id": data_source_info["credential_id"], }, document_model=dataset_document.doc_form, ) diff --git a/api/core/rag/extractor/entity/extract_setting.py b/api/core/rag/extractor/entity/extract_setting.py index d0a4a9353f..70e919210c 100644 --- a/api/core/rag/extractor/entity/extract_setting.py +++ b/api/core/rag/extractor/entity/extract_setting.py @@ -36,7 +36,6 @@ class WebsiteInfo(BaseModel): mode: str tenant_id: str only_main_content: bool = False - credential_id: Optional[str] = None class ExtractSetting(BaseModel): diff --git a/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py b/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py index 25454fa1d9..f655ba94a0 100644 --- a/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py +++ b/api/core/rag/extractor/firecrawl/firecrawl_web_extractor.py @@ -24,7 +24,6 @@ class FirecrawlWebExtractor(BaseExtractor): tenant_id: str, mode: str = "crawl", only_main_content: bool = True, - credential_id: Optional[str] = None, ): """Initialize with url, api_key, base_url and mode.""" self._url = url @@ -32,14 +31,13 @@ class FirecrawlWebExtractor(BaseExtractor): self.tenant_id = tenant_id self.mode = mode self.only_main_content = only_main_content - self.credential_id = credential_id def extract(self) -> list[Document]: """Extract content from the URL.""" documents = [] if self.mode == "crawl": crawl_data = WebsiteService.get_crawl_url_data( - self.job_id, "firecrawl", self._url, self.tenant_id, self.credential_id + self.job_id, "firecrawl", self._url, self.tenant_id ) if crawl_data is None: return [] @@ -54,7 +52,7 @@ class FirecrawlWebExtractor(BaseExtractor): documents.append(document) elif self.mode == "scrape": scrape_data = WebsiteService.get_scrape_url_data( - "firecrawl", self._url, self.tenant_id, self.only_main_content, self.credential_id + "firecrawl", self._url, self.tenant_id, self.only_main_content ) document = Document( diff --git a/api/core/rag/extractor/jina_reader_extractor.py b/api/core/rag/extractor/jina_reader_extractor.py index 88c240393f..096b34bb87 100644 --- a/api/core/rag/extractor/jina_reader_extractor.py +++ b/api/core/rag/extractor/jina_reader_extractor.py @@ -17,7 +17,6 @@ class JinaReaderWebExtractor(BaseExtractor): tenant_id: str, mode: str = "crawl", only_main_content: bool = False, - credential_id: Optional[str] = None, ): """Initialize with url, api_key, base_url and mode.""" self._url = url @@ -25,14 +24,13 @@ class JinaReaderWebExtractor(BaseExtractor): self.tenant_id = tenant_id self.mode = mode self.only_main_content = only_main_content - self.credential_id = credential_id def extract(self) -> list[Document]: """Extract content from the URL.""" documents = [] if self.mode == "crawl": crawl_data = WebsiteService.get_crawl_url_data( - self.job_id, "jinareader", self._url, self.tenant_id, self.credential_id + self.job_id, "jinareader", self._url, self.tenant_id ) if crawl_data is None: return [] diff --git a/api/core/rag/extractor/watercrawl/extractor.py b/api/core/rag/extractor/watercrawl/extractor.py index e5805d1b64..31c0c62799 100644 --- a/api/core/rag/extractor/watercrawl/extractor.py +++ b/api/core/rag/extractor/watercrawl/extractor.py @@ -25,7 +25,6 @@ class WaterCrawlWebExtractor(BaseExtractor): tenant_id: str, mode: str = "crawl", only_main_content: bool = True, - credential_id: Optional[str] = None, ): """Initialize with url, api_key, base_url and mode.""" self._url = url @@ -33,14 +32,13 @@ class WaterCrawlWebExtractor(BaseExtractor): self.tenant_id = tenant_id self.mode = mode self.only_main_content = only_main_content - self.credential_id = credential_id def extract(self) -> list[Document]: """Extract content from the URL.""" documents = [] if self.mode == "crawl": crawl_data = WebsiteService.get_crawl_url_data( - self.job_id, "watercrawl", self._url, self.tenant_id, self.credential_id + self.job_id, "watercrawl", self._url, self.tenant_id ) if crawl_data is None: return [] @@ -55,7 +53,7 @@ class WaterCrawlWebExtractor(BaseExtractor): documents.append(document) elif self.mode == "scrape": scrape_data = WebsiteService.get_scrape_url_data( - "watercrawl", self._url, self.tenant_id, self.only_main_content, self.credential_id + "watercrawl", self._url, self.tenant_id, self.only_main_content ) document = Document( diff --git a/api/services/datasource_provider_service.py b/api/services/datasource_provider_service.py index a3600db4e5..8f81e5462d 100644 --- a/api/services/datasource_provider_service.py +++ b/api/services/datasource_provider_service.py @@ -82,6 +82,36 @@ class DatasourceProviderService: if key in credential_secret_variables: copy_credentials[key] = encrypter.decrypt_token(tenant_id, value) return copy_credentials + + def get_default_real_credential( + self, tenant_id: str, provider: str, plugin_id: str + ) -> dict[str, Any]: + """ + get default credential + """ + with Session(db.engine) as session: + datasource_provider = ( + session.query(DatasourceProvider).filter_by(tenant_id=tenant_id, + is_default=True, + provider=provider, + plugin_id=plugin_id).first() + ) + if not datasource_provider: + return {} + encrypted_credentials = datasource_provider.encrypted_credentials + # Get provider credential secret variables + credential_secret_variables = self.extract_secret_variables( + tenant_id=tenant_id, + provider_id=f"{plugin_id}/{provider}", + credential_type=CredentialType.of(datasource_provider.auth_type), + ) + + # Obfuscate provider credentials + copy_credentials = encrypted_credentials.copy() + for key, value in copy_credentials.items(): + if key in credential_secret_variables: + copy_credentials[key] = encrypter.decrypt_token(tenant_id, value) + return copy_credentials def update_datasource_provider_name( self, tenant_id: str, datasource_provider_id: DatasourceProviderID, name: str, credential_id: str diff --git a/api/services/website_service.py b/api/services/website_service.py index a12128e248..5b14d11054 100644 --- a/api/services/website_service.py +++ b/api/services/website_service.py @@ -62,7 +62,6 @@ class WebsiteCrawlApiRequest: provider: str url: str options: dict[str, Any] - credential_id: Optional[str] = None def to_crawl_request(self) -> CrawlRequest: """Convert API request to internal CrawlRequest.""" @@ -121,29 +120,22 @@ class WebsiteService: @classmethod def _get_credentials_and_config( - cls, tenant_id: str, provider: str, credential_id: Optional[str] = None + cls, tenant_id: str, provider: str ) -> tuple[Any, Any]: """Get and validate credentials for a provider.""" - if credential_id: - if provider == "firecrawl": - plugin_id = "langgenius/firecrawl_datasource" - elif provider == "watercrawl": - plugin_id = "langgenius/watercrawl_datasource" - elif provider == "jinareader": - plugin_id = "langgenius/jinareader_datasource" - datasource_provider_service = DatasourceProviderService() - credential = datasource_provider_service.get_real_credential_by_id( - tenant_id=tenant_id, - credential_id=credential_id, - provider=provider, - plugin_id=plugin_id, - ) - return credential.get("api_key"), credential - else: - credentials = ApiKeyAuthService.get_auth_credentials(tenant_id, "website", provider) - if not credentials or "config" not in credentials: - raise ValueError("No valid credentials found for the provider") - return credentials, credentials["config"] + if provider == "firecrawl": + plugin_id = "langgenius/firecrawl_datasource" + elif provider == "watercrawl": + plugin_id = "langgenius/watercrawl_datasource" + elif provider == "jinareader": + plugin_id = "langgenius/jinareader_datasource" + datasource_provider_service = DatasourceProviderService() + credential = datasource_provider_service.get_default_real_credential( + tenant_id=tenant_id, + provider=provider, + plugin_id=plugin_id, + ) + return credential.get("api_key"), credential @classmethod def _get_decrypted_api_key(cls, tenant_id: str, config: dict) -> str: @@ -166,13 +158,9 @@ class WebsiteService: """Crawl a URL using the specified provider with typed request.""" request = api_request.to_crawl_request() - _, config = cls._get_credentials_and_config( - current_user.current_tenant_id, request.provider, api_request.credential_id + api_key, config = cls._get_credentials_and_config( + current_user.current_tenant_id, request.provider ) - if api_request.credential_id: - api_key = _ - else: - api_key = cls._get_decrypted_api_key(current_user.current_tenant_id, config) if request.provider == "firecrawl": return cls._crawl_with_firecrawl(request=request, api_key=api_key, config=config) @@ -262,13 +250,9 @@ class WebsiteService: @classmethod def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]: """Get crawl status using typed request.""" - _, config = cls._get_credentials_and_config( - current_user.current_tenant_id, api_request.provider, api_request.credential_id + api_key, config = cls._get_credentials_and_config( + current_user.current_tenant_id, api_request.provider ) - if api_request.credential_id: - api_key = _ - else: - api_key = cls._get_decrypted_api_key(current_user.current_tenant_id, config) if api_request.provider == "firecrawl": return cls._get_firecrawl_status(api_request.job_id, api_key, config) @@ -342,13 +326,9 @@ class WebsiteService: @classmethod def get_crawl_url_data( - cls, job_id: str, provider: str, url: str, tenant_id: str, credential_id: Optional[str] = None + cls, job_id: str, provider: str, url: str, tenant_id: str ) -> dict[str, Any] | None: - _, config = cls._get_credentials_and_config(tenant_id, provider, credential_id) - if credential_id: - api_key = _ - else: - api_key = cls._get_decrypted_api_key(tenant_id, config) + api_key, config = cls._get_credentials_and_config(tenant_id, provider) if provider == "firecrawl": return cls._get_firecrawl_url_data(job_id, url, api_key, config) @@ -419,17 +399,13 @@ class WebsiteService: @classmethod def get_scrape_url_data( - cls, provider: str, url: str, tenant_id: str, only_main_content: bool, credential_id: Optional[str] = None + cls, provider: str, url: str, tenant_id: str, only_main_content: bool ) -> dict[str, Any]: request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content) - _, config = cls._get_credentials_and_config( - tenant_id=request.tenant_id, provider=request.provider, credential_id=credential_id + api_key, config = cls._get_credentials_and_config( + tenant_id=request.tenant_id, provider=request.provider ) - if credential_id: - api_key = _ - else: - api_key = cls._get_decrypted_api_key(tenant_id=request.tenant_id, config=config) if request.provider == "firecrawl": return cls._scrape_with_firecrawl(request=request, api_key=api_key, config=config)