add old auth transform

This commit is contained in:
jyong 2025-07-29 14:13:50 +08:00
parent cc911f46f2
commit e89398f415
10 changed files with 58 additions and 63 deletions

View File

@ -464,7 +464,6 @@ class DatasetIndexingEstimateApi(Resource):
"tenant_id": current_user.current_tenant_id,
"mode": "crawl",
"only_main_content": website_info_list["only_main_content"],
"credential_id": website_info_list["credential_id"],
},
document_model=args["doc_form"],
)

View File

@ -529,7 +529,6 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
"tenant_id": current_user.current_tenant_id,
"mode": data_source_info["mode"],
"only_main_content": data_source_info["only_main_content"],
"credential_id": data_source_info["credential_id"],
},
document_model=document.doc_form,
)

View File

@ -23,7 +23,6 @@ class WebsiteCrawlApi(Resource):
)
parser.add_argument("url", type=str, required=True, nullable=True, location="json")
parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
parser.add_argument("credential_id", type=str, required=True, nullable=True, location="json")
args = parser.parse_args()
# Create typed request and validate

View File

@ -392,7 +392,6 @@ class IndexingRunner:
"url": data_source_info["url"],
"mode": data_source_info["mode"],
"only_main_content": data_source_info["only_main_content"],
"credential_id": data_source_info["credential_id"],
},
document_model=dataset_document.doc_form,
)

View File

@ -36,7 +36,6 @@ class WebsiteInfo(BaseModel):
mode: str
tenant_id: str
only_main_content: bool = False
credential_id: Optional[str] = None
class ExtractSetting(BaseModel):

View File

@ -24,7 +24,6 @@ class FirecrawlWebExtractor(BaseExtractor):
tenant_id: str,
mode: str = "crawl",
only_main_content: bool = True,
credential_id: Optional[str] = None,
):
"""Initialize with url, api_key, base_url and mode."""
self._url = url
@ -32,14 +31,13 @@ class FirecrawlWebExtractor(BaseExtractor):
self.tenant_id = tenant_id
self.mode = mode
self.only_main_content = only_main_content
self.credential_id = credential_id
def extract(self) -> list[Document]:
"""Extract content from the URL."""
documents = []
if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "firecrawl", self._url, self.tenant_id, self.credential_id
self.job_id, "firecrawl", self._url, self.tenant_id
)
if crawl_data is None:
return []
@ -54,7 +52,7 @@ class FirecrawlWebExtractor(BaseExtractor):
documents.append(document)
elif self.mode == "scrape":
scrape_data = WebsiteService.get_scrape_url_data(
"firecrawl", self._url, self.tenant_id, self.only_main_content, self.credential_id
"firecrawl", self._url, self.tenant_id, self.only_main_content
)
document = Document(

View File

@ -17,7 +17,6 @@ class JinaReaderWebExtractor(BaseExtractor):
tenant_id: str,
mode: str = "crawl",
only_main_content: bool = False,
credential_id: Optional[str] = None,
):
"""Initialize with url, api_key, base_url and mode."""
self._url = url
@ -25,14 +24,13 @@ class JinaReaderWebExtractor(BaseExtractor):
self.tenant_id = tenant_id
self.mode = mode
self.only_main_content = only_main_content
self.credential_id = credential_id
def extract(self) -> list[Document]:
"""Extract content from the URL."""
documents = []
if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "jinareader", self._url, self.tenant_id, self.credential_id
self.job_id, "jinareader", self._url, self.tenant_id
)
if crawl_data is None:
return []

View File

@ -25,7 +25,6 @@ class WaterCrawlWebExtractor(BaseExtractor):
tenant_id: str,
mode: str = "crawl",
only_main_content: bool = True,
credential_id: Optional[str] = None,
):
"""Initialize with url, api_key, base_url and mode."""
self._url = url
@ -33,14 +32,13 @@ class WaterCrawlWebExtractor(BaseExtractor):
self.tenant_id = tenant_id
self.mode = mode
self.only_main_content = only_main_content
self.credential_id = credential_id
def extract(self) -> list[Document]:
"""Extract content from the URL."""
documents = []
if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "watercrawl", self._url, self.tenant_id, self.credential_id
self.job_id, "watercrawl", self._url, self.tenant_id
)
if crawl_data is None:
return []
@ -55,7 +53,7 @@ class WaterCrawlWebExtractor(BaseExtractor):
documents.append(document)
elif self.mode == "scrape":
scrape_data = WebsiteService.get_scrape_url_data(
"watercrawl", self._url, self.tenant_id, self.only_main_content, self.credential_id
"watercrawl", self._url, self.tenant_id, self.only_main_content
)
document = Document(

View File

@ -82,6 +82,36 @@ class DatasourceProviderService:
if key in credential_secret_variables:
copy_credentials[key] = encrypter.decrypt_token(tenant_id, value)
return copy_credentials
def get_default_real_credential(
self, tenant_id: str, provider: str, plugin_id: str
) -> dict[str, Any]:
"""
get default credential
"""
with Session(db.engine) as session:
datasource_provider = (
session.query(DatasourceProvider).filter_by(tenant_id=tenant_id,
is_default=True,
provider=provider,
plugin_id=plugin_id).first()
)
if not datasource_provider:
return {}
encrypted_credentials = datasource_provider.encrypted_credentials
# Get provider credential secret variables
credential_secret_variables = self.extract_secret_variables(
tenant_id=tenant_id,
provider_id=f"{plugin_id}/{provider}",
credential_type=CredentialType.of(datasource_provider.auth_type),
)
# Obfuscate provider credentials
copy_credentials = encrypted_credentials.copy()
for key, value in copy_credentials.items():
if key in credential_secret_variables:
copy_credentials[key] = encrypter.decrypt_token(tenant_id, value)
return copy_credentials
def update_datasource_provider_name(
self, tenant_id: str, datasource_provider_id: DatasourceProviderID, name: str, credential_id: str

View File

@ -62,7 +62,6 @@ class WebsiteCrawlApiRequest:
provider: str
url: str
options: dict[str, Any]
credential_id: Optional[str] = None
def to_crawl_request(self) -> CrawlRequest:
"""Convert API request to internal CrawlRequest."""
@ -121,29 +120,22 @@ class WebsiteService:
@classmethod
def _get_credentials_and_config(
cls, tenant_id: str, provider: str, credential_id: Optional[str] = None
cls, tenant_id: str, provider: str
) -> tuple[Any, Any]:
"""Get and validate credentials for a provider."""
if credential_id:
if provider == "firecrawl":
plugin_id = "langgenius/firecrawl_datasource"
elif provider == "watercrawl":
plugin_id = "langgenius/watercrawl_datasource"
elif provider == "jinareader":
plugin_id = "langgenius/jinareader_datasource"
datasource_provider_service = DatasourceProviderService()
credential = datasource_provider_service.get_real_credential_by_id(
tenant_id=tenant_id,
credential_id=credential_id,
provider=provider,
plugin_id=plugin_id,
)
return credential.get("api_key"), credential
else:
credentials = ApiKeyAuthService.get_auth_credentials(tenant_id, "website", provider)
if not credentials or "config" not in credentials:
raise ValueError("No valid credentials found for the provider")
return credentials, credentials["config"]
if provider == "firecrawl":
plugin_id = "langgenius/firecrawl_datasource"
elif provider == "watercrawl":
plugin_id = "langgenius/watercrawl_datasource"
elif provider == "jinareader":
plugin_id = "langgenius/jinareader_datasource"
datasource_provider_service = DatasourceProviderService()
credential = datasource_provider_service.get_default_real_credential(
tenant_id=tenant_id,
provider=provider,
plugin_id=plugin_id,
)
return credential.get("api_key"), credential
@classmethod
def _get_decrypted_api_key(cls, tenant_id: str, config: dict) -> str:
@ -166,13 +158,9 @@ class WebsiteService:
"""Crawl a URL using the specified provider with typed request."""
request = api_request.to_crawl_request()
_, config = cls._get_credentials_and_config(
current_user.current_tenant_id, request.provider, api_request.credential_id
api_key, config = cls._get_credentials_and_config(
current_user.current_tenant_id, request.provider
)
if api_request.credential_id:
api_key = _
else:
api_key = cls._get_decrypted_api_key(current_user.current_tenant_id, config)
if request.provider == "firecrawl":
return cls._crawl_with_firecrawl(request=request, api_key=api_key, config=config)
@ -262,13 +250,9 @@ class WebsiteService:
@classmethod
def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]:
"""Get crawl status using typed request."""
_, config = cls._get_credentials_and_config(
current_user.current_tenant_id, api_request.provider, api_request.credential_id
api_key, config = cls._get_credentials_and_config(
current_user.current_tenant_id, api_request.provider
)
if api_request.credential_id:
api_key = _
else:
api_key = cls._get_decrypted_api_key(current_user.current_tenant_id, config)
if api_request.provider == "firecrawl":
return cls._get_firecrawl_status(api_request.job_id, api_key, config)
@ -342,13 +326,9 @@ class WebsiteService:
@classmethod
def get_crawl_url_data(
cls, job_id: str, provider: str, url: str, tenant_id: str, credential_id: Optional[str] = None
cls, job_id: str, provider: str, url: str, tenant_id: str
) -> dict[str, Any] | None:
_, config = cls._get_credentials_and_config(tenant_id, provider, credential_id)
if credential_id:
api_key = _
else:
api_key = cls._get_decrypted_api_key(tenant_id, config)
api_key, config = cls._get_credentials_and_config(tenant_id, provider)
if provider == "firecrawl":
return cls._get_firecrawl_url_data(job_id, url, api_key, config)
@ -419,17 +399,13 @@ class WebsiteService:
@classmethod
def get_scrape_url_data(
cls, provider: str, url: str, tenant_id: str, only_main_content: bool, credential_id: Optional[str] = None
cls, provider: str, url: str, tenant_id: str, only_main_content: bool
) -> dict[str, Any]:
request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content)
_, config = cls._get_credentials_and_config(
tenant_id=request.tenant_id, provider=request.provider, credential_id=credential_id
api_key, config = cls._get_credentials_and_config(
tenant_id=request.tenant_id, provider=request.provider
)
if credential_id:
api_key = _
else:
api_key = cls._get_decrypted_api_key(tenant_id=request.tenant_id, config=config)
if request.provider == "firecrawl":
return cls._scrape_with_firecrawl(request=request, api_key=api_key, config=config)