mirror of https://github.com/langgenius/dify.git
add old auth transform
This commit is contained in:
parent
cc911f46f2
commit
e89398f415
|
|
@ -464,7 +464,6 @@ class DatasetIndexingEstimateApi(Resource):
|
|||
"tenant_id": current_user.current_tenant_id,
|
||||
"mode": "crawl",
|
||||
"only_main_content": website_info_list["only_main_content"],
|
||||
"credential_id": website_info_list["credential_id"],
|
||||
},
|
||||
document_model=args["doc_form"],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -529,7 +529,6 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
|
|||
"tenant_id": current_user.current_tenant_id,
|
||||
"mode": data_source_info["mode"],
|
||||
"only_main_content": data_source_info["only_main_content"],
|
||||
"credential_id": data_source_info["credential_id"],
|
||||
},
|
||||
document_model=document.doc_form,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ class WebsiteCrawlApi(Resource):
|
|||
)
|
||||
parser.add_argument("url", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
|
||||
parser.add_argument("credential_id", type=str, required=True, nullable=True, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create typed request and validate
|
||||
|
|
|
|||
|
|
@ -392,7 +392,6 @@ class IndexingRunner:
|
|||
"url": data_source_info["url"],
|
||||
"mode": data_source_info["mode"],
|
||||
"only_main_content": data_source_info["only_main_content"],
|
||||
"credential_id": data_source_info["credential_id"],
|
||||
},
|
||||
document_model=dataset_document.doc_form,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@ class WebsiteInfo(BaseModel):
|
|||
mode: str
|
||||
tenant_id: str
|
||||
only_main_content: bool = False
|
||||
credential_id: Optional[str] = None
|
||||
|
||||
|
||||
class ExtractSetting(BaseModel):
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ class FirecrawlWebExtractor(BaseExtractor):
|
|||
tenant_id: str,
|
||||
mode: str = "crawl",
|
||||
only_main_content: bool = True,
|
||||
credential_id: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with url, api_key, base_url and mode."""
|
||||
self._url = url
|
||||
|
|
@ -32,14 +31,13 @@ class FirecrawlWebExtractor(BaseExtractor):
|
|||
self.tenant_id = tenant_id
|
||||
self.mode = mode
|
||||
self.only_main_content = only_main_content
|
||||
self.credential_id = credential_id
|
||||
|
||||
def extract(self) -> list[Document]:
|
||||
"""Extract content from the URL."""
|
||||
documents = []
|
||||
if self.mode == "crawl":
|
||||
crawl_data = WebsiteService.get_crawl_url_data(
|
||||
self.job_id, "firecrawl", self._url, self.tenant_id, self.credential_id
|
||||
self.job_id, "firecrawl", self._url, self.tenant_id
|
||||
)
|
||||
if crawl_data is None:
|
||||
return []
|
||||
|
|
@ -54,7 +52,7 @@ class FirecrawlWebExtractor(BaseExtractor):
|
|||
documents.append(document)
|
||||
elif self.mode == "scrape":
|
||||
scrape_data = WebsiteService.get_scrape_url_data(
|
||||
"firecrawl", self._url, self.tenant_id, self.only_main_content, self.credential_id
|
||||
"firecrawl", self._url, self.tenant_id, self.only_main_content
|
||||
)
|
||||
|
||||
document = Document(
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ class JinaReaderWebExtractor(BaseExtractor):
|
|||
tenant_id: str,
|
||||
mode: str = "crawl",
|
||||
only_main_content: bool = False,
|
||||
credential_id: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with url, api_key, base_url and mode."""
|
||||
self._url = url
|
||||
|
|
@ -25,14 +24,13 @@ class JinaReaderWebExtractor(BaseExtractor):
|
|||
self.tenant_id = tenant_id
|
||||
self.mode = mode
|
||||
self.only_main_content = only_main_content
|
||||
self.credential_id = credential_id
|
||||
|
||||
def extract(self) -> list[Document]:
|
||||
"""Extract content from the URL."""
|
||||
documents = []
|
||||
if self.mode == "crawl":
|
||||
crawl_data = WebsiteService.get_crawl_url_data(
|
||||
self.job_id, "jinareader", self._url, self.tenant_id, self.credential_id
|
||||
self.job_id, "jinareader", self._url, self.tenant_id
|
||||
)
|
||||
if crawl_data is None:
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ class WaterCrawlWebExtractor(BaseExtractor):
|
|||
tenant_id: str,
|
||||
mode: str = "crawl",
|
||||
only_main_content: bool = True,
|
||||
credential_id: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with url, api_key, base_url and mode."""
|
||||
self._url = url
|
||||
|
|
@ -33,14 +32,13 @@ class WaterCrawlWebExtractor(BaseExtractor):
|
|||
self.tenant_id = tenant_id
|
||||
self.mode = mode
|
||||
self.only_main_content = only_main_content
|
||||
self.credential_id = credential_id
|
||||
|
||||
def extract(self) -> list[Document]:
|
||||
"""Extract content from the URL."""
|
||||
documents = []
|
||||
if self.mode == "crawl":
|
||||
crawl_data = WebsiteService.get_crawl_url_data(
|
||||
self.job_id, "watercrawl", self._url, self.tenant_id, self.credential_id
|
||||
self.job_id, "watercrawl", self._url, self.tenant_id
|
||||
)
|
||||
if crawl_data is None:
|
||||
return []
|
||||
|
|
@ -55,7 +53,7 @@ class WaterCrawlWebExtractor(BaseExtractor):
|
|||
documents.append(document)
|
||||
elif self.mode == "scrape":
|
||||
scrape_data = WebsiteService.get_scrape_url_data(
|
||||
"watercrawl", self._url, self.tenant_id, self.only_main_content, self.credential_id
|
||||
"watercrawl", self._url, self.tenant_id, self.only_main_content
|
||||
)
|
||||
|
||||
document = Document(
|
||||
|
|
|
|||
|
|
@ -82,6 +82,36 @@ class DatasourceProviderService:
|
|||
if key in credential_secret_variables:
|
||||
copy_credentials[key] = encrypter.decrypt_token(tenant_id, value)
|
||||
return copy_credentials
|
||||
|
||||
def get_default_real_credential(
|
||||
self, tenant_id: str, provider: str, plugin_id: str
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
get default credential
|
||||
"""
|
||||
with Session(db.engine) as session:
|
||||
datasource_provider = (
|
||||
session.query(DatasourceProvider).filter_by(tenant_id=tenant_id,
|
||||
is_default=True,
|
||||
provider=provider,
|
||||
plugin_id=plugin_id).first()
|
||||
)
|
||||
if not datasource_provider:
|
||||
return {}
|
||||
encrypted_credentials = datasource_provider.encrypted_credentials
|
||||
# Get provider credential secret variables
|
||||
credential_secret_variables = self.extract_secret_variables(
|
||||
tenant_id=tenant_id,
|
||||
provider_id=f"{plugin_id}/{provider}",
|
||||
credential_type=CredentialType.of(datasource_provider.auth_type),
|
||||
)
|
||||
|
||||
# Obfuscate provider credentials
|
||||
copy_credentials = encrypted_credentials.copy()
|
||||
for key, value in copy_credentials.items():
|
||||
if key in credential_secret_variables:
|
||||
copy_credentials[key] = encrypter.decrypt_token(tenant_id, value)
|
||||
return copy_credentials
|
||||
|
||||
def update_datasource_provider_name(
|
||||
self, tenant_id: str, datasource_provider_id: DatasourceProviderID, name: str, credential_id: str
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ class WebsiteCrawlApiRequest:
|
|||
provider: str
|
||||
url: str
|
||||
options: dict[str, Any]
|
||||
credential_id: Optional[str] = None
|
||||
|
||||
def to_crawl_request(self) -> CrawlRequest:
|
||||
"""Convert API request to internal CrawlRequest."""
|
||||
|
|
@ -121,29 +120,22 @@ class WebsiteService:
|
|||
|
||||
@classmethod
|
||||
def _get_credentials_and_config(
|
||||
cls, tenant_id: str, provider: str, credential_id: Optional[str] = None
|
||||
cls, tenant_id: str, provider: str
|
||||
) -> tuple[Any, Any]:
|
||||
"""Get and validate credentials for a provider."""
|
||||
if credential_id:
|
||||
if provider == "firecrawl":
|
||||
plugin_id = "langgenius/firecrawl_datasource"
|
||||
elif provider == "watercrawl":
|
||||
plugin_id = "langgenius/watercrawl_datasource"
|
||||
elif provider == "jinareader":
|
||||
plugin_id = "langgenius/jinareader_datasource"
|
||||
datasource_provider_service = DatasourceProviderService()
|
||||
credential = datasource_provider_service.get_real_credential_by_id(
|
||||
tenant_id=tenant_id,
|
||||
credential_id=credential_id,
|
||||
provider=provider,
|
||||
plugin_id=plugin_id,
|
||||
)
|
||||
return credential.get("api_key"), credential
|
||||
else:
|
||||
credentials = ApiKeyAuthService.get_auth_credentials(tenant_id, "website", provider)
|
||||
if not credentials or "config" not in credentials:
|
||||
raise ValueError("No valid credentials found for the provider")
|
||||
return credentials, credentials["config"]
|
||||
if provider == "firecrawl":
|
||||
plugin_id = "langgenius/firecrawl_datasource"
|
||||
elif provider == "watercrawl":
|
||||
plugin_id = "langgenius/watercrawl_datasource"
|
||||
elif provider == "jinareader":
|
||||
plugin_id = "langgenius/jinareader_datasource"
|
||||
datasource_provider_service = DatasourceProviderService()
|
||||
credential = datasource_provider_service.get_default_real_credential(
|
||||
tenant_id=tenant_id,
|
||||
provider=provider,
|
||||
plugin_id=plugin_id,
|
||||
)
|
||||
return credential.get("api_key"), credential
|
||||
|
||||
@classmethod
|
||||
def _get_decrypted_api_key(cls, tenant_id: str, config: dict) -> str:
|
||||
|
|
@ -166,13 +158,9 @@ class WebsiteService:
|
|||
"""Crawl a URL using the specified provider with typed request."""
|
||||
request = api_request.to_crawl_request()
|
||||
|
||||
_, config = cls._get_credentials_and_config(
|
||||
current_user.current_tenant_id, request.provider, api_request.credential_id
|
||||
api_key, config = cls._get_credentials_and_config(
|
||||
current_user.current_tenant_id, request.provider
|
||||
)
|
||||
if api_request.credential_id:
|
||||
api_key = _
|
||||
else:
|
||||
api_key = cls._get_decrypted_api_key(current_user.current_tenant_id, config)
|
||||
|
||||
if request.provider == "firecrawl":
|
||||
return cls._crawl_with_firecrawl(request=request, api_key=api_key, config=config)
|
||||
|
|
@ -262,13 +250,9 @@ class WebsiteService:
|
|||
@classmethod
|
||||
def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]:
|
||||
"""Get crawl status using typed request."""
|
||||
_, config = cls._get_credentials_and_config(
|
||||
current_user.current_tenant_id, api_request.provider, api_request.credential_id
|
||||
api_key, config = cls._get_credentials_and_config(
|
||||
current_user.current_tenant_id, api_request.provider
|
||||
)
|
||||
if api_request.credential_id:
|
||||
api_key = _
|
||||
else:
|
||||
api_key = cls._get_decrypted_api_key(current_user.current_tenant_id, config)
|
||||
|
||||
if api_request.provider == "firecrawl":
|
||||
return cls._get_firecrawl_status(api_request.job_id, api_key, config)
|
||||
|
|
@ -342,13 +326,9 @@ class WebsiteService:
|
|||
|
||||
@classmethod
|
||||
def get_crawl_url_data(
|
||||
cls, job_id: str, provider: str, url: str, tenant_id: str, credential_id: Optional[str] = None
|
||||
cls, job_id: str, provider: str, url: str, tenant_id: str
|
||||
) -> dict[str, Any] | None:
|
||||
_, config = cls._get_credentials_and_config(tenant_id, provider, credential_id)
|
||||
if credential_id:
|
||||
api_key = _
|
||||
else:
|
||||
api_key = cls._get_decrypted_api_key(tenant_id, config)
|
||||
api_key, config = cls._get_credentials_and_config(tenant_id, provider)
|
||||
|
||||
if provider == "firecrawl":
|
||||
return cls._get_firecrawl_url_data(job_id, url, api_key, config)
|
||||
|
|
@ -419,17 +399,13 @@ class WebsiteService:
|
|||
|
||||
@classmethod
|
||||
def get_scrape_url_data(
|
||||
cls, provider: str, url: str, tenant_id: str, only_main_content: bool, credential_id: Optional[str] = None
|
||||
cls, provider: str, url: str, tenant_id: str, only_main_content: bool
|
||||
) -> dict[str, Any]:
|
||||
request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content)
|
||||
|
||||
_, config = cls._get_credentials_and_config(
|
||||
tenant_id=request.tenant_id, provider=request.provider, credential_id=credential_id
|
||||
api_key, config = cls._get_credentials_and_config(
|
||||
tenant_id=request.tenant_id, provider=request.provider
|
||||
)
|
||||
if credential_id:
|
||||
api_key = _
|
||||
else:
|
||||
api_key = cls._get_decrypted_api_key(tenant_id=request.tenant_id, config=config)
|
||||
|
||||
if request.provider == "firecrawl":
|
||||
return cls._scrape_with_firecrawl(request=request, api_key=api_key, config=config)
|
||||
|
|
|
|||
Loading…
Reference in New Issue