WIP: test(api): fix broken tests for WebsiteService

This commit is contained in:
QuantumGhost 2025-09-18 03:00:57 +08:00
parent fda15ef018
commit a678dd1a32

View File

@ -5,6 +5,7 @@ import pytest
from faker import Faker from faker import Faker
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from services.datasource_provider_service import DatasourceProviderService
from services.website_service import ( from services.website_service import (
CrawlOptions, CrawlOptions,
ScrapeRequest, ScrapeRequest,
@ -21,19 +22,27 @@ class TestWebsiteService:
def mock_external_service_dependencies(self): def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies.""" """Mock setup for external service dependencies."""
with ( with (
patch("services.website_service.ApiKeyAuthService") as mock_api_key_auth_service, # patch("services.website_service.ApiKeyAuthService") as mock_api_key_auth_service,
patch("services.website_service.FirecrawlApp") as mock_firecrawl_app, patch("services.website_service.FirecrawlApp") as mock_firecrawl_app,
patch("services.website_service.WaterCrawlProvider") as mock_watercrawl_provider, patch("services.website_service.WaterCrawlProvider") as mock_watercrawl_provider,
patch("services.website_service.requests") as mock_requests, patch("services.website_service.requests") as mock_requests,
patch("services.website_service.redis_client") as mock_redis_client, patch("services.website_service.redis_client") as mock_redis_client,
patch("services.website_service.storage") as mock_storage, patch("services.website_service.storage") as mock_storage,
patch("services.website_service.encrypter") as mock_encrypter, patch("services.website_service.encrypter") as mock_encrypter,
patch(
"services.website_service.DatasourceProviderService",
) as mock_datasource_provider_service,
): ):
# Setup default mock returns # Setup default mock returns
mock_api_key_auth_service.get_auth_credentials.return_value = { # mock_api_key_auth_service.get_auth_credentials.return_value = {
"config": {"api_key": "encrypted_api_key", "base_url": "https://api.example.com"} # "config": {"api_key": "encrypted_api_key", "base_url": "https://api.example.com"}
# }
mock_datasource_provider_service_instance = MagicMock(spec=DatasourceProviderService)
mock_datasource_provider_service_instance.get_datasource_credentials.return_value = {
"firecrawl_api_key": "firecrawl_api_key",
"api_key": "api_key",
} }
mock_encrypter.decrypt_token.return_value = "decrypted_api_key" mock_datasource_provider_service.return_value = mock_datasource_provider_service_instance
# Mock FirecrawlApp # Mock FirecrawlApp
mock_firecrawl_instance = MagicMock() mock_firecrawl_instance = MagicMock()
@ -85,7 +94,8 @@ class TestWebsiteService:
mock_storage.load_once.return_value = None mock_storage.load_once.return_value = None
yield { yield {
"api_key_auth_service": mock_api_key_auth_service, "mock_datasource_provider_service": mock_datasource_provider_service,
"mock_datasource_provider_service_instance": mock_datasource_provider_service_instance,
"firecrawl_app": mock_firecrawl_app, "firecrawl_app": mock_firecrawl_app,
"watercrawl_provider": mock_watercrawl_provider, "watercrawl_provider": mock_watercrawl_provider,
"requests": mock_requests, "requests": mock_requests,
@ -250,6 +260,12 @@ class TestWebsiteService:
}, },
) )
mock_provider_instance = mock_external_service_dependencies["mock_datasource_provider_service_instance"]
credential = {
"firecrawl_api_key": "decrypted_api_key",
"base_url": "https://api.example.com",
}
mock_provider_instance.get_datasource_credentials.return_value = credential
# Act: Execute crawl operation # Act: Execute crawl operation
result = WebsiteService.crawl_url(api_request) result = WebsiteService.crawl_url(api_request)
@ -258,13 +274,12 @@ class TestWebsiteService:
assert result["status"] == "active" assert result["status"] == "active"
assert result["job_id"] == "test_job_id_123" assert result["job_id"] == "test_job_id_123"
mock_provider_instance.get_datasource_credentials.assert_called_once_with(
tenant_id=current_tenant.id,
provider="firecrawl",
plugin_id="langgenius/firecrawl_datasource",
)
# Verify external service interactions # Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "firecrawl"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
mock_external_service_dependencies["firecrawl_app"].assert_called_once_with( mock_external_service_dependencies["firecrawl_app"].assert_called_once_with(
api_key="decrypted_api_key", base_url="https://api.example.com" api_key="decrypted_api_key", base_url="https://api.example.com"
) )
@ -304,7 +319,12 @@ class TestWebsiteService:
"use_sitemap": False, "use_sitemap": False,
}, },
) )
mock_provider_instance = mock_external_service_dependencies["mock_datasource_provider_service_instance"]
credential = {
"api_key": "decrypted_api_key",
"base_url": "https://api.example.com",
}
mock_provider_instance.get_datasource_credentials.return_value = credential
# Act: Execute crawl operation # Act: Execute crawl operation
result = WebsiteService.crawl_url(api_request) result = WebsiteService.crawl_url(api_request)
@ -314,11 +334,10 @@ class TestWebsiteService:
assert result["job_id"] == "watercrawl_job_123" assert result["job_id"] == "watercrawl_job_123"
# Verify external service interactions # Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( mock_provider_instance.get_datasource_credentials.assert_called_once_with(
account.current_tenant.id, "website", "watercrawl" tenant_id=current_tenant.id,
) provider="watercrawl",
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( plugin_id="langgenius/watercrawl_datasource",
tenant_id=account.current_tenant.id, token="encrypted_api_key"
) )
mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with( mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with(
api_key="decrypted_api_key", base_url="https://api.example.com" api_key="decrypted_api_key", base_url="https://api.example.com"
@ -365,14 +384,6 @@ class TestWebsiteService:
assert result["status"] == "active" assert result["status"] == "active"
assert result["data"] is not None assert result["data"] is not None
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "jinareader"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify HTTP request was made # Verify HTTP request was made
mock_external_service_dependencies["requests"].get.assert_called_once_with( mock_external_service_dependencies["requests"].get.assert_called_once_with(
"https://r.jina.ai/https://example.com", "https://r.jina.ai/https://example.com",
@ -442,14 +453,6 @@ class TestWebsiteService:
assert "data" in result assert "data" in result
assert "time_consuming" in result assert "time_consuming" in result
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "firecrawl"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify Redis cache was accessed and cleaned up # Verify Redis cache was accessed and cleaned up
mock_external_service_dependencies["redis_client"].get.assert_called_once() mock_external_service_dependencies["redis_client"].get.assert_called_once()
mock_external_service_dependencies["redis_client"].delete.assert_called_once() mock_external_service_dependencies["redis_client"].delete.assert_called_once()
@ -486,14 +489,6 @@ class TestWebsiteService:
assert result["current"] == 3 assert result["current"] == 3
assert "data" in result assert "data" in result
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "watercrawl"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
def test_get_crawl_status_jinareader_success(self, db_session_with_containers, mock_external_service_dependencies): def test_get_crawl_status_jinareader_success(self, db_session_with_containers, mock_external_service_dependencies):
""" """
Test successful crawl status retrieval with JinaReader provider. Test successful crawl status retrieval with JinaReader provider.
@ -527,14 +522,6 @@ class TestWebsiteService:
assert "data" in result assert "data" in result
assert "time_consuming" in result assert "time_consuming" in result
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "jinareader"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify HTTP request was made # Verify HTTP request was made
mock_external_service_dependencies["requests"].post.assert_called_once() mock_external_service_dependencies["requests"].post.assert_called_once()
@ -582,7 +569,9 @@ class TestWebsiteService:
with patch("services.website_service.current_user", mock_current_user): with patch("services.website_service.current_user", mock_current_user):
# Mock missing credentials # Mock missing credentials
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.return_value = None mock_external_service_dependencies[
"mock_datasource_provider_service"
].get_datasource_credentials.return_value = None
# Create API request # Create API request
api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123") api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123")
@ -661,14 +650,6 @@ class TestWebsiteService:
assert result["description"] == "Test Description" assert result["description"] == "Test Description"
assert result["markdown"] == "# Test Content" assert result["markdown"] == "# Test Content"
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "firecrawl"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify storage was accessed # Verify storage was accessed
mock_external_service_dependencies["storage"].exists.assert_called_once() mock_external_service_dependencies["storage"].exists.assert_called_once()
mock_external_service_dependencies["storage"].load_once.assert_called_once() mock_external_service_dependencies["storage"].load_once.assert_called_once()
@ -703,14 +684,6 @@ class TestWebsiteService:
assert result["description"] == "Test description" assert result["description"] == "Test description"
assert result["markdown"] == "# Test Content" assert result["markdown"] == "# Test Content"
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "watercrawl"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
def test_get_crawl_url_data_jinareader_success( def test_get_crawl_url_data_jinareader_success(
self, db_session_with_containers, mock_external_service_dependencies self, db_session_with_containers, mock_external_service_dependencies
): ):
@ -751,14 +724,6 @@ class TestWebsiteService:
assert result["description"] == "Test description" assert result["description"] == "Test description"
assert result["content"] == "# Test Content" assert result["content"] == "# Test Content"
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "jinareader"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify HTTP request was made # Verify HTTP request was made
mock_external_service_dependencies["requests"].get.assert_called_once_with( mock_external_service_dependencies["requests"].get.assert_called_once_with(
"https://r.jina.ai/https://example.com", "https://r.jina.ai/https://example.com",
@ -802,14 +767,6 @@ class TestWebsiteService:
assert result["url"] == "https://example.com" assert result["url"] == "https://example.com"
assert result["description"] == "Page description" assert result["description"] == "Page description"
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "firecrawl"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify FirecrawlApp was called with correct parameters # Verify FirecrawlApp was called with correct parameters
mock_external_service_dependencies["firecrawl_app"].assert_called_once_with( mock_external_service_dependencies["firecrawl_app"].assert_called_once_with(
api_key="decrypted_api_key", base_url="https://api.example.com" api_key="decrypted_api_key", base_url="https://api.example.com"
@ -847,14 +804,6 @@ class TestWebsiteService:
assert result["content"] == "Test content" assert result["content"] == "Test content"
assert result["url"] == "https://example.com" assert result["url"] == "https://example.com"
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "watercrawl"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify WaterCrawlProvider was called with correct parameters # Verify WaterCrawlProvider was called with correct parameters
mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with( mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with(
api_key="decrypted_api_key", base_url="https://api.example.com" api_key="decrypted_api_key", base_url="https://api.example.com"
@ -1032,14 +981,6 @@ class TestWebsiteService:
assert result["status"] == "active" assert result["status"] == "active"
assert result["job_id"] == "jina_job_123" assert result["job_id"] == "jina_job_123"
# Verify external service interactions
mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with(
account.current_tenant.id, "website", "jinareader"
)
mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with(
tenant_id=account.current_tenant.id, token="encrypted_api_key"
)
# Verify HTTP POST request was made for sub-page crawling # Verify HTTP POST request was made for sub-page crawling
mock_external_service_dependencies["requests"].post.assert_called_once_with( mock_external_service_dependencies["requests"].post.assert_called_once_with(
"https://adaptivecrawl-kir3wx7b3a-uc.a.run.app", "https://adaptivecrawl-kir3wx7b3a-uc.a.run.app",