From a5709251305de47fe4f6ef9438f43365d69c474a Mon Sep 17 00:00:00 2001
From: jyong <718720800@qq.com>
Date: Thu, 18 Sep 2025 11:24:56 +0800
Subject: [PATCH] del website test

---
 api/services/plugin/dependencies_analysis.py  |    2 +-
 .../services/test_website_service.py          | 1391 -----------------
 2 files changed, 1 insertion(+), 1392 deletions(-)
 delete mode 100644 api/tests/test_containers_integration_tests/services/test_website_service.py

diff --git a/api/services/plugin/dependencies_analysis.py b/api/services/plugin/dependencies_analysis.py
index 623fa2740f..2f0c5ae3af 100644
--- a/api/services/plugin/dependencies_analysis.py
+++ b/api/services/plugin/dependencies_analysis.py
@@ -59,7 +59,7 @@ class DependenciesAnalysisService:
                     version_match = _VERSION_REGEX.search(unique_identifier)
                     if version_match:
                         dependency.value.version = version_match.group("version")
-                
+
                 # Create and append the dependency (same for all types)
                 leaked_dependencies.append(
                     PluginDependency(
diff --git a/api/tests/test_containers_integration_tests/services/test_website_service.py b/api/tests/test_containers_integration_tests/services/test_website_service.py
deleted file mode 100644
index 897e31c88a..0000000000
--- a/api/tests/test_containers_integration_tests/services/test_website_service.py
+++ /dev/null
@@ -1,1391 +0,0 @@
-from datetime import datetime
-from unittest.mock import MagicMock, create_autospec, patch
-
-import pytest
-from faker import Faker
-
-from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
-from services.datasource_provider_service import DatasourceProviderService
-from services.website_service import (
-    CrawlOptions,
-    ScrapeRequest,
-    WebsiteCrawlApiRequest,
-    WebsiteCrawlStatusApiRequest,
-    WebsiteService,
-)
-
-
-class TestWebsiteService:
-    """Integration tests for WebsiteService using testcontainers."""
-
-    @pytest.fixture
-    def mock_external_service_dependencies(self):
-        """Mock setup for external service dependencies."""
-        with (
-            # patch("services.website_service.ApiKeyAuthService") as mock_api_key_auth_service,
-            patch("services.website_service.FirecrawlApp") as mock_firecrawl_app,
-            patch("services.website_service.WaterCrawlProvider") as mock_watercrawl_provider,
-            patch("services.website_service.requests") as mock_requests,
-            patch("services.website_service.redis_client") as mock_redis_client,
-            patch("services.website_service.storage") as mock_storage,
-            patch("services.website_service.encrypter") as mock_encrypter,
-            patch(
-                "services.website_service.DatasourceProviderService",
-            ) as mock_datasource_provider_service,
-        ):
-            # Setup default mock returns
-            # mock_api_key_auth_service.get_auth_credentials.return_value = {
-            #     "config": {"api_key": "encrypted_api_key", "base_url": "https://api.example.com"}
-            # }
-            mock_datasource_provider_service_instance = MagicMock(spec=DatasourceProviderService)
-            mock_datasource_provider_service_instance.get_datasource_credentials.return_value = {
-                "firecrawl_api_key": "firecrawl_api_key",
-                "api_key": "api_key",
-            }
-            mock_datasource_provider_service.return_value = mock_datasource_provider_service_instance
-
-            # Mock FirecrawlApp
-            mock_firecrawl_instance = MagicMock()
-            mock_firecrawl_instance.crawl_url.return_value = "test_job_id_123"
-            mock_firecrawl_instance.check_crawl_status.return_value = {
-                "status": "completed",
-                "total": 5,
-                "current": 5,
-                "data": [{"source_url": "https://example.com", "title": "Test Page"}],
-            }
-            mock_firecrawl_app.return_value = mock_firecrawl_instance
-
-            # Mock WaterCrawlProvider
-            mock_watercrawl_instance = MagicMock()
-            mock_watercrawl_instance.crawl_url.return_value = {"status": "active", "job_id": "watercrawl_job_123"}
-            mock_watercrawl_instance.get_crawl_status.return_value = {
-                "status": "completed",
-                "job_id": "watercrawl_job_123",
-                "total": 3,
-                "current": 3,
-                "data": [],
-            }
-            mock_watercrawl_instance.get_crawl_url_data.return_value = {
-                "title": "WaterCrawl Page",
-                "source_url": "https://example.com",
-                "description": "Test description",
-                "markdown": "# Test Content",
-            }
-            mock_watercrawl_instance.scrape_url.return_value = {
-                "title": "Scraped Page",
-                "content": "Test content",
-                "url": "https://example.com",
-            }
-            mock_watercrawl_provider.return_value = mock_watercrawl_instance
-
-            # Mock requests
-            mock_response = MagicMock()
-            mock_response.json.return_value = {"code": 200, "data": {"taskId": "jina_job_123"}}
-            mock_requests.get.return_value = mock_response
-            mock_requests.post.return_value = mock_response
-
-            # Mock Redis
-            mock_redis_client.setex.return_value = None
-            mock_redis_client.get.return_value = str(datetime.now().timestamp())
-            mock_redis_client.delete.return_value = None
-
-            # Mock Storage
-            mock_storage.exists.return_value = False
-            mock_storage.load_once.return_value = None
-
-            yield {
-                "mock_datasource_provider_service": mock_datasource_provider_service,
-                "mock_datasource_provider_service_instance": mock_datasource_provider_service_instance,
-                "firecrawl_app": mock_firecrawl_app,
-                "watercrawl_provider": mock_watercrawl_provider,
-                "requests": mock_requests,
-                "redis_client": mock_redis_client,
-                "storage": mock_storage,
-                "encrypter": mock_encrypter,
-            }
-
-    def _create_test_account(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Helper method to create a test account with proper tenant setup.
-
-        Args:
-            db_session_with_containers: Database session from testcontainers infrastructure
-            mock_external_service_dependencies: Mock dependencies
-
-        Returns:
-            Account: Created account instance
-        """
-        fake = Faker()
-
-        # Create account
-        account = Account(
-            email=fake.email(),
-            name=fake.name(),
-            interface_language="en-US",
-            status="active",
-        )
-
-        from extensions.ext_database import db
-
-        db.session.add(account)
-        db.session.commit()
-
-        # Create tenant for the account
-        tenant = Tenant(
-            name=fake.company(),
-            status="normal",
-        )
-        db.session.add(tenant)
-        db.session.commit()
-
-        # Create tenant-account join
-        join = TenantAccountJoin(
-            tenant_id=tenant.id,
-            account_id=account.id,
-            role=TenantAccountRole.OWNER.value,
-            current=True,
-        )
-        db.session.add(join)
-        db.session.commit()
-
-        # Set current tenant for account
-        account.current_tenant = tenant
-
-        return account
-
-    def test_document_create_args_validate_success(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test successful argument validation for document creation.
-
-        This test verifies:
-        - Valid arguments are accepted without errors
-        - All required fields are properly validated
-        - Optional fields are handled correctly
-        """
-        # Arrange: Prepare valid arguments
-        valid_args = {
-            "provider": "firecrawl",
-            "url": "https://example.com",
-            "options": {
-                "limit": 5,
-                "crawl_sub_pages": True,
-                "only_main_content": False,
-                "includes": "blog,news",
-                "excludes": "admin,private",
-                "max_depth": 3,
-                "use_sitemap": True,
-            },
-        }
-
-        # Act: Validate arguments
-        WebsiteService.document_create_args_validate(valid_args)
-
-        # Assert: No exception should be raised
-        # If we reach here, validation passed successfully
-
-    def test_document_create_args_validate_missing_provider(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test argument validation fails when provider is missing.
-
-        This test verifies:
-        - Missing provider raises ValueError
-        - Proper error message is provided
-        - Validation stops at first missing required field
-        """
-        # Arrange: Prepare arguments without provider
-        invalid_args = {"url": "https://example.com", "options": {"limit": 5, "crawl_sub_pages": True}}
-
-        # Act & Assert: Verify proper error handling
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteService.document_create_args_validate(invalid_args)
-
-        assert "Provider is required" in str(exc_info.value)
-
-    def test_document_create_args_validate_missing_url(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test argument validation fails when URL is missing.
-
-        This test verifies:
-        - Missing URL raises ValueError
-        - Proper error message is provided
-        - Validation continues after provider check
-        """
-        # Arrange: Prepare arguments without URL
-        invalid_args = {"provider": "firecrawl", "options": {"limit": 5, "crawl_sub_pages": True}}
-
-        # Act & Assert: Verify proper error handling
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteService.document_create_args_validate(invalid_args)
-
-        assert "URL is required" in str(exc_info.value)
-
-    def test_crawl_url_firecrawl_success(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test successful URL crawling with Firecrawl provider.
-
-        This test verifies:
-        - Firecrawl provider is properly initialized
-        - API credentials are retrieved and decrypted
-        - Crawl parameters are correctly formatted
-        - Job ID is returned with active status
-        - Redis cache is properly set
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-        fake = Faker()
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request
-            api_request = WebsiteCrawlApiRequest(
-                provider="firecrawl",
-                url="https://example.com",
-                options={
-                    "limit": 10,
-                    "crawl_sub_pages": True,
-                    "only_main_content": True,
-                    "includes": "blog,news",
-                    "excludes": "admin,private",
-                    "max_depth": 2,
-                    "use_sitemap": True,
-                },
-            )
-
-            mock_provider_instance = mock_external_service_dependencies["mock_datasource_provider_service_instance"]
-            credential = {
-                "firecrawl_api_key": "decrypted_api_key",
-                "base_url": "https://api.example.com",
-            }
-            mock_provider_instance.get_datasource_credentials.return_value = credential
-            # Act: Execute crawl operation
-            result = WebsiteService.crawl_url(api_request)
-
-            # Assert: Verify successful operation
-            assert result is not None
-            assert result["status"] == "active"
-            assert result["job_id"] == "test_job_id_123"
-
-            mock_provider_instance.get_datasource_credentials.assert_called_once_with(
-                tenant_id=current_tenant.id,
-                provider="firecrawl",
-                plugin_id="langgenius/firecrawl_datasource",
-            )
-            # Verify external service interactions
-            mock_external_service_dependencies["firecrawl_app"].assert_called_once_with(
-                api_key="decrypted_api_key", base_url="https://api.example.com"
-            )
-
-            # Verify Redis cache was set
-            mock_external_service_dependencies["redis_client"].setex.assert_called_once()
-
-    def test_crawl_url_watercrawl_success(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test successful URL crawling with WaterCrawl provider.
-
-        This test verifies:
-        - WaterCrawl provider is properly initialized
-        - API credentials are retrieved and decrypted
-        - Crawl options are correctly passed to provider
-        - Provider returns expected response format
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request
-            api_request = WebsiteCrawlApiRequest(
-                provider="watercrawl",
-                url="https://example.com",
-                options={
-                    "limit": 5,
-                    "crawl_sub_pages": False,
-                    "only_main_content": False,
-                    "includes": None,
-                    "excludes": None,
-                    "max_depth": None,
-                    "use_sitemap": False,
-                },
-            )
-            mock_provider_instance = mock_external_service_dependencies["mock_datasource_provider_service_instance"]
-            credential = {
-                "api_key": "decrypted_api_key",
-                "base_url": "https://api.example.com",
-            }
-            mock_provider_instance.get_datasource_credentials.return_value = credential
-            # Act: Execute crawl operation
-            result = WebsiteService.crawl_url(api_request)
-
-            # Assert: Verify successful operation
-            assert result is not None
-            assert result["status"] == "active"
-            assert result["job_id"] == "watercrawl_job_123"
-
-            # Verify external service interactions
-            mock_provider_instance.get_datasource_credentials.assert_called_once_with(
-                tenant_id=current_tenant.id,
-                provider="watercrawl",
-                plugin_id="langgenius/watercrawl_datasource",
-            )
-            mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with(
-                api_key="decrypted_api_key", base_url="https://api.example.com"
-            )
-
-    def test_crawl_url_jinareader_success(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test successful URL crawling with JinaReader provider.
-
-        This test verifies:
-        - JinaReader provider handles single page crawling
-        - API credentials are retrieved and decrypted
-        - HTTP requests are made with proper headers
-        - Response is properly parsed and returned
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request for single page crawling
-            api_request = WebsiteCrawlApiRequest(
-                provider="jinareader",
-                url="https://example.com",
-                options={
-                    "limit": 1,
-                    "crawl_sub_pages": False,
-                    "only_main_content": True,
-                    "includes": None,
-                    "excludes": None,
-                    "max_depth": None,
-                    "use_sitemap": False,
-                },
-            )
-
-            # Act: Execute crawl operation
-            result = WebsiteService.crawl_url(api_request)
-
-            # Assert: Verify successful operation
-            assert result is not None
-            assert result["status"] == "active"
-            assert result["data"] is not None
-
-            # Verify HTTP request was made
-            mock_external_service_dependencies["requests"].get.assert_called_once_with(
-                "https://r.jina.ai/https://example.com",
-                headers={"Accept": "application/json", "Authorization": "Bearer decrypted_api_key"},
-            )
-
-    def test_crawl_url_invalid_provider(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test crawl operation fails with invalid provider.
-
-        This test verifies:
-        - Invalid provider raises ValueError
-        - Proper error message is provided
-        - Service handles unsupported providers gracefully
-        """
-        # Arrange: Create test account and prepare request with invalid provider
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request with invalid provider
-            api_request = WebsiteCrawlApiRequest(
-                provider="invalid_provider",
-                url="https://example.com",
-                options={"limit": 5, "crawl_sub_pages": False, "only_main_content": False},
-            )
-
-            # Act & Assert: Verify proper error handling
-            with pytest.raises(ValueError) as exc_info:
-                WebsiteService.crawl_url(api_request)
-
-            assert "Invalid provider" in str(exc_info.value)
-
-    def test_get_crawl_status_firecrawl_success(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test successful crawl status retrieval with Firecrawl provider.
-
-        This test verifies:
-        - Firecrawl status is properly retrieved
-        - API credentials are retrieved and decrypted
-        - Status data includes all required fields
-        - Redis cache is properly managed for completed jobs
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request
-            api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123")
-
-            # Act: Get crawl status
-            result = WebsiteService.get_crawl_status_typed(api_request)
-
-            # Assert: Verify successful operation
-            assert result is not None
-            assert result["status"] == "completed"
-            assert result["job_id"] == "test_job_id_123"
-            assert result["total"] == 5
-            assert result["current"] == 5
-            assert "data" in result
-            assert "time_consuming" in result
-
-            # Verify Redis cache was accessed and cleaned up
-            mock_external_service_dependencies["redis_client"].get.assert_called_once()
-            mock_external_service_dependencies["redis_client"].delete.assert_called_once()
-
-    def test_get_crawl_status_watercrawl_success(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test successful crawl status retrieval with WaterCrawl provider.
-
-        This test verifies:
-        - WaterCrawl status is properly retrieved
-        - API credentials are retrieved and decrypted
-        - Provider returns expected status format
-        - All required status fields are present
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request
-            api_request = WebsiteCrawlStatusApiRequest(provider="watercrawl", job_id="watercrawl_job_123")
-
-            # Act: Get crawl status
-            result = WebsiteService.get_crawl_status_typed(api_request)
-
-            # Assert: Verify successful operation
-            assert result is not None
-            assert result["status"] == "completed"
-            assert result["job_id"] == "watercrawl_job_123"
-            assert result["total"] == 3
-            assert result["current"] == 3
-            assert "data" in result
-
-    def test_get_crawl_status_jinareader_success(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test successful crawl status retrieval with JinaReader provider.
-
-        This test verifies:
-        - JinaReader status is properly retrieved
-        - API credentials are retrieved and decrypted
-        - HTTP requests are made with proper parameters
-        - Status data is properly formatted and returned
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request
-            api_request = WebsiteCrawlStatusApiRequest(provider="jinareader", job_id="jina_job_123")
-
-            # Act: Get crawl status
-            result = WebsiteService.get_crawl_status_typed(api_request)
-
-            # Assert: Verify successful operation
-            assert result is not None
-            assert result["status"] == "active"
-            assert result["job_id"] == "jina_job_123"
-            assert "total" in result
-            assert "current" in result
-            assert "data" in result
-            assert "time_consuming" in result
-
-            # Verify HTTP request was made
-            mock_external_service_dependencies["requests"].post.assert_called_once()
-
-    def test_get_crawl_status_invalid_provider(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test crawl status retrieval fails with invalid provider.
-
-        This test verifies:
-        - Invalid provider raises ValueError
-        - Proper error message is provided
-        - Service handles unsupported providers gracefully
-        """
-        # Arrange: Create test account and prepare request with invalid provider
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request with invalid provider
-            api_request = WebsiteCrawlStatusApiRequest(provider="invalid_provider", job_id="test_job_id_123")
-
-            # Act & Assert: Verify proper error handling
-            with pytest.raises(ValueError) as exc_info:
-                WebsiteService.get_crawl_status_typed(api_request)
-
-            assert "Invalid provider" in str(exc_info.value)
-
-    def test_get_crawl_status_missing_credentials(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test crawl status retrieval fails when credentials are missing.
-
-        This test verifies:
-        - Missing credentials raises ValueError
-        - Proper error message is provided
-        - Service handles authentication failures gracefully
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Mock missing credentials
-            mock_external_service_dependencies[
-                "mock_datasource_provider_service"
-            ].get_datasource_credentials.return_value = None
-
-            # Create API request
-            api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123")
-
-            # Act & Assert: Verify proper error handling
-            with pytest.raises(ValueError) as exc_info:
-                WebsiteService.get_crawl_status_typed(api_request)
-
-            assert "No valid credentials found for the provider" in str(exc_info.value)
-
-    def test_get_crawl_status_missing_api_key(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test crawl status retrieval fails when API key is missing from config.
-
-        This test verifies:
-        - Missing API key raises ValueError
-        - Proper error message is provided
-        - Service handles configuration failures gracefully
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Mock missing API key in config
-            mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.return_value = {
-                "config": {"base_url": "https://api.example.com"}
-            }
-
-            # Create API request
-            api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123")
-
-            # Act & Assert: Verify proper error handling
-            with pytest.raises(ValueError) as exc_info:
-                WebsiteService.get_crawl_status_typed(api_request)
-
-            assert "API key not found in configuration" in str(exc_info.value)
-
-    def test_get_crawl_url_data_firecrawl_success(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test successful URL data retrieval with Firecrawl provider.
-
-        This test verifies:
-        - Firecrawl URL data is properly retrieved
-        - API credentials are retrieved and decrypted
-        - Data is returned for matching URL
-        - Storage fallback works when needed
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock storage to return existing data
-        mock_external_service_dependencies["storage"].exists.return_value = True
-        mock_external_service_dependencies["storage"].load_once.return_value = (
-            b"["
-            b'{"source_url": "https://example.com", "title": "Test Page", '
-            b'"description": "Test Description", "markdown": "# Test Content"}'
-            b"]"
-        )
-
-        # Act: Get URL data
-        result = WebsiteService.get_crawl_url_data(
-            job_id="test_job_id_123",
-            provider="firecrawl",
-            url="https://example.com",
-            tenant_id=account.current_tenant.id,
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["source_url"] == "https://example.com"
-        assert result["title"] == "Test Page"
-        assert result["description"] == "Test Description"
-        assert result["markdown"] == "# Test Content"
-
-        # Verify storage was accessed
-        mock_external_service_dependencies["storage"].exists.assert_called_once()
-        mock_external_service_dependencies["storage"].load_once.assert_called_once()
-
-    def test_get_crawl_url_data_watercrawl_success(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test successful URL data retrieval with WaterCrawl provider.
-
-        This test verifies:
-        - WaterCrawl URL data is properly retrieved
-        - API credentials are retrieved and decrypted
-        - Provider returns expected data format
-        - All required data fields are present
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Act: Get URL data
-        result = WebsiteService.get_crawl_url_data(
-            job_id="watercrawl_job_123",
-            provider="watercrawl",
-            url="https://example.com",
-            tenant_id=account.current_tenant.id,
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["title"] == "WaterCrawl Page"
-        assert result["source_url"] == "https://example.com"
-        assert result["description"] == "Test description"
-        assert result["markdown"] == "# Test Content"
-
-    def test_get_crawl_url_data_jinareader_success(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test successful URL data retrieval with JinaReader provider.
-
-        This test verifies:
-        - JinaReader URL data is properly retrieved
-        - API credentials are retrieved and decrypted
-        - HTTP requests are made with proper parameters
-        - Data is properly formatted and returned
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock successful response for JinaReader
-        mock_response = MagicMock()
-        mock_response.json.return_value = {
-            "code": 200,
-            "data": {
-                "title": "JinaReader Page",
-                "url": "https://example.com",
-                "description": "Test description",
-                "content": "# Test Content",
-            },
-        }
-        mock_external_service_dependencies["requests"].get.return_value = mock_response
-
-        # Act: Get URL data without job_id (single page scraping)
-        result = WebsiteService.get_crawl_url_data(
-            job_id="", provider="jinareader", url="https://example.com", tenant_id=account.current_tenant.id
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["title"] == "JinaReader Page"
-        assert result["url"] == "https://example.com"
-        assert result["description"] == "Test description"
-        assert result["content"] == "# Test Content"
-
-        # Verify HTTP request was made
-        mock_external_service_dependencies["requests"].get.assert_called_once_with(
-            "https://r.jina.ai/https://example.com",
-            headers={"Accept": "application/json", "Authorization": "Bearer decrypted_api_key"},
-        )
-
-    def test_get_scrape_url_data_firecrawl_success(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test successful URL scraping with Firecrawl provider.
-
-        This test verifies:
-        - Firecrawl scraping is properly executed
-        - API credentials are retrieved and decrypted
-        - Scraping parameters are correctly passed
-        - Scraped data is returned in expected format
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock FirecrawlApp scraping response
-        mock_firecrawl_instance = MagicMock()
-        mock_firecrawl_instance.scrape_url.return_value = {
-            "title": "Scraped Page Title",
-            "content": "This is the scraped content",
-            "url": "https://example.com",
-            "description": "Page description",
-        }
-        mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance
-
-        # Act: Scrape URL
-        result = WebsiteService.get_scrape_url_data(
-            provider="firecrawl", url="https://example.com", tenant_id=account.current_tenant.id, only_main_content=True
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["title"] == "Scraped Page Title"
-        assert result["content"] == "This is the scraped content"
-        assert result["url"] == "https://example.com"
-        assert result["description"] == "Page description"
-
-        # Verify FirecrawlApp was called with correct parameters
-        mock_external_service_dependencies["firecrawl_app"].assert_called_once_with(
-            api_key="decrypted_api_key", base_url="https://api.example.com"
-        )
-        mock_firecrawl_instance.scrape_url.assert_called_once_with(
-            url="https://example.com", params={"onlyMainContent": True}
-        )
-
-    def test_get_scrape_url_data_watercrawl_success(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test successful URL scraping with WaterCrawl provider.
-
-        This test verifies:
-        - WaterCrawl scraping is properly executed
-        - API credentials are retrieved and decrypted
-        - Provider returns expected scraping format
-        - All required data fields are present
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Act: Scrape URL
-        result = WebsiteService.get_scrape_url_data(
-            provider="watercrawl",
-            url="https://example.com",
-            tenant_id=account.current_tenant.id,
-            only_main_content=False,
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["title"] == "Scraped Page"
-        assert result["content"] == "Test content"
-        assert result["url"] == "https://example.com"
-
-        # Verify WaterCrawlProvider was called with correct parameters
-        mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with(
-            api_key="decrypted_api_key", base_url="https://api.example.com"
-        )
-
-    def test_get_scrape_url_data_invalid_provider(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test URL scraping fails with invalid provider.
-
-        This test verifies:
-        - Invalid provider raises ValueError
-        - Proper error message is provided
-        - Service handles unsupported providers gracefully
-        """
-        # Arrange: Create test account and prepare request with invalid provider
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Act & Assert: Verify proper error handling
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteService.get_scrape_url_data(
-                provider="invalid_provider",
-                url="https://example.com",
-                tenant_id=account.current_tenant.id,
-                only_main_content=False,
-            )
-
-        assert "Invalid provider" in str(exc_info.value)
-
-    def test_crawl_options_include_exclude_paths(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test CrawlOptions include and exclude path methods.
-
-        This test verifies:
-        - Include paths are properly parsed from comma-separated string
-        - Exclude paths are properly parsed from comma-separated string
-        - Empty or None values are handled correctly
-        - Path lists are returned in expected format
-        """
-        # Arrange: Create CrawlOptions with various path configurations
-        options_with_paths = CrawlOptions(includes="blog,news,articles", excludes="admin,private,test")
-
-        options_without_paths = CrawlOptions(includes=None, excludes="")
-
-        # Act: Get include and exclude paths
-        include_paths = options_with_paths.get_include_paths()
-        exclude_paths = options_with_paths.get_exclude_paths()
-
-        empty_include_paths = options_without_paths.get_include_paths()
-        empty_exclude_paths = options_without_paths.get_exclude_paths()
-
-        # Assert: Verify path parsing
-        assert include_paths == ["blog", "news", "articles"]
-        assert exclude_paths == ["admin", "private", "test"]
-        assert empty_include_paths == []
-        assert empty_exclude_paths == []
-
-    def test_website_crawl_api_request_conversion(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test WebsiteCrawlApiRequest conversion to CrawlRequest.
-
-        This test verifies:
-        - API request is properly converted to internal CrawlRequest
-        - All options are correctly mapped
-        - Default values are applied when options are missing
-        - Conversion maintains data integrity
-        """
-        # Arrange: Create API request with various options
-        api_request = WebsiteCrawlApiRequest(
-            provider="firecrawl",
-            url="https://example.com",
-            options={
-                "limit": 10,
-                "crawl_sub_pages": True,
-                "only_main_content": True,
-                "includes": "blog,news",
-                "excludes": "admin,private",
-                "max_depth": 3,
-                "use_sitemap": False,
-            },
-        )
-
-        # Act: Convert to CrawlRequest
-        crawl_request = api_request.to_crawl_request()
-
-        # Assert: Verify conversion
-        assert crawl_request.url == "https://example.com"
-        assert crawl_request.provider == "firecrawl"
-        assert crawl_request.options.limit == 10
-        assert crawl_request.options.crawl_sub_pages is True
-        assert crawl_request.options.only_main_content is True
-        assert crawl_request.options.includes == "blog,news"
-        assert crawl_request.options.excludes == "admin,private"
-        assert crawl_request.options.max_depth == 3
-        assert crawl_request.options.use_sitemap is False
-
-    def test_website_crawl_api_request_from_args(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test WebsiteCrawlApiRequest creation from Flask arguments.
-
-        This test verifies:
-        - Request is properly created from parsed arguments
-        - Required fields are validated
-        - Optional fields are handled correctly
-        - Validation errors are properly raised
-        """
-        # Arrange: Prepare valid arguments
-        valid_args = {"provider": "watercrawl", "url": "https://example.com", "options": {"limit": 5}}
-
-        # Act: Create request from args
-        request = WebsiteCrawlApiRequest.from_args(valid_args)
-
-        # Assert: Verify request creation
-        assert request.provider == "watercrawl"
-        assert request.url == "https://example.com"
-        assert request.options == {"limit": 5}
-
-        # Test missing provider
-        invalid_args = {"url": "https://example.com", "options": {}}
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteCrawlApiRequest.from_args(invalid_args)
-        assert "Provider is required" in str(exc_info.value)
-
-        # Test missing URL
-        invalid_args = {"provider": "watercrawl", "options": {}}
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteCrawlApiRequest.from_args(invalid_args)
-        assert "URL is required" in str(exc_info.value)
-
-        # Test missing options
-        invalid_args = {"provider": "watercrawl", "url": "https://example.com"}
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteCrawlApiRequest.from_args(invalid_args)
-        assert "Options are required" in str(exc_info.value)
-
-    def test_crawl_url_jinareader_sub_pages_success(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test successful URL crawling with JinaReader provider for sub-pages.
-
-        This test verifies:
-        - JinaReader provider handles sub-page crawling correctly
-        - HTTP POST request is made with proper parameters
-        - Job ID is returned for multi-page crawling
-        - All required parameters are passed correctly
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request for sub-page crawling
-            api_request = WebsiteCrawlApiRequest(
-                provider="jinareader",
-                url="https://example.com",
-                options={
-                    "limit": 5,
-                    "crawl_sub_pages": True,
-                    "only_main_content": False,
-                    "includes": None,
-                    "excludes": None,
-                    "max_depth": None,
-                    "use_sitemap": True,
-                },
-            )
-
-            # Act: Execute crawl operation
-            result = WebsiteService.crawl_url(api_request)
-
-            # Assert: Verify successful operation
-            assert result is not None
-            assert result["status"] == "active"
-            assert result["job_id"] == "jina_job_123"
-
-            # Verify HTTP POST request was made for sub-page crawling
-            mock_external_service_dependencies["requests"].post.assert_called_once_with(
-                "https://adaptivecrawl-kir3wx7b3a-uc.a.run.app",
-                json={"url": "https://example.com", "maxPages": 5, "useSitemap": True},
-                headers={"Content-Type": "application/json", "Authorization": "Bearer decrypted_api_key"},
-            )
-
-    def test_crawl_url_jinareader_failed_response(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test JinaReader crawling fails when API returns error.
-
-        This test verifies:
-        - Failed API response raises ValueError
-        - Proper error message is provided
-        - Service handles API failures gracefully
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock failed response
-        mock_failed_response = MagicMock()
-        mock_failed_response.json.return_value = {"code": 500, "error": "Internal server error"}
-        mock_external_service_dependencies["requests"].get.return_value = mock_failed_response
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request
-            api_request = WebsiteCrawlApiRequest(
-                provider="jinareader",
-                url="https://example.com",
-                options={"limit": 1, "crawl_sub_pages": False, "only_main_content": True},
-            )
-
-            # Act & Assert: Verify proper error handling
-            with pytest.raises(ValueError) as exc_info:
-                WebsiteService.crawl_url(api_request)
-
-            assert "Failed to crawl" in str(exc_info.value)
-
-    def test_get_crawl_status_firecrawl_active_job(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test Firecrawl status retrieval for active (not completed) job.
-
-        This test verifies:
-        - Active job status is properly returned
-        - Redis cache is not deleted for active jobs
-        - Time consuming is not calculated for active jobs
-        - All required status fields are present
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock active job status
-        mock_firecrawl_instance = MagicMock()
-        mock_firecrawl_instance.check_crawl_status.return_value = {
-            "status": "active",
-            "total": 10,
-            "current": 3,
-            "data": [],
-        }
-        mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance
-
-        # Mock current_user for the test
-        mock_current_user = create_autospec(Account, instance=True)
-        mock_current_user.current_tenant_id = account.current_tenant.id
-
-        with patch("services.website_service.current_user", mock_current_user):
-            # Create API request
-            api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="active_job_123")
-
-            # Act: Get crawl status
-            result = WebsiteService.get_crawl_status_typed(api_request)
-
-            # Assert: Verify active job status
-            assert result is not None
-            assert result["status"] == "active"
-            assert result["job_id"] == "active_job_123"
-            assert result["total"] == 10
-            assert result["current"] == 3
-            assert "data" in result
-            assert "time_consuming" not in result
-
-            # Verify Redis cache was not accessed for active jobs
-            mock_external_service_dependencies["redis_client"].get.assert_not_called()
-            mock_external_service_dependencies["redis_client"].delete.assert_not_called()
-
-    def test_get_crawl_url_data_firecrawl_storage_fallback(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test Firecrawl URL data retrieval with storage fallback.
-
-        This test verifies:
-        - Storage fallback works when storage has data
-        - API call is not made when storage has data
-        - Data is properly parsed from storage
-        - Correct URL data is returned
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock storage to return existing data
-        mock_external_service_dependencies["storage"].exists.return_value = True
-        mock_external_service_dependencies["storage"].load_once.return_value = (
-            b"["
-            b'{"source_url": "https://example.com/page1", '
-            b'"title": "Page 1", "description": "Description 1", "markdown": "# Page 1"}, '
-            b'{"source_url": "https://example.com/page2", "title": "Page 2", '
-            b'"description": "Description 2", "markdown": "# Page 2"}'
-            b"]"
-        )
-
-        # Act: Get URL data for specific URL
-        result = WebsiteService.get_crawl_url_data(
-            job_id="test_job_id_123",
-            provider="firecrawl",
-            url="https://example.com/page1",
-            tenant_id=account.current_tenant.id,
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["source_url"] == "https://example.com/page1"
-        assert result["title"] == "Page 1"
-        assert result["description"] == "Description 1"
-        assert result["markdown"] == "# Page 1"
-
-        # Verify storage was accessed
-        mock_external_service_dependencies["storage"].exists.assert_called_once()
-        mock_external_service_dependencies["storage"].load_once.assert_called_once()
-
-    def test_get_crawl_url_data_firecrawl_api_fallback(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test Firecrawl URL data retrieval with API fallback when storage is empty.
-
-        This test verifies:
-        - API fallback works when storage has no data
-        - FirecrawlApp is called to get data
-        - Completed job status is checked
-        - Data is returned from API response
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock storage to return no data
-        mock_external_service_dependencies["storage"].exists.return_value = False
-
-        # Mock FirecrawlApp for API fallback
-        mock_firecrawl_instance = MagicMock()
-        mock_firecrawl_instance.check_crawl_status.return_value = {
-            "status": "completed",
-            "data": [
-                {
-                    "source_url": "https://example.com/api_page",
-                    "title": "API Page",
-                    "description": "API Description",
-                    "markdown": "# API Content",
-                }
-            ],
-        }
-        mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance
-
-        # Act: Get URL data
-        result = WebsiteService.get_crawl_url_data(
-            job_id="test_job_id_123",
-            provider="firecrawl",
-            url="https://example.com/api_page",
-            tenant_id=account.current_tenant.id,
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["source_url"] == "https://example.com/api_page"
-        assert result["title"] == "API Page"
-        assert result["description"] == "API Description"
-        assert result["markdown"] == "# API Content"
-
-        # Verify API was called
-        mock_external_service_dependencies["firecrawl_app"].assert_called_once()
-
-    def test_get_crawl_url_data_firecrawl_incomplete_job(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test Firecrawl URL data retrieval fails for incomplete job.
-
-        This test verifies:
-        - Incomplete job raises ValueError
-        - Proper error message is provided
-        - Service handles incomplete jobs gracefully
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock storage to return no data
-        mock_external_service_dependencies["storage"].exists.return_value = False
-
-        # Mock incomplete job status
-        mock_firecrawl_instance = MagicMock()
-        mock_firecrawl_instance.check_crawl_status.return_value = {"status": "active", "data": []}
-        mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance
-
-        # Act & Assert: Verify proper error handling
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteService.get_crawl_url_data(
-                job_id="test_job_id_123",
-                provider="firecrawl",
-                url="https://example.com/page",
-                tenant_id=account.current_tenant.id,
-            )
-
-        assert "Crawl job is not completed" in str(exc_info.value)
-
-    def test_get_crawl_url_data_jinareader_with_job_id(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test JinaReader URL data retrieval with job ID for multi-page crawling.
-
-        This test verifies:
-        - JinaReader handles job ID-based data retrieval
-        - Status check is performed before data retrieval
-        - Processed data is properly formatted
-        - Correct URL data is returned
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock successful status response
-        mock_status_response = MagicMock()
-        mock_status_response.json.return_value = {
-            "code": 200,
-            "data": {
-                "status": "completed",
-                "processed": {
-                    "https://example.com/page1": {
-                        "data": {
-                            "title": "Page 1",
-                            "url": "https://example.com/page1",
-                            "description": "Description 1",
-                            "content": "# Content 1",
-                        }
-                    }
-                },
-            },
-        }
-        mock_external_service_dependencies["requests"].post.return_value = mock_status_response
-
-        # Act: Get URL data with job ID
-        result = WebsiteService.get_crawl_url_data(
-            job_id="jina_job_123",
-            provider="jinareader",
-            url="https://example.com/page1",
-            tenant_id=account.current_tenant.id,
-        )
-
-        # Assert: Verify successful operation
-        assert result is not None
-        assert result["title"] == "Page 1"
-        assert result["url"] == "https://example.com/page1"
-        assert result["description"] == "Description 1"
-        assert result["content"] == "# Content 1"
-
-        # Verify HTTP requests were made
-        assert mock_external_service_dependencies["requests"].post.call_count == 2
-
-    def test_get_crawl_url_data_jinareader_incomplete_job(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test JinaReader URL data retrieval fails for incomplete job.
-
-        This test verifies:
-        - Incomplete job raises ValueError
-        - Proper error message is provided
-        - Service handles incomplete jobs gracefully
-        """
-        # Arrange: Create test account and prepare request
-        account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
-
-        # Mock incomplete job status
-        mock_status_response = MagicMock()
-        mock_status_response.json.return_value = {"code": 200, "data": {"status": "active", "processed": {}}}
-        mock_external_service_dependencies["requests"].post.return_value = mock_status_response
-
-        # Act & Assert: Verify proper error handling
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteService.get_crawl_url_data(
-                job_id="jina_job_123",
-                provider="jinareader",
-                url="https://example.com/page",
-                tenant_id=account.current_tenant.id,
-            )
-
-        assert "Crawl job is not completed" in str(exc_info.value)
-
-    def test_crawl_options_default_values(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test CrawlOptions default values and initialization.
-
-        This test verifies:
-        - Default values are properly set
-        - Optional fields can be None
-        - Boolean fields have correct defaults
-        - Integer fields have correct defaults
-        """
-        # Arrange: Create CrawlOptions with minimal parameters
-        options = CrawlOptions()
-
-        # Assert: Verify default values
-        assert options.limit == 1
-        assert options.crawl_sub_pages is False
-        assert options.only_main_content is False
-        assert options.includes is None
-        assert options.excludes is None
-        assert options.max_depth is None
-        assert options.use_sitemap is True
-
-        # Test with custom values
-        custom_options = CrawlOptions(
-            limit=10,
-            crawl_sub_pages=True,
-            only_main_content=True,
-            includes="blog,news",
-            excludes="admin",
-            max_depth=3,
-            use_sitemap=False,
-        )
-
-        assert custom_options.limit == 10
-        assert custom_options.crawl_sub_pages is True
-        assert custom_options.only_main_content is True
-        assert custom_options.includes == "blog,news"
-        assert custom_options.excludes == "admin"
-        assert custom_options.max_depth == 3
-        assert custom_options.use_sitemap is False
-
-    def test_website_crawl_status_api_request_from_args(
-        self, db_session_with_containers, mock_external_service_dependencies
-    ):
-        """
-        Test WebsiteCrawlStatusApiRequest creation from Flask arguments.
-
-        This test verifies:
-        - Request is properly created from parsed arguments
-        - Required fields are validated
-        - Job ID is properly handled
-        - Validation errors are properly raised
-        """
-        # Arrange: Prepare valid arguments
-        valid_args = {"provider": "firecrawl"}
-        job_id = "test_job_123"
-
-        # Act: Create request from args
-        request = WebsiteCrawlStatusApiRequest.from_args(valid_args, job_id)
-
-        # Assert: Verify request creation
-        assert request.provider == "firecrawl"
-        assert request.job_id == "test_job_123"
-
-        # Test missing provider
-        invalid_args = {}
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteCrawlStatusApiRequest.from_args(invalid_args, job_id)
-        assert "Provider is required" in str(exc_info.value)
-
-        # Test missing job ID
-        with pytest.raises(ValueError) as exc_info:
-            WebsiteCrawlStatusApiRequest.from_args(valid_args, "")
-        assert "Job ID is required" in str(exc_info.value)
-
-    def test_scrape_request_initialization(self, db_session_with_containers, mock_external_service_dependencies):
-        """
-        Test ScrapeRequest dataclass initialization and properties.
-
-        This test verifies:
-        - ScrapeRequest is properly initialized
-        - All fields are correctly set
-        - Boolean field works correctly
-        - String fields are properly assigned
-        """
-        # Arrange: Create ScrapeRequest
-        request = ScrapeRequest(
-            provider="firecrawl", url="https://example.com", tenant_id="tenant_123", only_main_content=True
-        )
-
-        # Assert: Verify initialization
-        assert request.provider == "firecrawl"
-        assert request.url == "https://example.com"
-        assert request.tenant_id == "tenant_123"
-        assert request.only_main_content is True
-
-        # Test with different values
-        request2 = ScrapeRequest(
-            provider="watercrawl", url="https://test.com", tenant_id="tenant_456", only_main_content=False
-        )
-
-        assert request2.provider == "watercrawl"
-        assert request2.url == "https://test.com"
-        assert request2.tenant_id == "tenant_456"
-        assert request2.only_main_content is False