chore: port 2 api as deprecated (#35261)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: 99 <wh2099@pm.me>
2026-06-20 09:11:08 +08:00 · 2026-04-21 21:29:39 +09:00 · 2026-04-21 21:29:39 +09:00 · 2e1b11bdb2
commit 2e1b11bdb2
parent d65a6b4810
2 changed files with 217 additions and 133 deletions
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@ -1,4 +1,12 @@
+"""Service API endpoints for dataset document management.
+
+The canonical Service API paths use hyphenated route segments. Legacy underscore
+aliases remain registered for backward compatibility, but they must stay marked
+deprecated in generated API docs so clients migrate toward the canonical paths.
+"""
+
 import json
+from collections.abc import Mapping
 from contextlib import ExitStack
 from typing import Self
 from uuid import UUID
@ -117,12 +125,137 @@ register_schema_models(
 )


-@service_api_ns.route(
-    "/datasets/<uuid:dataset_id>/document/create_by_text",
-    "/datasets/<uuid:dataset_id>/document/create-by-text",
-)
+def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[str, object], int]:
+    """Create a document from text for both canonical and legacy routes."""
+    payload = DocumentTextCreatePayload.model_validate(service_api_ns.payload or {})
+    args = payload.model_dump(exclude_none=True)
+
+    dataset_id_str = str(dataset_id)
+    tenant_id_str = str(tenant_id)
+    dataset = db.session.scalar(
+        select(Dataset).where(Dataset.tenant_id == tenant_id_str, Dataset.id == dataset_id_str).limit(1)
+    )
+
+    if not dataset:
+        raise ValueError("Dataset does not exist.")
+
+    if not dataset.indexing_technique and not args["indexing_technique"]:
+        raise ValueError("indexing_technique is required.")
+
+    embedding_model_provider = payload.embedding_model_provider
+    embedding_model = payload.embedding_model
+    if embedding_model_provider and embedding_model:
+        DatasetService.check_embedding_model_setting(tenant_id_str, embedding_model_provider, embedding_model)
+
+    retrieval_model = payload.retrieval_model
+    if (
+        retrieval_model
+        and retrieval_model.reranking_model
+        and retrieval_model.reranking_model.reranking_provider_name
+        and retrieval_model.reranking_model.reranking_model_name
+    ):
+        DatasetService.check_reranking_model_setting(
+            tenant_id_str,
+            retrieval_model.reranking_model.reranking_provider_name,
+            retrieval_model.reranking_model.reranking_model_name,
+        )
+
+    if not current_user:
+        raise ValueError("current_user is required")
+
+    upload_file = FileService(db.engine).upload_text(
+        text=payload.text, text_name=payload.name, user_id=current_user.id, tenant_id=tenant_id_str
+    )
+    data_source = {
+        "type": "upload_file",
+        "info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
+    }
+    args["data_source"] = data_source
+    knowledge_config = KnowledgeConfig.model_validate(args)
+    DocumentService.document_create_args_validate(knowledge_config)
+
+    if not current_user:
+        raise ValueError("current_user is required")
+
+    try:
+        documents, batch = DocumentService.save_document_with_dataset_id(
+            dataset=dataset,
+            knowledge_config=knowledge_config,
+            account=current_user,
+            dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
+            created_from="api",
+        )
+    except ProviderTokenNotInitError as ex:
+        raise ProviderNotInitializeError(ex.description)
+    document = documents[0]
+
+    documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
+    return documents_and_batch_fields, 200
+
+
+def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
+    """Update a document from text for both canonical and legacy routes."""
+    payload = DocumentTextUpdate.model_validate(service_api_ns.payload or {})
+    dataset = db.session.scalar(
+        select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == str(dataset_id)).limit(1)
+    )
+    args = payload.model_dump(exclude_none=True)
+    if not dataset:
+        raise ValueError("Dataset does not exist.")
+
+    retrieval_model = payload.retrieval_model
+    if (
+        retrieval_model
+        and retrieval_model.reranking_model
+        and retrieval_model.reranking_model.reranking_provider_name
+        and retrieval_model.reranking_model.reranking_model_name
+    ):
+        DatasetService.check_reranking_model_setting(
+            tenant_id,
+            retrieval_model.reranking_model.reranking_provider_name,
+            retrieval_model.reranking_model.reranking_model_name,
+        )
+
+    # indexing_technique is already set in dataset since this is an update
+    args["indexing_technique"] = dataset.indexing_technique
+
+    if args.get("text"):
+        text = args.get("text")
+        name = args.get("name")
+        if not current_user:
+            raise ValueError("current_user is required")
+        upload_file = FileService(db.engine).upload_text(
+            text=str(text), text_name=str(name), user_id=current_user.id, tenant_id=tenant_id
+        )
+        data_source = {
+            "type": "upload_file",
+            "info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
+        }
+        args["data_source"] = data_source
+
+    args["original_document_id"] = str(document_id)
+    knowledge_config = KnowledgeConfig.model_validate(args)
+    DocumentService.document_create_args_validate(knowledge_config)
+
+    try:
+        documents, batch = DocumentService.save_document_with_dataset_id(
+            dataset=dataset,
+            knowledge_config=knowledge_config,
+            account=current_user,
+            dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
+            created_from="api",
+        )
+    except ProviderTokenNotInitError as ex:
+        raise ProviderNotInitializeError(ex.description)
+    document = documents[0]
+
+    documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
+    return documents_and_batch_fields, 200
+
+
+@service_api_ns.route("/datasets/<uuid:dataset_id>/document/create-by-text")
 class DocumentAddByTextApi(DatasetApiResource):
-    """Resource for documents."""
+    """Resource for the canonical text document creation route."""

    @service_api_ns.expect(service_api_ns.models[DocumentTextCreatePayload.__name__])
    @service_api_ns.doc("create_document_by_text")
@ -138,81 +271,43 @@ class DocumentAddByTextApi(DatasetApiResource):
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_resource_check("documents", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
-    def post(self, tenant_id, dataset_id):
+    def post(self, tenant_id: str, dataset_id: UUID):
        """Create document by text."""
-        payload = DocumentTextCreatePayload.model_validate(service_api_ns.payload or {})
-        args = payload.model_dump(exclude_none=True)
+        return _create_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id)

-        dataset_id = str(dataset_id)
-        tenant_id = str(tenant_id)
-        dataset = db.session.scalar(
-            select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
+
+@service_api_ns.route("/datasets/<uuid:dataset_id>/document/create_by_text")
+class DeprecatedDocumentAddByTextApi(DatasetApiResource):
+    """Deprecated resource alias for text document creation."""
+
+    @service_api_ns.expect(service_api_ns.models[DocumentTextCreatePayload.__name__])
+    @service_api_ns.doc("create_document_by_text_deprecated")
+    @service_api_ns.doc(deprecated=True)
+    @service_api_ns.doc(
+        description=(
+            "Deprecated legacy alias for creating a new document by providing text content. "
+            "Use /datasets/{dataset_id}/document/create-by-text instead."
        )
-
-        if not dataset:
-            raise ValueError("Dataset does not exist.")
-
-        if not dataset.indexing_technique and not args["indexing_technique"]:
-            raise ValueError("indexing_technique is required.")
-
-        embedding_model_provider = payload.embedding_model_provider
-        embedding_model = payload.embedding_model
-        if embedding_model_provider and embedding_model:
-            DatasetService.check_embedding_model_setting(tenant_id, embedding_model_provider, embedding_model)
-
-        retrieval_model = payload.retrieval_model
-        if (
-            retrieval_model
-            and retrieval_model.reranking_model
-            and retrieval_model.reranking_model.reranking_provider_name
-            and retrieval_model.reranking_model.reranking_model_name
-        ):
-            DatasetService.check_reranking_model_setting(
-                tenant_id,
-                retrieval_model.reranking_model.reranking_provider_name,
-                retrieval_model.reranking_model.reranking_model_name,
-            )
-
-        if not current_user:
-            raise ValueError("current_user is required")
-
-        upload_file = FileService(db.engine).upload_text(
-            text=payload.text, text_name=payload.name, user_id=current_user.id, tenant_id=tenant_id
-        )
-        data_source = {
-            "type": "upload_file",
-            "info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
+    )
+    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(
+        responses={
+            200: "Document created successfully",
+            401: "Unauthorized - invalid API token",
+            400: "Bad request - invalid parameters",
        }
-        args["data_source"] = data_source
-        knowledge_config = KnowledgeConfig.model_validate(args)
-        # validate args
-        DocumentService.document_create_args_validate(knowledge_config)
-
-        if not current_user:
-            raise ValueError("current_user is required")
-
-        try:
-            documents, batch = DocumentService.save_document_with_dataset_id(
-                dataset=dataset,
-                knowledge_config=knowledge_config,
-                account=current_user,
-                dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
-                created_from="api",
-            )
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        document = documents[0]
-
-        documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
-        return documents_and_batch_fields, 200
+    )
+    @cloud_edition_billing_resource_check("vector_space", "dataset")
+    @cloud_edition_billing_resource_check("documents", "dataset")
+    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
+    def post(self, tenant_id: str, dataset_id: UUID):
+        """Create document by text through the deprecated underscore alias."""
+        return _create_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id)


-@service_api_ns.route(
-    "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text",
-    "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-text",
-)
+@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-text")
 class DocumentUpdateByTextApi(DatasetApiResource):
-    """Resource for update documents."""
+    """Resource for the canonical text document update route."""

    @service_api_ns.expect(service_api_ns.models[DocumentTextUpdate.__name__])
    @service_api_ns.doc("update_document_by_text")
@ -229,62 +324,35 @@ class DocumentUpdateByTextApi(DatasetApiResource):
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
    def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
        """Update document by text."""
-        payload = DocumentTextUpdate.model_validate(service_api_ns.payload or {})
-        dataset = db.session.scalar(
-            select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == str(dataset_id)).limit(1)
+        return _update_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id)
+
+
+@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text")
+class DeprecatedDocumentUpdateByTextApi(DatasetApiResource):
+    """Deprecated resource alias for text document updates."""
+
+    @service_api_ns.expect(service_api_ns.models[DocumentTextUpdate.__name__])
+    @service_api_ns.doc("update_document_by_text_deprecated")
+    @service_api_ns.doc(deprecated=True)
+    @service_api_ns.doc(
+        description=(
+            "Deprecated legacy alias for updating an existing document by providing text content. "
+            "Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead."
        )
-        args = payload.model_dump(exclude_none=True)
-        if not dataset:
-            raise ValueError("Dataset does not exist.")
-
-        retrieval_model = payload.retrieval_model
-        if (
-            retrieval_model
-            and retrieval_model.reranking_model
-            and retrieval_model.reranking_model.reranking_provider_name
-            and retrieval_model.reranking_model.reranking_model_name
-        ):
-            DatasetService.check_reranking_model_setting(
-                tenant_id,
-                retrieval_model.reranking_model.reranking_provider_name,
-                retrieval_model.reranking_model.reranking_model_name,
-            )
-
-        # indexing_technique is already set in dataset since this is an update
-        args["indexing_technique"] = dataset.indexing_technique
-
-        if args.get("text"):
-            text = args.get("text")
-            name = args.get("name")
-            if not current_user:
-                raise ValueError("current_user is required")
-            upload_file = FileService(db.engine).upload_text(
-                text=str(text), text_name=str(name), user_id=current_user.id, tenant_id=tenant_id
-            )
-            data_source = {
-                "type": "upload_file",
-                "info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
-            }
-            args["data_source"] = data_source
-        # validate args
-        args["original_document_id"] = str(document_id)
-        knowledge_config = KnowledgeConfig.model_validate(args)
-        DocumentService.document_create_args_validate(knowledge_config)
-
-        try:
-            documents, batch = DocumentService.save_document_with_dataset_id(
-                dataset=dataset,
-                knowledge_config=knowledge_config,
-                account=current_user,
-                dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
-                created_from="api",
-            )
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        document = documents[0]
-
-        documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
-        return documents_and_batch_fields, 200
+    )
+    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(
+        responses={
+            200: "Document updated successfully",
+            401: "Unauthorized - invalid API token",
+            404: "Document not found",
+        }
+    )
+    @cloud_edition_billing_resource_check("vector_space", "dataset")
+    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
+    def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
+        """Update document by text through the deprecated underscore alias."""
+        return _update_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id)


@service_api_ns.route(
--- a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py
+++ b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py
@ -22,6 +22,8 @@ import pytest
 from werkzeug.exceptions import Forbidden, NotFound

 from controllers.service_api.dataset.document import (
+    DeprecatedDocumentAddByTextApi,
+    DeprecatedDocumentUpdateByTextApi,
    DocumentAddByFileApi,
    DocumentAddByTextApi,
    DocumentApi,
@ -1005,7 +1007,7 @@ class TestDocumentAddByTextApi:

        # Act
        with app.test_request_context(
-            f"/datasets/{mock_dataset.id}/document/create_by_text",
+            f"/datasets/{mock_dataset.id}/document/create-by-text",
            method="POST",
            json={
                "name": "Test Document",
@ -1037,7 +1039,7 @@ class TestDocumentAddByTextApi:

        # Act & Assert
        with app.test_request_context(
-            f"/datasets/{mock_dataset.id}/document/create_by_text",
+            f"/datasets/{mock_dataset.id}/document/create-by-text",
            method="POST",
            json={"name": "Test Document", "text": "Content"},
            headers={"Authorization": "Bearer test_token"},
@ -1066,7 +1068,7 @@ class TestDocumentAddByTextApi:

        # Act & Assert
        with app.test_request_context(
-            f"/datasets/{mock_dataset.id}/document/create_by_text",
+            f"/datasets/{mock_dataset.id}/document/create-by-text",
            method="POST",
            json={"name": "Test Document", "text": "Content"},
            headers={"Authorization": "Bearer test_token"},
@ -1093,6 +1095,20 @@ class TestArchivedDocumentImmutableError:
        assert error.code == 403


+class TestDocumentTextRouteDeprecation:
+    """Test that legacy underscore text routes stay marked deprecated."""
+
+    def test_create_by_text_legacy_alias_is_deprecated(self):
+        """Ensure only the legacy create-by-text alias is marked deprecated."""
+        assert DeprecatedDocumentAddByTextApi.post.__apidoc__["deprecated"] is True
+        assert DocumentAddByTextApi.post.__apidoc__.get("deprecated") is not True
+
+    def test_update_by_text_legacy_alias_is_deprecated(self):
+        """Ensure only the legacy update-by-text alias is marked deprecated."""
+        assert DeprecatedDocumentUpdateByTextApi.post.__apidoc__["deprecated"] is True
+        assert DocumentUpdateByTextApi.post.__apidoc__.get("deprecated") is not True
+
+
 # =============================================================================
 # Endpoint tests for DocumentUpdateByTextApi, DocumentAddByFileApi,
 # DocumentUpdateByFileApi.
@ -1162,7 +1178,7 @@ class TestDocumentUpdateByTextApiPost:

        doc_id = str(uuid.uuid4())
        with app.test_request_context(
-            f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_text",
+            f"/datasets/{mock_dataset.id}/documents/{doc_id}/update-by-text",
            method="POST",
            json={"name": "Updated Doc", "text": "New content"},
            headers={"Authorization": "Bearer test_token"},
@ -1195,7 +1211,7 @@ class TestDocumentUpdateByTextApiPost:

        doc_id = str(uuid.uuid4())
        with app.test_request_context(
-            f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_text",
+            f"/datasets/{mock_dataset.id}/documents/{doc_id}/update-by-text",
            method="POST",
            json={"name": "Doc", "text": "Content"},
            headers={"Authorization": "Bearer test_token"},