From 928f888ef5783883996197ad1c841e5ae650c4d6 Mon Sep 17 00:00:00 2001 From: chariri Date: Sat, 30 May 2026 22:54:01 +0900 Subject: [PATCH] refactor(api): migrate console/service_api.dataset.segment to BaseModel (#36522) Co-authored-by: WH-2099 Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/controllers/common/schema.py | 64 +++- .../console/datasets/datasets_segments.py | 184 ++++++++---- .../service_api/dataset/segment.py | 273 +++++++++++------- api/fields/segment_fields.py | 154 ++++++---- api/openapi/markdown/console-swagger.md | 227 +++++++++++---- api/openapi/markdown/service-swagger.md | 205 ++++++++++--- .../console/datasets/test_segments.py | 97 +++++++ .../service_api/dataset/test_segment.py | 153 ++++++++++ .../controllers/common/test_schema.py | 46 +++ .../datasets/test_datasets_segments.py | 204 +++++++++---- .../dataset/test_dataset_segment.py | 199 ++++++++----- .../api/console/datasets/orpc.gen.ts | 85 ++---- .../api/console/datasets/types.gen.ts | 141 +++++++-- .../generated/api/console/datasets/zod.gen.ts | 184 ++++++++++-- .../generated/api/service/orpc.gen.ts | 56 +--- .../generated/api/service/types.gen.ts | 128 ++++++-- .../generated/api/service/zod.gen.ts | 154 ++++++++-- 17 files changed, 1906 insertions(+), 648 deletions(-) create mode 100644 api/tests/test_containers_integration_tests/controllers/console/datasets/test_segments.py create mode 100644 api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_segment.py diff --git a/api/controllers/common/schema.py b/api/controllers/common/schema.py index a1b37d3df4..5ce1265ed8 100644 --- a/api/controllers/common/schema.py +++ b/api/controllers/common/schema.py @@ -6,10 +6,11 @@ These helpers keep that translation centralized so models registered through `register_schema_models` emit resolvable Swagger 2.0 references. """ -from collections.abc import Mapping +from collections.abc import Iterable, Mapping from enum import StrEnum -from typing import Any, Literal, NotRequired, TypedDict +from typing import Any, Literal, NotRequired, Protocol, TypedDict +from flask import request from flask_restx import Namespace from pydantic import BaseModel, TypeAdapter @@ -36,6 +37,12 @@ QueryParamDoc = TypedDict( ) +class QueryArgs(Protocol): + def to_dict(self, flat: bool = True) -> dict[str, str]: ... + + def getlist(self, key: str) -> list[str]: ... + + def _register_json_schema(namespace: Namespace, name: str, schema: dict) -> None: """Register a JSON schema and promote any nested Pydantic `$defs`.""" @@ -167,6 +174,58 @@ def query_params_from_model(model: type[BaseModel]) -> dict[str, QueryParamDoc]: return params +def query_params_from_request[ModelT: BaseModel]( + model: type[ModelT], + *, + list_fields: Iterable[str] = (), + args: QueryArgs | None = None, + use_defaults_for_malformed_ints: bool = False, +) -> ModelT: + """Validate query args with Pydantic while preserving Flask query parsing behavior. + + Repeated params need explicit ``getlist()`` handling because Werkzeug's + ``to_dict()`` keeps only one value. For malformed scalar integers, Flask's + For endpoints migrated from ``request.args.get(..., type=int, default=...)``, + set ``use_defaults_for_malformed_ints`` to preserve Flask's fallback to + defaults for malformed optional integer params. + """ + + query_args = args or request.args + params: dict[str, Any] = query_args.to_dict() + for field_name in list_fields: + params[field_name] = query_args.getlist(field_name) + + if use_defaults_for_malformed_ints: + _drop_malformed_defaulted_integer_params(model, params) + return model.model_validate(params) + + +def _drop_malformed_defaulted_integer_params(model: type[BaseModel], params: dict[str, Any]) -> None: + properties = model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0).get("properties", {}) + if not isinstance(properties, Mapping): + return + + for name, value in list(params.items()): + if not isinstance(value, str): + continue + + field = model.model_fields.get(name) + if field is None or field.is_required(): + continue + + property_schema = properties.get(name) + if not isinstance(property_schema, Mapping): + continue + + if _nullable_property_schema(property_schema).get("type") != "integer": + continue + + try: + int(value) + except ValueError: + params.pop(name) + + def _query_param_from_property(property_schema: Mapping[str, Any], *, required: bool) -> QueryParamDoc: param_schema = _nullable_property_schema(property_schema) param_doc: QueryParamDoc = {"in": "query", "required": required} @@ -239,6 +298,7 @@ __all__ = [ "DEFAULT_REF_TEMPLATE_SWAGGER_2_0", "get_or_create_model", "query_params_from_model", + "query_params_from_request", "register_enum_models", "register_response_schema_model", "register_response_schema_models", diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index a8a8a5237c..77a6462427 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -1,9 +1,10 @@ import uuid from typing import Literal +from typing import cast as type_cast from uuid import UUID from flask import request -from flask_restx import Resource, marshal +from flask_restx import Resource from pydantic import BaseModel, Field from sqlalchemy import String, case, cast, func, literal, or_, select from sqlalchemy.dialects.postgresql import JSONB @@ -13,7 +14,12 @@ import services from configs import dify_config from controllers.common.controller_schemas import ChildChunkCreatePayload, ChildChunkUpdatePayload from controllers.common.fields import SimpleResultResponse -from controllers.common.schema import register_response_schema_models, register_schema_models +from controllers.common.schema import ( + query_params_from_model, + query_params_from_request, + register_response_schema_models, + register_schema_models, +) from controllers.console import console_ns from controllers.console.app.error import ProviderNotInitializeError from controllers.console.datasets.error import ( @@ -34,9 +40,17 @@ from core.rag.index_processor.constant.index_type import IndexTechniqueType from extensions.ext_database import db from extensions.ext_redis import redis_client from fields.base import ResponseModel -from fields.segment_fields import child_chunk_fields, segment_fields +from fields.segment_fields import ( + ChildChunkDetailResponse, + ChildChunkListResponse, + ChildChunkResponse, + SegmentDetailResponse, + SegmentResponse, + segment_response_with_summary, + segment_responses_with_summaries, +) from graphon.model_runtime.entities.model_entities import ModelType -from libs.helper import escape_like_pattern +from libs.helper import dump_response, escape_like_pattern from libs.login import current_account_with_tenant, login_required from models.dataset import ChildChunk, DocumentSegment from models.model import UploadFile @@ -44,20 +58,10 @@ from services.dataset_service import DatasetService, DocumentService, SegmentSer from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingServiceError +from services.summary_index_service import SummaryIndexService from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task -def _get_segment_with_summary(segment, dataset_id): - """Helper function to marshal segment and add summary information.""" - from services.summary_index_service import SummaryIndexService - - segment_dict = dict(marshal(segment, segment_fields)) # type: ignore - # Query summary for this segment (only enabled summaries) - summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id) - segment_dict["summary"] = summary.summary_content if summary else None - return segment_dict - - class SegmentListQuery(BaseModel): limit: int = Field(default=20, ge=1, le=100) status: list[str] = Field(default_factory=list) @@ -67,6 +71,16 @@ class SegmentListQuery(BaseModel): page: int = Field(default=1, ge=1) +class SegmentIdListQuery(BaseModel): + segment_id: list[str] = Field(default_factory=list, description="Segment IDs") + + +class ChildChunkListQuery(BaseModel): + limit: int = Field(default=20, ge=1, le=100) + keyword: str | None = None + page: int = Field(default=1, ge=1) + + class SegmentCreatePayload(BaseModel): content: str answer: str | None = None @@ -92,13 +106,35 @@ class SegmentBatchImportStatusResponse(ResponseModel): job_status: str +class ConsoleSegmentListResponse(ResponseModel): + data: list[SegmentResponse] + limit: int + total: int + total_pages: int + page: int + + +class ChildChunkBatchUpdateResponse(ResponseModel): + data: list[ChildChunkResponse] + + class ChildChunkBatchUpdatePayload(BaseModel): chunks: list[ChildChunkUpdateArgs] +class SegmentDocParams: + DATASET_DOCUMENT = {"dataset_id": "Dataset ID", "document_id": "Document ID"} + DATASET_DOCUMENT_ACTION = {**DATASET_DOCUMENT, "action": "Action"} + DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Segment ID"} + DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Parent segment ID"} + DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID"} + + register_schema_models( console_ns, SegmentListQuery, + SegmentIdListQuery, + ChildChunkListQuery, SegmentCreatePayload, SegmentUpdatePayload, BatchImportPayload, @@ -107,11 +143,24 @@ register_schema_models( ChildChunkBatchUpdatePayload, ChildChunkUpdateArgs, ) -register_response_schema_models(console_ns, SegmentBatchImportStatusResponse, SimpleResultResponse) +register_response_schema_models( + console_ns, + SegmentResponse, + ConsoleSegmentListResponse, + SegmentDetailResponse, + ChildChunkDetailResponse, + ChildChunkListResponse, + ChildChunkBatchUpdateResponse, + SegmentBatchImportStatusResponse, + SimpleResultResponse, +) @console_ns.route("/datasets//documents//segments") class DatasetDocumentSegmentListApi(Resource): + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT) + @console_ns.doc(params=query_params_from_model(SegmentListQuery)) + @console_ns.response(200, "Segments retrieved successfully", console_ns.models[ConsoleSegmentListResponse.__name__]) @setup_required @login_required @account_initialization_required @@ -134,12 +183,7 @@ class DatasetDocumentSegmentListApi(Resource): if not document: raise NotFound("Document not found.") - args = SegmentListQuery.model_validate( - { - **request.args.to_dict(), - "status": request.args.getlist("status"), - } - ) + args = query_params_from_request(SegmentListQuery, list_fields=("status",)) page = args.page limit = min(args.limit, 100) @@ -205,38 +249,30 @@ class DatasetDocumentSegmentListApi(Resource): segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False) - # Query summaries for all segments in this page (batch query for efficiency) - segment_ids = [segment.id for segment in segments.items] - summaries = {} + segment_list = list(segments.items) + segment_ids = [segment.id for segment in segment_list] + summaries: dict[str, str | None] = {} if segment_ids: - from services.summary_index_service import SummaryIndexService - summary_records = SummaryIndexService.get_segments_summaries( segment_ids=segment_ids, dataset_id=dataset_id_str ) - # Only include enabled summaries (already filtered by service) summaries = {chunk_id: summary.summary_content for chunk_id, summary in summary_records.items()} - # Add summary to each segment - segments_with_summary = [] - for segment in segments.items: - segment_dict = dict(marshal(segment, segment_fields)) # type: ignore - segment_dict["summary"] = summaries.get(segment.id) - segments_with_summary.append(segment_dict) - response = { - "data": segments_with_summary, + "data": segment_responses_with_summaries(segment_list, summaries), "limit": limit, "total": segments.total, "total_pages": segments.pages, "page": page, } - return response, 200 + return dump_response(ConsoleSegmentListResponse, response), 200 @setup_required @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT) + @console_ns.doc(params=query_params_from_model(SegmentIdListQuery)) @console_ns.response(204, "Segments deleted successfully") def delete(self, dataset_id: UUID, document_id: UUID): current_user, _ = current_account_with_tenant() @@ -268,6 +304,8 @@ class DatasetDocumentSegmentListApi(Resource): @console_ns.route("/datasets//documents//segment/") class DatasetDocumentSegmentApi(Resource): + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_ACTION) + @console_ns.doc(params=query_params_from_model(SegmentIdListQuery)) @setup_required @login_required @account_initialization_required @@ -321,11 +359,12 @@ class DatasetDocumentSegmentApi(Resource): SegmentService.update_segments_status(segment_ids, action, dataset, document) except Exception as e: raise InvalidActionError(str(e)) - return {"result": "success"}, 200 + return dump_response(SimpleResultResponse, {"result": "success"}), 200 @console_ns.route("/datasets//documents//segment") class DatasetDocumentSegmentAddApi(Resource): + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT) @setup_required @login_required @account_initialization_required @@ -333,6 +372,7 @@ class DatasetDocumentSegmentAddApi(Resource): @cloud_edition_billing_knowledge_limit_check("add_segment") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[SegmentCreatePayload.__name__]) + @console_ns.response(200, "Segment created successfully", console_ns.models[SegmentDetailResponse.__name__]) def post(self, dataset_id: UUID, document_id: UUID): current_user, current_tenant_id = current_account_with_tenant() @@ -372,18 +412,25 @@ class DatasetDocumentSegmentAddApi(Resource): payload = SegmentCreatePayload.model_validate(console_ns.payload or {}) payload_dict = payload.model_dump(exclude_none=True) SegmentService.segment_create_args_validate(payload_dict, document) - segment = SegmentService.create_segment(payload_dict, document, dataset) - return {"data": _get_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200 + segment = type_cast(DocumentSegment, SegmentService.create_segment(payload_dict, document, dataset)) + summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id_str) + response = { + "data": segment_response_with_summary(segment, summary.summary_content if summary else None), + "doc_form": document.doc_form, + } + return dump_response(SegmentDetailResponse, response), 200 @console_ns.route("/datasets//documents//segments/") class DatasetDocumentSegmentUpdateApi(Resource): + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT) @setup_required @login_required @account_initialization_required @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[SegmentUpdatePayload.__name__]) + @console_ns.response(200, "Segment updated successfully", console_ns.models[SegmentDetailResponse.__name__]) def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() @@ -440,12 +487,18 @@ class DatasetDocumentSegmentUpdateApi(Resource): segment = SegmentService.update_segment( SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset ) - return {"data": _get_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200 + summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id_str) + response = { + "data": segment_response_with_summary(segment, summary.summary_content if summary else None), + "doc_form": document.doc_form, + } + return dump_response(SegmentDetailResponse, response), 200 @setup_required @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT) @console_ns.response(204, "Segment deleted successfully") def delete(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() @@ -523,11 +576,11 @@ class DatasetDocumentSegmentBatchImportApi(Resource): try: # async job job_id = str(uuid.uuid4()) - indexing_cache_key = f"segment_batch_import_{str(job_id)}" + indexing_cache_key = f"segment_batch_import_{job_id}" # send batch add segments task redis_client.setnx(indexing_cache_key, "waiting") batch_create_segment_to_index_task.delay( - str(job_id), + job_id, upload_file_id, dataset_id_str, document_id_str, @@ -536,7 +589,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource): ) except Exception as e: return {"error": str(e)}, 500 - return {"job_id": job_id, "job_status": "waiting"}, 200 + return dump_response(SegmentBatchImportStatusResponse, {"job_id": job_id, "job_status": "waiting"}), 200 @console_ns.response(200, "Batch import status", console_ns.models[SegmentBatchImportStatusResponse.__name__]) @setup_required @@ -551,11 +604,13 @@ class DatasetDocumentSegmentBatchImportApi(Resource): if cache_result is None: raise ValueError("The job does not exist.") - return {"job_id": job_id, "job_status": cache_result.decode()}, 200 + response = {"job_id": job_id, "job_status": cache_result.decode()} + return dump_response(SegmentBatchImportStatusResponse, response), 200 @console_ns.route("/datasets//documents//segments//child_chunks") class ChildChunkAddApi(Resource): + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT) @setup_required @login_required @account_initialization_required @@ -563,6 +618,7 @@ class ChildChunkAddApi(Resource): @cloud_edition_billing_knowledge_limit_check("add_segment") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[ChildChunkCreatePayload.__name__]) + @console_ns.response(200, "Child chunk created successfully", console_ns.models[ChildChunkDetailResponse.__name__]) def post(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() @@ -613,8 +669,11 @@ class ChildChunkAddApi(Resource): child_chunk = SegmentService.create_child_chunk(payload.content, segment, document, dataset) except ChildChunkIndexingServiceError as e: raise ChildChunkIndexingError(str(e)) - return {"data": marshal(child_chunk, child_chunk_fields)}, 200 + return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200 + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT) + @console_ns.doc(params=query_params_from_model(ChildChunkListQuery)) + @console_ns.response(200, "Child chunks retrieved successfully", console_ns.models[ChildChunkListResponse.__name__]) @setup_required @login_required @account_initialization_required @@ -642,13 +701,7 @@ class ChildChunkAddApi(Resource): ) if not segment: raise NotFound("Segment not found.") - args = SegmentListQuery.model_validate( - { - "limit": request.args.get("limit", default=20, type=int), - "keyword": request.args.get("keyword"), - "page": request.args.get("page", default=1, type=int), - } - ) + args = query_params_from_request(ChildChunkListQuery, use_defaults_for_malformed_ints=True) page = args.page limit = min(args.limit, 100) @@ -657,19 +710,27 @@ class ChildChunkAddApi(Resource): child_chunks = SegmentService.get_child_chunks( segment_id_str, document_id_str, dataset_id_str, page, limit, keyword ) - return { - "data": marshal(child_chunks.items, child_chunk_fields), + response = { + "data": child_chunks.items, "total": child_chunks.total, "total_pages": child_chunks.pages, "page": page, "limit": limit, - }, 200 + } + return dump_response(ChildChunkListResponse, response), 200 @setup_required @login_required @account_initialization_required @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT) + @console_ns.response( + 200, + "Child chunks updated successfully", + console_ns.models[ChildChunkBatchUpdateResponse.__name__], + ) + @console_ns.expect(console_ns.models[ChildChunkBatchUpdatePayload.__name__]) def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() @@ -707,7 +768,7 @@ class ChildChunkAddApi(Resource): child_chunks = SegmentService.update_child_chunks(payload.chunks, segment, document, dataset) except ChildChunkIndexingServiceError as e: raise ChildChunkIndexingError(str(e)) - return {"data": marshal(child_chunks, child_chunk_fields)}, 200 + return dump_response(ChildChunkBatchUpdateResponse, {"data": child_chunks}), 200 @console_ns.route( @@ -718,6 +779,7 @@ class ChildChunkUpdateApi(Resource): @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK) @console_ns.response(204, "Child chunk deleted successfully") def delete(self, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID): current_user, current_tenant_id = current_account_with_tenant() @@ -748,7 +810,7 @@ class ChildChunkUpdateApi(Resource): child_chunk = db.session.scalar( select(ChildChunk) .where( - ChildChunk.id == str(child_chunk_id_str), + ChildChunk.id == child_chunk_id_str, ChildChunk.tenant_id == current_tenant_id, ChildChunk.segment_id == segment.id, ChildChunk.document_id == document_id_str, @@ -775,7 +837,9 @@ class ChildChunkUpdateApi(Resource): @account_initialization_required @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") + @console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK) @console_ns.expect(console_ns.models[ChildChunkUpdatePayload.__name__]) + @console_ns.response(200, "Child chunk updated successfully", console_ns.models[ChildChunkDetailResponse.__name__]) def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID): current_user, current_tenant_id = current_account_with_tenant() @@ -805,7 +869,7 @@ class ChildChunkUpdateApi(Resource): child_chunk = db.session.scalar( select(ChildChunk) .where( - ChildChunk.id == str(child_chunk_id_str), + ChildChunk.id == child_chunk_id_str, ChildChunk.tenant_id == current_tenant_id, ChildChunk.segment_id == segment.id, ChildChunk.document_id == document_id_str, @@ -827,4 +891,4 @@ class ChildChunkUpdateApi(Resource): child_chunk = SegmentService.update_child_chunk(payload.content, child_chunk, segment, document, dataset) except ChildChunkIndexingServiceError as e: raise ChildChunkIndexingError(str(e)) - return {"data": marshal(child_chunk, child_chunk_fields)}, 200 + return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200 diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index 34e1710068..f93eb6a4bf 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -1,15 +1,18 @@ -from typing import Any +from typing import cast from uuid import UUID -from flask import request -from flask_restx import marshal -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ValidationError, field_validator from sqlalchemy import select from werkzeug.exceptions import NotFound from configs import dify_config from controllers.common.controller_schemas import ChildChunkCreatePayload, ChildChunkUpdatePayload -from controllers.common.schema import register_schema_models +from controllers.common.schema import ( + query_params_from_model, + query_params_from_request, + register_response_schema_models, + register_schema_models, +) from controllers.service_api import service_api_ns from controllers.service_api.app.error import ProviderNotInitializeError from controllers.service_api.wraps import ( @@ -22,10 +25,19 @@ from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.model_manager import ModelManager from core.rag.index_processor.constant.index_type import IndexTechniqueType from extensions.ext_database import db -from fields.segment_fields import child_chunk_fields, segment_fields +from fields.base import ResponseModel +from fields.segment_fields import ( + ChildChunkDetailResponse, + ChildChunkListResponse, + SegmentDetailResponse, + SegmentResponse, + segment_response_with_summary, + segment_responses_with_summaries, +) from graphon.model_runtime.entities.model_entities import ModelType +from libs.helper import dump_response from libs.login import current_account_with_tenant -from models.dataset import Dataset +from models.dataset import Dataset, DocumentSegment from services.dataset_service import DatasetService, DocumentService, SegmentService from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateArgs from services.errors.chunk import ChildChunkDeleteIndexError, ChildChunkIndexingError @@ -34,35 +46,27 @@ from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingS from services.summary_index_service import SummaryIndexService -def _marshal_segment_with_summary(segment, dataset_id: str) -> dict[str, Any]: - """Marshal a single segment and enrich it with summary content.""" - segment_dict: dict[str, Any] = dict(marshal(segment, segment_fields)) # type: ignore[arg-type] - summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id) - segment_dict["summary"] = summary.summary_content if summary else None - return segment_dict +class SegmentCreateItemPayload(BaseModel): + content: str = Field(min_length=1) + answer: str | None = None + keywords: list[str] | None = None + attachment_ids: list[str] | None = None - -def _marshal_segments_with_summary(segments, dataset_id: str) -> list[dict[str, Any]]: - """Marshal multiple segments and enrich them with summary content (batch query).""" - segment_ids = [segment.id for segment in segments] - summaries: dict[str, str | None] = {} - if segment_ids: - summary_records = SummaryIndexService.get_segments_summaries(segment_ids=segment_ids, dataset_id=dataset_id) - summaries = {chunk_id: record.summary_content for chunk_id, record in summary_records.items()} - - result: list[dict[str, Any]] = [] - for segment in segments: - segment_dict: dict[str, Any] = dict(marshal(segment, segment_fields)) # type: ignore[arg-type] - segment_dict["summary"] = summaries.get(segment.id) - result.append(segment_dict) - return result + @field_validator("content") + @classmethod + def validate_content(cls, value: str) -> str: + if not value.strip(): + raise ValueError("Content is empty") + return value class SegmentCreatePayload(BaseModel): - segments: list[dict[str, Any]] | None = None + segments: list[SegmentCreateItemPayload] = Field(min_length=1) class SegmentListQuery(BaseModel): + limit: int = Field(default=20, ge=1) + page: int = Field(default=1, ge=1) status: list[str] = Field(default_factory=list) keyword: str | None = None @@ -77,9 +81,31 @@ class ChildChunkListQuery(BaseModel): page: int = Field(default=1, ge=1) +class SegmentDocParams: + DATASET_DOCUMENT = {"dataset_id": "Dataset ID", "document_id": "Document ID"} + DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Segment ID"} + DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Parent segment ID"} + DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID"} + + +class SegmentCreateListResponse(ResponseModel): + data: list[SegmentResponse] + doc_form: str + + +class SegmentListResponse(ResponseModel): + data: list[SegmentResponse] + doc_form: str + total: int + has_more: bool + limit: int + page: int + + register_schema_models( service_api_ns, SegmentCreatePayload, + SegmentCreateItemPayload, SegmentListQuery, SegmentUpdateArgs, SegmentUpdatePayload, @@ -87,6 +113,15 @@ register_schema_models( ChildChunkListQuery, ChildChunkUpdatePayload, ) +register_response_schema_models( + service_api_ns, + SegmentResponse, + SegmentCreateListResponse, + SegmentListResponse, + SegmentDetailResponse, + ChildChunkDetailResponse, + ChildChunkListResponse, +) @service_api_ns.route("/datasets//documents//segments") @@ -96,7 +131,7 @@ class SegmentApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[SegmentCreatePayload.__name__]) @service_api_ns.doc("create_segments") @service_api_ns.doc(description="Create segments in a document") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT) @service_api_ns.doc( responses={ 200: "Segments created successfully", @@ -105,6 +140,11 @@ class SegmentApi(DatasetApiResource): 404: "Dataset or document not found", } ) + @service_api_ns.response( + 200, + "Segments created successfully", + service_api_ns.models[SegmentCreateListResponse.__name__], + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") @@ -144,26 +184,35 @@ class SegmentApi(DatasetApiResource): except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) # validate args - payload = SegmentCreatePayload.model_validate(service_api_ns.payload or {}) - if payload.segments is not None: - segments_limit = dify_config.DATASET_MAX_SEGMENTS_PER_REQUEST - if segments_limit > 0 and len(payload.segments) > segments_limit: - raise ValueError(f"Exceeded maximum segments limit of {segments_limit}.") + try: + payload = SegmentCreatePayload.model_validate(service_api_ns.payload or {}) + except ValidationError as e: + return {"error": str(e)}, 400 + segments_limit = dify_config.DATASET_MAX_SEGMENTS_PER_REQUEST + if segments_limit > 0 and len(payload.segments) > segments_limit: + raise ValueError(f"Exceeded maximum segments limit of {segments_limit}.") + segment_items = [segment.model_dump(exclude_none=True) for segment in payload.segments] - for args_item in payload.segments: - SegmentService.segment_create_args_validate(args_item, document) - segments = SegmentService.multi_create_segment(payload.segments, document, dataset) - return { - "data": _marshal_segments_with_summary(segments, dataset_id_str), - "doc_form": document.doc_form, - }, 200 - else: - return {"error": "Segments is required"}, 400 + for args_item in segment_items: + SegmentService.segment_create_args_validate(args_item, document) + segments = cast(list[DocumentSegment], SegmentService.multi_create_segment(segment_items, document, dataset)) + segment_ids = [segment.id for segment in segments] + summaries: dict[str, str | None] = {} + if segment_ids: + summary_records = SummaryIndexService.get_segments_summaries( + segment_ids=segment_ids, dataset_id=dataset_id_str + ) + summaries = {chunk_id: record.summary_content for chunk_id, record in summary_records.items()} + response = { + "data": segment_responses_with_summaries(segments, summaries), + "doc_form": document.doc_form, + } + return dump_response(SegmentCreateListResponse, response), 200 - @service_api_ns.expect(service_api_ns.models[SegmentListQuery.__name__]) @service_api_ns.doc("list_segments") @service_api_ns.doc(description="List segments in a document") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT) + @service_api_ns.doc(params=query_params_from_model(SegmentListQuery)) @service_api_ns.doc( responses={ 200: "Segments retrieved successfully", @@ -171,12 +220,22 @@ class SegmentApi(DatasetApiResource): 404: "Dataset or document not found", } ) + @service_api_ns.response( + 200, + "Segments retrieved successfully", + service_api_ns.models[SegmentListResponse.__name__], + ) def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID): _, current_tenant_id = current_account_with_tenant() """Get segments.""" # check dataset - page = request.args.get("page", default=1, type=int) - limit = request.args.get("limit", default=20, type=int) + args = query_params_from_request( + SegmentListQuery, + list_fields=("status",), + use_defaults_for_malformed_ints=True, + ) + page = args.page + limit = args.limit dataset_id_str = str(dataset_id) dataset = db.session.scalar( select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1) @@ -205,13 +264,6 @@ class SegmentApi(DatasetApiResource): except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) - args = SegmentListQuery.model_validate( - { - "status": request.args.getlist("status"), - "keyword": request.args.get("keyword"), - } - ) - segments, total = SegmentService.get_segments( document_id=document_id_str, tenant_id=current_tenant_id, @@ -220,9 +272,16 @@ class SegmentApi(DatasetApiResource): page=page, limit=limit, ) + segment_ids = [segment.id for segment in segments] + summaries: dict[str, str | None] = {} + if segment_ids: + summary_records = SummaryIndexService.get_segments_summaries( + segment_ids=segment_ids, dataset_id=dataset_id_str + ) + summaries = {chunk_id: record.summary_content for chunk_id, record in summary_records.items()} response = { - "data": _marshal_segments_with_summary(segments, dataset_id_str), + "data": segment_responses_with_summaries(segments, summaries), "doc_form": document.doc_form, "total": total, "has_more": len(segments) == limit, @@ -230,16 +289,14 @@ class SegmentApi(DatasetApiResource): "page": page, } - return response, 200 + return dump_response(SegmentListResponse, response), 200 @service_api_ns.route("/datasets//documents//segments/") class DatasetSegmentApi(DatasetApiResource): @service_api_ns.doc("delete_segment") @service_api_ns.doc(description="Delete a specific segment") - @service_api_ns.doc( - params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Segment ID to delete"} - ) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT) @service_api_ns.doc( responses={ 204: "Segment deleted successfully", @@ -275,9 +332,7 @@ class DatasetSegmentApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[SegmentUpdatePayload.__name__]) @service_api_ns.doc("update_segment") @service_api_ns.doc(description="Update a specific segment") - @service_api_ns.doc( - params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Segment ID to update"} - ) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT) @service_api_ns.doc( responses={ 200: "Segment updated successfully", @@ -285,6 +340,7 @@ class DatasetSegmentApi(DatasetApiResource): 404: "Dataset, document, or segment not found", } ) + @service_api_ns.response(200, "Segment updated successfully", service_api_ns.models[SegmentDetailResponse.__name__]) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID): @@ -328,13 +384,16 @@ class DatasetSegmentApi(DatasetApiResource): payload = SegmentUpdatePayload.model_validate(service_api_ns.payload or {}) updated_segment = SegmentService.update_segment(payload.segment, segment, document, dataset) - return { - "data": _marshal_segment_with_summary(updated_segment, dataset_id_str), + summary = SummaryIndexService.get_segment_summary(segment_id=updated_segment.id, dataset_id=dataset_id_str) + response = { + "data": segment_response_with_summary(updated_segment, summary.summary_content if summary else None), "doc_form": document.doc_form, - }, 200 + } + return dump_response(SegmentDetailResponse, response), 200 @service_api_ns.doc("get_segment") @service_api_ns.doc(description="Get a specific segment by ID") + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT) @service_api_ns.doc( responses={ 200: "Segment retrieved successfully", @@ -342,6 +401,11 @@ class DatasetSegmentApi(DatasetApiResource): 404: "Dataset, document, or segment not found", } ) + @service_api_ns.response( + 200, + "Segment retrieved successfully", + service_api_ns.models[SegmentDetailResponse.__name__], + ) def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID): _, current_tenant_id = current_account_with_tenant() dataset_id_str = str(dataset_id) @@ -364,7 +428,12 @@ class DatasetSegmentApi(DatasetApiResource): if not segment: raise NotFound("Segment not found.") - return {"data": _marshal_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200 + summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id_str) + response = { + "data": segment_response_with_summary(segment, summary.summary_content if summary else None), + "doc_form": document.doc_form, + } + return dump_response(SegmentDetailResponse, response), 200 @service_api_ns.route( @@ -376,9 +445,7 @@ class ChildChunkApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[ChildChunkCreatePayload.__name__]) @service_api_ns.doc("create_child_chunk") @service_api_ns.doc(description="Create a new child chunk for a segment") - @service_api_ns.doc( - params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Parent segment ID"} - ) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT) @service_api_ns.doc( responses={ 200: "Child chunk created successfully", @@ -386,6 +453,11 @@ class ChildChunkApi(DatasetApiResource): 404: "Dataset, document, or segment not found", } ) + @service_api_ns.response( + 200, + "Child chunk created successfully", + service_api_ns.models[ChildChunkDetailResponse.__name__], + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") @@ -437,14 +509,12 @@ class ChildChunkApi(DatasetApiResource): except ChildChunkIndexingServiceError as e: raise ChildChunkIndexingError(str(e)) - return {"data": marshal(child_chunk, child_chunk_fields)}, 200 + return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200 - @service_api_ns.expect(service_api_ns.models[ChildChunkListQuery.__name__]) @service_api_ns.doc("list_child_chunks") @service_api_ns.doc(description="List child chunks for a segment") - @service_api_ns.doc( - params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Parent segment ID"} - ) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT) + @service_api_ns.doc(params=query_params_from_model(ChildChunkListQuery)) @service_api_ns.doc( responses={ 200: "Child chunks retrieved successfully", @@ -452,6 +522,11 @@ class ChildChunkApi(DatasetApiResource): 404: "Dataset, document, or segment not found", } ) + @service_api_ns.response( + 200, + "Child chunks retrieved successfully", + service_api_ns.models[ChildChunkListResponse.__name__], + ) def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID): _, current_tenant_id = current_account_with_tenant() """Get child chunks.""" @@ -475,13 +550,7 @@ class ChildChunkApi(DatasetApiResource): if not segment: raise NotFound("Segment not found.") - args = ChildChunkListQuery.model_validate( - { - "limit": request.args.get("limit", default=20, type=int), - "keyword": request.args.get("keyword"), - "page": request.args.get("page", default=1, type=int), - } - ) + args = query_params_from_request(ChildChunkListQuery, use_defaults_for_malformed_ints=True) page = args.page limit = min(args.limit, 100) @@ -491,13 +560,14 @@ class ChildChunkApi(DatasetApiResource): segment_id_str, document_id_str, dataset_id_str, page, limit, keyword ) - return { - "data": marshal(child_chunks.items, child_chunk_fields), + response = { + "data": child_chunks.items, "total": child_chunks.total, "total_pages": child_chunks.pages, "page": page, "limit": limit, - }, 200 + } + return dump_response(ChildChunkListResponse, response), 200 @service_api_ns.route( @@ -508,14 +578,7 @@ class DatasetChildChunkApi(DatasetApiResource): @service_api_ns.doc("delete_child_chunk") @service_api_ns.doc(description="Delete a specific child chunk") - @service_api_ns.doc( - params={ - "dataset_id": "Dataset ID", - "document_id": "Document ID", - "segment_id": "Parent segment ID", - "child_chunk_id": "Child chunk ID to delete", - } - ) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK) @service_api_ns.doc( responses={ 204: "Child chunk deleted successfully", @@ -549,7 +612,7 @@ class DatasetChildChunkApi(DatasetApiResource): raise NotFound("Segment not found.") # validate segment belongs to the specified document - if str(segment.document_id) != str(document_id_str): + if segment.document_id != document_id_str: raise NotFound("Document not found.") child_chunk_id_str = str(child_chunk_id) @@ -561,7 +624,7 @@ class DatasetChildChunkApi(DatasetApiResource): raise NotFound("Child chunk not found.") # validate child chunk belongs to the specified segment - if str(child_chunk.segment_id) != str(segment.id): + if child_chunk.segment_id != segment.id: raise NotFound("Child chunk not found.") try: @@ -574,14 +637,7 @@ class DatasetChildChunkApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[ChildChunkUpdatePayload.__name__]) @service_api_ns.doc("update_child_chunk") @service_api_ns.doc(description="Update a specific child chunk") - @service_api_ns.doc( - params={ - "dataset_id": "Dataset ID", - "document_id": "Document ID", - "segment_id": "Parent segment ID", - "child_chunk_id": "Child chunk ID to update", - } - ) + @service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK) @service_api_ns.doc( responses={ 200: "Child chunk updated successfully", @@ -589,6 +645,11 @@ class DatasetChildChunkApi(DatasetApiResource): 404: "Dataset, document, segment, or child chunk not found", } ) + @service_api_ns.response( + 200, + "Child chunk updated successfully", + service_api_ns.models[ChildChunkDetailResponse.__name__], + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") @@ -616,7 +677,7 @@ class DatasetChildChunkApi(DatasetApiResource): raise NotFound("Segment not found.") # validate segment belongs to the specified document - if str(segment.document_id) != str(document_id_str): + if segment.document_id != document_id_str: raise NotFound("Segment not found.") child_chunk_id_str = str(child_chunk_id) @@ -628,7 +689,7 @@ class DatasetChildChunkApi(DatasetApiResource): raise NotFound("Child chunk not found.") # validate child chunk belongs to the specified segment - if str(child_chunk.segment_id) != str(segment.id): + if child_chunk.segment_id != segment.id: raise NotFound("Child chunk not found.") # validate args @@ -639,4 +700,4 @@ class DatasetChildChunkApi(DatasetApiResource): except ChildChunkIndexingServiceError as e: raise ChildChunkIndexingError(str(e)) - return {"data": marshal(child_chunk, child_chunk_fields)}, 200 + return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200 diff --git a/api/fields/segment_fields.py b/api/fields/segment_fields.py index 2ce9fb154c..b5c9975400 100644 --- a/api/fields/segment_fields.py +++ b/api/fields/segment_fields.py @@ -1,53 +1,109 @@ -from flask_restx import fields +from collections.abc import Iterable, Mapping +from dataclasses import dataclass +from datetime import datetime +from typing import Any -from libs.helper import TimestampField +from pydantic import field_serializer -child_chunk_fields = { - "id": fields.String, - "segment_id": fields.String, - "content": fields.String, - "position": fields.Integer, - "word_count": fields.Integer, - "type": fields.String, - "created_at": TimestampField, - "updated_at": TimestampField, -} +from fields.base import ResponseModel +from libs.helper import to_timestamp -attachment_fields = { - "id": fields.String, - "name": fields.String, - "size": fields.Integer, - "extension": fields.String, - "mime_type": fields.String, - "source_url": fields.String, -} -segment_fields = { - "id": fields.String, - "position": fields.Integer, - "document_id": fields.String, - "content": fields.String, - "sign_content": fields.String, - "answer": fields.String, - "word_count": fields.Integer, - "tokens": fields.Integer, - "keywords": fields.List(fields.String), - "index_node_id": fields.String, - "index_node_hash": fields.String, - "hit_count": fields.Integer, - "enabled": fields.Boolean, - "disabled_at": TimestampField, - "disabled_by": fields.String, - "status": fields.String, - "created_by": fields.String, - "created_at": TimestampField, - "updated_at": TimestampField, - "updated_by": fields.String, - "indexing_at": TimestampField, - "completed_at": TimestampField, - "error": fields.String, - "stopped_at": TimestampField, - "child_chunks": fields.List(fields.Nested(child_chunk_fields)), - "attachments": fields.List(fields.Nested(attachment_fields)), - "summary": fields.String, # Summary content for the segment -} +class SegmentAttachmentResponse(ResponseModel): + id: str + name: str + size: int + extension: str + mime_type: str | None + source_url: str + + +class ChildChunkResponse(ResponseModel): + id: str + segment_id: str + content: str + position: int + word_count: int + type: str + created_at: datetime | int + updated_at: datetime | int + + @field_serializer("created_at", "updated_at") + def serialize_timestamp(self, value: datetime | int) -> int: + return to_timestamp(value) + + +class SegmentResponse(ResponseModel): + id: str + position: int + document_id: str + content: str + sign_content: str + answer: str | None + word_count: int + tokens: int + keywords: list[str] | None + index_node_id: str | None + index_node_hash: str | None + hit_count: int + enabled: bool + disabled_at: datetime | int | None + disabled_by: str | None + status: str + created_by: str + created_at: datetime | int + updated_at: datetime | int + updated_by: str | None + indexing_at: datetime | int | None + completed_at: datetime | int | None + error: str | None + stopped_at: datetime | int | None + child_chunks: list[ChildChunkResponse] + attachments: list[SegmentAttachmentResponse] + summary: str | None + + @field_serializer("created_at", "updated_at") + def serialize_required_timestamp(self, value: datetime | int) -> int: + return to_timestamp(value) + + @field_serializer("disabled_at", "indexing_at", "completed_at", "stopped_at") + def serialize_optional_timestamp(self, value: datetime | int | None) -> int | None: + return to_timestamp(value) + + +@dataclass(frozen=True) +class SegmentWithSummary: + segment: Any + summary: str | None + + def __getattr__(self, name: str) -> Any: + return getattr(self.segment, name) + + +def segment_response_with_summary(segment: Any, summary: str | None) -> SegmentResponse: + response_source = SegmentWithSummary(segment=segment, summary=summary) + return SegmentResponse.model_validate(response_source, from_attributes=True) + + +def segment_responses_with_summaries( + segments: Iterable[Any], + summaries: Mapping[str, str | None], +) -> list[SegmentResponse]: + return [segment_response_with_summary(segment, summaries.get(segment.id)) for segment in segments] + + +class SegmentDetailResponse(ResponseModel): + data: SegmentResponse + doc_form: str + + +class ChildChunkDetailResponse(ResponseModel): + data: ChildChunkResponse + + +class ChildChunkListResponse(ResponseModel): + data: list[ChildChunkResponse] + total: int + total_pages: int + page: int + limit: int diff --git a/api/openapi/markdown/console-swagger.md b/api/openapi/markdown/console-swagger.md index a84c3a4cad..d0341a5d1c 100644 --- a/api/openapi/markdown/console-swagger.md +++ b/api/openapi/markdown/console-swagger.md @@ -5175,15 +5175,15 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | | payload | body | | Yes | [SegmentCreatePayload](#segmentcreatepayload) | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Segment created successfully | [SegmentDetailResponse](#segmentdetailresponse) | ### /datasets/{dataset_id}/documents/{document_id}/segment/{action} @@ -5192,9 +5192,10 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| action | path | | Yes | string | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | +| action | path | Action | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | query | Segment IDs | No | [ string ] | ##### Responses @@ -5209,8 +5210,9 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | query | Segment IDs | No | [ string ] | ##### Responses @@ -5223,14 +5225,20 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| enabled | query | | No | string | +| hit_count_gte | query | | No | integer | +| keyword | query | | No | string | +| limit | query | | No | integer | +| page | query | | No | integer | +| status | query | | No | [ string ] | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Segments retrieved successfully | [ConsoleSegmentListResponse](#consolesegmentlistresponse) | ### /datasets/{dataset_id}/documents/{document_id}/segments/batch_import @@ -5270,9 +5278,9 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Segment ID | Yes | string | ##### Responses @@ -5285,16 +5293,16 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | | payload | body | | Yes | [SegmentUpdatePayload](#segmentupdatepayload) | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Segment ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Segment updated successfully | [SegmentDetailResponse](#segmentdetailresponse) | ### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks @@ -5303,46 +5311,50 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Parent segment ID | Yes | string | +| keyword | query | | No | string | +| limit | query | | No | integer | +| page | query | | No | integer | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Child chunks retrieved successfully | [ChildChunkListResponse](#childchunklistresponse) | #### PATCH ##### Parameters | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | +| payload | body | | Yes | [ChildChunkBatchUpdatePayload](#childchunkbatchupdatepayload) | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Parent segment ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Child chunks updated successfully | [ChildChunkBatchUpdateResponse](#childchunkbatchupdateresponse) | #### POST ##### Parameters | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | | payload | body | | Yes | [ChildChunkCreatePayload](#childchunkcreatepayload) | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Parent segment ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Child chunk created successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) | ### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id} @@ -5351,10 +5363,10 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| child_chunk_id | path | | Yes | string | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | +| child_chunk_id | path | Child chunk ID | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Parent segment ID | Yes | string | ##### Responses @@ -5367,17 +5379,17 @@ Update document processing status (pause/resume) | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| child_chunk_id | path | | Yes | string | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | | payload | body | | Yes | [ChildChunkUpdatePayload](#childchunkupdatepayload) | +| child_chunk_id | path | Child chunk ID | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Parent segment ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Child chunk updated successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) | ### /datasets/{dataset_id}/documents/{document_id}/summary-status @@ -11718,12 +11730,55 @@ Button styles for user actions. | ---- | ---- | ----------- | -------- | | chunks | [ [ChildChunkUpdateArgs](#childchunkupdateargs) ] | | Yes | +#### ChildChunkBatchUpdateResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes | + #### ChildChunkCreatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | | content | string | | Yes | +#### ChildChunkDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ChildChunkResponse](#childchunkresponse) | | Yes | + +#### ChildChunkListQuery + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| keyword | string | | No | +| limit | integer | | No | +| page | integer | | No | + +#### ChildChunkListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | +| total_pages | integer | | Yes | + +#### ChildChunkResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| content | string | | Yes | +| created_at | integer | | Yes | +| id | string | | Yes | +| position | integer | | Yes | +| segment_id | string | | Yes | +| type | string | | Yes | +| updated_at | integer | | Yes | +| word_count | integer | | Yes | + #### ChildChunkUpdateArgs | Name | Type | Description | Required | @@ -11861,6 +11916,16 @@ Condition detail | page | integer | Page number | No | | tag_ids | [ string ] | Filter by tag IDs | No | +#### ConsoleSegmentListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [SegmentResponse](#segmentresponse) ] | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | +| total_pages | integer | | Yes | + #### Conversation | Name | Type | Description | Required | @@ -14865,6 +14930,17 @@ Form input definition. | last_id | string | | No | | limit | integer | | No | +#### SegmentAttachmentResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| extension | string | | Yes | +| id | string | | Yes | +| mime_type | string | | Yes | +| name | string | | Yes | +| size | integer | | Yes | +| source_url | string | | Yes | + #### SegmentBatchImportStatusResponse | Name | Type | Description | Required | @@ -14881,6 +14957,19 @@ Form input definition. | content | string | | Yes | | keywords | [ string ] | | No | +#### SegmentDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [SegmentResponse](#segmentresponse) | | Yes | +| doc_form | string | | Yes | + +#### SegmentIdListQuery + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| segment_id | [ string ] | Segment IDs | No | + #### SegmentListQuery | Name | Type | Description | Required | @@ -14892,6 +14981,38 @@ Form input definition. | page | integer | | No | | status | [ string ] | | No | +#### SegmentResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| answer | string | | Yes | +| attachments | [ [SegmentAttachmentResponse](#segmentattachmentresponse) ] | | Yes | +| child_chunks | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes | +| completed_at | integer | | Yes | +| content | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| disabled_at | integer | | Yes | +| disabled_by | string | | Yes | +| document_id | string | | Yes | +| enabled | boolean | | Yes | +| error | string | | Yes | +| hit_count | integer | | Yes | +| id | string | | Yes | +| index_node_hash | string | | Yes | +| index_node_id | string | | Yes | +| indexing_at | integer | | Yes | +| keywords | [ string ] | | Yes | +| position | integer | | Yes | +| sign_content | string | | Yes | +| status | string | | Yes | +| stopped_at | integer | | Yes | +| summary | string | | Yes | +| tokens | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | + #### SegmentUpdatePayload | Name | Type | Description | Required | diff --git a/api/openapi/markdown/service-swagger.md b/api/openapi/markdown/service-swagger.md index 11881c5d38..cfb67eb3bd 100644 --- a/api/openapi/markdown/service-swagger.md +++ b/api/openapi/markdown/service-swagger.md @@ -1064,17 +1064,20 @@ List segments in a document | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| payload | body | | Yes | [SegmentListQuery](#segmentlistquery) | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | +| keyword | query | | No | string | +| limit | query | | No | integer | +| page | query | | No | integer | +| status | query | | No | [ string ] | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Segments retrieved successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset or document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Segments retrieved successfully | [SegmentListResponse](#segmentlistresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset or document not found | | #### POST ##### Description @@ -1091,12 +1094,12 @@ Create segments in a document ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Segments created successfully | -| 400 | Bad request - segments data is missing | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset or document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Segments created successfully | [SegmentCreateListResponse](#segmentcreatelistresponse) | +| 400 | Bad request - segments data is missing | | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset or document not found | | ### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id} @@ -1111,7 +1114,7 @@ Delete a specific segment | ---- | ---------- | ----------- | -------- | ------ | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | -| segment_id | path | Segment ID to delete | Yes | string | +| segment_id | path | Segment ID | Yes | string | ##### Responses @@ -1130,17 +1133,17 @@ Get a specific segment by ID | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string | -| document_id | path | | Yes | string | -| segment_id | path | | Yes | string | +| dataset_id | path | Dataset ID | Yes | string | +| document_id | path | Document ID | Yes | string | +| segment_id | path | Segment ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Segment retrieved successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset, document, or segment not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Segment retrieved successfully | [SegmentDetailResponse](#segmentdetailresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset, document, or segment not found | | #### POST ##### Description @@ -1154,15 +1157,15 @@ Update a specific segment | payload | body | | Yes | [SegmentUpdatePayload](#segmentupdatepayload) | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | -| segment_id | path | Segment ID to update | Yes | string | +| segment_id | path | Segment ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Segment updated successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset, document, or segment not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Segment updated successfully | [SegmentDetailResponse](#segmentdetailresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset, document, or segment not found | | ### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks @@ -1175,18 +1178,20 @@ List child chunks for a segment | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| payload | body | | Yes | [ChildChunkListQuery](#childchunklistquery) | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | | segment_id | path | Parent segment ID | Yes | string | +| keyword | query | | No | string | +| limit | query | | No | integer | +| page | query | | No | integer | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Child chunks retrieved successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset, document, or segment not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Child chunks retrieved successfully | [ChildChunkListResponse](#childchunklistresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset, document, or segment not found | | #### POST ##### Description @@ -1204,11 +1209,11 @@ Create a new child chunk for a segment ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Child chunk created successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset, document, or segment not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Child chunk created successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset, document, or segment not found | | ### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id} @@ -1221,7 +1226,7 @@ Delete a specific child chunk | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| child_chunk_id | path | Child chunk ID to delete | Yes | string | +| child_chunk_id | path | Child chunk ID | Yes | string | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | | segment_id | path | Parent segment ID | Yes | string | @@ -1244,18 +1249,18 @@ Update a specific child chunk | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | | payload | body | | Yes | [ChildChunkUpdatePayload](#childchunkupdatepayload) | -| child_chunk_id | path | Child chunk ID to update | Yes | string | +| child_chunk_id | path | Child chunk ID | Yes | string | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | | segment_id | path | Parent segment ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Child chunk updated successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset, document, segment, or child chunk not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Child chunk updated successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset, document, segment, or child chunk not found | | ### /datasets/{dataset_id}/documents/{document_id}/update-by-file @@ -2222,6 +2227,12 @@ Returns a list of available models for the specified model type. | ---- | ---- | ----------- | -------- | | content | string | | Yes | +#### ChildChunkDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ChildChunkResponse](#childchunkresponse) | | Yes | + #### ChildChunkListQuery | Name | Type | Description | Required | @@ -2230,6 +2241,29 @@ Returns a list of available models for the specified model type. | limit | integer | | No | | page | integer | | No | +#### ChildChunkListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | +| total_pages | integer | | Yes | + +#### ChildChunkResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| content | string | | Yes | +| created_at | integer | | Yes | +| id | string | | Yes | +| position | integer | | Yes | +| segment_id | string | | Yes | +| type | string | | Yes | +| updated_at | integer | | Yes | +| word_count | integer | | Yes | + #### ChildChunkUpdatePayload | Name | Type | Description | Required | @@ -2954,19 +2988,98 @@ Metadata operation data | segmentation | [Segmentation](#segmentation) | | No | | subchunk_segmentation | [Segmentation](#segmentation) | | No | +#### SegmentAttachmentResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| extension | string | | Yes | +| id | string | | Yes | +| mime_type | string | | Yes | +| name | string | | Yes | +| size | integer | | Yes | +| source_url | string | | Yes | + +#### SegmentCreateItemPayload + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| answer | string | | No | +| attachment_ids | [ string ] | | No | +| content | string | | Yes | +| keywords | [ string ] | | No | + +#### SegmentCreateListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [SegmentResponse](#segmentresponse) ] | | Yes | +| doc_form | string | | Yes | + #### SegmentCreatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| segments | [ object ] | | No | +| segments | [ [SegmentCreateItemPayload](#segmentcreateitempayload) ] | | Yes | + +#### SegmentDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [SegmentResponse](#segmentresponse) | | Yes | +| doc_form | string | | Yes | #### SegmentListQuery | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | | keyword | string | | No | +| limit | integer | | No | +| page | integer | | No | | status | [ string ] | | No | +#### SegmentListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [SegmentResponse](#segmentresponse) ] | | Yes | +| doc_form | string | | Yes | +| has_more | boolean | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | + +#### SegmentResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| answer | string | | Yes | +| attachments | [ [SegmentAttachmentResponse](#segmentattachmentresponse) ] | | Yes | +| child_chunks | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes | +| completed_at | integer | | Yes | +| content | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| disabled_at | integer | | Yes | +| disabled_by | string | | Yes | +| document_id | string | | Yes | +| enabled | boolean | | Yes | +| error | string | | Yes | +| hit_count | integer | | Yes | +| id | string | | Yes | +| index_node_hash | string | | Yes | +| index_node_id | string | | Yes | +| indexing_at | integer | | Yes | +| keywords | [ string ] | | Yes | +| position | integer | | Yes | +| sign_content | string | | Yes | +| status | string | | Yes | +| stopped_at | integer | | Yes | +| summary | string | | Yes | +| tokens | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | + #### SegmentUpdateArgs | Name | Type | Description | Required | diff --git a/api/tests/test_containers_integration_tests/controllers/console/datasets/test_segments.py b/api/tests/test_containers_integration_tests/controllers/console/datasets/test_segments.py new file mode 100644 index 0000000000..6b9a4f2c96 --- /dev/null +++ b/api/tests/test_containers_integration_tests/controllers/console/datasets/test_segments.py @@ -0,0 +1,97 @@ +"""DB-backed integration tests for console dataset segment endpoints.""" + +from __future__ import annotations + +from uuid import uuid4 + +from flask.testing import FlaskClient +from sqlalchemy.orm import Session + +from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType +from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary +from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus, SummaryStatus +from tests.test_containers_integration_tests.controllers.console.helpers import ( + authenticate_console_client, + create_console_account_and_tenant, +) + + +def test_list_segments_uses_real_db_query_and_console_response_shape( + test_client_with_containers: FlaskClient, + db_session_with_containers: Session, +) -> None: + account, tenant = create_console_account_and_tenant(db_session_with_containers) + dataset = Dataset( + tenant_id=tenant.id, + name=f"Console Segment Dataset {uuid4()}", + description="Console segment integration dataset", + data_source_type=DataSourceType.UPLOAD_FILE, + indexing_technique=IndexTechniqueType.ECONOMY, + created_by=account.id, + permission="only_me", + provider="vendor", + ) + db_session_with_containers.add(dataset) + db_session_with_containers.commit() + + document = Document( + tenant_id=tenant.id, + dataset_id=dataset.id, + position=1, + data_source_type=DataSourceType.UPLOAD_FILE, + batch=f"batch-{uuid4()}", + name="console-segment-doc.txt", + created_from=DocumentCreatedFrom.WEB, + created_by=account.id, + enabled=True, + archived=False, + indexing_status=IndexingStatus.COMPLETED, + doc_form=IndexStructureType.PARAGRAPH_INDEX, + word_count=3, + tokens=4, + ) + db_session_with_containers.add(document) + db_session_with_containers.commit() + + segment = DocumentSegment( + tenant_id=tenant.id, + dataset_id=dataset.id, + document_id=document.id, + position=1, + content="Console integration segment", + word_count=3, + tokens=4, + keywords=["console", "integration"], + status=SegmentStatus.COMPLETED, + created_by=account.id, + ) + db_session_with_containers.add(segment) + db_session_with_containers.commit() + segment_id = segment.id + + db_session_with_containers.add( + DocumentSegmentSummary( + dataset_id=dataset.id, + document_id=document.id, + chunk_id=segment.id, + summary_content="Console DB summary", + status=SummaryStatus.COMPLETED, + ) + ) + db_session_with_containers.commit() + + response = test_client_with_containers.get( + f"/console/api/datasets/{dataset.id}/documents/{document.id}/segments" + "?page=1&limit=10&status=completed&keyword=integration&enabled=all", + headers=authenticate_console_client(test_client_with_containers, account), + ) + + assert response.status_code == 200 + body = response.get_json() + assert set(body) == {"data", "limit", "total", "total_pages", "page"} + assert body["limit"] == 10 + assert body["total"] == 1 + assert body["total_pages"] == 1 + assert "has_more" not in body + assert body["data"][0]["id"] == segment_id + assert body["data"][0]["summary"] == "Console DB summary" diff --git a/api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_segment.py b/api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_segment.py new file mode 100644 index 0000000000..050149aec1 --- /dev/null +++ b/api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_segment.py @@ -0,0 +1,153 @@ +"""DB-backed integration tests for service API dataset segment endpoints.""" + +from __future__ import annotations + +from uuid import uuid4 + +from flask.testing import FlaskClient +from sqlalchemy.orm import Session + +from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType +from models.dataset import ChildChunk, Dataset, Document, DocumentSegment, DocumentSegmentSummary +from models.enums import ( + ApiTokenType, + DataSourceType, + DocumentCreatedFrom, + IndexingStatus, + SegmentStatus, + SegmentType, + SummaryStatus, +) +from models.model import ApiToken +from tests.test_containers_integration_tests.controllers.console.helpers import create_console_account_and_tenant + + +def _create_dataset_graph(db_session: Session) -> tuple[Dataset, Document, DocumentSegment]: + account, tenant = create_console_account_and_tenant(db_session) + dataset = Dataset( + tenant_id=tenant.id, + name=f"Segment Dataset {uuid4()}", + description="Segment integration dataset", + data_source_type=DataSourceType.UPLOAD_FILE, + indexing_technique=IndexTechniqueType.ECONOMY, + created_by=account.id, + permission="only_me", + provider="vendor", + enable_api=True, + ) + db_session.add(dataset) + db_session.commit() + + document = Document( + tenant_id=tenant.id, + dataset_id=dataset.id, + position=1, + data_source_type=DataSourceType.UPLOAD_FILE, + batch=f"batch-{uuid4()}", + name="segment-doc.txt", + created_from=DocumentCreatedFrom.API, + created_by=account.id, + enabled=True, + archived=False, + indexing_status=IndexingStatus.COMPLETED, + doc_form=IndexStructureType.PARAGRAPH_INDEX, + word_count=4, + tokens=5, + ) + db_session.add(document) + db_session.commit() + + segment = DocumentSegment( + tenant_id=tenant.id, + dataset_id=dataset.id, + document_id=document.id, + position=1, + content="Segment content for integration", + word_count=4, + tokens=5, + keywords=["segment", "integration"], + status=SegmentStatus.COMPLETED, + created_by=account.id, + ) + db_session.add(segment) + db_session.commit() + + summary = DocumentSegmentSummary( + dataset_id=dataset.id, + document_id=document.id, + chunk_id=segment.id, + summary_content="DB summary", + status=SummaryStatus.COMPLETED, + ) + db_session.add(summary) + + api_token = ApiToken( + tenant_id=tenant.id, + type=ApiTokenType.DATASET, + token=f"dataset-{uuid4().hex}", + ) + db_session.add(api_token) + db_session.commit() + return dataset, document, segment + + +def _auth_headers(db_session: Session, dataset: Dataset) -> dict[str, str]: + token = db_session.query(ApiToken).filter_by(tenant_id=dataset.tenant_id, type=ApiTokenType.DATASET).one() + return {"Authorization": f"Bearer {token.token}"} + + +def test_list_segments_uses_real_services_and_service_api_shape( + test_client_with_containers: FlaskClient, + db_session_with_containers: Session, +) -> None: + dataset, document, segment = _create_dataset_graph(db_session_with_containers) + segment_id = segment.id + + response = test_client_with_containers.get( + f"/v1/datasets/{dataset.id}/documents/{document.id}/segments" + "?page=1&limit=20&status=completed&keyword=integration", + headers=_auth_headers(db_session_with_containers, dataset), + ) + + assert response.status_code == 200 + body = response.get_json() + assert set(body) == {"data", "doc_form", "total", "has_more", "limit", "page"} + assert body["doc_form"] == "text_model" + assert body["total"] == 1 + assert "total_pages" not in body + assert body["data"][0]["id"] == segment_id + assert body["data"][0]["summary"] == "DB summary" + assert body["data"][0]["attachments"] == [] + assert body["data"][0]["child_chunks"] == [] + + +def test_list_child_chunks_uses_real_segment_service( + test_client_with_containers: FlaskClient, + db_session_with_containers: Session, +) -> None: + dataset, document, segment = _create_dataset_graph(db_session_with_containers) + child_chunk = ChildChunk( + tenant_id=dataset.tenant_id, + dataset_id=dataset.id, + document_id=document.id, + segment_id=segment.id, + position=1, + content="Child integration content", + word_count=3, + type=SegmentType.CUSTOMIZED, + created_by=document.created_by, + ) + db_session_with_containers.add(child_chunk) + db_session_with_containers.commit() + + response = test_client_with_containers.get( + f"/v1/datasets/{dataset.id}/documents/{document.id}/segments/{segment.id}/child_chunks" + "?page=1&limit=20&keyword=integration", + headers=_auth_headers(db_session_with_containers, dataset), + ) + + assert response.status_code == 200 + body = response.get_json() + assert set(body) == {"data", "total", "total_pages", "page", "limit"} + assert body["total"] == 1 + assert body["data"][0]["content"] == "Child integration content" diff --git a/api/tests/unit_tests/controllers/common/test_schema.py b/api/tests/unit_tests/controllers/common/test_schema.py index 14b0833dcf..c5da7093a0 100644 --- a/api/tests/unit_tests/controllers/common/test_schema.py +++ b/api/tests/unit_tests/controllers/common/test_schema.py @@ -4,6 +4,7 @@ from typing import Literal from unittest.mock import MagicMock, patch import pytest +from flask import Flask from flask_restx import Namespace from pydantic import BaseModel, ConfigDict, Field @@ -47,6 +48,13 @@ class QueryModel(BaseModel): ambiguous: int | str | None = Field(default=None, description="Ambiguous query parameter") +class HelperQueryModel(BaseModel): + page: int = 1 + limit: int = 20 + status: list[str] = Field(default_factory=list) + keyword: str | None = None + + class NullableSchemaModel(BaseModel): name: str | None = None tags: list[str] | None = None @@ -320,3 +328,41 @@ def test_query_params_from_model_builds_flask_restx_doc_params(): "required": False, "description": "Ambiguous query parameter", } + + +def test_query_params_from_request_preserves_repeated_list_params(): + from controllers.common.schema import query_params_from_request + + app = Flask(__name__) + with app.test_request_context("/?page=2&limit=30&status=active&status=inactive&keyword=hello"): + query = query_params_from_request(HelperQueryModel, list_fields=("status",)) + + assert query.page == 2 + assert query.limit == 30 + assert query.status == ["active", "inactive"] + assert query.keyword == "hello" + + +def test_query_params_from_request_raises_for_malformed_ints_by_default(): + from controllers.common.schema import query_params_from_request + + app = Flask(__name__) + with app.test_request_context("/?page=bad&limit="): + with pytest.raises(ValueError): + query_params_from_request(HelperQueryModel, list_fields=("status",)) + + +def test_query_params_from_request_can_use_model_default_for_malformed_defaulted_ints(): + from controllers.common.schema import query_params_from_request + + app = Flask(__name__) + with app.test_request_context("/?page=bad&limit="): + query = query_params_from_request( + HelperQueryModel, + list_fields=("status",), + use_defaults_for_malformed_ints=True, + ) + + assert query.page == 1 + assert query.limit == 20 + assert query.status == [] diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets_segments.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets_segments.py index f9b11cf11f..a07c110ed9 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets_segments.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets_segments.py @@ -10,13 +10,13 @@ from controllers.console import console_ns from controllers.console.app.error import ProviderNotInitializeError from controllers.console.datasets.datasets_segments import ( ChildChunkAddApi, + ChildChunkBatchUpdatePayload, ChildChunkUpdateApi, DatasetDocumentSegmentAddApi, DatasetDocumentSegmentApi, DatasetDocumentSegmentBatchImportApi, DatasetDocumentSegmentListApi, DatasetDocumentSegmentUpdateApi, - _get_segment_with_summary, ) from controllers.console.datasets.error import ( ChildChunkDeleteIndexError, @@ -25,9 +25,13 @@ from controllers.console.datasets.error import ( ) from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.rag.index_processor.constant.index_type import IndexStructureType +from fields.segment_fields import segment_response_with_summary from libs.datetime_utils import naive_utc_now from models.dataset import ChildChunk, DocumentSegment +from models.enums import SegmentStatus, SegmentType from models.model import UploadFile +from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError +from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingServiceError def unwrap(func): @@ -37,49 +41,89 @@ def unwrap(func): def _segment(): - return SimpleNamespace( - id="s1", + segment = DocumentSegment( + tenant_id="tenant-1", + dataset_id="ds-1", + document_id="doc-1", position=1, - document_id="d1", content="c", - sign_content="c", - answer="a", word_count=1, tokens=1, - keywords=[], - index_node_id="n1", - index_node_hash="h", - hit_count=0, - enabled=True, - disabled_at=None, - disabled_by=None, - status="normal", created_by="u1", - created_at=naive_utc_now(), - updated_at=naive_utc_now(), - updated_by="u1", - indexing_at=None, - completed_at=None, - error=None, - stopped_at=None, - child_chunks=[], - attachments=[], - summary=None, ) + segment.id = "seg-1" + segment.answer = "a" + segment.keywords = ["test"] + segment.index_node_id = "n1" + segment.index_node_hash = "h" + segment.status = SegmentStatus.COMPLETED + segment.created_at = naive_utc_now() + segment.updated_at = naive_utc_now() + segment.updated_by = "u1" + return segment -def test_get_segment_with_summary(monkeypatch: pytest.MonkeyPatch): +def _child_chunk(): + child_chunk = ChildChunk( + tenant_id="tenant-1", + dataset_id="ds-1", + document_id="doc-1", + segment_id="seg-1", + position=1, + content="child", + word_count=1, + created_by="u1", + ) + child_chunk.id = "cc-1" + child_chunk.type = SegmentType.CUSTOMIZED + child_chunk.created_at = naive_utc_now() + child_chunk.updated_at = naive_utc_now() + return child_chunk + + +def _segment_response_dict(): + return { + "id": "seg-1", + "position": 1, + "document_id": "doc-1", + "content": "c", + "sign_content": "c", + "answer": "a", + "word_count": 1, + "tokens": 1, + "keywords": ["test"], + "index_node_id": "n1", + "index_node_hash": "h", + "hit_count": 0, + "enabled": True, + "disabled_at": None, + "disabled_by": None, + "status": "completed", + "created_by": "u1", + "created_at": 1779678000, + "updated_at": 1779678000, + "updated_by": "u1", + "indexing_at": None, + "completed_at": None, + "error": None, + "stopped_at": None, + "child_chunks": [], + "attachments": [], + "summary": None, + } + + +def test_segment_response_with_summary(): segment = _segment() - summary = SimpleNamespace(summary_content="summary") - monkeypatch.setattr( - "services.summary_index_service.SummaryIndexService.get_segment_summary", - lambda *_args, **_kwargs: summary, - ) + with ( + patch("models.dataset.db.session.scalar", return_value=None), + patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))), + ): + result = segment_response_with_summary(segment, "summary") - result = _get_segment_with_summary(segment, dataset_id="d1") - - assert result["summary"] == "summary" + assert result.summary == "summary" + assert result.id == segment.id class TestDatasetDocumentSegmentListApi: @@ -90,8 +134,7 @@ class TestDatasetDocumentSegmentListApi: dataset = MagicMock() document = MagicMock() - segment = MagicMock(spec=DocumentSegment) - segment.id = "seg-1" + segment = _segment() pagination = MagicMock() pagination.items = [segment] @@ -124,10 +167,8 @@ class TestDatasetDocumentSegmentListApi: "services.summary_index_service.SummaryIndexService.get_segments_summaries", return_value={}, ), - patch( - "controllers.console.datasets.datasets_segments.marshal", - return_value={"id": "seg-1"}, - ), + patch("models.dataset.db.session.scalar", return_value=None), + patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))), ): response, status = method(api, "ds-1", "doc-1") @@ -370,8 +411,7 @@ class TestDatasetDocumentSegmentAddApi: document = MagicMock() document.doc_form = IndexStructureType.PARAGRAPH_INDEX - segment = MagicMock() - segment.id = "seg-1" + segment = _segment() with ( app.test_request_context("/", json=payload), @@ -401,13 +441,11 @@ class TestDatasetDocumentSegmentAddApi: return_value=segment, ), patch( - "controllers.console.datasets.datasets_segments.marshal", - return_value={"id": "seg-1"}, - ), - patch( - "controllers.console.datasets.datasets_segments._get_segment_with_summary", - return_value={"id": "seg-1"}, + "controllers.console.datasets.datasets_segments.SummaryIndexService.get_segment_summary", + return_value=None, ), + patch("models.dataset.db.session.scalar", return_value=None), + patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))), ): response, status = method(api, "ds-1", "doc-1") @@ -509,7 +547,7 @@ class TestDatasetDocumentSegmentUpdateApi: document = MagicMock() document.doc_form = IndexStructureType.PARAGRAPH_INDEX - segment = MagicMock() + segment = _segment() with ( app.test_request_context("/", json=payload), @@ -528,7 +566,7 @@ class TestDatasetDocumentSegmentUpdateApi: ), patch( "controllers.console.datasets.datasets_segments.db.session.scalar", - return_value=segment, + side_effect=[segment, None], ), patch( "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", @@ -543,9 +581,10 @@ class TestDatasetDocumentSegmentUpdateApi: return_value=segment, ), patch( - "controllers.console.datasets.datasets_segments._get_segment_with_summary", - return_value={"id": "seg-1"}, + "controllers.console.datasets.datasets_segments.SummaryIndexService.get_segment_summary", + return_value=None, ), + patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))), ): response, status = method(api, "ds-1", "doc-1", "seg-1") @@ -800,6 +839,52 @@ class TestDatasetDocumentSegmentBatchImportApi: class TestChildChunkAddApi: + def test_patch_documents_batch_update_payload(self): + api_doc = unwrap(ChildChunkAddApi.patch).__apidoc__ + expected_model = ChildChunkBatchUpdatePayload.__name__ + + assert [model.name for model in api_doc["expect"]] == [expected_model] + + def test_get_uses_default_pagination_for_malformed_ints(self, app: Flask): + api = ChildChunkAddApi() + method = unwrap(api.get) + + pagination = MagicMock(items=[], total=0, pages=0) + + with ( + app.test_request_context("/?page=bad&limit="), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.scalar", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.get_child_chunks", + return_value=pagination, + ) as get_child_chunks, + ): + response, status = method(api, "ds-1", "doc-1", "seg-1") + + assert status == 200 + assert response["page"] == 1 + assert response["limit"] == 20 + get_child_chunks.assert_called_once_with("seg-1", "doc-1", "ds-1", 1, 20, None) + def test_post_success(self, app: Flask): api = ChildChunkAddApi() method = unwrap(api.post) @@ -814,7 +899,7 @@ class TestChildChunkAddApi: document = MagicMock() segment = MagicMock() - child_chunk = MagicMock(spec=ChildChunk) + child_chunk = _child_chunk() with ( app.test_request_context("/", json=payload), @@ -843,10 +928,6 @@ class TestChildChunkAddApi: "controllers.console.datasets.datasets_segments.SegmentService.create_child_chunk", return_value=child_chunk, ), - patch( - "controllers.console.datasets.datasets_segments.marshal", - return_value={"id": "cc-1"}, - ), ): response, status = method(api, "ds-1", "doc-1", "seg-1") @@ -890,7 +971,7 @@ class TestChildChunkAddApi: ), patch( "controllers.console.datasets.datasets_segments.SegmentService.create_child_chunk", - side_effect=services.errors.chunk.ChildChunkIndexingError("fail"), + side_effect=ChildChunkIndexingServiceError("fail"), ), ): with pytest.raises(ChildChunkIndexingError): @@ -977,7 +1058,7 @@ class TestChildChunkUpdateApi: ), patch( "controllers.console.datasets.datasets_segments.SegmentService.delete_child_chunk", - side_effect=services.errors.chunk.ChildChunkDeleteIndexError("fail"), + side_effect=ChildChunkDeleteIndexServiceError("fail"), ), ): with pytest.raises(ChildChunkDeleteIndexError): @@ -992,10 +1073,7 @@ class TestSegmentListAdvancedCases: dataset = MagicMock() document = MagicMock() - segment = MagicMock(spec=DocumentSegment) - segment.id = "seg-1" - segment.keywords = ["test"] - segment.enabled = True + segment = _segment() pagination = MagicMock(items=[segment], total=1, pages=1) @@ -1025,6 +1103,8 @@ class TestSegmentListAdvancedCases: "services.summary_index_service.SummaryIndexService.get_segments_summaries", return_value={}, ), + patch("models.dataset.db.session.scalar", return_value=None), + patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))), ): result = method(api, "ds-1", "doc-1") diff --git a/api/tests/unit_tests/controllers/service_api/dataset/test_dataset_segment.py b/api/tests/unit_tests/controllers/service_api/dataset/test_dataset_segment.py index 2e1051ab6b..5eb76e309c 100644 --- a/api/tests/unit_tests/controllers/service_api/dataset/test_dataset_segment.py +++ b/api/tests/unit_tests/controllers/service_api/dataset/test_dataset_segment.py @@ -29,15 +29,67 @@ from controllers.service_api.dataset.segment import ( DatasetChildChunkApi, DatasetSegmentApi, SegmentApi, + SegmentCreateItemPayload, SegmentCreatePayload, SegmentListQuery, ) from core.rag.index_processor.constant.index_type import IndexStructureType +from libs.datetime_utils import naive_utc_now from models.dataset import ChildChunk, Dataset, Document, DocumentSegment -from models.enums import IndexingStatus +from models.enums import IndexingStatus, SegmentType from services.dataset_service import DocumentService, SegmentService +def _segment_response_dict(summary: str | None = None): + return { + "id": "seg-1", + "position": 1, + "document_id": "doc-id", + "content": "segment content", + "sign_content": "segment content", + "answer": None, + "word_count": 2, + "tokens": 3, + "keywords": ["segment"], + "index_node_id": None, + "index_node_hash": None, + "hit_count": 0, + "enabled": True, + "disabled_at": None, + "disabled_by": None, + "status": "completed", + "created_by": "account-1", + "created_at": 1779678000, + "updated_at": 1779678000, + "updated_by": None, + "indexing_at": None, + "completed_at": None, + "error": None, + "stopped_at": None, + "child_chunks": [], + "attachments": [], + "summary": summary, + } + + +def _child_chunk() -> ChildChunk: + child_chunk = ChildChunk( + tenant_id="tenant-1", + dataset_id="dataset-1", + document_id="doc-id", + segment_id="seg-id", + position=1, + content="child chunk content", + word_count=3, + created_by="account-1", + ) + child_chunk.id = "child-1" + child_chunk.type = SegmentType.CUSTOMIZED + child_chunk.created_at = naive_utc_now() + child_chunk.updated_at = naive_utc_now() + return child_chunk + + class TestSegmentCreatePayload: """Test suite for SegmentCreatePayload Pydantic model.""" @@ -48,18 +100,34 @@ class TestSegmentCreatePayload: {"content": "Second segment", "keywords": ["key1", "key2"]}, ] payload = SegmentCreatePayload(segments=segments) - assert payload.segments == segments + assert payload.segments is not None + assert [segment.model_dump(exclude_none=True) for segment in payload.segments] == segments assert len(payload.segments) == 2 def test_payload_with_none_segments(self): - """Test payload with None segments (should be valid).""" - payload = SegmentCreatePayload(segments=None) - assert payload.segments is None + """Test payload with None segments is rejected.""" + with pytest.raises(ValueError): + SegmentCreatePayload.model_validate({"segments": None}) def test_payload_with_empty_segments(self): - """Test payload with empty segments list.""" - payload = SegmentCreatePayload(segments=[]) - assert payload.segments == [] + """Test payload with empty segments list is rejected.""" + with pytest.raises(ValueError): + SegmentCreatePayload.model_validate({"segments": []}) + + def test_payload_requires_segments(self): + """Test payload requires a segments field.""" + with pytest.raises(ValueError): + SegmentCreatePayload.model_validate({}) + + def test_payload_rejects_segment_without_content(self): + """Test each segment requires content.""" + with pytest.raises(ValueError): + SegmentCreatePayload.model_validate({"segments": [{"answer": "Answer only"}]}) + + def test_payload_rejects_blank_content(self): + """Test content cannot be whitespace-only.""" + with pytest.raises(ValueError): + SegmentCreateItemPayload.model_validate({"content": " "}) def test_payload_with_complex_segment_data(self): """Test payload with complex segment structure.""" @@ -72,8 +140,9 @@ class TestSegmentCreatePayload: } ] payload = SegmentCreatePayload(segments=segments) - assert payload.segments[0]["content"] == "Complex segment" - assert payload.segments[0]["keywords"] == ["keyword1", "keyword2"] + assert payload.segments is not None + assert payload.segments[0].content == "Complex segment" + assert payload.segments[0].keywords == ["keyword1", "keyword2"] class TestSegmentListQuery: @@ -117,7 +186,7 @@ class TestChildChunkCreatePayload: def test_payload_requires_content(self): """Test that content is required.""" with pytest.raises(ValueError): - ChildChunkCreatePayload() + ChildChunkCreatePayload.model_validate({}) def test_payload_with_long_content(self): """Test payload with very long content.""" @@ -157,12 +226,12 @@ class TestChildChunkListQuery: def test_query_limit_minimum(self): """Test query limit minimum validation.""" with pytest.raises(ValueError): - ChildChunkListQuery(limit=0) + ChildChunkListQuery.model_validate({"limit": 0}) def test_query_page_minimum(self): """Test query page minimum validation.""" with pytest.raises(ValueError): - ChildChunkListQuery(page=0) + ChildChunkListQuery.model_validate({"page": 0}) def test_query_with_keyword(self): """Test query with keyword filter.""" @@ -292,6 +361,7 @@ class TestSegmentServiceMockedBehavior: segments=[{"content": "Test"}, {"content": "Test 2"}], document=mock_document, dataset=mock_dataset ) + assert result is not None assert len(result) == 2 mock_create.assert_called_once() @@ -301,7 +371,12 @@ class TestSegmentServiceMockedBehavior: mock_segments = [Mock(), Mock()] mock_get.return_value = (mock_segments, 2) - segments, count = SegmentService.get_segments(document_id=mock_document.id, page=1, limit=20) + segments, count = SegmentService.get_segments( + document_id=mock_document.id, + tenant_id=mock_document.tenant_id, + page=1, + limit=20, + ) assert len(segments) == 2 assert count == 2 @@ -429,13 +504,13 @@ class TestDocumentValidation: """Test that enabled=True is valid.""" document = Mock(spec=Document) document.enabled = True - assert document.enabled is True + assert document.enabled def test_document_enabled_false_is_invalid(self): """Test that enabled=False is invalid for segment operations.""" document = Mock(spec=Document) document.enabled = False - assert document.enabled is False + assert not document.enabled class TestDatasetModels: @@ -462,7 +537,7 @@ class TestDatasetModels: assert segment.id is not None assert segment.document_id is not None - assert segment.content is not None + assert segment.content == "Test content" def test_child_chunk_has_required_fields(self): """Test ChildChunk model has required fields.""" @@ -473,7 +548,7 @@ class TestDatasetModels: assert chunk.id is not None assert chunk.segment_id is not None - assert chunk.content is not None + assert chunk.content == "Chunk content" class TestSegmentUpdatePayload: @@ -594,6 +669,7 @@ class TestSegmentCreateArgs: from services.entities.knowledge_entities.knowledge_entities import SegmentCreateArgs args = SegmentCreateArgs(content="Test content", keywords=["machine learning", "AI", "neural networks"]) + assert args.keywords is not None assert len(args.keywords) == 3 @@ -690,7 +766,7 @@ class TestSegmentIndexingRequirements: # Both conditions must be true assert document.indexing_status == "completed" - assert document.enabled is True + assert document.enabled class TestSegmentLimits: @@ -753,7 +829,7 @@ class TestSegmentPagination: # # Strategy per decorator type: # - No billing decorator → call the method directly; only patch ``db``, -# services, ``current_account_with_tenant``, and ``marshal``. +# services, ``current_account_with_tenant``, and response helpers when needed. # - ``@cloud_edition_billing_rate_limit_check`` (preserves ``__wrapped__``) # → call via ``method.__wrapped__(self, …)`` to skip the decorator. # - ``@cloud_edition_billing_resource_check`` (no ``__wrapped__``) → patch @@ -766,11 +842,11 @@ class TestSegmentApiGet: """Test suite for SegmentApi.get() endpoint. ``get`` has no billing decorators but calls - ``current_account_with_tenant()`` and ``marshal``. + ``current_account_with_tenant()`` and response serialization. """ - @patch("controllers.service_api.dataset.segment.SummaryIndexService") - @patch("controllers.service_api.dataset.segment.marshal") + @patch("controllers.service_api.dataset.segment.segment_responses_with_summaries") + @patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segments_summaries") @patch("controllers.service_api.dataset.segment.SegmentService") @patch("controllers.service_api.dataset.segment.DocumentService") @patch("controllers.service_api.dataset.segment.current_account_with_tenant") @@ -781,8 +857,8 @@ class TestSegmentApiGet: mock_account_fn, mock_doc_svc, mock_seg_svc, - mock_marshal, - mock_summary_svc, + mock_get_summaries, + mock_dump_segments, app: Flask, mock_tenant, mock_dataset, @@ -794,8 +870,8 @@ class TestSegmentApiGet: mock_db.session.scalar.return_value = mock_dataset mock_doc_svc.get_document.return_value = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX) mock_seg_svc.get_segments.return_value = ([mock_segment], 1) - mock_marshal.return_value = {"id": mock_segment.id} - mock_summary_svc.get_segments_summaries.return_value = {} + mock_get_summaries.return_value = {} + mock_dump_segments.return_value = [_segment_response_dict()] # Act with app.test_request_context( @@ -881,8 +957,8 @@ class TestSegmentApiPost: mock_rate_limit.enabled = False mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit - @patch("controllers.service_api.dataset.segment.SummaryIndexService") - @patch("controllers.service_api.dataset.segment.marshal") + @patch("controllers.service_api.dataset.segment.segment_responses_with_summaries") + @patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segments_summaries") @patch("controllers.service_api.dataset.segment.SegmentService") @patch("controllers.service_api.dataset.segment.DocumentService") @patch("controllers.service_api.dataset.segment.current_account_with_tenant") @@ -897,8 +973,8 @@ class TestSegmentApiPost: mock_account_fn, mock_doc_svc, mock_seg_svc, - mock_marshal, - mock_summary_svc, + mock_get_summaries, + mock_dump_segments, app: Flask, mock_tenant, mock_dataset, @@ -920,8 +996,8 @@ class TestSegmentApiPost: mock_seg_svc.segment_create_args_validate.return_value = None mock_seg_svc.multi_create_segment.return_value = [mock_segment] - mock_marshal.return_value = {"id": mock_segment.id} - mock_summary_svc.get_segments_summaries.return_value = {} + mock_get_summaries.return_value = {} + mock_dump_segments.return_value = [_segment_response_dict()] segments_data = [{"content": "Test segment content", "answer": "Test answer"}] @@ -1222,8 +1298,8 @@ class TestDatasetSegmentApiUpdate: mock_rate_limit.enabled = False mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit - @patch("controllers.service_api.dataset.segment.SummaryIndexService") - @patch("controllers.service_api.dataset.segment.marshal") + @patch("controllers.service_api.dataset.segment.segment_response_with_summary") + @patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segment_summary") @patch("controllers.service_api.dataset.segment.SegmentService") @patch("controllers.service_api.dataset.segment.DocumentService") @patch("controllers.service_api.dataset.segment.DatasetService") @@ -1240,8 +1316,8 @@ class TestDatasetSegmentApiUpdate: mock_dataset_svc, mock_doc_svc, mock_seg_svc, - mock_marshal, - mock_summary_svc, + mock_get_summary, + mock_dump_segment, app: Flask, mock_tenant, mock_dataset, @@ -1253,12 +1329,13 @@ class TestDatasetSegmentApiUpdate: mock_dataset.indexing_technique = "economy" mock_db.session.scalar.return_value = mock_dataset mock_dataset_svc.check_dataset_model_setting.return_value = None - mock_doc_svc.get_document.return_value = Mock() + mock_doc_svc.get_document.return_value = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX) mock_seg_svc.get_segment_by_id.return_value = mock_segment updated = Mock() + updated.id = "updated-seg" mock_seg_svc.update_segment.return_value = updated - mock_marshal.return_value = {"id": mock_segment.id} - mock_summary_svc.get_segment_summary.return_value = None + mock_get_summary.return_value = None + mock_dump_segment.return_value = _segment_response_dict() with app.test_request_context( f"/datasets/{mock_dataset.id}/documents/doc-id/segments/{mock_segment.id}", @@ -1365,11 +1442,11 @@ class TestDatasetSegmentApiGetSingle: """Test suite for DatasetSegmentApi.get() (single segment) endpoint. ``get`` has no billing decorators but calls - ``current_account_with_tenant()`` and ``marshal``. + ``current_account_with_tenant()`` and response serialization. """ - @patch("controllers.service_api.dataset.segment.SummaryIndexService") - @patch("controllers.service_api.dataset.segment.marshal") + @patch("controllers.service_api.dataset.segment.segment_response_with_summary") + @patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segment_summary") @patch("controllers.service_api.dataset.segment.SegmentService") @patch("controllers.service_api.dataset.segment.DocumentService") @patch("controllers.service_api.dataset.segment.DatasetService") @@ -1382,8 +1459,8 @@ class TestDatasetSegmentApiGetSingle: mock_dataset_svc, mock_doc_svc, mock_seg_svc, - mock_marshal, - mock_summary_svc, + mock_get_summary, + mock_dump_segment, app: Flask, mock_tenant, mock_dataset, @@ -1396,8 +1473,8 @@ class TestDatasetSegmentApiGetSingle: mock_doc = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX) mock_doc_svc.get_document.return_value = mock_doc mock_seg_svc.get_segment_by_id.return_value = mock_segment - mock_marshal.return_value = {"id": mock_segment.id} - mock_summary_svc.get_segment_summary.return_value = None + mock_get_summary.return_value = None + mock_dump_segment.return_value = _segment_response_dict() with app.test_request_context( f"/datasets/{mock_dataset.id}/documents/doc-id/segments/{mock_segment.id}", @@ -1415,8 +1492,8 @@ class TestDatasetSegmentApiGetSingle: assert "data" in response assert response["doc_form"] == IndexStructureType.PARAGRAPH_INDEX - @patch("controllers.service_api.dataset.segment.SummaryIndexService") - @patch("controllers.service_api.dataset.segment.marshal") + @patch("controllers.service_api.dataset.segment.segment_response_with_summary") + @patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segment_summary") @patch("controllers.service_api.dataset.segment.SegmentService") @patch("controllers.service_api.dataset.segment.DocumentService") @patch("controllers.service_api.dataset.segment.DatasetService") @@ -1429,8 +1506,8 @@ class TestDatasetSegmentApiGetSingle: mock_dataset_svc, mock_doc_svc, mock_seg_svc, - mock_marshal, - mock_summary_svc, + mock_get_summary, + mock_dump_segment, app: Flask, mock_tenant, mock_dataset, @@ -1443,11 +1520,9 @@ class TestDatasetSegmentApiGetSingle: mock_doc = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX) mock_doc_svc.get_document.return_value = mock_doc mock_seg_svc.get_segment_by_id.return_value = mock_segment - mock_marshal.return_value = {"id": mock_segment.id, "summary": None} - - mock_summary_record = Mock() - mock_summary_record.summary_content = "This is the segment summary" - mock_summary_svc.get_segment_summary.return_value = mock_summary_record + mock_summary_record = Mock(summary_content="This is the segment summary") + mock_get_summary.return_value = mock_summary_record + mock_dump_segment.return_value = _segment_response_dict("This is the segment summary") with app.test_request_context( f"/datasets/{mock_dataset.id}/documents/doc-id/segments/{mock_segment.id}", @@ -1565,10 +1640,9 @@ class TestChildChunkApiGet: """Test suite for ChildChunkApi.get() endpoint. ``get`` has no billing decorators but calls - ``current_account_with_tenant()``, ``marshal``, and ``db``. + ``current_account_with_tenant()``, response serialization, and ``db``. """ - @patch("controllers.service_api.dataset.segment.marshal") @patch("controllers.service_api.dataset.segment.SegmentService") @patch("controllers.service_api.dataset.segment.DocumentService") @patch("controllers.service_api.dataset.segment.current_account_with_tenant") @@ -1579,7 +1653,6 @@ class TestChildChunkApiGet: mock_account_fn, mock_doc_svc, mock_seg_svc, - mock_marshal, app: Flask, mock_tenant, mock_dataset, @@ -1591,11 +1664,10 @@ class TestChildChunkApiGet: mock_seg_svc.get_segment_by_id.return_value = Mock() mock_pagination = Mock() - mock_pagination.items = [Mock(), Mock()] + mock_pagination.items = [_child_chunk(), _child_chunk()] mock_pagination.total = 2 mock_pagination.pages = 1 mock_seg_svc.get_child_chunks.return_value = mock_pagination - mock_marshal.return_value = [{"id": "c1"}, {"id": "c2"}] with app.test_request_context( f"/datasets/{mock_dataset.id}/documents/doc-id/segments/seg-id/child_chunks?page=1&limit=20", @@ -1727,7 +1799,6 @@ class TestChildChunkApiPost: mock_rate_limit.enabled = False mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit - @patch("controllers.service_api.dataset.segment.marshal") @patch("controllers.service_api.dataset.segment.SegmentService") @patch("controllers.service_api.dataset.segment.DocumentService") @patch("controllers.service_api.dataset.segment.current_account_with_tenant") @@ -1742,7 +1813,6 @@ class TestChildChunkApiPost: mock_account_fn, mock_doc_svc, mock_seg_svc, - mock_marshal, app: Flask, mock_tenant, mock_dataset, @@ -1754,9 +1824,8 @@ class TestChildChunkApiPost: mock_db.session.scalar.return_value = mock_dataset mock_doc_svc.get_document.return_value = Mock() mock_seg_svc.get_segment_by_id.return_value = Mock() - mock_child = Mock() + mock_child = _child_chunk() mock_seg_svc.create_child_chunk.return_value = mock_child - mock_marshal.return_value = {"id": "child-1"} with app.test_request_context( f"/datasets/{mock_dataset.id}/documents/doc-id/segments/seg-id/child_chunks", diff --git a/packages/contracts/generated/api/console/datasets/orpc.gen.ts b/packages/contracts/generated/api/console/datasets/orpc.gen.ts index c926c82620..3a53a9a7f5 100644 --- a/packages/contracts/generated/api/console/datasets/orpc.gen.ts +++ b/packages/contracts/generated/api/console/datasets/orpc.gen.ts @@ -13,6 +13,7 @@ import { zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdPath, zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse, zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath, + zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery, zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse, zDeleteDatasetsByDatasetIdDocumentsPath, zDeleteDatasetsByDatasetIdDocumentsResponse, @@ -50,8 +51,10 @@ import { zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBatchImportPath, zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBatchImportResponse, zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksPath, + zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksQuery, zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse, zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath, + zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery, zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse, zGetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusPath, zGetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusResponse, @@ -103,8 +106,10 @@ import { zPatchDatasetsByDatasetIdDocumentsByDocumentIdProcessingResumePath, zPatchDatasetsByDatasetIdDocumentsByDocumentIdProcessingResumeResponse, zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionPath, + zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionQuery, zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionResponse, zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdBody, + zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksBody, zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdBody, zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdPath, zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse, @@ -1039,23 +1044,20 @@ export const patch6 = oc path: '/datasets/{dataset_id}/documents/{document_id}/segment/{action}', tags: ['console'], }) - .input(z.object({ params: zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionPath })) + .input( + z.object({ + params: zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionPath, + query: zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionQuery.optional(), + }), + ) .output(zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionResponse) export const byAction3 = { patch: patch6, } -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const post13 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdSegment', @@ -1127,16 +1129,8 @@ export const delete3 = oc zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse, ) -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const patch7 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'PATCH', operationId: @@ -1160,16 +1154,8 @@ export const byChildChunkId = { patch: patch7, } -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const get21 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunks', @@ -1179,20 +1165,14 @@ export const get21 = oc .input( z.object({ params: zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksPath, + query: + zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksQuery.optional(), }), ) .output(zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse) -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const patch8 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'PATCH', operationId: 'patchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunks', @@ -1201,21 +1181,14 @@ export const patch8 = oc }) .input( z.object({ + body: zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksBody, params: zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksPath, }), ) .output(zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse) -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const post15 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunks', @@ -1251,16 +1224,8 @@ export const delete4 = oc ) .output(zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse) -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const patch9 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'PATCH', operationId: 'patchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentId', @@ -1290,26 +1255,28 @@ export const delete5 = oc successStatus: 204, tags: ['console'], }) - .input(z.object({ params: zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath })) + .input( + z.object({ + params: zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath, + query: zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery.optional(), + }), + ) .output(zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse) -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const get22 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdSegments', path: '/datasets/{dataset_id}/documents/{document_id}/segments', tags: ['console'], }) - .input(z.object({ params: zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath })) + .input( + z.object({ + params: zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath, + query: zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery.optional(), + }), + ) .output(zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse) export const segments = { diff --git a/packages/contracts/generated/api/console/datasets/types.gen.ts b/packages/contracts/generated/api/console/datasets/types.gen.ts index 938331f9c9..020e8fca76 100644 --- a/packages/contracts/generated/api/console/datasets/types.gen.ts +++ b/packages/contracts/generated/api/console/datasets/types.gen.ts @@ -354,6 +354,19 @@ export type SegmentCreatePayload = { keywords?: Array | null } +export type SegmentDetailResponse = { + data: SegmentResponse + doc_form: string +} + +export type ConsoleSegmentListResponse = { + data: Array + limit: number + page: number + total: number + total_pages: number +} + export type SegmentUpdatePayload = { answer?: string | null attachment_ids?: Array | null @@ -363,10 +376,30 @@ export type SegmentUpdatePayload = { summary?: string | null } +export type ChildChunkListResponse = { + data: Array + limit: number + page: number + total: number + total_pages: number +} + +export type ChildChunkBatchUpdatePayload = { + chunks: Array +} + +export type ChildChunkBatchUpdateResponse = { + data: Array +} + export type ChildChunkCreatePayload = { content: string } +export type ChildChunkDetailResponse = { + data: ChildChunkResponse +} + export type ChildChunkUpdatePayload = { content: string } @@ -651,6 +684,52 @@ export type DocumentMetadataResponse = { value?: string | null } +export type SegmentResponse = { + answer: string | null + attachments: Array + child_chunks: Array + completed_at: number | null + content: string + created_at: number + created_by: string + disabled_at: number | null + disabled_by: string | null + document_id: string + enabled: boolean + error: string | null + hit_count: number + id: string + index_node_hash: string | null + index_node_id: string | null + indexing_at: number | null + keywords: Array | null + position: number + sign_content: string + status: string + stopped_at: number | null + summary: string | null + tokens: number + updated_at: number + updated_by: string | null + word_count: number +} + +export type ChildChunkResponse = { + content: string + created_at: number + id: string + position: number + segment_id: string + type: string + updated_at: number + word_count: number +} + +export type ChildChunkUpdateArgs = { + content: string + id?: string | null +} + export type DocumentStatusResponse = { cleaning_completed_at: number | null completed_at: number | null @@ -771,6 +850,15 @@ export type MetadataDetail = { value?: unknown } +export type SegmentAttachmentResponse = { + extension: string + id: string + mime_type: string | null + name: string + size: number + source_url: string +} + export type HitTestingChildChunk = { content: string id: string @@ -1922,9 +2010,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentData = { } export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentResponses = { - 200: { - [key: string]: unknown - } + 200: SegmentDetailResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentResponse @@ -1937,7 +2023,9 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionData = { dataset_id: string document_id: string } - query?: never + query?: { + segment_id?: Array + } url: '/datasets/{dataset_id}/documents/{document_id}/segment/{action}' } @@ -1954,7 +2042,9 @@ export type DeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsData = { dataset_id: string document_id: string } - query?: never + query?: { + segment_id?: Array + } url: '/datasets/{dataset_id}/documents/{document_id}/segments' } @@ -1973,14 +2063,19 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsData = { dataset_id: string document_id: string } - query?: never + query?: { + enabled?: string + hit_count_gte?: number + keyword?: string + limit?: number + page?: number + status?: Array + } url: '/datasets/{dataset_id}/documents/{document_id}/segments' } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponses = { - 200: { - [key: string]: unknown - } + 200: ConsoleSegmentListResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse @@ -2052,9 +2147,7 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdData } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponses = { - 200: { - [key: string]: unknown - } + 200: SegmentDetailResponse } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse @@ -2067,21 +2160,23 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildC document_id: string segment_id: string } - query?: never + query?: { + keyword?: string + limit?: number + page?: number + } url: '/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponses = { - 200: { - [key: string]: unknown - } + 200: ChildChunkListResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse = GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponses[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponses] export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksData = { - body?: never + body: ChildChunkBatchUpdatePayload path: { dataset_id: string document_id: string @@ -2092,9 +2187,7 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponses = { - 200: { - [key: string]: unknown - } + 200: ChildChunkBatchUpdateResponse } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse @@ -2112,9 +2205,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChild } export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponses = { - 200: { - [key: string]: unknown - } + 200: ChildChunkDetailResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse @@ -2158,9 +2249,7 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponses = { - 200: { - [key: string]: unknown - } + 200: ChildChunkDetailResponse } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse diff --git a/packages/contracts/generated/api/console/datasets/zod.gen.ts b/packages/contracts/generated/api/console/datasets/zod.gen.ts index 082695d39d..0387e897d2 100644 --- a/packages/contracts/generated/api/console/datasets/zod.gen.ts +++ b/packages/contracts/generated/api/console/datasets/zod.gen.ts @@ -497,6 +497,60 @@ export const zDatasetAndDocumentResponse = z.object({ documents: z.array(zDocumentResponse), }) +/** + * ChildChunkResponse + */ +export const zChildChunkResponse = z.object({ + content: z.string(), + created_at: z.int(), + id: z.string(), + position: z.int(), + segment_id: z.string(), + type: z.string(), + updated_at: z.int(), + word_count: z.int(), +}) + +/** + * ChildChunkListResponse + */ +export const zChildChunkListResponse = z.object({ + data: z.array(zChildChunkResponse), + limit: z.int(), + page: z.int(), + total: z.int(), + total_pages: z.int(), +}) + +/** + * ChildChunkBatchUpdateResponse + */ +export const zChildChunkBatchUpdateResponse = z.object({ + data: z.array(zChildChunkResponse), +}) + +/** + * ChildChunkDetailResponse + */ +export const zChildChunkDetailResponse = z.object({ + data: zChildChunkResponse, +}) + +/** + * ChildChunkUpdateArgs + */ +export const zChildChunkUpdateArgs = z.object({ + content: z.string(), + id: z.string().nullish(), +}) + +/** + * ChildChunkBatchUpdatePayload + */ +export const zChildChunkBatchUpdatePayload = z.object({ + chunks: z.array(zChildChunkUpdateArgs), +}) + /** * DocumentStatusResponse */ @@ -635,6 +689,70 @@ export const zMetadataOperationData = z.object({ operation_data: z.array(zDocumentMetadataOperation), }) +/** + * SegmentAttachmentResponse + */ +export const zSegmentAttachmentResponse = z.object({ + extension: z.string(), + id: z.string(), + mime_type: z.string().nullable(), + name: z.string(), + size: z.int(), + source_url: z.string(), +}) + +/** + * SegmentResponse + */ +export const zSegmentResponse = z.object({ + answer: z.string().nullable(), + attachments: z.array(zSegmentAttachmentResponse), + child_chunks: z.array(zChildChunkResponse), + completed_at: z.int().nullable(), + content: z.string(), + created_at: z.int(), + created_by: z.string(), + disabled_at: z.int().nullable(), + disabled_by: z.string().nullable(), + document_id: z.string(), + enabled: z.boolean(), + error: z.string().nullable(), + hit_count: z.int(), + id: z.string(), + index_node_hash: z.string().nullable(), + index_node_id: z.string().nullable(), + indexing_at: z.int().nullable(), + keywords: z.array(z.string()).nullable(), + position: z.int(), + sign_content: z.string(), + status: z.string(), + stopped_at: z.int().nullable(), + summary: z.string().nullable(), + tokens: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * SegmentDetailResponse + */ +export const zSegmentDetailResponse = z.object({ + data: zSegmentResponse, + doc_form: z.string(), +}) + +/** + * ConsoleSegmentListResponse + */ +export const zConsoleSegmentListResponse = z.object({ + data: z.array(zSegmentResponse), + limit: z.int(), + page: z.int(), + total: z.int(), + total_pages: z.int(), +}) + /** * HitTestingChildChunk */ @@ -1720,12 +1838,9 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentPath = z.object }) /** - * Success + * Segment created successfully */ -export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentResponse = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentResponse = zSegmentDetailResponse export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionPath = z.object({ action: z.string(), @@ -1733,6 +1848,10 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionPath = document_id: z.string(), }) +export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentByActionQuery = z.object({ + segment_id: z.array(z.string()).optional(), +}) + /** * Success */ @@ -1744,6 +1863,10 @@ export const zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath = z.obj document_id: z.string(), }) +export const zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery = z.object({ + segment_id: z.array(z.string()).optional(), +}) + /** * Segments deleted successfully */ @@ -1757,13 +1880,20 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath = z.object document_id: z.string(), }) +export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery = z.object({ + enabled: z.string().optional().default('all'), + hit_count_gte: z.int().optional(), + keyword: z.string().optional(), + limit: z.int().gte(1).lte(100).optional().default(20), + page: z.int().gte(1).optional().default(1), + status: z.array(z.string()).optional(), +}) + /** - * Success + * Segments retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse = z.record( - z.string(), - z.unknown(), -) +export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse + = zConsoleSegmentListResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBatchImportPath = z.object({ dataset_id: z.string(), @@ -1814,12 +1944,10 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdPa }) /** - * Success + * Segment updated successfully */ -export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse = z.record( - z.string(), - z.unknown(), -) +export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse + = zSegmentDetailResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksPath = z.object({ @@ -1828,11 +1956,21 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil segment_id: z.string(), }) +export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksQuery + = z.object({ + keyword: z.string().optional(), + limit: z.int().gte(1).lte(100).optional().default(20), + page: z.int().gte(1).optional().default(1), + }) + /** - * Success + * Child chunks retrieved successfully */ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse - = z.record(z.string(), z.unknown()) + = zChildChunkListResponse + +export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksBody + = zChildChunkBatchUpdatePayload export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksPath = z.object({ @@ -1842,10 +1980,10 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdCh }) /** - * Success + * Child chunks updated successfully */ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse - = z.record(z.string(), z.unknown()) + = zChildChunkBatchUpdateResponse export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksBody = zChildChunkCreatePayload @@ -1858,10 +1996,10 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChi }) /** - * Success + * Child chunk created successfully */ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse - = z.record(z.string(), z.unknown()) + = zChildChunkDetailResponse export const zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdPath = z.object({ @@ -1889,10 +2027,10 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdCh }) /** - * Success + * Child chunk updated successfully */ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse - = z.record(z.string(), z.unknown()) + = zChildChunkDetailResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusPath = z.object({ dataset_id: z.string(), diff --git a/packages/contracts/generated/api/service/orpc.gen.ts b/packages/contracts/generated/api/service/orpc.gen.ts index 2c046d3054..a3a1e6c66f 100644 --- a/packages/contracts/generated/api/service/orpc.gen.ts +++ b/packages/contracts/generated/api/service/orpc.gen.ts @@ -1134,16 +1134,10 @@ export const delete4 = oc /** * Update a specific child chunk - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const patch3 = oc .route({ - deprecated: true, - description: - 'Update a specific child chunk\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Update a specific child chunk', inputStructure: 'detailed', method: 'PATCH', operationId: @@ -1169,16 +1163,10 @@ export const byChildChunkId = { /** * List child chunks for a segment - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get10 = oc .route({ - deprecated: true, - description: - 'List child chunks for a segment\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'List child chunks for a segment', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunks', @@ -1196,16 +1184,10 @@ export const get10 = oc /** * Create a new child chunk for a segment - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post19 = oc .route({ - deprecated: true, - description: - 'Create a new child chunk for a segment\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Create a new child chunk for a segment', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunks', @@ -1246,16 +1228,10 @@ export const delete5 = oc /** * Get a specific segment by ID - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get11 = oc .route({ - deprecated: true, - description: - 'Get a specific segment by ID\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get a specific segment by ID', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentId', @@ -1267,16 +1243,10 @@ export const get11 = oc /** * Update a specific segment - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post20 = oc .route({ - deprecated: true, - description: - 'Update a specific segment\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Update a specific segment', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentId', @@ -1300,16 +1270,10 @@ export const bySegmentId = { /** * List segments in a document - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get12 = oc .route({ - deprecated: true, - description: - 'List segments in a document\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'List segments in a document', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdSegments', @@ -1326,16 +1290,10 @@ export const get12 = oc /** * Create segments in a document - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post21 = oc .route({ - deprecated: true, - description: - 'Create segments in a document\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Create segments in a document', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdSegments', diff --git a/packages/contracts/generated/api/service/types.gen.ts b/packages/contracts/generated/api/service/types.gen.ts index 4e187e7202..45c4a09cdd 100644 --- a/packages/contracts/generated/api/service/types.gen.ts +++ b/packages/contracts/generated/api/service/types.gen.ts @@ -64,12 +64,35 @@ export type ChildChunkCreatePayload = { content: string } +export type ChildChunkDetailResponse = { + data: ChildChunkResponse +} + export type ChildChunkListQuery = { keyword?: string | null limit?: number page?: number } +export type ChildChunkListResponse = { + data: Array + limit: number + page: number + total: number + total_pages: number +} + +export type ChildChunkResponse = { + content: string + created_at: number + id: string + position: number + segment_id: string + type: string + updated_at: number + word_count: number +} + export type ChildChunkUpdatePayload = { content: string } @@ -668,17 +691,82 @@ export type Rule = { subchunk_segmentation?: Segmentation } +export type SegmentAttachmentResponse = { + extension: string + id: string + mime_type: string | null + name: string + size: number + source_url: string +} + +export type SegmentCreateItemPayload = { + answer?: string | null + attachment_ids?: Array | null + content: string + keywords?: Array | null +} + +export type SegmentCreateListResponse = { + data: Array + doc_form: string +} + export type SegmentCreatePayload = { - segments?: Array<{ - [key: string]: unknown - }> | null + segments: Array +} + +export type SegmentDetailResponse = { + data: SegmentResponse + doc_form: string } export type SegmentListQuery = { keyword?: string | null + limit?: number + page?: number status?: Array } +export type SegmentListResponse = { + data: Array + doc_form: string + has_more: boolean + limit: number + page: number + total: number +} + +export type SegmentResponse = { + answer: string | null + attachments: Array + child_chunks: Array + completed_at: number | null + content: string + created_at: number + created_by: string + disabled_at: number | null + disabled_by: string | null + document_id: string + enabled: boolean + error: string | null + hit_count: number + id: string + index_node_hash: string | null + index_node_id: string | null + indexing_at: number | null + keywords: Array | null + position: number + sign_content: string + status: string + stopped_at: number | null + summary: string | null + tokens: number + updated_at: number + updated_by: string | null + word_count: number +} + export type SegmentUpdateArgs = { answer?: string | null attachment_ids?: Array | null @@ -2145,7 +2233,9 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsData = { document_id: string } query?: { - keyword?: string | null + keyword?: string + limit?: number + page?: number status?: Array } url: '/datasets/{dataset_id}/documents/{document_id}/segments' @@ -2164,9 +2254,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsError = GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsErrors[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsErrors] export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponses = { - 200: { - [key: string]: unknown - } + 200: SegmentListResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse @@ -2198,9 +2286,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsError = PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsErrors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsErrors] export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponses = { - 200: { - [key: string]: unknown - } + 200: SegmentCreateListResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse @@ -2262,9 +2348,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdError = GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdErrors[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdErrors] export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponses = { - 200: { - [key: string]: unknown - } + 200: SegmentDetailResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse @@ -2294,9 +2378,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdError = PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdErrors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdErrors] export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponses = { - 200: { - [key: string]: unknown - } + 200: SegmentDetailResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse @@ -2310,7 +2392,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildC segment_id: string } query?: { - keyword?: string | null + keyword?: string limit?: number page?: number } @@ -2330,9 +2412,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildC = GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksErrors[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksErrors] export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponses = { - 200: { - [key: string]: unknown - } + 200: ChildChunkListResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse @@ -2362,9 +2442,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChild = PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksErrors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksErrors] export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponses = { - 200: { - [key: string]: unknown - } + 200: ChildChunkDetailResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse @@ -2434,9 +2512,7 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponses = { - 200: { - [key: string]: unknown - } + 200: ChildChunkDetailResponse } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse diff --git a/packages/contracts/generated/api/service/zod.gen.ts b/packages/contracts/generated/api/service/zod.gen.ts index ae7c5cbf6c..d27b64e54f 100644 --- a/packages/contracts/generated/api/service/zod.gen.ts +++ b/packages/contracts/generated/api/service/zod.gen.ts @@ -91,6 +91,38 @@ export const zChildChunkListQuery = z.object({ page: z.int().gte(1).optional().default(1), }) +/** + * ChildChunkResponse + */ +export const zChildChunkResponse = z.object({ + content: z.string(), + created_at: z.int(), + id: z.string(), + position: z.int(), + segment_id: z.string(), + type: z.string(), + updated_at: z.int(), + word_count: z.int(), +}) + +/** + * ChildChunkDetailResponse + */ +export const zChildChunkDetailResponse = z.object({ + data: zChildChunkResponse, +}) + +/** + * ChildChunkListResponse + */ +export const zChildChunkListResponse = z.object({ + data: z.array(zChildChunkResponse), + limit: z.int(), + page: z.int(), + total: z.int(), + total_pages: z.int(), +}) + /** * ChildChunkUpdatePayload */ @@ -799,11 +831,33 @@ export const zRetrievalMethod = z.enum([ 'semantic_search', ]) +/** + * SegmentAttachmentResponse + */ +export const zSegmentAttachmentResponse = z.object({ + extension: z.string(), + id: z.string(), + mime_type: z.string().nullable(), + name: z.string(), + size: z.int(), + source_url: z.string(), +}) + +/** + * SegmentCreateItemPayload + */ +export const zSegmentCreateItemPayload = z.object({ + answer: z.string().nullish(), + attachment_ids: z.array(z.string()).nullish(), + content: z.string().min(1), + keywords: z.array(z.string()).nullish(), +}) + /** * SegmentCreatePayload */ export const zSegmentCreatePayload = z.object({ - segments: z.array(z.record(z.string(), z.unknown())).nullish(), + segments: z.array(zSegmentCreateItemPayload).min(1), }) /** @@ -811,9 +865,72 @@ export const zSegmentCreatePayload = z.object({ */ export const zSegmentListQuery = z.object({ keyword: z.string().nullish(), + limit: z.int().gte(1).optional().default(20), + page: z.int().gte(1).optional().default(1), status: z.array(z.string()).optional(), }) +/** + * SegmentResponse + */ +export const zSegmentResponse = z.object({ + answer: z.string().nullable(), + attachments: z.array(zSegmentAttachmentResponse), + child_chunks: z.array(zChildChunkResponse), + completed_at: z.int().nullable(), + content: z.string(), + created_at: z.int(), + created_by: z.string(), + disabled_at: z.int().nullable(), + disabled_by: z.string().nullable(), + document_id: z.string(), + enabled: z.boolean(), + error: z.string().nullable(), + hit_count: z.int(), + id: z.string(), + index_node_hash: z.string().nullable(), + index_node_id: z.string().nullable(), + indexing_at: z.int().nullable(), + keywords: z.array(z.string()).nullable(), + position: z.int(), + sign_content: z.string(), + status: z.string(), + stopped_at: z.int().nullable(), + summary: z.string().nullable(), + tokens: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * SegmentCreateListResponse + */ +export const zSegmentCreateListResponse = z.object({ + data: z.array(zSegmentResponse), + doc_form: z.string(), +}) + +/** + * SegmentDetailResponse + */ +export const zSegmentDetailResponse = z.object({ + data: zSegmentResponse, + doc_form: z.string(), +}) + +/** + * SegmentListResponse + */ +export const zSegmentListResponse = z.object({ + data: z.array(zSegmentResponse), + doc_form: z.string(), + has_more: z.boolean(), + limit: z.int(), + page: z.int(), + total: z.int(), +}) + /** * SegmentUpdateArgs */ @@ -1620,17 +1737,16 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath = z.object }) export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery = z.object({ - keyword: z.string().nullish(), + keyword: z.string().optional(), + limit: z.int().gte(1).optional().default(20), + page: z.int().gte(1).optional().default(1), status: z.array(z.string()).optional(), }) /** * Segments retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse = z.record( - z.string(), - z.unknown(), -) +export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse = zSegmentListResponse export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBody = zSegmentCreatePayload @@ -1642,10 +1758,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath = z.objec /** * Segments created successfully */ -export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse + = zSegmentCreateListResponse export const zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdPath = z.object({ dataset_id: z.string(), @@ -1670,10 +1784,8 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdPath /** * Segment retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse = z.record( - z.string(), - z.unknown(), -) +export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse + = zSegmentDetailResponse export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdBody = zSegmentUpdatePayload @@ -1687,10 +1799,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdPat /** * Segment updated successfully */ -export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse + = zSegmentDetailResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksPath = z.object({ @@ -1701,7 +1811,7 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksQuery = z.object({ - keyword: z.string().nullish(), + keyword: z.string().optional(), limit: z.int().gte(1).optional().default(20), page: z.int().gte(1).optional().default(1), }) @@ -1710,7 +1820,7 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil * Child chunks retrieved successfully */ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse - = z.record(z.string(), z.unknown()) + = zChildChunkListResponse export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksBody = zChildChunkCreatePayload @@ -1726,7 +1836,7 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChi * Child chunk created successfully */ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse - = z.record(z.string(), z.unknown()) + = zChildChunkDetailResponse export const zDeleteDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdPath = z.object({ @@ -1757,7 +1867,7 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdCh * Child chunk updated successfully */ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse - = z.record(z.string(), z.unknown()) + = zChildChunkDetailResponse export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath = z.object({ dataset_id: z.string(),