From 51a8f79d67de30f77f34d22105039aeb5526811f Mon Sep 17 00:00:00 2001 From: Asuka Minato Date: Tue, 12 May 2026 16:02:17 +0900 Subject: [PATCH] chore: DocumentSegment to Typebase (#35635) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../rag/datasource/keyword/jieba/jieba.py | 2 + api/core/rag/datasource/retrieval_service.py | 6 +- api/core/rag/docstore/dataset_docstore.py | 3 +- .../rag/index_processor/index_processor.py | 2 +- api/models/dataset.py | 58 ++++++++++--------- api/services/vector_service.py | 2 + api/tasks/batch_clean_document_task.py | 2 +- .../batch_create_segment_to_index_task.py | 3 +- api/tasks/clean_document_task.py | 2 +- api/tasks/clean_notion_document_task.py | 2 +- api/tasks/disable_segment_from_index_task.py | 4 +- api/tasks/disable_segments_from_index_task.py | 2 +- api/tasks/document_indexing_sync_task.py | 2 +- api/tasks/document_indexing_update_task.py | 2 +- api/tasks/duplicate_document_indexing_task.py | 2 +- api/tasks/remove_document_from_index_task.py | 2 +- api/tasks/retry_document_indexing_task.py | 2 +- .../sync_website_document_indexing_task.py | 2 +- .../test_dataset_service_get_segments.py | 30 +++++----- .../tasks/test_add_document_to_index_task.py | 6 -- .../tasks/test_batch_clean_document_task.py | 6 +- .../tasks/test_clean_dataset_task.py | 13 ----- .../tasks/test_clean_notion_document_task.py | 28 +++------ .../test_deal_dataset_vector_index_task.py | 25 +------- .../test_delete_segment_from_index_task.py | 39 ++++++------- .../test_disable_segment_from_index_task.py | 2 +- .../test_disable_segments_from_index_task.py | 49 ++++++++-------- .../test_duplicate_document_indexing_task.py | 1 - .../test_enable_segments_to_index_task.py | 1 - .../services/test_dataset_service_segment.py | 6 +- 30 files changed, 132 insertions(+), 174 deletions(-) diff --git a/api/core/rag/datasource/keyword/jieba/jieba.py b/api/core/rag/datasource/keyword/jieba/jieba.py index 392af351b6..b3f174bf78 100644 --- a/api/core/rag/datasource/keyword/jieba/jieba.py +++ b/api/core/rag/datasource/keyword/jieba/jieba.py @@ -245,6 +245,7 @@ class Jieba(BaseKeyword): segment = pre_segment_data["segment"] if pre_segment_data["keywords"]: segment.keywords = pre_segment_data["keywords"] + assert segment.index_node_id keyword_table = self._add_text_to_keyword_table( keyword_table or {}, segment.index_node_id, pre_segment_data["keywords"] ) @@ -253,6 +254,7 @@ class Jieba(BaseKeyword): keywords = keyword_table_handler.extract_keywords(segment.content, keyword_number) segment.keywords = list(keywords) + assert segment.index_node_id keyword_table = self._add_text_to_keyword_table( keyword_table or {}, segment.index_node_id, list(keywords) ) diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index 7769878e70..8cc2be8feb 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -1,5 +1,6 @@ import concurrent.futures import logging +from collections.abc import Sequence from concurrent.futures import ThreadPoolExecutor from typing import Any, NotRequired, TypedDict @@ -526,7 +527,7 @@ class RetrievalService: index_node_ids = [i for i in index_node_ids if i] segment_ids: list[str] = [] - index_node_segments: list[DocumentSegment] = [] + index_node_segments: Sequence[DocumentSegment] = [] segments: list[DocumentSegment] = [] attachment_map: dict[str, list[AttachmentInfoDict]] = {} child_chunk_map: dict[str, list[ChildChunk]] = {} @@ -568,8 +569,9 @@ class RetrievalService: DocumentSegment.status == "completed", DocumentSegment.index_node_id.in_(index_node_ids), ) - index_node_segments = session.execute(document_segment_stmt).scalars().all() # type: ignore + index_node_segments = session.execute(document_segment_stmt).scalars().all() for index_node_segment in index_node_segments: + assert index_node_segment.index_node_id doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id] if segment_ids: diff --git a/api/core/rag/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py index 78305a6ac0..c7d52d74cb 100644 --- a/api/core/rag/docstore/dataset_docstore.py +++ b/api/core/rag/docstore/dataset_docstore.py @@ -50,6 +50,7 @@ class DatasetDocumentStore: output = {} for document_segment in document_segments: + assert document_segment.index_node_id doc_id = document_segment.index_node_id output[doc_id] = Document( page_content=document_segment.content, @@ -103,7 +104,7 @@ class DatasetDocumentStore: if not segment_document: max_position += 1 - + assert self._document_id segment_document = DocumentSegment( tenant_id=self._dataset.tenant_id, dataset_id=self._dataset.id, diff --git a/api/core/rag/index_processor/index_processor.py b/api/core/rag/index_processor/index_processor.py index aded5315bd..757134e734 100644 --- a/api/core/rag/index_processor/index_processor.py +++ b/api/core/rag/index_processor/index_processor.py @@ -84,7 +84,7 @@ class IndexProcessor: select(DocumentSegment).where(DocumentSegment.document_id == original_document_id) ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] indexing_start_at = time.perf_counter() # delete from vector index diff --git a/api/models/dataset.py b/api/models/dataset.py index 65ea39969c..8137ed4ff3 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -8,7 +8,6 @@ import os import pickle import re import time -from collections.abc import Sequence from datetime import datetime from json import JSONDecodeError from typing import Any, ClassVar, TypedDict, cast @@ -831,7 +830,7 @@ class Document(Base): ) -class DocumentSegment(Base): +class DocumentSegment(TypeBase): __tablename__ = "document_segments" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="document_segment_pkey"), @@ -844,35 +843,40 @@ class DocumentSegment(Base): ) # initial fields - id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4())) - tenant_id = mapped_column(StringUUID, nullable=False) - dataset_id = mapped_column(StringUUID, nullable=False) - document_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, nullable=False, default_factory=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + document_id: Mapped[str] = mapped_column(StringUUID, nullable=False) position: Mapped[int] - content = mapped_column(LongText, nullable=False) - answer = mapped_column(LongText, nullable=True) + content: Mapped[str] = mapped_column(LongText, nullable=False) word_count: Mapped[int] tokens: Mapped[int] - # indexing fields - keywords = mapped_column(sa.JSON, nullable=True) - index_node_id = mapped_column(String(255), nullable=True) - index_node_hash = mapped_column(String(255), nullable=True) - + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) # basic fields + # indexing fields + index_node_id: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None) + index_node_hash: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None) + enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"), default=True) + answer: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) + keywords: Mapped[Any] = mapped_column(sa.JSON, nullable=True, default=None) + disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) + disabled_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) + status: Mapped[SegmentStatus] = mapped_column( + EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'"), default=SegmentStatus.WAITING + ) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) + completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) + error: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) + stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) hit_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) - enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) - disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - disabled_by = mapped_column(StringUUID, nullable=True) - status: Mapped[str] = mapped_column(EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'")) - created_by = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_by = mapped_column(StringUUID, nullable=True) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - error = mapped_column(LongText, nullable=True) - stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) @property def dataset(self): @@ -899,7 +903,7 @@ class DocumentSegment(Base): ) @property - def child_chunks(self) -> Sequence[Any]: + def child_chunks(self): if not self.document: return [] process_rule = self.document.dataset_process_rule @@ -914,7 +918,7 @@ class DocumentSegment(Base): return child_chunks or [] return [] - def get_child_chunks(self) -> Sequence[Any]: + def get_child_chunks(self): if not self.document: return [] process_rule = self.document.dataset_process_rule diff --git a/api/services/vector_service.py b/api/services/vector_service.py index 7e689af35d..49c3b85831 100644 --- a/api/services/vector_service.py +++ b/api/services/vector_service.py @@ -111,6 +111,7 @@ class VectorService: "dataset_id": segment.dataset_id, }, ) + assert segment.index_node_id if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY: # update vector index vector = Vector(dataset=dataset) @@ -138,6 +139,7 @@ class VectorService: regenerate: bool = False, ): index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor() + assert segment.index_node_id if regenerate: # delete child chunks index_processor.clean(dataset, [segment.index_node_id], with_keywords=True, delete_child_chunks=True) diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index 56c371fcc1..5794726716 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -50,7 +50,7 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] segment_ids = [segment.id for segment in segments] # Collect image file IDs from segment content diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index beb23d8354..9f19b03544 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -19,6 +19,7 @@ from graphon.model_runtime.entities.model_entities import ModelType from libs import helper from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment +from models.enums import SegmentStatus from models.model import UploadFile from services.vector_service import VectorService @@ -156,7 +157,7 @@ def batch_create_segment_to_index_task( tokens=tokens, created_by=user_id, indexing_at=naive_utc_now(), - status="completed", + status=SegmentStatus.COMPLETED, completed_at=naive_utc_now(), ) if document_config["doc_form"] == IndexStructureType.QA_INDEX: diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py index c8d0e31c06..869e2b3028 100644 --- a/api/tasks/clean_document_task.py +++ b/api/tasks/clean_document_task.py @@ -53,7 +53,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i binding_ids = [binding.id for binding, _ in attachments_with_bindings] total_attachment_files.extend([attachment_file.key for _, attachment_file in attachments_with_bindings]) - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] segment_contents = [segment.content for segment in segments] except Exception: logger.exception("Cleaned document when document deleted failed") diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py index 017d60efac..782d7d0226 100644 --- a/api/tasks/clean_notion_document_task.py +++ b/api/tasks/clean_notion_document_task.py @@ -38,7 +38,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str): for document_id in document_ids: segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() - total_index_node_ids.extend([segment.index_node_id for segment in segments]) + total_index_node_ids.extend([segment.index_node_id for segment in segments if segment.index_node_id]) # Wrap vector / keyword index cleanup in try/except so that a transient # failure here (e.g. billing API hiccup propagated via FeatureService when diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py index dd1a40844b..d00e143093 100644 --- a/api/tasks/disable_segment_from_index_task.py +++ b/api/tasks/disable_segment_from_index_task.py @@ -9,6 +9,7 @@ from core.db.session_factory import session_factory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_redis import redis_client from models.dataset import DocumentSegment +from models.enums import SegmentStatus logger = logging.getLogger(__name__) @@ -30,7 +31,7 @@ def disable_segment_from_index_task(segment_id: str): logger.info(click.style(f"Segment not found: {segment_id}", fg="red")) return - if segment.status != "completed": + if segment.status != SegmentStatus.COMPLETED: logger.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red")) return @@ -59,6 +60,7 @@ def disable_segment_from_index_task(segment_id: str): index_type = dataset_document.doc_form index_processor = IndexProcessorFactory(index_type).init_index_processor() + assert segment.index_node_id index_processor.clean(dataset, [segment.index_node_id]) # Disable summary index for this segment diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py index 86e96ea3f0..cd91ddd074 100644 --- a/api/tasks/disable_segments_from_index_task.py +++ b/api/tasks/disable_segments_from_index_task.py @@ -55,7 +55,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen return try: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] if dataset.is_multimodal: segment_ids = [segment.id for segment in segments] segment_attachment_bindings = session.scalars( diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index 90c80be3a1..842e7dcdb2 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -69,7 +69,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): index_type = document.doc_form segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # Get credentials from datasource provider datasource_provider_service = DatasourceProviderService() diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py index 15f0e0162b..39564bbede 100644 --- a/api/tasks/document_indexing_update_task.py +++ b/api/tasks/document_indexing_update_task.py @@ -45,7 +45,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str): index_type = document.doc_form segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] clean_success = False try: diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py index 6bc58bdf9c..71f367c5e7 100644 --- a/api/tasks/duplicate_document_indexing_task.py +++ b/api/tasks/duplicate_document_indexing_task.py @@ -137,7 +137,7 @@ def _duplicate_document_indexing_task(dataset_id: str, document_ids: Sequence[st select(DocumentSegment).where(DocumentSegment.document_id == document.id) ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # delete from vector index index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py index 74e8a012cf..2314d32232 100644 --- a/api/tasks/remove_document_from_index_task.py +++ b/api/tasks/remove_document_from_index_task.py @@ -61,7 +61,7 @@ def remove_document_from_index_task(document_id: str): except Exception as e: logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e)) - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] if index_node_ids: try: index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index 7cc28d5226..0df5896ce3 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -85,7 +85,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_ select(DocumentSegment).where(DocumentSegment.document_id == document_id) ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # delete from vector index index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) diff --git a/api/tasks/sync_website_document_indexing_task.py b/api/tasks/sync_website_document_indexing_task.py index ab21f63f7e..06eb460311 100644 --- a/api/tasks/sync_website_document_indexing_task.py +++ b/api/tasks/sync_website_document_indexing_task.py @@ -70,7 +70,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # delete from vector index index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) diff --git a/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py b/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py index 87239b2cb3..bd8f5371b8 100644 --- a/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py +++ b/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py @@ -13,9 +13,9 @@ from uuid import uuid4 from sqlalchemy.orm import Session from core.rag.index_processor.constant.index_type import IndexTechniqueType -from models import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole, TenantStatus from models.dataset import Dataset, DatasetPermissionEnum, Document, DocumentSegment -from models.enums import DataSourceType, DocumentCreatedFrom +from models.enums import DataSourceType, DocumentCreatedFrom, SegmentStatus from services.dataset_service import SegmentService @@ -35,13 +35,13 @@ class SegmentServiceTestDataFactory: email=f"{uuid4()}@example.com", name=f"user-{uuid4()}", interface_language="en-US", - status="active", + status=AccountStatus.ACTIVE, ) db_session_with_containers.add(account) db_session_with_containers.commit() if tenant is None: - tenant = Tenant(name=f"tenant-{uuid4()}", status="normal") + tenant = Tenant(name=f"tenant-{uuid4()}", status=TenantStatus.NORMAL) db_session_with_containers.add(tenant) db_session_with_containers.commit() @@ -103,7 +103,7 @@ class SegmentServiceTestDataFactory: created_by: str, position: int = 1, content: str = "Test content", - status: str = "completed", + status: SegmentStatus = SegmentStatus.COMPLETED, word_count: int = 10, tokens: int = 15, ) -> DocumentSegment: @@ -203,7 +203,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -212,7 +212,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="indexing", + status=SegmentStatus.INDEXING, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -221,7 +221,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=3, - status="waiting", + status=SegmentStatus.WAITING, ) # Act @@ -257,7 +257,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -266,7 +266,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="indexing", + status=SegmentStatus.INDEXING, ) # Act @@ -415,7 +415,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, content="This is important information", ) SegmentServiceTestDataFactory.create_segment( @@ -425,7 +425,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="indexing", + status=SegmentStatus.INDEXING, content="This is also important", ) SegmentServiceTestDataFactory.create_segment( @@ -435,7 +435,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=3, - status="completed", + status=SegmentStatus.COMPLETED, content="This is irrelevant", ) @@ -477,7 +477,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -486,7 +486,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="waiting", + status=SegmentStatus.WAITING, ) # Act diff --git a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py index fcc15aad42..94fd7602f5 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py @@ -128,7 +128,6 @@ class TestAddDocumentToIndexTask: for i in range(3): segment = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -451,7 +450,6 @@ class TestAddDocumentToIndexTask: segments = [] for i in range(3): segment = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -630,7 +628,6 @@ class TestAddDocumentToIndexTask: # Segment 1: Should be processed (enabled=False, status=SegmentStatus.COMPLETED) segment1 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -650,7 +647,6 @@ class TestAddDocumentToIndexTask: # Segment 2: Should be processed (enabled=True, status=SegmentStatus.COMPLETED) # Note: Implementation doesn't filter by enabled status, only by status=SegmentStatus.COMPLETED segment2 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -669,7 +665,6 @@ class TestAddDocumentToIndexTask: # Segment 3: Should NOT be processed (enabled=False, status="processing") segment3 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -688,7 +683,6 @@ class TestAddDocumentToIndexTask: # Segment 4: Should be processed (enabled=False, status=SegmentStatus.COMPLETED) segment4 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py index e29ca7ebab..436c8f11b0 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py @@ -177,7 +177,6 @@ class TestBatchCleanDocumentTask: fake = Faker() segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=document.dataset_id, document_id=document.id, @@ -290,10 +289,9 @@ class TestBatchCleanDocumentTask: account = self._create_test_account(db_session_with_containers) dataset = self._create_test_dataset(db_session_with_containers, account) document = self._create_test_document(db_session_with_containers, dataset, account) - + assert account.current_tenant # Create segment with simple content (no image references) segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=document.dataset_id, document_id=document.id, @@ -692,9 +690,9 @@ class TestBatchCleanDocumentTask: # Create multiple segments for the document segments = [] + assert account.current_tenant for i in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=document.dataset_id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py index 32bc2fc0bd..a31552a09e 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py @@ -220,7 +220,6 @@ class TestCleanDatasetTask: DocumentSegment: Created document segment instance """ segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -232,8 +231,6 @@ class TestCleanDatasetTask: status=SegmentStatus.COMPLETED, index_node_id=str(uuid.uuid4()), index_node_hash="test_hash", - created_at=datetime.now(), - updated_at=datetime.now(), ) db_session_with_containers.add(segment) @@ -614,7 +611,6 @@ class TestCleanDatasetTask: """ segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -626,8 +622,6 @@ class TestCleanDatasetTask: status=SegmentStatus.COMPLETED, index_node_id=str(uuid.uuid4()), index_node_hash="test_hash", - created_at=datetime.now(), - updated_at=datetime.now(), ) db_session_with_containers.add(segment) @@ -729,8 +723,6 @@ class TestCleanDatasetTask: type=DatasetMetadataType.STRING, created_by=account.id, ) - metadata.id = str(uuid.uuid4()) - metadata.created_at = datetime.now() metadata_items.append(metadata) # Create binding for each metadata item @@ -741,8 +733,6 @@ class TestCleanDatasetTask: document_id=documents[i % len(documents)].id, created_by=account.id, ) - binding.id = str(uuid.uuid4()) - binding.created_at = datetime.now() bindings.append(binding) db_session_with_containers.add_all(metadata_items) @@ -946,7 +936,6 @@ class TestCleanDatasetTask: long_content = "Very long content " * 100 # Long content within reasonable limits segment_content = f"Segment with special chars: {special_content}\n{long_content}" segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -958,8 +947,6 @@ class TestCleanDatasetTask: status=SegmentStatus.COMPLETED, index_node_id=str(uuid.uuid4()), index_node_hash="test_hash_" + "x" * 50, # Long hash within limits - created_at=datetime.now(), - updated_at=datetime.now(), ) db_session_with_containers.add(segment) db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py index 1c8d5969e0..ef65b90508 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py @@ -132,11 +132,10 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() document_ids.append(document.id) - + assert tenant # Create segments for each document for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -297,10 +296,9 @@ class TestCleanNotionDocumentTask: ) db_session_with_containers.add(document) db_session_with_containers.flush() - + assert tenant # Create test segment segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -379,12 +377,11 @@ class TestCleanNotionDocumentTask: ) db_session_with_containers.add(document) db_session_with_containers.flush() - + assert tenant # Create segments without index_node_ids segments = [] for i in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -468,11 +465,10 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() documents.append(document) - + assert tenant # Create segments for each document for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -569,10 +565,9 @@ class TestCleanNotionDocumentTask: segment_statuses = [SegmentStatus.WAITING, SegmentStatus.INDEXING, SegmentStatus.COMPLETED, SegmentStatus.ERROR] segments = [] index_node_ids = [] - + assert tenant for i, status in enumerate(segment_statuses): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -665,10 +660,9 @@ class TestCleanNotionDocumentTask: ) db_session_with_containers.add(document) db_session_with_containers.flush() - + assert tenant # Create segment segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -765,12 +759,11 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() documents.append(document) - + assert tenant # Create multiple segments for each document num_segments_per_doc = 5 for j in range(num_segments_per_doc): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -875,7 +868,6 @@ class TestCleanNotionDocumentTask: # Create segments for each document for j in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -984,11 +976,10 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() documents.append(document) - + assert tenant # Create segments for each document for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -1093,10 +1084,9 @@ class TestCleanNotionDocumentTask: # Create segments with metadata segments = [] index_node_ids = [] - + assert tenant for i in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py index e4cbb9e589..aba2458d55 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py @@ -90,7 +90,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -150,7 +149,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -202,7 +200,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -253,7 +250,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset with parent-child index dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -305,7 +301,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -371,7 +366,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset without documents dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -403,7 +397,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -461,7 +454,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset without documents dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -494,7 +486,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -546,7 +537,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -592,7 +582,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset with custom index type dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -624,7 +613,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -670,7 +658,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset without doc_form (should use default) dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -702,7 +689,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -748,7 +734,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -806,7 +791,6 @@ class TestDealDatasetVectorIndexTask: for i, document in enumerate(documents): for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -832,6 +816,7 @@ class TestDealDatasetVectorIndexTask: updated_document = db_session_with_containers.scalar( select(Document).where(Document.id == document.id).limit(1) ) + assert updated_document assert updated_document.indexing_status == IndexingStatus.COMPLETED # Verify index processor load was called multiple times @@ -853,7 +838,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -905,7 +889,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -952,7 +935,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -1024,7 +1006,6 @@ class TestDealDatasetVectorIndexTask: # Create segments for enabled document only segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=enabled_document.id, @@ -1075,7 +1056,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -1147,7 +1127,6 @@ class TestDealDatasetVectorIndexTask: # Create segments for active document only segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=active_document.id, @@ -1198,7 +1177,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -1270,7 +1248,6 @@ class TestDealDatasetVectorIndexTask: # Create segments for completed document only segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=completed_document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py index f4a71040c1..a7edf4f77a 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py @@ -209,26 +209,25 @@ class TestDeleteSegmentFromIndexTask: segments = [] for i in range(count): - segment = DocumentSegment() - segment.id = fake.uuid4() - segment.tenant_id = document.tenant_id - segment.dataset_id = document.dataset_id - segment.document_id = document.id - segment.position = i + 1 - segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}" - segment.answer = f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}" - segment.word_count = fake.random_int(min=10, max=100) - segment.tokens = fake.random_int(min=5, max=50) - segment.keywords = [fake.word() for _ in range(3)] - segment.index_node_id = f"node_{fake.uuid4()}" - segment.index_node_hash = fake.sha256() - segment.hit_count = 0 - segment.enabled = True - segment.status = SegmentStatus.COMPLETED - segment.created_by = account.id - segment.created_at = fake.date_time_this_year() - segment.updated_by = account.id - segment.updated_at = segment.created_at + created_at = fake.date_time_this_year() + segment = DocumentSegment( + tenant_id=document.tenant_id, + dataset_id=document.dataset_id, + document_id=document.id, + position=i + 1, + content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}", + answer=f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}", + word_count=fake.random_int(min=10, max=100), + tokens=fake.random_int(min=5, max=50), + keywords=[fake.word() for _ in range(3)], + index_node_id=f"node_{fake.uuid4()}", + index_node_hash=fake.sha256(), + hit_count=0, + enabled=True, + status=SegmentStatus.COMPLETED, + created_by=account.id, + updated_by=account.id, + ) db_session_with_containers.add(segment) segments.append(segment) diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py index 5bdf7d1389..34e2ce4e80 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py @@ -159,7 +159,7 @@ class TestDisableSegmentFromIndexTask: dataset: Dataset, tenant: Tenant, account: Account, - status: str = "completed", + status: SegmentStatus = SegmentStatus.COMPLETED, enabled: bool = True, ) -> DocumentSegment: """ diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py index 6a95bfc425..cb5fb5483c 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py @@ -185,30 +185,31 @@ class TestDisableSegmentsFromIndexTask: segments = [] for i in range(count): - segment = DocumentSegment() - segment.id = fake.uuid4() - segment.tenant_id = dataset.tenant_id - segment.dataset_id = dataset.id - segment.document_id = document.id - segment.position = i + 1 - segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}" - segment.answer = f"Test answer {i + 1}" if i % 2 == 0 else None - segment.word_count = fake.random_int(min=10, max=100) - segment.tokens = fake.random_int(min=5, max=50) - segment.keywords = [fake.word() for _ in range(3)] - segment.index_node_id = f"node_{segment.id}" - segment.index_node_hash = fake.sha256() - segment.hit_count = 0 - segment.enabled = True - segment.disabled_at = None - segment.disabled_by = None - segment.status = SegmentStatus.COMPLETED - segment.created_by = account.id - segment.updated_by = account.id - segment.indexing_at = fake.date_time_this_year() - segment.completed_at = fake.date_time_this_year() - segment.error = None - segment.stopped_at = None + id = fake.uuid4() + segment = DocumentSegment( + tenant_id=dataset.tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=i + 1, + content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}", + answer=f"Test answer {i + 1}" if i % 2 == 0 else None, + word_count=fake.random_int(min=10, max=100), + tokens=fake.random_int(min=5, max=50), + keywords=[fake.word() for _ in range(3)], + index_node_id=f"node_{id}", + index_node_hash=fake.sha256(), + hit_count=0, + enabled=True, + disabled_at=None, + disabled_by=None, + status=SegmentStatus.COMPLETED, + created_by=account.id, + updated_by=account.id, + indexing_at=fake.date_time_this_year(), + completed_at=fake.date_time_this_year(), + error=None, + stopped_at=None, + ) segments.append(segment) diff --git a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py index 12440f3e6b..e1c7e3e09a 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py @@ -175,7 +175,6 @@ class TestDuplicateDocumentIndexingTasks: for document in documents: for i in range(segments_per_doc): segment = DocumentSegment( - id=fake.uuid4(), tenant_id=dataset.tenant_id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py index e2f35067e3..6d3b90d41c 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py @@ -139,7 +139,6 @@ class TestEnableSegmentsToIndexTask: for i in range(count): text = fake.text(max_nb_chars=200) segment = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/unit_tests/services/test_dataset_service_segment.py b/api/tests/unit_tests/services/test_dataset_service_segment.py index 6330e53765..1f8586e32f 100644 --- a/api/tests/unit_tests/services/test_dataset_service_segment.py +++ b/api/tests/unit_tests/services/test_dataset_service_segment.py @@ -282,7 +282,6 @@ class TestSegmentServiceQueries: def test_get_segment_by_id_returns_only_document_segment_instances(self): segment = DocumentSegment( - id="segment-1", tenant_id="tenant-1", dataset_id="dataset-1", document_id="doc-1", @@ -292,7 +291,7 @@ class TestSegmentServiceQueries: tokens=2, created_by="user-1", ) - + segment.id = "segment-1" with patch("services.dataset_service.db") as mock_db: mock_db.session.scalar.return_value = segment result = SegmentService.get_segment_by_id("segment-1", "tenant-1") @@ -307,7 +306,6 @@ class TestSegmentServiceQueries: def test_get_segments_by_document_and_dataset_returns_scalars_result(self): segment = DocumentSegment( - id="segment-1", tenant_id="tenant-1", dataset_id="dataset-1", document_id="doc-1", @@ -318,6 +316,7 @@ class TestSegmentServiceQueries: created_by="user-1", ) + segment.id = "segment-1" with patch("services.dataset_service.db") as mock_db: mock_db.session.scalars.return_value.all.return_value = [segment] @@ -461,6 +460,7 @@ class TestSegmentServiceMutations: vector_service.create_segments_vector.side_effect = RuntimeError("vector failed") result = SegmentService.multi_create_segment(segments, document, dataset) + assert result assert len(result) == 2 assert [segment.position for segment in result] == [2, 3]