diff --git a/api/models/dataset.py b/api/models/dataset.py index 3d5ee70e30..6109d4a605 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -60,6 +60,7 @@ class Dataset(db.Model): # type: ignore[name-defined] embedding_model_provider = db.Column(db.String(255), nullable=True) collection_binding_id = db.Column(StringUUID, nullable=True) retrieval_model = db.Column(JSONB, nullable=True) + built_in_field_enabled = db.Column(db.Boolean, nullable=False, server_default=db.text("false")) @property def dataset_keyword_table(self): diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 38025b5213..66fa62cd83 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -582,9 +582,30 @@ class DocumentService: return document + @staticmethod + def get_document_by_ids(document_ids: list[str]) -> list[Document]: + documents = db.session.query(Document).filter(Document.id.in_(document_ids), + Document.enabled == True, + Document.indexing_status == "completed", + Document.archived == False, + ).all() + return documents + @staticmethod def get_document_by_dataset_id(dataset_id: str) -> list[Document]: - documents = db.session.query(Document).filter(Document.dataset_id == dataset_id, Document.enabled == True).all() + documents = db.session.query(Document).filter(Document.dataset_id == dataset_id, + Document.enabled == True, + ).all() + + return documents + + @staticmethod + def get_working_documents_by_dataset_id(dataset_id: str) -> list[Document]: + documents = db.session.query(Document).filter(Document.dataset_id == dataset_id, + Document.enabled == True, + Document.indexing_status == "completed", + Document.archived == False, + ).all() return documents