diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index e68273afa6..ceaa9ec4fa 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -283,6 +283,15 @@ class DatasetApi(Resource): location="json", help="Invalid external knowledge api id.", ) + + parser.add_argument( + "icon_info", + type=dict, + required=False, + nullable=True, + location="json", + help="Invalid icon info.", + ) args = parser.parse_args() data = request.get_json() diff --git a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py index 1195df1b7f..c63d837106 100644 --- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py +++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py @@ -1,18 +1,21 @@ import datetime import logging from collections.abc import Mapping +import time from typing import Any, cast +from sqlalchemy import func + from core.app.entities.app_invoke_entities import InvokeFrom from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.workflow.entities.node_entities import NodeRunResult from core.workflow.entities.variable_pool import VariablePool +from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus from core.workflow.enums import SystemVariableKey from core.workflow.nodes.enums import NodeType from extensions.ext_database import db from models.dataset import Dataset, Document, DocumentSegment -from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus from ..base import BaseNode from .entities import KnowledgeIndexNodeData @@ -111,13 +114,19 @@ class KnowledgeIndexNode(BaseNode[KnowledgeIndexNodeData]): document = db.session.query(Document).filter_by(id=document_id.value).first() if not document: raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.") - + # chunk nodes by chunk size + indexing_start_at = time.perf_counter() index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor() index_processor.index(dataset, document, chunks) - + indexing_end_at = time.perf_counter() + document.indexing_latency = indexing_end_at - indexing_start_at # update document status document.indexing_status = "completed" document.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.word_count = db.session.query(func.sum(DocumentSegment.word_count)).filter( + DocumentSegment.document_id == document.id, + DocumentSegment.dataset_id == dataset.id, + ).scalar() db.session.add(document) # update document segment status db.session.query(DocumentSegment).filter( diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index 2871b3ec16..aa147331d4 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -88,6 +88,8 @@ dataset_detail_fields = { "chunk_structure": fields.String, "icon_info": fields.Nested(icon_info_fields), "is_published": fields.Boolean, + "total_documents": fields.Integer, + "total_available_documents": fields.Integer, } dataset_query_detail_fields = { diff --git a/api/models/dataset.py b/api/models/dataset.py index d2fdd5e900..85c10c06d7 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -311,6 +311,19 @@ class DatasetProcessRule(Base): return json.loads(self.rules) if self.rules else None except JSONDecodeError: return None + + @property + def total_documents(self): + return db.session.query(func.count(Document.id)).filter(Document.dataset_id == self.dataset_id).scalar() + + @property + def total_available_documents(self): + return db.session.query(func.count(Document.id)).filter( + Document.dataset_id == self.dataset_id, + Document.indexing_status == "completed", + Document.enabled == True, + Document.archived == False, + ).scalar() class Document(Base): diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 133e3765f7..ab16081afc 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -484,6 +484,9 @@ class DatasetService: # update Retrieval model filtered_data["retrieval_model"] = data["retrieval_model"] + # update icon info + if data.get("icon_info"): + filtered_data["icon_info"] = data.get("icon_info") db.session.query(Dataset).filter_by(id=dataset_id).update(filtered_data) db.session.commit()