mirror of https://github.com/langgenius/dify.git
fix chunk format
This commit is contained in:
parent
3c0adfb48a
commit
c2afb84884
|
|
@ -11,6 +11,7 @@ from core.rag.datasource.vdb.vector_factory import Vector
|
|||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
||||
from core.rag.models.document import Document, GeneralStructureChunk
|
||||
from core.tools.utils.text_processing_utils import remove_leading_symbols
|
||||
|
|
@ -162,6 +163,9 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
|||
preview = []
|
||||
for content in chunks:
|
||||
preview.append({"content": content})
|
||||
return {"preview": preview, "total_segments": len(chunks)}
|
||||
return {"chunk_structure": IndexType.PARAGRAPH_INDEX,
|
||||
"preview": preview,
|
||||
"total_segments": len(chunks)
|
||||
}
|
||||
else:
|
||||
raise ValueError("Chunks is not a list")
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from core.rag.datasource.vdb.vector_factory import Vector
|
|||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
||||
from core.rag.models.document import ChildDocument, Document, ParentChildStructureChunk
|
||||
from extensions.ext_database import db
|
||||
|
|
@ -263,4 +264,9 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
|||
preview = []
|
||||
for parent_child in parent_childs.parent_child_chunks:
|
||||
preview.append({"content": parent_child.parent_content, "child_chunks": parent_child.child_contents})
|
||||
return {"preview": preview, "total_segments": len(parent_childs.parent_child_chunks)}
|
||||
return {
|
||||
"chunk_structure": IndexType.PARENT_CHILD_INDEX,
|
||||
"parent_mode": parent_childs.parent_mode,
|
||||
"preview": preview,
|
||||
"total_segments": len(parent_childs.parent_child_chunks),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ from core.rag.datasource.vdb.vector_factory import Vector
|
|||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
||||
from core.rag.models.document import Document, QAStructureChunk
|
||||
from core.tools.utils.text_processing_utils import remove_leading_symbols
|
||||
|
|
@ -194,7 +195,11 @@ class QAIndexProcessor(BaseIndexProcessor):
|
|||
preview = []
|
||||
for qa_chunk in qa_chunks.qa_chunks:
|
||||
preview.append({"question": qa_chunk.question, "answer": qa_chunk.answer})
|
||||
return {"qa_preview": preview, "total_segments": len(qa_chunks.qa_chunks)}
|
||||
return {
|
||||
"chunk_structure": IndexType.QA_INDEX,
|
||||
"qa_preview": preview,
|
||||
"total_segments": len(qa_chunks.qa_chunks),
|
||||
}
|
||||
|
||||
def _format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents, document_language):
|
||||
format_documents = []
|
||||
|
|
|
|||
Loading…
Reference in New Issue