diff --git a/api/.env.example b/api/.env.example
index d5d4e4486f..5a13ac69de 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -65,7 +65,7 @@ OPENDAL_FS_ROOT=storage
 
 # S3 Storage configuration
 S3_USE_AWS_MANAGED_IAM=false
-S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
+S3_ENDPOINT=https://your-bucket-name.storage.s3.cloudflare.com
 S3_BUCKET_NAME=your-bucket-name
 S3_ACCESS_KEY=your-access-key
 S3_SECRET_KEY=your-secret-key
@@ -74,7 +74,7 @@ S3_REGION=your-region
 # Azure Blob Storage configuration
 AZURE_BLOB_ACCOUNT_NAME=your-account-name
 AZURE_BLOB_ACCOUNT_KEY=your-account-key
-AZURE_BLOB_CONTAINER_NAME=yout-container-name
+AZURE_BLOB_CONTAINER_NAME=your-container-name
 AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
 
 # Aliyun oss Storage configuration
@@ -88,7 +88,7 @@ ALIYUN_OSS_REGION=your-region
 ALIYUN_OSS_PATH=your-path
 
 # Google Storage configuration
-GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
+GOOGLE_STORAGE_BUCKET_NAME=your-bucket-name
 GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
 
 # Tencent COS Storage configuration
diff --git a/api/.ruff.toml b/api/.ruff.toml
index 26a1b977a9..f30275a943 100644
--- a/api/.ruff.toml
+++ b/api/.ruff.toml
@@ -67,7 +67,7 @@ ignore = [
     "SIM105", # suppressible-exception
     "SIM107", # return-in-try-except-finally
     "SIM108", # if-else-block-instead-of-if-exp
-    "SIM113", # eumerate-for-loop
+    "SIM113", # enumerate-for-loop
     "SIM117", # multiple-with-statements
     "SIM210", # if-expr-with-true-false
 ]
diff --git a/api/commands.py b/api/commands.py
index c6e450b3ee..76c8d3e382 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -563,8 +563,13 @@ def create_tenant(email: str, language: Optional[str] = None, name: Optional[str
     new_password = secrets.token_urlsafe(16)
 
     # register account
-    account = RegisterService.register(email=email, name=account_name, password=new_password, language=language)
-
+    account = RegisterService.register(
+        email=email,
+        name=account_name,
+        password=new_password,
+        language=language,
+        create_workspace_required=False,
+    )
     TenantService.create_owner_tenant_if_not_exist(account, name)
 
     click.echo(
@@ -584,7 +589,7 @@ def upgrade_db():
             click.echo(click.style("Starting database migration.", fg="green"))
 
             # run db migration
-            import flask_migrate
+            import flask_migrate  # type: ignore
 
             flask_migrate.upgrade()
 
diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py
index fcecb346b0..5865ddcc8b 100644
--- a/api/configs/feature/__init__.py
+++ b/api/configs/feature/__init__.py
@@ -659,7 +659,7 @@ class RagEtlConfig(BaseSettings):
 
     UNSTRUCTURED_API_KEY: Optional[str] = Field(
         description="API key for Unstructured.io service",
-        default=None,
+        default="",
     )
 
     SCARF_NO_ANALYTICS: Optional[str] = Field(
diff --git a/api/controllers/console/datasets/data_source.py b/api/controllers/console/datasets/data_source.py
index 89a638ae54..30cd93a010 100644
--- a/api/controllers/console/datasets/data_source.py
+++ b/api/controllers/console/datasets/data_source.py
@@ -232,7 +232,7 @@ class DataSourceNotionApi(Resource):
             args["doc_form"],
             args["doc_language"],
         )
-        return response, 200
+        return response.model_dump(), 200
 
 
 class DataSourceNotionDatasetSyncApi(Resource):
diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py
index f3c3736b25..0c0d2e2003 100644
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -464,7 +464,7 @@ class DatasetIndexingEstimateApi(Resource):
         except Exception as e:
             raise IndexingEstimateError(str(e))
 
-        return response, 200
+        return response.model_dump(), 200
 
 
 class DatasetRelatedAppListApi(Resource):
@@ -733,6 +733,18 @@ class DatasetPermissionUserListApi(Resource):
         }, 200
 
 
+class DatasetAutoDisableLogApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, dataset_id):
+        dataset_id_str = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id_str)
+        if dataset is None:
+            raise NotFound("Dataset not found.")
+        return DatasetService.get_dataset_auto_disable_logs(dataset_id_str), 200
+
+
 api.add_resource(DatasetListApi, "/datasets")
 api.add_resource(DatasetApi, "/datasets/<uuid:dataset_id>")
 api.add_resource(DatasetUseCheckApi, "/datasets/<uuid:dataset_id>/use-check")
@@ -747,3 +759,4 @@ api.add_resource(DatasetApiBaseUrlApi, "/datasets/api-base-info")
 api.add_resource(DatasetRetrievalSettingApi, "/datasets/retrieval-setting")
 api.add_resource(DatasetRetrievalSettingMockApi, "/datasets/retrieval-setting/<string:vector_type>")
 api.add_resource(DatasetPermissionUserListApi, "/datasets/<uuid:dataset_id>/permission-part-users")
+api.add_resource(DatasetAutoDisableLogApi, "/datasets/<uuid:dataset_id>/auto-disable-logs")
diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py
index c236e1a431..3c132bc3d0 100644
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -52,6 +52,7 @@ from fields.document_fields import (
 from libs.login import login_required
 from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
 from services.dataset_service import DatasetService, DocumentService
+from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
 from tasks.add_document_to_index_task import add_document_to_index_task
 from tasks.remove_document_from_index_task import remove_document_from_index_task
 
@@ -267,20 +268,22 @@ class DatasetDocumentListApi(Resource):
         parser.add_argument("duplicate", type=bool, default=True, nullable=False, location="json")
         parser.add_argument("original_document_id", type=str, required=False, location="json")
         parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
+        parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
+
         parser.add_argument(
             "doc_language", type=str, default="English", required=False, nullable=False, location="json"
         )
-        parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
         args = parser.parse_args()
+        knowledge_config = KnowledgeConfig(**args)
 
-        if not dataset.indexing_technique and not args["indexing_technique"]:
+        if not dataset.indexing_technique and not knowledge_config.indexing_technique:
             raise ValueError("indexing_technique is required.")
 
         # validate args
-        DocumentService.document_create_args_validate(args)
+        DocumentService.document_create_args_validate(knowledge_config)
 
         try:
-            documents, batch = DocumentService.save_document_with_dataset_id(dataset, args, current_user)
+            documents, batch = DocumentService.save_document_with_dataset_id(dataset, knowledge_config, current_user)
         except ProviderTokenNotInitError as ex:
             raise ProviderNotInitializeError(ex.description)
         except QuotaExceededError:
@@ -290,6 +293,25 @@ class DatasetDocumentListApi(Resource):
 
         return {"documents": documents, "batch": batch}
 
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, dataset_id):
+        dataset_id = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if dataset is None:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+
+        try:
+            document_ids = request.args.getlist("document_id")
+            DocumentService.delete_documents(dataset, document_ids)
+        except services.errors.document.DocumentIndexingError:
+            raise DocumentIndexingError("Cannot delete document during indexing.")
+
+        return {"result": "success"}, 204
+
 
 class DatasetInitApi(Resource):
     @setup_required
@@ -325,9 +347,9 @@ class DatasetInitApi(Resource):
         # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
         if not current_user.is_dataset_editor:
             raise Forbidden()
-
-        if args["indexing_technique"] == "high_quality":
-            if args["embedding_model"] is None or args["embedding_model_provider"] is None:
+        knowledge_config = KnowledgeConfig(**args)
+        if knowledge_config.indexing_technique == "high_quality":
+            if knowledge_config.embedding_model is None or knowledge_config.embedding_model_provider is None:
                 raise ValueError("embedding model and embedding model provider are required for high quality indexing.")
             try:
                 model_manager = ModelManager()
@@ -346,11 +368,11 @@ class DatasetInitApi(Resource):
                 raise ProviderNotInitializeError(ex.description)
 
         # validate args
-        DocumentService.document_create_args_validate(args)
+        DocumentService.document_create_args_validate(knowledge_config)
 
         try:
             dataset, documents, batch = DocumentService.save_document_without_dataset_id(
-                tenant_id=current_user.current_tenant_id, document_data=args, account=current_user
+                tenant_id=current_user.current_tenant_id, knowledge_config=knowledge_config, account=current_user
             )
         except ProviderTokenNotInitError as ex:
             raise ProviderNotInitializeError(ex.description)
@@ -403,7 +425,7 @@ class DocumentIndexingEstimateApi(DocumentResource):
                 indexing_runner = IndexingRunner()
 
                 try:
-                    response = indexing_runner.indexing_estimate(
+                    estimate_response = indexing_runner.indexing_estimate(
                         current_user.current_tenant_id,
                         [extract_setting],
                         data_process_rule_dict,
@@ -411,6 +433,7 @@ class DocumentIndexingEstimateApi(DocumentResource):
                         "English",
                         dataset_id,
                     )
+                    return estimate_response.model_dump(), 200
                 except LLMBadRequestError:
                     raise ProviderNotInitializeError(
                         "No Embedding Model available. Please configure a valid provider "
@@ -423,7 +446,7 @@ class DocumentIndexingEstimateApi(DocumentResource):
                 except Exception as e:
                     raise IndexingEstimateError(str(e))
 
-        return response
+        return response, 200
 
 
 class DocumentBatchIndexingEstimateApi(DocumentResource):
@@ -434,9 +457,8 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
         dataset_id = str(dataset_id)
         batch = str(batch)
         documents = self.get_batch_documents(dataset_id, batch)
-        response = {"tokens": 0, "total_price": 0, "currency": "USD", "total_segments": 0, "preview": []}
         if not documents:
-            return response
+            return {"tokens": 0, "total_price": 0, "currency": "USD", "total_segments": 0, "preview": []}, 200
         data_process_rule = documents[0].dataset_process_rule
         data_process_rule_dict = data_process_rule.to_dict()
         info_list = []
@@ -514,6 +536,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
                     "English",
                     dataset_id,
                 )
+                return response.model_dump(), 200
             except LLMBadRequestError:
                 raise ProviderNotInitializeError(
                     "No Embedding Model available. Please configure a valid provider "
@@ -525,7 +548,6 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
                 raise ProviderNotInitializeError(ex.description)
             except Exception as e:
                 raise IndexingEstimateError(str(e))
-        return response
 
 
 class DocumentBatchIndexingStatusApi(DocumentResource):
@@ -598,7 +620,8 @@ class DocumentDetailApi(DocumentResource):
         if metadata == "only":
             response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata}
         elif metadata == "without":
-            process_rules = DatasetService.get_process_rules(dataset_id)
+            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
+            document_process_rules = document.dataset_process_rule.to_dict()
             data_source_info = document.data_source_detail_dict
             response = {
                 "id": document.id,
@@ -606,7 +629,8 @@ class DocumentDetailApi(DocumentResource):
                 "data_source_type": document.data_source_type,
                 "data_source_info": data_source_info,
                 "dataset_process_rule_id": document.dataset_process_rule_id,
-                "dataset_process_rule": process_rules,
+                "dataset_process_rule": dataset_process_rules,
+                "document_process_rule": document_process_rules,
                 "name": document.name,
                 "created_from": document.created_from,
                 "created_by": document.created_by,
@@ -629,7 +653,8 @@ class DocumentDetailApi(DocumentResource):
                 "doc_language": document.doc_language,
             }
         else:
-            process_rules = DatasetService.get_process_rules(dataset_id)
+            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
+            document_process_rules = document.dataset_process_rule.to_dict()
             data_source_info = document.data_source_detail_dict
             response = {
                 "id": document.id,
@@ -637,7 +662,8 @@ class DocumentDetailApi(DocumentResource):
                 "data_source_type": document.data_source_type,
                 "data_source_info": data_source_info,
                 "dataset_process_rule_id": document.dataset_process_rule_id,
-                "dataset_process_rule": process_rules,
+                "dataset_process_rule": dataset_process_rules,
+                "document_process_rule": document_process_rules,
                 "name": document.name,
                 "created_from": document.created_from,
                 "created_by": document.created_by,
@@ -773,9 +799,8 @@ class DocumentStatusApi(DocumentResource):
     @login_required
     @account_initialization_required
     @cloud_edition_billing_resource_check("vector_space")
-    def patch(self, dataset_id, document_id, action):
+    def patch(self, dataset_id, action):
         dataset_id = str(dataset_id)
-        document_id = str(document_id)
         dataset = DatasetService.get_dataset(dataset_id)
         if dataset is None:
             raise NotFound("Dataset not found.")
@@ -790,84 +815,79 @@ class DocumentStatusApi(DocumentResource):
         # check user's permission
         DatasetService.check_dataset_permission(dataset, current_user)
 
-        document = self.get_document(dataset_id, document_id)
+        document_ids = request.args.getlist("document_id")
+        for document_id in document_ids:
+            document = self.get_document(dataset_id, document_id)
 
-        indexing_cache_key = "document_{}_indexing".format(document.id)
-        cache_result = redis_client.get(indexing_cache_key)
-        if cache_result is not None:
-            raise InvalidActionError("Document is being indexed, please try again later")
+            indexing_cache_key = "document_{}_indexing".format(document.id)
+            cache_result = redis_client.get(indexing_cache_key)
+            if cache_result is not None:
+                raise InvalidActionError(f"Document:{document.name} is being indexed, please try again later")
 
-        if action == "enable":
-            if document.enabled:
-                raise InvalidActionError("Document already enabled.")
+            if action == "enable":
+                if document.enabled:
+                    continue
+                document.enabled = True
+                document.disabled_at = None
+                document.disabled_by = None
+                document.updated_at = datetime.now(UTC).replace(tzinfo=None)
+                db.session.commit()
 
-            document.enabled = True
-            document.disabled_at = None
-            document.disabled_by = None
-            document.updated_at = datetime.now(UTC).replace(tzinfo=None)
-            db.session.commit()
+                # Set cache to prevent indexing the same document multiple times
+                redis_client.setex(indexing_cache_key, 600, 1)
 
-            # Set cache to prevent indexing the same document multiple times
-            redis_client.setex(indexing_cache_key, 600, 1)
+                add_document_to_index_task.delay(document_id)
 
-            add_document_to_index_task.delay(document_id)
+            elif action == "disable":
+                if not document.completed_at or document.indexing_status != "completed":
+                    raise InvalidActionError(f"Document: {document.name} is not completed.")
+                if not document.enabled:
+                    continue
 
-            return {"result": "success"}, 200
+                document.enabled = False
+                document.disabled_at = datetime.now(UTC).replace(tzinfo=None)
+                document.disabled_by = current_user.id
+                document.updated_at = datetime.now(UTC).replace(tzinfo=None)
+                db.session.commit()
 
-        elif action == "disable":
-            if not document.completed_at or document.indexing_status != "completed":
-                raise InvalidActionError("Document is not completed.")
-            if not document.enabled:
-                raise InvalidActionError("Document already disabled.")
-
-            document.enabled = False
-            document.disabled_at = datetime.now(UTC).replace(tzinfo=None)
-            document.disabled_by = current_user.id
-            document.updated_at = datetime.now(UTC).replace(tzinfo=None)
-            db.session.commit()
-
-            # Set cache to prevent indexing the same document multiple times
-            redis_client.setex(indexing_cache_key, 600, 1)
-
-            remove_document_from_index_task.delay(document_id)
-
-            return {"result": "success"}, 200
-
-        elif action == "archive":
-            if document.archived:
-                raise InvalidActionError("Document already archived.")
-
-            document.archived = True
-            document.archived_at = datetime.now(UTC).replace(tzinfo=None)
-            document.archived_by = current_user.id
-            document.updated_at = datetime.now(UTC).replace(tzinfo=None)
-            db.session.commit()
-
-            if document.enabled:
                 # Set cache to prevent indexing the same document multiple times
                 redis_client.setex(indexing_cache_key, 600, 1)
 
                 remove_document_from_index_task.delay(document_id)
 
-            return {"result": "success"}, 200
-        elif action == "un_archive":
-            if not document.archived:
-                raise InvalidActionError("Document is not archived.")
+            elif action == "archive":
+                if document.archived:
+                    continue
 
-            document.archived = False
-            document.archived_at = None
-            document.archived_by = None
-            document.updated_at = datetime.now(UTC).replace(tzinfo=None)
-            db.session.commit()
+                document.archived = True
+                document.archived_at = datetime.now(UTC).replace(tzinfo=None)
+                document.archived_by = current_user.id
+                document.updated_at = datetime.now(UTC).replace(tzinfo=None)
+                db.session.commit()
 
-            # Set cache to prevent indexing the same document multiple times
-            redis_client.setex(indexing_cache_key, 600, 1)
+                if document.enabled:
+                    # Set cache to prevent indexing the same document multiple times
+                    redis_client.setex(indexing_cache_key, 600, 1)
 
-            add_document_to_index_task.delay(document_id)
+                    remove_document_from_index_task.delay(document_id)
 
-            return {"result": "success"}, 200
-        else:
-            raise InvalidActionError()
+            elif action == "un_archive":
+                if not document.archived:
+                    continue
+                document.archived = False
+                document.archived_at = None
+                document.archived_by = None
+                document.updated_at = datetime.now(UTC).replace(tzinfo=None)
+                db.session.commit()
+
+                # Set cache to prevent indexing the same document multiple times
+                redis_client.setex(indexing_cache_key, 600, 1)
+
+                add_document_to_index_task.delay(document_id)
+
+            else:
+                raise InvalidActionError()
+        return {"result": "success"}, 200
 
 
 class DocumentPauseApi(DocumentResource):
@@ -1038,7 +1058,7 @@ api.add_resource(
 )
 api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
 api.add_resource(DocumentMetadataApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/metadata")
-api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/status/<string:action>")
+api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/status/<string:action>/batch")
 api.add_resource(DocumentPauseApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause")
 api.add_resource(DocumentRecoverApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/resume")
 api.add_resource(DocumentRetryApi, "/datasets/<uuid:dataset_id>/retry")
diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py
index 2d5933ca23..96654c09fd 100644
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -1,5 +1,4 @@
 import uuid
-from datetime import UTC, datetime
 
 import pandas as pd
 from flask import request
@@ -10,7 +9,13 @@ from werkzeug.exceptions import Forbidden, NotFound
 import services
 from controllers.console import api
 from controllers.console.app.error import ProviderNotInitializeError
-from controllers.console.datasets.error import InvalidActionError, NoFileUploadedError, TooManyFilesError
+from controllers.console.datasets.error import (
+    ChildChunkDeleteIndexError,
+    ChildChunkIndexingError,
+    InvalidActionError,
+    NoFileUploadedError,
+    TooManyFilesError,
+)
 from controllers.console.wraps import (
     account_initialization_required,
     cloud_edition_billing_knowledge_limit_check,
@@ -20,15 +25,15 @@ from controllers.console.wraps import (
 from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
 from core.model_manager import ModelManager
 from core.model_runtime.entities.model_entities import ModelType
-from extensions.ext_database import db
 from extensions.ext_redis import redis_client
-from fields.segment_fields import segment_fields
+from fields.segment_fields import child_chunk_fields, segment_fields
 from libs.login import login_required
-from models import DocumentSegment
+from models.dataset import ChildChunk, DocumentSegment
 from services.dataset_service import DatasetService, DocumentService, SegmentService
+from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs
+from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError
+from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingServiceError
 from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
-from tasks.disable_segment_from_index_task import disable_segment_from_index_task
-from tasks.enable_segment_to_index_task import enable_segment_to_index_task
 
 
 class DatasetDocumentSegmentListApi(Resource):
@@ -53,15 +58,16 @@ class DatasetDocumentSegmentListApi(Resource):
             raise NotFound("Document not found.")
 
         parser = reqparse.RequestParser()
-        parser.add_argument("last_id", type=str, default=None, location="args")
         parser.add_argument("limit", type=int, default=20, location="args")
         parser.add_argument("status", type=str, action="append", default=[], location="args")
         parser.add_argument("hit_count_gte", type=int, default=None, location="args")
         parser.add_argument("enabled", type=str, default="all", location="args")
         parser.add_argument("keyword", type=str, default=None, location="args")
+        parser.add_argument("page", type=int, default=1, location="args")
+
         args = parser.parse_args()
 
-        last_id = args["last_id"]
+        page = args["page"]
         limit = min(args["limit"], 100)
         status_list = args["status"]
         hit_count_gte = args["hit_count_gte"]
@@ -69,14 +75,7 @@ class DatasetDocumentSegmentListApi(Resource):
 
         query = DocumentSegment.query.filter(
             DocumentSegment.document_id == str(document_id), DocumentSegment.tenant_id == current_user.current_tenant_id
-        )
-
-        if last_id is not None:
-            last_segment = db.session.get(DocumentSegment, str(last_id))
-            if last_segment:
-                query = query.filter(DocumentSegment.position > last_segment.position)
-            else:
-                return {"data": [], "has_more": False, "limit": limit}, 200
+        ).order_by(DocumentSegment.position.asc())
 
         if status_list:
             query = query.filter(DocumentSegment.status.in_(status_list))
@@ -93,21 +92,44 @@ class DatasetDocumentSegmentListApi(Resource):
             elif args["enabled"].lower() == "false":
                 query = query.filter(DocumentSegment.enabled == False)
 
-        total = query.count()
-        segments = query.order_by(DocumentSegment.position).limit(limit + 1).all()
+        segments = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False)
 
-        has_more = False
-        if len(segments) > limit:
-            has_more = True
-            segments = segments[:-1]
-
-        return {
-            "data": marshal(segments, segment_fields),
-            "doc_form": document.doc_form,
-            "has_more": has_more,
+        response = {
+            "data": marshal(segments.items, segment_fields),
             "limit": limit,
-            "total": total,
-        }, 200
+            "total": segments.total,
+            "total_pages": segments.pages,
+            "page": page,
+        }
+        return response, 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, dataset_id, document_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        segment_ids = request.args.getlist("segment_id")
+
+        # The role of the current user in the ta table must be admin or owner
+        if not current_user.is_editor:
+            raise Forbidden()
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+        SegmentService.delete_segments(segment_ids, document, dataset)
+        return {"result": "success"}, 200
 
 
 class DatasetDocumentSegmentApi(Resource):
@@ -115,11 +137,15 @@ class DatasetDocumentSegmentApi(Resource):
     @login_required
     @account_initialization_required
     @cloud_edition_billing_resource_check("vector_space")
-    def patch(self, dataset_id, segment_id, action):
+    def patch(self, dataset_id, document_id, action):
         dataset_id = str(dataset_id)
         dataset = DatasetService.get_dataset(dataset_id)
         if not dataset:
             raise NotFound("Dataset not found.")
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
         # check user's model setting
         DatasetService.check_dataset_model_setting(dataset)
         # The role of the current user in the ta table must be admin, owner, or editor
@@ -147,59 +173,17 @@ class DatasetDocumentSegmentApi(Resource):
                 )
             except ProviderTokenNotInitError as ex:
                 raise ProviderNotInitializeError(ex.description)
+        segment_ids = request.args.getlist("segment_id")
 
-        segment = DocumentSegment.query.filter(
-            DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
-        ).first()
-
-        if not segment:
-            raise NotFound("Segment not found.")
-
-        if segment.status != "completed":
-            raise NotFound("Segment is not completed, enable or disable function is not allowed")
-
-        document_indexing_cache_key = "document_{}_indexing".format(segment.document_id)
+        document_indexing_cache_key = "document_{}_indexing".format(document.id)
         cache_result = redis_client.get(document_indexing_cache_key)
         if cache_result is not None:
             raise InvalidActionError("Document is being indexed, please try again later")
-
-        indexing_cache_key = "segment_{}_indexing".format(segment.id)
-        cache_result = redis_client.get(indexing_cache_key)
-        if cache_result is not None:
-            raise InvalidActionError("Segment is being indexed, please try again later")
-
-        if action == "enable":
-            if segment.enabled:
-                raise InvalidActionError("Segment is already enabled.")
-
-            segment.enabled = True
-            segment.disabled_at = None
-            segment.disabled_by = None
-            db.session.commit()
-
-            # Set cache to prevent indexing the same segment multiple times
-            redis_client.setex(indexing_cache_key, 600, 1)
-
-            enable_segment_to_index_task.delay(segment.id)
-
-            return {"result": "success"}, 200
-        elif action == "disable":
-            if not segment.enabled:
-                raise InvalidActionError("Segment is already disabled.")
-
-            segment.enabled = False
-            segment.disabled_at = datetime.now(UTC).replace(tzinfo=None)
-            segment.disabled_by = current_user.id
-            db.session.commit()
-
-            # Set cache to prevent indexing the same segment multiple times
-            redis_client.setex(indexing_cache_key, 600, 1)
-
-            disable_segment_from_index_task.delay(segment.id)
-
-            return {"result": "success"}, 200
-        else:
-            raise InvalidActionError()
+        try:
+            SegmentService.update_segments_status(segment_ids, action, dataset, document)
+        except Exception as e:
+            raise InvalidActionError(str(e))
+        return {"result": "success"}, 200
 
 
 class DatasetDocumentSegmentAddApi(Resource):
@@ -307,9 +291,12 @@ class DatasetDocumentSegmentUpdateApi(Resource):
         parser.add_argument("content", type=str, required=True, nullable=False, location="json")
         parser.add_argument("answer", type=str, required=False, nullable=True, location="json")
         parser.add_argument("keywords", type=list, required=False, nullable=True, location="json")
+        parser.add_argument(
+            "regenerate_child_chunks", type=bool, required=False, nullable=True, default=False, location="json"
+        )
         args = parser.parse_args()
         SegmentService.segment_create_args_validate(args, document)
-        segment = SegmentService.update_segment(args, segment, document, dataset)
+        segment = SegmentService.update_segment(SegmentUpdateArgs(**args), segment, document, dataset)
         return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
 
     @setup_required
@@ -412,8 +399,248 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
         return {"job_id": job_id, "job_status": cache_result.decode()}, 200
 
 
+class ChildChunkAddApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_resource_check("vector_space")
+    @cloud_edition_billing_knowledge_limit_check("add_segment")
+    def post(self, dataset_id, document_id, segment_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        # check segment
+        segment_id = str(segment_id)
+        segment = DocumentSegment.query.filter(
+            DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not segment:
+            raise NotFound("Segment not found.")
+        if not current_user.is_editor:
+            raise Forbidden()
+        # check embedding model setting
+        if dataset.indexing_technique == "high_quality":
+            try:
+                model_manager = ModelManager()
+                model_manager.get_model_instance(
+                    tenant_id=current_user.current_tenant_id,
+                    provider=dataset.embedding_model_provider,
+                    model_type=ModelType.TEXT_EMBEDDING,
+                    model=dataset.embedding_model,
+                )
+            except LLMBadRequestError:
+                raise ProviderNotInitializeError(
+                    "No Embedding Model available. Please configure a valid provider "
+                    "in the Settings -> Model Provider."
+                )
+            except ProviderTokenNotInitError as ex:
+                raise ProviderNotInitializeError(ex.description)
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+        # validate args
+        parser = reqparse.RequestParser()
+        parser.add_argument("content", type=str, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+        try:
+            child_chunk = SegmentService.create_child_chunk(args.get("content"), segment, document, dataset)
+        except ChildChunkIndexingServiceError as e:
+            raise ChildChunkIndexingError(str(e))
+        return {"data": marshal(child_chunk, child_chunk_fields)}, 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, dataset_id, document_id, segment_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        # check segment
+        segment_id = str(segment_id)
+        segment = DocumentSegment.query.filter(
+            DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not segment:
+            raise NotFound("Segment not found.")
+        parser = reqparse.RequestParser()
+        parser.add_argument("limit", type=int, default=20, location="args")
+        parser.add_argument("keyword", type=str, default=None, location="args")
+        parser.add_argument("page", type=int, default=1, location="args")
+
+        args = parser.parse_args()
+
+        page = args["page"]
+        limit = min(args["limit"], 100)
+        keyword = args["keyword"]
+
+        child_chunks = SegmentService.get_child_chunks(segment_id, document_id, dataset_id, page, limit, keyword)
+        return {
+            "data": marshal(child_chunks.items, child_chunk_fields),
+            "total": child_chunks.total,
+            "total_pages": child_chunks.pages,
+            "page": page,
+            "limit": limit,
+        }, 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_resource_check("vector_space")
+    def patch(self, dataset_id, document_id, segment_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+            # check segment
+        segment_id = str(segment_id)
+        segment = DocumentSegment.query.filter(
+            DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not segment:
+            raise NotFound("Segment not found.")
+        # The role of the current user in the ta table must be admin, owner, or editor
+        if not current_user.is_editor:
+            raise Forbidden()
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+        # validate args
+        parser = reqparse.RequestParser()
+        parser.add_argument("chunks", type=list, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+        try:
+            chunks = [ChildChunkUpdateArgs(**chunk) for chunk in args.get("chunks")]
+            child_chunks = SegmentService.update_child_chunks(chunks, segment, document, dataset)
+        except ChildChunkIndexingServiceError as e:
+            raise ChildChunkIndexingError(str(e))
+        return {"data": marshal(child_chunks, child_chunk_fields)}, 200
+
+
+class ChildChunkUpdateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, dataset_id, document_id, segment_id, child_chunk_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        # check segment
+        segment_id = str(segment_id)
+        segment = DocumentSegment.query.filter(
+            DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not segment:
+            raise NotFound("Segment not found.")
+        # check child chunk
+        child_chunk_id = str(child_chunk_id)
+        child_chunk = ChildChunk.query.filter(
+            ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not child_chunk:
+            raise NotFound("Child chunk not found.")
+        # The role of the current user in the ta table must be admin or owner
+        if not current_user.is_editor:
+            raise Forbidden()
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+        try:
+            SegmentService.delete_child_chunk(child_chunk, dataset)
+        except ChildChunkDeleteIndexServiceError as e:
+            raise ChildChunkDeleteIndexError(str(e))
+        return {"result": "success"}, 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_resource_check("vector_space")
+    def patch(self, dataset_id, document_id, segment_id, child_chunk_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+            # check segment
+        segment_id = str(segment_id)
+        segment = DocumentSegment.query.filter(
+            DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not segment:
+            raise NotFound("Segment not found.")
+        # check child chunk
+        child_chunk_id = str(child_chunk_id)
+        child_chunk = ChildChunk.query.filter(
+            ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not child_chunk:
+            raise NotFound("Child chunk not found.")
+        # The role of the current user in the ta table must be admin or owner
+        if not current_user.is_editor:
+            raise Forbidden()
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+        # validate args
+        parser = reqparse.RequestParser()
+        parser.add_argument("content", type=str, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+        try:
+            child_chunk = SegmentService.update_child_chunk(
+                args.get("content"), child_chunk, segment, document, dataset
+            )
+        except ChildChunkIndexingServiceError as e:
+            raise ChildChunkIndexingError(str(e))
+        return {"data": marshal(child_chunk, child_chunk_fields)}, 200
+
+
 api.add_resource(DatasetDocumentSegmentListApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments")
-api.add_resource(DatasetDocumentSegmentApi, "/datasets/<uuid:dataset_id>/segments/<uuid:segment_id>/<string:action>")
+api.add_resource(
+    DatasetDocumentSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment/<string:action>"
+)
 api.add_resource(DatasetDocumentSegmentAddApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment")
 api.add_resource(
     DatasetDocumentSegmentUpdateApi,
@@ -424,3 +651,11 @@ api.add_resource(
     "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/batch_import",
     "/datasets/batch_import_status/<uuid:job_id>",
 )
+api.add_resource(
+    ChildChunkAddApi,
+    "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks",
+)
+api.add_resource(
+    ChildChunkUpdateApi,
+    "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks/<uuid:child_chunk_id>",
+)
diff --git a/api/controllers/console/datasets/error.py b/api/controllers/console/datasets/error.py
index 6a7a3971a8..2f00a84de6 100644
--- a/api/controllers/console/datasets/error.py
+++ b/api/controllers/console/datasets/error.py
@@ -89,3 +89,15 @@ class IndexingEstimateError(BaseHTTPException):
     error_code = "indexing_estimate_error"
     description = "Knowledge indexing estimate failed: {message}"
     code = 500
+
+
+class ChildChunkIndexingError(BaseHTTPException):
+    error_code = "child_chunk_indexing_error"
+    description = "Create child chunk index failed: {message}"
+    code = 500
+
+
+class ChildChunkDeleteIndexError(BaseHTTPException):
+    error_code = "child_chunk_delete_index_error"
+    description = "Delete child chunk index failed: {message}"
+    code = 500
diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py
index c3488de299..405d5ed607 100644
--- a/api/controllers/console/explore/message.py
+++ b/api/controllers/console/explore/message.py
@@ -66,10 +66,17 @@ class MessageFeedbackApi(InstalledAppResource):
 
         parser = reqparse.RequestParser()
         parser.add_argument("rating", type=str, choices=["like", "dislike", None], location="json")
+        parser.add_argument("content", type=str, location="json")
         args = parser.parse_args()
 
         try:
-            MessageService.create_feedback(app_model, message_id, current_user, args["rating"], args["content"])
+            MessageService.create_feedback(
+                app_model=app_model,
+                message_id=message_id,
+                user=current_user,
+                rating=args.get("rating"),
+                content=args.get("content"),
+            )
         except services.errors.message.MessageNotExistsError:
             raise NotFound("Message Not Exists.")
 
diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py
index 522c7509b9..773ea0e0c6 100644
--- a/api/controllers/service_api/app/message.py
+++ b/api/controllers/service_api/app/message.py
@@ -108,7 +108,13 @@ class MessageFeedbackApi(Resource):
         args = parser.parse_args()
 
         try:
-            MessageService.create_feedback(app_model, message_id, end_user, args["rating"], args["content"])
+            MessageService.create_feedback(
+                app_model=app_model,
+                message_id=message_id,
+                user=end_user,
+                rating=args.get("rating"),
+                content=args.get("content"),
+            )
         except services.errors.message.MessageNotExistsError:
             raise NotFound("Message Not Exists.")
 
diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py
index 34afe2837f..ea664b8f1b 100644
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -8,12 +8,16 @@ from werkzeug.exceptions import NotFound
 import services.dataset_service
 from controllers.common.errors import FilenameNotExistsError
 from controllers.service_api import api
-from controllers.service_api.app.error import ProviderNotInitializeError
+from controllers.service_api.app.error import (
+    FileTooLargeError,
+    NoFileUploadedError,
+    ProviderNotInitializeError,
+    TooManyFilesError,
+    UnsupportedFileTypeError,
+)
 from controllers.service_api.dataset.error import (
     ArchivedDocumentImmutableError,
     DocumentIndexingError,
-    NoFileUploadedError,
-    TooManyFilesError,
 )
 from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_resource_check
 from core.errors.error import ProviderTokenNotInitError
@@ -22,6 +26,7 @@ from fields.document_fields import document_fields, document_status_fields
 from libs.login import current_user
 from models.dataset import Dataset, Document, DocumentSegment
 from services.dataset_service import DocumentService
+from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
 from services.file_service import FileService
 
 
@@ -67,13 +72,14 @@ class DocumentAddByTextApi(DatasetApiResource):
             "info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
         }
         args["data_source"] = data_source
+        knowledge_config = KnowledgeConfig(**args)
         # validate args
-        DocumentService.document_create_args_validate(args)
+        DocumentService.document_create_args_validate(knowledge_config)
 
         try:
             documents, batch = DocumentService.save_document_with_dataset_id(
                 dataset=dataset,
-                document_data=args,
+                knowledge_config=knowledge_config,
                 account=current_user,
                 dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
                 created_from="api",
@@ -122,12 +128,13 @@ class DocumentUpdateByTextApi(DatasetApiResource):
             args["data_source"] = data_source
         # validate args
         args["original_document_id"] = str(document_id)
-        DocumentService.document_create_args_validate(args)
+        knowledge_config = KnowledgeConfig(**args)
+        DocumentService.document_create_args_validate(knowledge_config)
 
         try:
             documents, batch = DocumentService.save_document_with_dataset_id(
                 dataset=dataset,
-                document_data=args,
+                knowledge_config=knowledge_config,
                 account=current_user,
                 dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
                 created_from="api",
@@ -186,12 +193,13 @@ class DocumentAddByFileApi(DatasetApiResource):
         data_source = {"type": "upload_file", "info_list": {"file_info_list": {"file_ids": [upload_file.id]}}}
         args["data_source"] = data_source
         # validate args
-        DocumentService.document_create_args_validate(args)
+        knowledge_config = KnowledgeConfig(**args)
+        DocumentService.document_create_args_validate(knowledge_config)
 
         try:
             documents, batch = DocumentService.save_document_with_dataset_id(
                 dataset=dataset,
-                document_data=args,
+                knowledge_config=knowledge_config,
                 account=dataset.created_by_account,
                 dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
                 created_from="api",
@@ -234,23 +242,30 @@ class DocumentUpdateByFileApi(DatasetApiResource):
             if not file.filename:
                 raise FilenameNotExistsError
 
-            upload_file = FileService.upload_file(
-                filename=file.filename,
-                content=file.read(),
-                mimetype=file.mimetype,
-                user=current_user,
-                source="datasets",
-            )
+            try:
+                upload_file = FileService.upload_file(
+                    filename=file.filename,
+                    content=file.read(),
+                    mimetype=file.mimetype,
+                    user=current_user,
+                    source="datasets",
+                )
+            except services.errors.file.FileTooLargeError as file_too_large_error:
+                raise FileTooLargeError(file_too_large_error.description)
+            except services.errors.file.UnsupportedFileTypeError:
+                raise UnsupportedFileTypeError()
             data_source = {"type": "upload_file", "info_list": {"file_info_list": {"file_ids": [upload_file.id]}}}
             args["data_source"] = data_source
         # validate args
         args["original_document_id"] = str(document_id)
-        DocumentService.document_create_args_validate(args)
+
+        knowledge_config = KnowledgeConfig(**args)
+        DocumentService.document_create_args_validate(knowledge_config)
 
         try:
             documents, batch = DocumentService.save_document_with_dataset_id(
                 dataset=dataset,
-                document_data=args,
+                knowledge_config=knowledge_config,
                 account=dataset.created_by_account,
                 dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
                 created_from="api",
diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py
index 34904574a8..1c500f51bf 100644
--- a/api/controllers/service_api/dataset/segment.py
+++ b/api/controllers/service_api/dataset/segment.py
@@ -16,6 +16,7 @@ from extensions.ext_database import db
 from fields.segment_fields import segment_fields
 from models.dataset import Dataset, DocumentSegment
 from services.dataset_service import DatasetService, DocumentService, SegmentService
+from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateArgs
 
 
 class SegmentApi(DatasetApiResource):
@@ -193,7 +194,7 @@ class DatasetSegmentApi(DatasetApiResource):
         args = parser.parse_args()
 
         SegmentService.segment_create_args_validate(args["segment"], document)
-        segment = SegmentService.update_segment(args["segment"], segment, document, dataset)
+        segment = SegmentService.update_segment(SegmentUpdateArgs(**args["segment"]), segment, document, dataset)
         return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
 
 
diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py
index 0f47e64370..2afc11f601 100644
--- a/api/controllers/web/message.py
+++ b/api/controllers/web/message.py
@@ -105,10 +105,17 @@ class MessageFeedbackApi(WebApiResource):
 
         parser = reqparse.RequestParser()
         parser.add_argument("rating", type=str, choices=["like", "dislike", None], location="json")
+        parser.add_argument("content", type=str, location="json", default=None)
         args = parser.parse_args()
 
         try:
-            MessageService.create_feedback(app_model, message_id, end_user, args["rating"], args["content"])
+            MessageService.create_feedback(
+                app_model=app_model,
+                message_id=message_id,
+                user=end_user,
+                rating=args.get("rating"),
+                content=args.get("content"),
+            )
         except services.errors.message.MessageNotExistsError:
             raise NotFound("Message Not Exists.")
 
diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py
index 684f2bc8a3..cb2a361f17 100644
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@@ -393,7 +393,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
         try:
             return generate_task_pipeline.process()
         except ValueError as e:
-            if e.args[0] == "I/O operation on closed file.":  # ignore this error
+            if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.":  # ignore this error
                 raise GenerateTaskStoppedError()
             else:
                 logger.exception(f"Failed to process generate task pipeline, conversation_id: {conversation.id}")
diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
index 2e9b643d8b..ab0f0763f4 100644
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@@ -5,6 +5,9 @@ from collections.abc import Generator, Mapping
 from threading import Thread
 from typing import Any, Optional, Union
 
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
 from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
 from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk
 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
@@ -66,7 +69,6 @@ from models.enums import CreatedByRole
 from models.workflow import (
     Workflow,
     WorkflowNodeExecution,
-    WorkflowRun,
     WorkflowRunStatus,
 )
 
@@ -80,8 +82,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
 
     _task_state: WorkflowTaskState
     _application_generate_entity: AdvancedChatAppGenerateEntity
-    _workflow: Workflow
-    _user: Union[Account, EndUser]
     _workflow_system_variables: dict[SystemVariableKey, Any]
     _wip_workflow_node_executions: dict[str, WorkflowNodeExecution]
     _conversation_name_generate_thread: Optional[Thread] = None
@@ -97,32 +97,37 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
         stream: bool,
         dialogue_count: int,
     ) -> None:
-        """
-        Initialize AdvancedChatAppGenerateTaskPipeline.
-        :param application_generate_entity: application generate entity
-        :param workflow: workflow
-        :param queue_manager: queue manager
-        :param conversation: conversation
-        :param message: message
-        :param user: user
-        :param stream: stream
-        :param dialogue_count: dialogue count
-        """
-        super().__init__(application_generate_entity, queue_manager, user, stream)
+        super().__init__(
+            application_generate_entity=application_generate_entity,
+            queue_manager=queue_manager,
+            stream=stream,
+        )
 
-        if isinstance(self._user, EndUser):
-            user_id = self._user.session_id
+        if isinstance(user, EndUser):
+            self._user_id = user.id
+            user_session_id = user.session_id
+            self._created_by_role = CreatedByRole.END_USER
+        elif isinstance(user, Account):
+            self._user_id = user.id
+            user_session_id = user.id
+            self._created_by_role = CreatedByRole.ACCOUNT
         else:
-            user_id = self._user.id
+            raise NotImplementedError(f"User type not supported: {type(user)}")
+
+        self._workflow_id = workflow.id
+        self._workflow_features_dict = workflow.features_dict
+
+        self._conversation_id = conversation.id
+        self._conversation_mode = conversation.mode
+
+        self._message_id = message.id
+        self._message_created_at = int(message.created_at.timestamp())
 
-        self._workflow = workflow
-        self._conversation = conversation
-        self._message = message
         self._workflow_system_variables = {
             SystemVariableKey.QUERY: message.query,
             SystemVariableKey.FILES: application_generate_entity.files,
             SystemVariableKey.CONVERSATION_ID: conversation.id,
-            SystemVariableKey.USER_ID: user_id,
+            SystemVariableKey.USER_ID: user_session_id,
             SystemVariableKey.DIALOGUE_COUNT: dialogue_count,
             SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
             SystemVariableKey.WORKFLOW_ID: workflow.id,
@@ -135,19 +140,16 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
 
         self._conversation_name_generate_thread = None
         self._recorded_files: list[Mapping[str, Any]] = []
+        self._workflow_run_id = ""
 
     def process(self) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]:
         """
         Process generate task pipeline.
         :return:
         """
-        db.session.refresh(self._workflow)
-        db.session.refresh(self._user)
-        db.session.close()
-
         # start generate conversation name thread
         self._conversation_name_generate_thread = self._generate_conversation_name(
-            self._conversation, self._application_generate_entity.query
+            conversation_id=self._conversation_id, query=self._application_generate_entity.query
         )
 
         generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager)
@@ -173,12 +175,12 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
                 return ChatbotAppBlockingResponse(
                     task_id=stream_response.task_id,
                     data=ChatbotAppBlockingResponse.Data(
-                        id=self._message.id,
-                        mode=self._conversation.mode,
-                        conversation_id=self._conversation.id,
-                        message_id=self._message.id,
+                        id=self._message_id,
+                        mode=self._conversation_mode,
+                        conversation_id=self._conversation_id,
+                        message_id=self._message_id,
                         answer=self._task_state.answer,
-                        created_at=int(self._message.created_at.timestamp()),
+                        created_at=self._message_created_at,
                         **extras,
                     ),
                 )
@@ -196,9 +198,9 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
         """
         for stream_response in generator:
             yield ChatbotAppStreamResponse(
-                conversation_id=self._conversation.id,
-                message_id=self._message.id,
-                created_at=int(self._message.created_at.timestamp()),
+                conversation_id=self._conversation_id,
+                message_id=self._message_id,
+                created_at=self._message_created_at,
                 stream_response=stream_response,
             )
 
@@ -216,7 +218,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
         tts_publisher = None
         task_id = self._application_generate_entity.task_id
         tenant_id = self._application_generate_entity.app_config.tenant_id
-        features_dict = self._workflow.features_dict
+        features_dict = self._workflow_features_dict
 
         if (
             features_dict.get("text_to_speech")
@@ -268,7 +270,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
         """
         # init fake graph runtime state
         graph_runtime_state: Optional[GraphRuntimeState] = None
-        workflow_run: Optional[WorkflowRun] = None
 
         for queue_message in self._queue_manager.listen():
             event = queue_message.event
@@ -276,237 +277,303 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
             if isinstance(event, QueuePingEvent):
                 yield self._ping_stream_response()
             elif isinstance(event, QueueErrorEvent):
-                err = self._handle_error(event, self._message)
+                with Session(db.engine) as session:
+                    err = self._handle_error(event=event, session=session, message_id=self._message_id)
+                    session.commit()
                 yield self._error_to_stream_response(err)
                 break
             elif isinstance(event, QueueWorkflowStartedEvent):
                 # override graph runtime state
                 graph_runtime_state = event.graph_runtime_state
 
-                # init workflow run
-                workflow_run = self._handle_workflow_run_start()
+                with Session(db.engine) as session:
+                    # init workflow run
+                    workflow_run = self._handle_workflow_run_start(
+                        session=session,
+                        workflow_id=self._workflow_id,
+                        user_id=self._user_id,
+                        created_by_role=self._created_by_role,
+                    )
+                    self._workflow_run_id = workflow_run.id
+                    message = self._get_message(session=session)
+                    if not message:
+                        raise ValueError(f"Message not found: {self._message_id}")
+                    message.workflow_run_id = workflow_run.id
+                    workflow_start_resp = self._workflow_start_to_stream_response(
+                        session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                    )
+                    session.commit()
 
-                self._refetch_message()
-                self._message.workflow_run_id = workflow_run.id
-
-                db.session.commit()
-                db.session.refresh(self._message)
-                db.session.close()
-
-                yield self._workflow_start_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
+                yield workflow_start_resp
             elif isinstance(
                 event,
                 QueueNodeRetryEvent,
             ):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
-                workflow_node_execution = self._handle_workflow_node_execution_retried(
-                    workflow_run=workflow_run, event=event
-                )
 
-                response = self._workflow_node_retry_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    workflow_node_execution = self._handle_workflow_node_execution_retried(
+                        session=session, workflow_run=workflow_run, event=event
+                    )
+                    node_retry_resp = self._workflow_node_retry_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
 
-                if response:
-                    yield response
+                if node_retry_resp:
+                    yield node_retry_resp
             elif isinstance(event, QueueNodeStartedEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                workflow_node_execution = self._handle_node_execution_start(workflow_run=workflow_run, event=event)
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    workflow_node_execution = self._handle_node_execution_start(
+                        session=session, workflow_run=workflow_run, event=event
+                    )
 
-                response_start = self._workflow_node_start_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
+                    node_start_resp = self._workflow_node_start_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
 
-                if response_start:
-                    yield response_start
+                if node_start_resp:
+                    yield node_start_resp
             elif isinstance(event, QueueNodeSucceededEvent):
-                workflow_node_execution = self._handle_workflow_node_execution_success(event)
-
                 # Record files if it's an answer node or end node
                 if event.node_type in [NodeType.ANSWER, NodeType.END]:
                     self._recorded_files.extend(self._fetch_files_from_node_outputs(event.outputs or {}))
 
-                response_finish = self._workflow_node_finish_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
+                with Session(db.engine) as session:
+                    workflow_node_execution = self._handle_workflow_node_execution_success(session=session, event=event)
 
-                if response_finish:
-                    yield response_finish
+                    node_finish_resp = self._workflow_node_finish_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
+
+                if node_finish_resp:
+                    yield node_finish_resp
             elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent):
-                workflow_node_execution = self._handle_workflow_node_execution_failed(event)
+                with Session(db.engine) as session:
+                    workflow_node_execution = self._handle_workflow_node_execution_failed(session=session, event=event)
 
-                response_finish = self._workflow_node_finish_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
-
-                if response_finish:
-                    yield response_finish
+                    node_finish_resp = self._workflow_node_finish_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
 
+                if node_finish_resp:
+                    yield node_finish_resp
             elif isinstance(event, QueueParallelBranchRunStartedEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                yield self._workflow_parallel_branch_start_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
-            elif isinstance(event, QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent):
-                if not workflow_run:
-                    raise ValueError("workflow run not initialized.")
-
-                yield self._workflow_parallel_branch_finished_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
-            elif isinstance(event, QueueIterationStartEvent):
-                if not workflow_run:
-                    raise ValueError("workflow run not initialized.")
-
-                yield self._workflow_iteration_start_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
-            elif isinstance(event, QueueIterationNextEvent):
-                if not workflow_run:
-                    raise ValueError("workflow run not initialized.")
-
-                yield self._workflow_iteration_next_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
-            elif isinstance(event, QueueIterationCompletedEvent):
-                if not workflow_run:
-                    raise ValueError("workflow run not initialized.")
-
-                yield self._workflow_iteration_completed_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
-            elif isinstance(event, QueueWorkflowSucceededEvent):
-                if not workflow_run:
-                    raise ValueError("workflow run not initialized.")
-
-                if not graph_runtime_state:
-                    raise ValueError("workflow run not initialized.")
-
-                workflow_run = self._handle_workflow_run_success(
-                    workflow_run=workflow_run,
-                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens,
-                    total_steps=graph_runtime_state.node_run_steps,
-                    outputs=event.outputs,
-                    conversation_id=self._conversation.id,
-                    trace_manager=trace_manager,
-                )
-
-                yield self._workflow_finish_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
-
-                self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
-            elif isinstance(event, QueueWorkflowPartialSuccessEvent):
-                if not workflow_run:
-                    raise ValueError("workflow run not initialized.")
-
-                if not graph_runtime_state:
-                    raise ValueError("graph runtime state not initialized.")
-
-                workflow_run = self._handle_workflow_run_partial_success(
-                    workflow_run=workflow_run,
-                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens,
-                    total_steps=graph_runtime_state.node_run_steps,
-                    outputs=event.outputs,
-                    exceptions_count=event.exceptions_count,
-                    conversation_id=None,
-                    trace_manager=trace_manager,
-                )
-
-                yield self._workflow_finish_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
-
-                self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
-            elif isinstance(event, QueueWorkflowFailedEvent):
-                if not workflow_run:
-                    raise ValueError("workflow run not initialized.")
-
-                if not graph_runtime_state:
-                    raise ValueError("graph runtime state not initialized.")
-
-                workflow_run = self._handle_workflow_run_failed(
-                    workflow_run=workflow_run,
-                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens,
-                    total_steps=graph_runtime_state.node_run_steps,
-                    status=WorkflowRunStatus.FAILED,
-                    error=event.error,
-                    conversation_id=self._conversation.id,
-                    trace_manager=trace_manager,
-                    exceptions_count=event.exceptions_count,
-                )
-
-                yield self._workflow_finish_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
-
-                err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_run.error}"))
-                yield self._error_to_stream_response(self._handle_error(err_event, self._message))
-                break
-            elif isinstance(event, QueueStopEvent):
-                if workflow_run and graph_runtime_state:
-                    workflow_run = self._handle_workflow_run_failed(
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    parallel_start_resp = self._workflow_parallel_branch_start_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
                         workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield parallel_start_resp
+            elif isinstance(event, QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent):
+                if not self._workflow_run_id:
+                    raise ValueError("workflow run not initialized.")
+
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    parallel_finish_resp = self._workflow_parallel_branch_finished_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield parallel_finish_resp
+            elif isinstance(event, QueueIterationStartEvent):
+                if not self._workflow_run_id:
+                    raise ValueError("workflow run not initialized.")
+
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    iter_start_resp = self._workflow_iteration_start_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield iter_start_resp
+            elif isinstance(event, QueueIterationNextEvent):
+                if not self._workflow_run_id:
+                    raise ValueError("workflow run not initialized.")
+
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    iter_next_resp = self._workflow_iteration_next_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield iter_next_resp
+            elif isinstance(event, QueueIterationCompletedEvent):
+                if not self._workflow_run_id:
+                    raise ValueError("workflow run not initialized.")
+
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    iter_finish_resp = self._workflow_iteration_completed_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield iter_finish_resp
+            elif isinstance(event, QueueWorkflowSucceededEvent):
+                if not self._workflow_run_id:
+                    raise ValueError("workflow run not initialized.")
+
+                if not graph_runtime_state:
+                    raise ValueError("workflow run not initialized.")
+
+                with Session(db.engine) as session:
+                    workflow_run = self._handle_workflow_run_success(
+                        session=session,
+                        workflow_run_id=self._workflow_run_id,
                         start_at=graph_runtime_state.start_at,
                         total_tokens=graph_runtime_state.total_tokens,
                         total_steps=graph_runtime_state.node_run_steps,
-                        status=WorkflowRunStatus.STOPPED,
-                        error=event.get_stop_reason(),
-                        conversation_id=self._conversation.id,
+                        outputs=event.outputs,
+                        conversation_id=self._conversation_id,
                         trace_manager=trace_manager,
                     )
 
-                    yield self._workflow_finish_to_stream_response(
-                        task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                    workflow_finish_resp = self._workflow_finish_to_stream_response(
+                        session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
                     )
+                    session.commit()
 
-                # Save message
-                self._save_message(graph_runtime_state=graph_runtime_state)
+                yield workflow_finish_resp
+                self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
+            elif isinstance(event, QueueWorkflowPartialSuccessEvent):
+                if not self._workflow_run_id:
+                    raise ValueError("workflow run not initialized.")
+                if not graph_runtime_state:
+                    raise ValueError("graph runtime state not initialized.")
+
+                with Session(db.engine) as session:
+                    workflow_run = self._handle_workflow_run_partial_success(
+                        session=session,
+                        workflow_run_id=self._workflow_run_id,
+                        start_at=graph_runtime_state.start_at,
+                        total_tokens=graph_runtime_state.total_tokens,
+                        total_steps=graph_runtime_state.node_run_steps,
+                        outputs=event.outputs,
+                        exceptions_count=event.exceptions_count,
+                        conversation_id=None,
+                        trace_manager=trace_manager,
+                    )
+                    workflow_finish_resp = self._workflow_finish_to_stream_response(
+                        session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                    )
+                    session.commit()
+
+                yield workflow_finish_resp
+                self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
+            elif isinstance(event, QueueWorkflowFailedEvent):
+                if not self._workflow_run_id:
+                    raise ValueError("workflow run not initialized.")
+                if not graph_runtime_state:
+                    raise ValueError("graph runtime state not initialized.")
+
+                with Session(db.engine) as session:
+                    workflow_run = self._handle_workflow_run_failed(
+                        session=session,
+                        workflow_run_id=self._workflow_run_id,
+                        start_at=graph_runtime_state.start_at,
+                        total_tokens=graph_runtime_state.total_tokens,
+                        total_steps=graph_runtime_state.node_run_steps,
+                        status=WorkflowRunStatus.FAILED,
+                        error=event.error,
+                        conversation_id=self._conversation_id,
+                        trace_manager=trace_manager,
+                        exceptions_count=event.exceptions_count,
+                    )
+                    workflow_finish_resp = self._workflow_finish_to_stream_response(
+                        session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                    )
+                    err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_run.error}"))
+                    err = self._handle_error(event=err_event, session=session, message_id=self._message_id)
+                    session.commit()
+
+                yield workflow_finish_resp
+                yield self._error_to_stream_response(err)
+                break
+            elif isinstance(event, QueueStopEvent):
+                if self._workflow_run_id and graph_runtime_state:
+                    with Session(db.engine) as session:
+                        workflow_run = self._handle_workflow_run_failed(
+                            session=session,
+                            workflow_run_id=self._workflow_run_id,
+                            start_at=graph_runtime_state.start_at,
+                            total_tokens=graph_runtime_state.total_tokens,
+                            total_steps=graph_runtime_state.node_run_steps,
+                            status=WorkflowRunStatus.STOPPED,
+                            error=event.get_stop_reason(),
+                            conversation_id=self._conversation_id,
+                            trace_manager=trace_manager,
+                        )
+                        workflow_finish_resp = self._workflow_finish_to_stream_response(
+                            session=session,
+                            task_id=self._application_generate_entity.task_id,
+                            workflow_run=workflow_run,
+                        )
+                        # Save message
+                        self._save_message(session=session, graph_runtime_state=graph_runtime_state)
+                        session.commit()
+
+                    yield workflow_finish_resp
 
                 yield self._message_end_to_stream_response()
                 break
             elif isinstance(event, QueueRetrieverResourcesEvent):
                 self._handle_retriever_resources(event)
 
-                self._refetch_message()
-
-                self._message.message_metadata = (
-                    json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None
-                )
-
-                db.session.commit()
-                db.session.refresh(self._message)
-                db.session.close()
+                with Session(db.engine) as session:
+                    message = self._get_message(session=session)
+                    message.message_metadata = (
+                        json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None
+                    )
+                    session.commit()
             elif isinstance(event, QueueAnnotationReplyEvent):
                 self._handle_annotation_reply(event)
 
-                self._refetch_message()
-
-                self._message.message_metadata = (
-                    json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None
-                )
-
-                db.session.commit()
-                db.session.refresh(self._message)
-                db.session.close()
+                with Session(db.engine) as session:
+                    message = self._get_message(session=session)
+                    message.message_metadata = (
+                        json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None
+                    )
+                    session.commit()
             elif isinstance(event, QueueTextChunkEvent):
                 delta_text = event.text
                 if delta_text is None:
@@ -523,7 +590,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
 
                 self._task_state.answer += delta_text
                 yield self._message_to_stream_response(
-                    answer=delta_text, message_id=self._message.id, from_variable_selector=event.from_variable_selector
+                    answer=delta_text, message_id=self._message_id, from_variable_selector=event.from_variable_selector
                 )
             elif isinstance(event, QueueMessageReplaceEvent):
                 # published by moderation
@@ -538,7 +605,9 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
                     yield self._message_replace_to_stream_response(answer=output_moderation_answer)
 
                 # Save message
-                self._save_message(graph_runtime_state=graph_runtime_state)
+                with Session(db.engine) as session:
+                    self._save_message(session=session, graph_runtime_state=graph_runtime_state)
+                    session.commit()
 
                 yield self._message_end_to_stream_response()
             elif isinstance(event, QueueAgentLogEvent):
@@ -553,54 +622,46 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
         if self._conversation_name_generate_thread:
             self._conversation_name_generate_thread.join()
 
-    def _save_message(self, graph_runtime_state: Optional[GraphRuntimeState] = None) -> None:
-        self._refetch_message()
-
-        self._message.answer = self._task_state.answer
-        self._message.provider_response_latency = time.perf_counter() - self._start_at
-        self._message.message_metadata = (
+    def _save_message(self, *, session: Session, graph_runtime_state: Optional[GraphRuntimeState] = None) -> None:
+        message = self._get_message(session=session)
+        message.answer = self._task_state.answer
+        message.provider_response_latency = time.perf_counter() - self._start_at
+        message.message_metadata = (
             json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None
         )
         message_files = [
             MessageFile(
-                message_id=self._message.id,
+                message_id=message.id,
                 type=file["type"],
                 transfer_method=file["transfer_method"],
                 url=file["remote_url"],
                 belongs_to="assistant",
                 upload_file_id=file["related_id"],
                 created_by_role=CreatedByRole.ACCOUNT
-                if self._message.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
+                if message.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
                 else CreatedByRole.END_USER,
-                created_by=self._message.from_account_id or self._message.from_end_user_id or "",
+                created_by=message.from_account_id or message.from_end_user_id or "",
             )
             for file in self._recorded_files
         ]
-        db.session.add_all(message_files)
+        session.add_all(message_files)
 
         if graph_runtime_state and graph_runtime_state.llm_usage:
             usage = graph_runtime_state.llm_usage
-            self._message.message_tokens = usage.prompt_tokens
-            self._message.message_unit_price = usage.prompt_unit_price
-            self._message.message_price_unit = usage.prompt_price_unit
-            self._message.answer_tokens = usage.completion_tokens
-            self._message.answer_unit_price = usage.completion_unit_price
-            self._message.answer_price_unit = usage.completion_price_unit
-            self._message.total_price = usage.total_price
-            self._message.currency = usage.currency
-
+            message.message_tokens = usage.prompt_tokens
+            message.message_unit_price = usage.prompt_unit_price
+            message.message_price_unit = usage.prompt_price_unit
+            message.answer_tokens = usage.completion_tokens
+            message.answer_unit_price = usage.completion_unit_price
+            message.answer_price_unit = usage.completion_price_unit
+            message.total_price = usage.total_price
+            message.currency = usage.currency
             self._task_state.metadata["usage"] = jsonable_encoder(usage)
         else:
             self._task_state.metadata["usage"] = jsonable_encoder(LLMUsage.empty_usage())
-
-        db.session.commit()
-
         message_was_created.send(
-            self._message,
+            message,
             application_generate_entity=self._application_generate_entity,
-            conversation=self._conversation,
-            is_first_message=self._application_generate_entity.conversation_id is None,
-            extras=self._application_generate_entity.extras,
         )
 
     def _message_end_to_stream_response(self) -> MessageEndStreamResponse:
@@ -617,7 +678,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
 
         return MessageEndStreamResponse(
             task_id=self._application_generate_entity.task_id,
-            id=self._message.id,
+            id=self._message_id,
             files=self._recorded_files,
             metadata=extras.get("metadata", {}),
         )
@@ -645,11 +706,9 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
 
         return False
 
-    def _refetch_message(self) -> None:
-        """
-        Refetch message.
-        :return:
-        """
-        message = db.session.query(Message).filter(Message.id == self._message.id).first()
-        if message:
-            self._message = message
+    def _get_message(self, *, session: Session):
+        stmt = select(Message).where(Message.id == self._message_id)
+        message = session.scalar(stmt)
+        if not message:
+            raise ValueError(f"Message not found: {self._message_id}")
+        return message
diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py
index c2e35faf89..4e3aa840ce 100644
--- a/api/core/app/apps/message_based_app_generator.py
+++ b/api/core/app/apps/message_based_app_generator.py
@@ -70,14 +70,13 @@ class MessageBasedAppGenerator(BaseAppGenerator):
             queue_manager=queue_manager,
             conversation=conversation,
             message=message,
-            user=user,
             stream=stream,
         )
 
         try:
             return generate_task_pipeline.process()
         except ValueError as e:
-            if e.args[0] == "I/O operation on closed file.":  # ignore this error
+            if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.":  # ignore this error
                 raise GenerateTaskStoppedError()
             else:
                 logger.exception(f"Failed to handle response, conversation_id: {conversation.id}")
diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py
index 9a5f90f998..cbfb535848 100644
--- a/api/core/app/apps/workflow/app_generator.py
+++ b/api/core/app/apps/workflow/app_generator.py
@@ -325,7 +325,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
         try:
             return generate_task_pipeline.process()
         except ValueError as e:
-            if e.args[0] == "I/O operation on closed file.":  # ignore this error
+            if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.":  # ignore this error
                 raise GenerateTaskStoppedError()
             else:
                 logger.exception(
diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py
index af0698d701..df48a83316 100644
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@@ -3,6 +3,8 @@ import time
 from collections.abc import Generator
 from typing import Any, Optional, Union
 
+from sqlalchemy.orm import Session
+
 from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
 from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk
 from core.app.apps.base_app_queue_manager import AppQueueManager
@@ -51,6 +53,7 @@ from core.ops.ops_trace_manager import TraceQueueManager
 from core.workflow.enums import SystemVariableKey
 from extensions.ext_database import db
 from models.account import Account
+from models.enums import CreatedByRole
 from models.model import EndUser
 from models.workflow import (
     Workflow,
@@ -69,8 +72,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
     WorkflowAppGenerateTaskPipeline is a class that generate stream output and state management for Application.
     """
 
-    _workflow: Workflow
-    _user: Union[Account, EndUser]
     _task_state: WorkflowTaskState
     _application_generate_entity: WorkflowAppGenerateEntity
     _workflow_system_variables: dict[SystemVariableKey, Any]
@@ -84,44 +85,42 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
         user: Union[Account, EndUser],
         stream: bool,
     ) -> None:
-        """
-        Initialize GenerateTaskPipeline.
-        :param application_generate_entity: application generate entity
-        :param workflow: workflow
-        :param queue_manager: queue manager
-        :param user: user
-        :param stream: is streamed
-        """
-        super().__init__(application_generate_entity, queue_manager, user, stream)
+        super().__init__(
+            application_generate_entity=application_generate_entity,
+            queue_manager=queue_manager,
+            stream=stream,
+        )
 
-        if isinstance(self._user, EndUser):
-            user_id = self._user.session_id
+        if isinstance(user, EndUser):
+            self._user_id = user.id
+            user_session_id = user.session_id
+            self._created_by_role = CreatedByRole.END_USER
+        elif isinstance(user, Account):
+            self._user_id = user.id
+            user_session_id = user.id
+            self._created_by_role = CreatedByRole.ACCOUNT
         else:
-            user_id = self._user.id
+            raise ValueError(f"Invalid user type: {type(user)}")
+
+        self._workflow_id = workflow.id
+        self._workflow_features_dict = workflow.features_dict
 
-        self._workflow = workflow
         self._workflow_system_variables = {
             SystemVariableKey.FILES: application_generate_entity.files,
-            SystemVariableKey.USER_ID: user_id,
+            SystemVariableKey.USER_ID: user_session_id,
             SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
             SystemVariableKey.WORKFLOW_ID: workflow.id,
             SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id,
         }
 
         self._task_state = WorkflowTaskState()
-        self._wip_workflow_node_executions = {}
-        self._wip_workflow_agent_logs = {}
-        self.total_tokens: int = 0
+        self._workflow_run_id = ""
 
     def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
         """
         Process generate task pipeline.
         :return:
         """
-        db.session.refresh(self._workflow)
-        db.session.refresh(self._user)
-        db.session.close()
-
         generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager)
         if self._stream:
             return self._to_stream_response(generator)
@@ -188,7 +187,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
         tts_publisher = None
         task_id = self._application_generate_entity.task_id
         tenant_id = self._application_generate_entity.app_config.tenant_id
-        features_dict = self._workflow.features_dict
+        features_dict = self._workflow_features_dict
 
         if (
             features_dict.get("text_to_speech")
@@ -237,7 +236,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
         :return:
         """
         graph_runtime_state = None
-        workflow_run = None
 
         for queue_message in self._queue_manager.listen():
             event = queue_message.event
@@ -245,180 +243,261 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
             if isinstance(event, QueuePingEvent):
                 yield self._ping_stream_response()
             elif isinstance(event, QueueErrorEvent):
-                err = self._handle_error(event)
+                err = self._handle_error(event=event)
                 yield self._error_to_stream_response(err)
                 break
             elif isinstance(event, QueueWorkflowStartedEvent):
                 # override graph runtime state
                 graph_runtime_state = event.graph_runtime_state
 
-                # init workflow run
-                workflow_run = self._handle_workflow_run_start()
-                yield self._workflow_start_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
+                with Session(db.engine) as session:
+                    # init workflow run
+                    workflow_run = self._handle_workflow_run_start(
+                        session=session,
+                        workflow_id=self._workflow_id,
+                        user_id=self._user_id,
+                        created_by_role=self._created_by_role,
+                    )
+                    self._workflow_run_id = workflow_run.id
+                    start_resp = self._workflow_start_to_stream_response(
+                        session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                    )
+                    session.commit()
+
+                yield start_resp
             elif isinstance(
                 event,
                 QueueNodeRetryEvent,
             ):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
-                workflow_node_execution = self._handle_workflow_node_execution_retried(
-                    workflow_run=workflow_run, event=event
-                )
-
-                response = self._workflow_node_retry_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    workflow_node_execution = self._handle_workflow_node_execution_retried(
+                        session=session, workflow_run=workflow_run, event=event
+                    )
+                    response = self._workflow_node_retry_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
 
                 if response:
                     yield response
             elif isinstance(event, QueueNodeStartedEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                workflow_node_execution = self._handle_node_execution_start(workflow_run=workflow_run, event=event)
-
-                node_start_response = self._workflow_node_start_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    workflow_node_execution = self._handle_node_execution_start(
+                        session=session, workflow_run=workflow_run, event=event
+                    )
+                    node_start_response = self._workflow_node_start_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
 
                 if node_start_response:
                     yield node_start_response
             elif isinstance(event, QueueNodeSucceededEvent):
-                workflow_node_execution = self._handle_workflow_node_execution_success(event)
-
-                node_success_response = self._workflow_node_finish_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
+                with Session(db.engine) as session:
+                    workflow_node_execution = self._handle_workflow_node_execution_success(session=session, event=event)
+                    node_success_response = self._workflow_node_finish_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
 
                 if node_success_response:
                     yield node_success_response
             elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent):
-                workflow_node_execution = self._handle_workflow_node_execution_failed(event)
+                with Session(db.engine) as session:
+                    workflow_node_execution = self._handle_workflow_node_execution_failed(
+                        session=session,
+                        event=event,
+                    )
+                    node_failed_response = self._workflow_node_finish_to_stream_response(
+                        session=session,
+                        event=event,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_node_execution=workflow_node_execution,
+                    )
+                    session.commit()
 
-                node_failed_response = self._workflow_node_finish_to_stream_response(
-                    event=event,
-                    task_id=self._application_generate_entity.task_id,
-                    workflow_node_execution=workflow_node_execution,
-                )
                 if node_failed_response:
                     yield node_failed_response
 
             elif isinstance(event, QueueParallelBranchRunStartedEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                yield self._workflow_parallel_branch_start_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    parallel_start_resp = self._workflow_parallel_branch_start_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield parallel_start_resp
+
             elif isinstance(event, QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                yield self._workflow_parallel_branch_finished_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    parallel_finish_resp = self._workflow_parallel_branch_finished_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield parallel_finish_resp
+
             elif isinstance(event, QueueIterationStartEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                yield self._workflow_iteration_start_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    iter_start_resp = self._workflow_iteration_start_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield iter_start_resp
+
             elif isinstance(event, QueueIterationNextEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                yield self._workflow_iteration_next_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    iter_next_resp = self._workflow_iteration_next_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield iter_next_resp
+
             elif isinstance(event, QueueIterationCompletedEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
 
-                yield self._workflow_iteration_completed_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._get_workflow_run(session=session, workflow_run_id=self._workflow_run_id)
+                    iter_finish_resp = self._workflow_iteration_completed_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                        event=event,
+                    )
+
+                yield iter_finish_resp
+
             elif isinstance(event, QueueWorkflowSucceededEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
-
                 if not graph_runtime_state:
                     raise ValueError("graph runtime state not initialized.")
 
-                workflow_run = self._handle_workflow_run_success(
-                    workflow_run=workflow_run,
-                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens,
-                    total_steps=graph_runtime_state.node_run_steps,
-                    outputs=event.outputs,
-                    conversation_id=None,
-                    trace_manager=trace_manager,
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._handle_workflow_run_success(
+                        session=session,
+                        workflow_run_id=self._workflow_run_id,
+                        start_at=graph_runtime_state.start_at,
+                        total_tokens=graph_runtime_state.total_tokens,
+                        total_steps=graph_runtime_state.node_run_steps,
+                        outputs=event.outputs,
+                        conversation_id=None,
+                        trace_manager=trace_manager,
+                    )
 
-                # save workflow app log
-                self._save_workflow_app_log(workflow_run)
+                    # save workflow app log
+                    self._save_workflow_app_log(session=session, workflow_run=workflow_run)
 
-                yield self._workflow_finish_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
+                    workflow_finish_resp = self._workflow_finish_to_stream_response(
+                        session=session,
+                        task_id=self._application_generate_entity.task_id,
+                        workflow_run=workflow_run,
+                    )
+                    session.commit()
+
+                yield workflow_finish_resp
             elif isinstance(event, QueueWorkflowPartialSuccessEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
-
                 if not graph_runtime_state:
                     raise ValueError("graph runtime state not initialized.")
 
-                workflow_run = self._handle_workflow_run_partial_success(
-                    workflow_run=workflow_run,
-                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens,
-                    total_steps=graph_runtime_state.node_run_steps,
-                    outputs=event.outputs,
-                    exceptions_count=event.exceptions_count,
-                    conversation_id=None,
-                    trace_manager=trace_manager,
-                )
+                with Session(db.engine) as session:
+                    workflow_run = self._handle_workflow_run_partial_success(
+                        session=session,
+                        workflow_run_id=self._workflow_run_id,
+                        start_at=graph_runtime_state.start_at,
+                        total_tokens=graph_runtime_state.total_tokens,
+                        total_steps=graph_runtime_state.node_run_steps,
+                        outputs=event.outputs,
+                        exceptions_count=event.exceptions_count,
+                        conversation_id=None,
+                        trace_manager=trace_manager,
+                    )
 
-                # save workflow app log
-                self._save_workflow_app_log(workflow_run)
+                    # save workflow app log
+                    self._save_workflow_app_log(session=session, workflow_run=workflow_run)
 
-                yield self._workflow_finish_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
+                    workflow_finish_resp = self._workflow_finish_to_stream_response(
+                        session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                    )
+                    session.commit()
+
+                yield workflow_finish_resp
             elif isinstance(event, QueueWorkflowFailedEvent | QueueStopEvent):
-                if not workflow_run:
+                if not self._workflow_run_id:
                     raise ValueError("workflow run not initialized.")
-
                 if not graph_runtime_state:
                     raise ValueError("graph runtime state not initialized.")
-                workflow_run = self._handle_workflow_run_failed(
-                    workflow_run=workflow_run,
-                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens,
-                    total_steps=graph_runtime_state.node_run_steps,
-                    status=WorkflowRunStatus.FAILED
-                    if isinstance(event, QueueWorkflowFailedEvent)
-                    else WorkflowRunStatus.STOPPED,
-                    error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(),
-                    conversation_id=None,
-                    trace_manager=trace_manager,
-                    exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0,
-                )
 
-                # save workflow app log
-                self._save_workflow_app_log(workflow_run)
+                with Session(db.engine) as session:
+                    workflow_run = self._handle_workflow_run_failed(
+                        session=session,
+                        workflow_run_id=self._workflow_run_id,
+                        start_at=graph_runtime_state.start_at,
+                        total_tokens=graph_runtime_state.total_tokens,
+                        total_steps=graph_runtime_state.node_run_steps,
+                        status=WorkflowRunStatus.FAILED
+                        if isinstance(event, QueueWorkflowFailedEvent)
+                        else WorkflowRunStatus.STOPPED,
+                        error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(),
+                        conversation_id=None,
+                        trace_manager=trace_manager,
+                        exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0,
+                    )
 
-                yield self._workflow_finish_to_stream_response(
-                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
-                )
+                    # save workflow app log
+                    self._save_workflow_app_log(session=session, workflow_run=workflow_run)
+
+                    workflow_finish_resp = self._workflow_finish_to_stream_response(
+                        session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                    )
+                    session.commit()
+
+                yield workflow_finish_resp
             elif isinstance(event, QueueTextChunkEvent):
                 delta_text = event.text
                 if delta_text is None:
@@ -440,7 +519,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
         if tts_publisher:
             tts_publisher.publish(None)
 
-    def _save_workflow_app_log(self, workflow_run: WorkflowRun) -> None:
+    def _save_workflow_app_log(self, *, session: Session, workflow_run: WorkflowRun) -> None:
         """
         Save workflow app log.
         :return:
@@ -462,12 +541,10 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
         workflow_app_log.workflow_id = workflow_run.workflow_id
         workflow_app_log.workflow_run_id = workflow_run.id
         workflow_app_log.created_from = created_from.value
-        workflow_app_log.created_by_role = "account" if isinstance(self._user, Account) else "end_user"
-        workflow_app_log.created_by = self._user.id
+        workflow_app_log.created_by_role = self._created_by_role
+        workflow_app_log.created_by = self._user_id
 
-        db.session.add(workflow_app_log)
-        db.session.commit()
-        db.session.close()
+        session.add(workflow_app_log)
 
     def _text_chunk_to_stream_response(
         self, text: str, from_variable_selector: Optional[list[str]] = None
diff --git a/api/core/app/task_pipeline/based_generate_task_pipeline.py b/api/core/app/task_pipeline/based_generate_task_pipeline.py
index 03a81353d0..e363a7f642 100644
--- a/api/core/app/task_pipeline/based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/based_generate_task_pipeline.py
@@ -1,6 +1,9 @@
 import logging
 import time
-from typing import Optional, Union
+from typing import Optional
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
 
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.entities.app_invoke_entities import (
@@ -17,9 +20,7 @@ from core.app.entities.task_entities import (
 from core.errors.error import QuotaExceededError
 from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
 from core.moderation.output_moderation import ModerationRule, OutputModeration
-from extensions.ext_database import db
-from models.account import Account
-from models.model import EndUser, Message
+from models.model import Message
 
 logger = logging.getLogger(__name__)
 
@@ -36,7 +37,6 @@ class BasedGenerateTaskPipeline:
         self,
         application_generate_entity: AppGenerateEntity,
         queue_manager: AppQueueManager,
-        user: Union[Account, EndUser],
         stream: bool,
     ) -> None:
         """
@@ -48,18 +48,11 @@ class BasedGenerateTaskPipeline:
         """
         self._application_generate_entity = application_generate_entity
         self._queue_manager = queue_manager
-        self._user = user
         self._start_at = time.perf_counter()
         self._output_moderation_handler = self._init_output_moderation()
         self._stream = stream
 
-    def _handle_error(self, event: QueueErrorEvent, message: Optional[Message] = None):
-        """
-        Handle error event.
-        :param event: event
-        :param message: message
-        :return:
-        """
+    def _handle_error(self, *, event: QueueErrorEvent, session: Session | None = None, message_id: str = ""):
         logger.debug("error: %s", event.error)
         e = event.error
         err: Exception
@@ -71,16 +64,17 @@ class BasedGenerateTaskPipeline:
         else:
             err = Exception(e.description if getattr(e, "description", None) is not None else str(e))
 
-        if message:
-            refetch_message = db.session.query(Message).filter(Message.id == message.id).first()
+        if not message_id or not session:
+            return err
 
-            if refetch_message:
-                err_desc = self._error_to_desc(err)
-                refetch_message.status = "error"
-                refetch_message.error = err_desc
-
-                db.session.commit()
+        stmt = select(Message).where(Message.id == message_id)
+        message = session.scalar(stmt)
+        if not message:
+            return err
 
+        err_desc = self._error_to_desc(err)
+        message.status = "error"
+        message.error = err_desc
         return err
 
     def _error_to_desc(self, e: Exception) -> str:
diff --git a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
index b9f8e7ca56..c84f8ba3e4 100644
--- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
@@ -5,6 +5,9 @@ from collections.abc import Generator
 from threading import Thread
 from typing import Optional, Union, cast
 
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
 from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
 from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk
 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
@@ -55,8 +58,7 @@ from core.prompt.utils.prompt_message_util import PromptMessageUtil
 from core.prompt.utils.prompt_template_parser import PromptTemplateParser
 from events.message_event import message_was_created
 from extensions.ext_database import db
-from models.account import Account
-from models.model import AppMode, Conversation, EndUser, Message, MessageAgentThought
+from models.model import AppMode, Conversation, Message, MessageAgentThought
 
 logger = logging.getLogger(__name__)
 
@@ -77,23 +79,21 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
         queue_manager: AppQueueManager,
         conversation: Conversation,
         message: Message,
-        user: Union[Account, EndUser],
         stream: bool,
     ) -> None:
-        """
-        Initialize GenerateTaskPipeline.
-        :param application_generate_entity: application generate entity
-        :param queue_manager: queue manager
-        :param conversation: conversation
-        :param message: message
-        :param user: user
-        :param stream: stream
-        """
-        super().__init__(application_generate_entity, queue_manager, user, stream)
+        super().__init__(
+            application_generate_entity=application_generate_entity,
+            queue_manager=queue_manager,
+            stream=stream,
+        )
         self._model_config = application_generate_entity.model_conf
         self._app_config = application_generate_entity.app_config
-        self._conversation = conversation
-        self._message = message
+
+        self._conversation_id = conversation.id
+        self._conversation_mode = conversation.mode
+
+        self._message_id = message.id
+        self._message_created_at = int(message.created_at.timestamp())
 
         self._task_state = EasyUITaskState(
             llm_result=LLMResult(
@@ -113,18 +113,10 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
         CompletionAppBlockingResponse,
         Generator[Union[ChatbotAppStreamResponse, CompletionAppStreamResponse], None, None],
     ]:
-        """
-        Process generate task pipeline.
-        :return:
-        """
-        db.session.refresh(self._conversation)
-        db.session.refresh(self._message)
-        db.session.close()
-
         if self._application_generate_entity.app_config.app_mode != AppMode.COMPLETION:
             # start generate conversation name thread
             self._conversation_name_generate_thread = self._generate_conversation_name(
-                self._conversation, self._application_generate_entity.query or ""
+                conversation_id=self._conversation_id, query=self._application_generate_entity.query or ""
             )
 
         generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager)
@@ -148,15 +140,15 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
                 if self._task_state.metadata:
                     extras["metadata"] = self._task_state.metadata
                 response: Union[ChatbotAppBlockingResponse, CompletionAppBlockingResponse]
-                if self._conversation.mode == AppMode.COMPLETION.value:
+                if self._conversation_mode == AppMode.COMPLETION.value:
                     response = CompletionAppBlockingResponse(
                         task_id=self._application_generate_entity.task_id,
                         data=CompletionAppBlockingResponse.Data(
-                            id=self._message.id,
-                            mode=self._conversation.mode,
-                            message_id=self._message.id,
+                            id=self._message_id,
+                            mode=self._conversation_mode,
+                            message_id=self._message_id,
                             answer=cast(str, self._task_state.llm_result.message.content),
-                            created_at=int(self._message.created_at.timestamp()),
+                            created_at=self._message_created_at,
                             **extras,
                         ),
                     )
@@ -164,12 +156,12 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
                     response = ChatbotAppBlockingResponse(
                         task_id=self._application_generate_entity.task_id,
                         data=ChatbotAppBlockingResponse.Data(
-                            id=self._message.id,
-                            mode=self._conversation.mode,
-                            conversation_id=self._conversation.id,
-                            message_id=self._message.id,
+                            id=self._message_id,
+                            mode=self._conversation_mode,
+                            conversation_id=self._conversation_id,
+                            message_id=self._message_id,
                             answer=cast(str, self._task_state.llm_result.message.content),
-                            created_at=int(self._message.created_at.timestamp()),
+                            created_at=self._message_created_at,
                             **extras,
                         ),
                     )
@@ -190,15 +182,15 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
         for stream_response in generator:
             if isinstance(self._application_generate_entity, CompletionAppGenerateEntity):
                 yield CompletionAppStreamResponse(
-                    message_id=self._message.id,
-                    created_at=int(self._message.created_at.timestamp()),
+                    message_id=self._message_id,
+                    created_at=self._message_created_at,
                     stream_response=stream_response,
                 )
             else:
                 yield ChatbotAppStreamResponse(
-                    conversation_id=self._conversation.id,
-                    message_id=self._message.id,
-                    created_at=int(self._message.created_at.timestamp()),
+                    conversation_id=self._conversation_id,
+                    message_id=self._message_id,
+                    created_at=self._message_created_at,
                     stream_response=stream_response,
                 )
 
@@ -265,7 +257,9 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
             event = message.event
 
             if isinstance(event, QueueErrorEvent):
-                err = self._handle_error(event, self._message)
+                with Session(db.engine) as session:
+                    err = self._handle_error(event=event, session=session, message_id=self._message_id)
+                    session.commit()
                 yield self._error_to_stream_response(err)
                 break
             elif isinstance(event, QueueStopEvent | QueueMessageEndEvent):
@@ -283,10 +277,12 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
                     self._task_state.llm_result.message.content = output_moderation_answer
                     yield self._message_replace_to_stream_response(answer=output_moderation_answer)
 
-                # Save message
-                self._save_message(trace_manager)
-
-                yield self._message_end_to_stream_response()
+                with Session(db.engine) as session:
+                    # Save message
+                    self._save_message(session=session, trace_manager=trace_manager)
+                    session.commit()
+                message_end_resp = self._message_end_to_stream_response()
+                yield message_end_resp
             elif isinstance(event, QueueRetrieverResourcesEvent):
                 self._handle_retriever_resources(event)
             elif isinstance(event, QueueAnnotationReplyEvent):
@@ -320,9 +316,15 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
                 self._task_state.llm_result.message.content = current_content
 
                 if isinstance(event, QueueLLMChunkEvent):
-                    yield self._message_to_stream_response(cast(str, delta_text), self._message.id)
+                    yield self._message_to_stream_response(
+                        answer=cast(str, delta_text),
+                        message_id=self._message_id,
+                    )
                 else:
-                    yield self._agent_message_to_stream_response(cast(str, delta_text), self._message.id)
+                    yield self._agent_message_to_stream_response(
+                        answer=cast(str, delta_text),
+                        message_id=self._message_id,
+                    )
             elif isinstance(event, QueueMessageReplaceEvent):
                 yield self._message_replace_to_stream_response(answer=event.text)
             elif isinstance(event, QueuePingEvent):
@@ -334,7 +336,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
         if self._conversation_name_generate_thread:
             self._conversation_name_generate_thread.join()
 
-    def _save_message(self, trace_manager: Optional[TraceQueueManager] = None) -> None:
+    def _save_message(self, *, session: Session, trace_manager: Optional[TraceQueueManager] = None) -> None:
         """
         Save message.
         :return:
@@ -342,53 +344,46 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
         llm_result = self._task_state.llm_result
         usage = llm_result.usage
 
-        message = db.session.query(Message).filter(Message.id == self._message.id).first()
+        message_stmt = select(Message).where(Message.id == self._message_id)
+        message = session.scalar(message_stmt)
         if not message:
-            raise Exception(f"Message {self._message.id} not found")
-        self._message = message
-        conversation = db.session.query(Conversation).filter(Conversation.id == self._conversation.id).first()
+            raise ValueError(f"message {self._message_id} not found")
+        conversation_stmt = select(Conversation).where(Conversation.id == self._conversation_id)
+        conversation = session.scalar(conversation_stmt)
         if not conversation:
-            raise Exception(f"Conversation {self._conversation.id} not found")
-        self._conversation = conversation
+            raise ValueError(f"Conversation {self._conversation_id} not found")
 
-        self._message.message = PromptMessageUtil.prompt_messages_to_prompt_for_saving(
+        message.message = PromptMessageUtil.prompt_messages_to_prompt_for_saving(
             self._model_config.mode, self._task_state.llm_result.prompt_messages
         )
-        self._message.message_tokens = usage.prompt_tokens
-        self._message.message_unit_price = usage.prompt_unit_price
-        self._message.message_price_unit = usage.prompt_price_unit
-        self._message.answer = (
+        message.message_tokens = usage.prompt_tokens
+        message.message_unit_price = usage.prompt_unit_price
+        message.message_price_unit = usage.prompt_price_unit
+        message.answer = (
             PromptTemplateParser.remove_template_variables(cast(str, llm_result.message.content).strip())
             if llm_result.message.content
             else ""
         )
-        self._message.answer_tokens = usage.completion_tokens
-        self._message.answer_unit_price = usage.completion_unit_price
-        self._message.answer_price_unit = usage.completion_price_unit
-        self._message.provider_response_latency = time.perf_counter() - self._start_at
-        self._message.total_price = usage.total_price
-        self._message.currency = usage.currency
-        self._message.message_metadata = (
+        message.answer_tokens = usage.completion_tokens
+        message.answer_unit_price = usage.completion_unit_price
+        message.answer_price_unit = usage.completion_price_unit
+        message.provider_response_latency = time.perf_counter() - self._start_at
+        message.total_price = usage.total_price
+        message.currency = usage.currency
+        message.message_metadata = (
             json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None
         )
 
-        db.session.commit()
-
         if trace_manager:
             trace_manager.add_trace_task(
                 TraceTask(
-                    TraceTaskName.MESSAGE_TRACE, conversation_id=self._conversation.id, message_id=self._message.id
+                    TraceTaskName.MESSAGE_TRACE, conversation_id=self._conversation_id, message_id=self._message_id
                 )
             )
 
         message_was_created.send(
-            self._message,
+            message,
             application_generate_entity=self._application_generate_entity,
-            conversation=self._conversation,
-            is_first_message=self._application_generate_entity.app_config.app_mode in {AppMode.AGENT_CHAT, AppMode.CHAT}
-            and hasattr(self._application_generate_entity, "conversation_id")
-            and self._application_generate_entity.conversation_id is None,
-            extras=self._application_generate_entity.extras,
         )
 
     def _handle_stop(self, event: QueueStopEvent) -> None:
@@ -434,7 +429,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
 
         return MessageEndStreamResponse(
             task_id=self._application_generate_entity.task_id,
-            id=self._message.id,
+            id=self._message_id,
             metadata=extras.get("metadata", {}),
         )
 
diff --git a/api/core/app/task_pipeline/message_cycle_manage.py b/api/core/app/task_pipeline/message_cycle_manage.py
index 007543f6d0..15f2c25c66 100644
--- a/api/core/app/task_pipeline/message_cycle_manage.py
+++ b/api/core/app/task_pipeline/message_cycle_manage.py
@@ -36,7 +36,7 @@ class MessageCycleManage:
     ]
     _task_state: Union[EasyUITaskState, WorkflowTaskState]
 
-    def _generate_conversation_name(self, conversation: Conversation, query: str) -> Optional[Thread]:
+    def _generate_conversation_name(self, *, conversation_id: str, query: str) -> Optional[Thread]:
         """
         Generate conversation name.
         :param conversation: conversation
@@ -56,7 +56,7 @@ class MessageCycleManage:
                 target=self._generate_conversation_name_worker,
                 kwargs={
                     "flask_app": current_app._get_current_object(),  # type: ignore
-                    "conversation_id": conversation.id,
+                    "conversation_id": conversation_id,
                     "query": query,
                 },
             )
diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py
index 2d958b460d..1a2e67f7e7 100644
--- a/api/core/app/task_pipeline/workflow_cycle_manage.py
+++ b/api/core/app/task_pipeline/workflow_cycle_manage.py
@@ -5,6 +5,7 @@ from datetime import UTC, datetime
 from typing import Any, Optional, Union, cast
 from uuid import uuid4
 
+from sqlalchemy import func, select
 from sqlalchemy.orm import Session
 
 from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity
@@ -47,7 +48,6 @@ from core.workflow.enums import SystemVariableKey
 from core.workflow.nodes import NodeType
 from core.workflow.nodes.tool.entities import ToolNodeData
 from core.workflow.workflow_entry import WorkflowEntry
-from extensions.ext_database import db
 from models.account import Account
 from models.enums import CreatedByRole, WorkflowRunTriggeredFrom
 from models.model import EndUser
@@ -65,28 +65,33 @@ from .exc import WorkflowNodeExecutionNotFoundError, WorkflowRunNotFoundError
 
 class WorkflowCycleManage:
     _application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity]
-    _workflow: Workflow
-    _user: Union[Account, EndUser]
     _task_state: WorkflowTaskState
     _workflow_system_variables: dict[SystemVariableKey, Any]
-    _wip_workflow_node_executions: dict[str, WorkflowNodeExecution]
-    _wip_workflow_agent_logs: dict[str, list[AgentLogStreamResponse.Data]]
 
-    def _handle_workflow_run_start(self) -> WorkflowRun:
-        max_sequence = (
-            db.session.query(db.func.max(WorkflowRun.sequence_number))
-            .filter(WorkflowRun.tenant_id == self._workflow.tenant_id)
-            .filter(WorkflowRun.app_id == self._workflow.app_id)
-            .scalar()
-            or 0
+    def _handle_workflow_run_start(
+        self,
+        *,
+        session: Session,
+        workflow_id: str,
+        user_id: str,
+        created_by_role: CreatedByRole,
+    ) -> WorkflowRun:
+        workflow_stmt = select(Workflow).where(Workflow.id == workflow_id)
+        workflow = session.scalar(workflow_stmt)
+        if not workflow:
+            raise ValueError(f"Workflow not found: {workflow_id}")
+
+        max_sequence_stmt = select(func.max(WorkflowRun.sequence_number)).where(
+            WorkflowRun.tenant_id == workflow.tenant_id,
+            WorkflowRun.app_id == workflow.app_id,
         )
+        max_sequence = session.scalar(max_sequence_stmt) or 0
         new_sequence_number = max_sequence + 1
 
         inputs = {**self._application_generate_entity.inputs}
         for key, value in (self._workflow_system_variables or {}).items():
             if key.value == "conversation":
                 continue
-
             inputs[f"sys.{key.value}"] = value
 
         triggered_from = (
@@ -99,34 +104,33 @@ class WorkflowCycleManage:
         inputs = dict(WorkflowEntry.handle_special_values(inputs) or {})
 
         # init workflow run
-        with Session(db.engine, expire_on_commit=False) as session:
-            workflow_run = WorkflowRun()
-            system_id = self._workflow_system_variables[SystemVariableKey.WORKFLOW_RUN_ID]
-            workflow_run.id = system_id or str(uuid4())
-            workflow_run.tenant_id = self._workflow.tenant_id
-            workflow_run.app_id = self._workflow.app_id
-            workflow_run.sequence_number = new_sequence_number
-            workflow_run.workflow_id = self._workflow.id
-            workflow_run.type = self._workflow.type
-            workflow_run.triggered_from = triggered_from.value
-            workflow_run.version = self._workflow.version
-            workflow_run.graph = self._workflow.graph
-            workflow_run.inputs = json.dumps(inputs)
-            workflow_run.status = WorkflowRunStatus.RUNNING
-            workflow_run.created_by_role = (
-                CreatedByRole.ACCOUNT if isinstance(self._user, Account) else CreatedByRole.END_USER
-            )
-            workflow_run.created_by = self._user.id
-            workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None)
+        workflow_run_id = str(self._workflow_system_variables.get(SystemVariableKey.WORKFLOW_RUN_ID, uuid4()))
 
-            session.add(workflow_run)
-            session.commit()
+        workflow_run = WorkflowRun()
+        workflow_run.id = workflow_run_id
+        workflow_run.tenant_id = workflow.tenant_id
+        workflow_run.app_id = workflow.app_id
+        workflow_run.sequence_number = new_sequence_number
+        workflow_run.workflow_id = workflow.id
+        workflow_run.type = workflow.type
+        workflow_run.triggered_from = triggered_from.value
+        workflow_run.version = workflow.version
+        workflow_run.graph = workflow.graph
+        workflow_run.inputs = json.dumps(inputs)
+        workflow_run.status = WorkflowRunStatus.RUNNING
+        workflow_run.created_by_role = created_by_role
+        workflow_run.created_by = user_id
+        workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None)
+
+        session.add(workflow_run)
 
         return workflow_run
 
     def _handle_workflow_run_success(
         self,
-        workflow_run: WorkflowRun,
+        *,
+        session: Session,
+        workflow_run_id: str,
         start_at: float,
         total_tokens: int,
         total_steps: int,
@@ -144,7 +148,7 @@ class WorkflowCycleManage:
         :param conversation_id: conversation id
         :return:
         """
-        workflow_run = self._refetch_workflow_run(workflow_run.id)
+        workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id)
 
         outputs = WorkflowEntry.handle_special_values(outputs)
 
@@ -155,9 +159,6 @@ class WorkflowCycleManage:
         workflow_run.total_steps = total_steps
         workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
 
-        db.session.commit()
-        db.session.refresh(workflow_run)
-
         if trace_manager:
             trace_manager.add_trace_task(
                 TraceTask(
@@ -168,13 +169,13 @@ class WorkflowCycleManage:
                 )
             )
 
-        db.session.close()
-
         return workflow_run
 
     def _handle_workflow_run_partial_success(
         self,
-        workflow_run: WorkflowRun,
+        *,
+        session: Session,
+        workflow_run_id: str,
         start_at: float,
         total_tokens: int,
         total_steps: int,
@@ -183,18 +184,7 @@ class WorkflowCycleManage:
         conversation_id: Optional[str] = None,
         trace_manager: Optional[TraceQueueManager] = None,
     ) -> WorkflowRun:
-        """
-        Workflow run success
-        :param workflow_run: workflow run
-        :param start_at: start time
-        :param total_tokens: total tokens
-        :param total_steps: total steps
-        :param outputs: outputs
-        :param conversation_id: conversation id
-        :return:
-        """
-        workflow_run = self._refetch_workflow_run(workflow_run.id)
-
+        workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id)
         outputs = WorkflowEntry.handle_special_values(dict(outputs) if outputs else None)
 
         workflow_run.status = WorkflowRunStatus.PARTIAL_SUCCESSED.value
@@ -204,8 +194,6 @@ class WorkflowCycleManage:
         workflow_run.total_steps = total_steps
         workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
         workflow_run.exceptions_count = exceptions_count
-        db.session.commit()
-        db.session.refresh(workflow_run)
 
         if trace_manager:
             trace_manager.add_trace_task(
@@ -217,13 +205,13 @@ class WorkflowCycleManage:
                 )
             )
 
-        db.session.close()
-
         return workflow_run
 
     def _handle_workflow_run_failed(
         self,
-        workflow_run: WorkflowRun,
+        *,
+        session: Session,
+        workflow_run_id: str,
         start_at: float,
         total_tokens: int,
         total_steps: int,
@@ -243,7 +231,7 @@ class WorkflowCycleManage:
         :param error: error message
         :return:
         """
-        workflow_run = self._refetch_workflow_run(workflow_run.id)
+        workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id)
 
         workflow_run.status = status.value
         workflow_run.error = error
@@ -252,21 +240,18 @@ class WorkflowCycleManage:
         workflow_run.total_steps = total_steps
         workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
         workflow_run.exceptions_count = exceptions_count
-        db.session.commit()
 
-        running_workflow_node_executions = (
-            db.session.query(WorkflowNodeExecution)
-            .filter(
-                WorkflowNodeExecution.tenant_id == workflow_run.tenant_id,
-                WorkflowNodeExecution.app_id == workflow_run.app_id,
-                WorkflowNodeExecution.workflow_id == workflow_run.workflow_id,
-                WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value,
-                WorkflowNodeExecution.workflow_run_id == workflow_run.id,
-                WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING.value,
-            )
-            .all()
+        stmt = select(WorkflowNodeExecution).where(
+            WorkflowNodeExecution.tenant_id == workflow_run.tenant_id,
+            WorkflowNodeExecution.app_id == workflow_run.app_id,
+            WorkflowNodeExecution.workflow_id == workflow_run.workflow_id,
+            WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value,
+            WorkflowNodeExecution.workflow_run_id == workflow_run.id,
+            WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING.value,
         )
 
+        running_workflow_node_executions = session.scalars(stmt).all()
+
         for workflow_node_execution in running_workflow_node_executions:
             workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value
             workflow_node_execution.error = error
@@ -274,13 +259,6 @@ class WorkflowCycleManage:
             workflow_node_execution.elapsed_time = (
                 workflow_node_execution.finished_at - workflow_node_execution.created_at
             ).total_seconds()
-            db.session.commit()
-
-        db.session.close()
-
-        # with Session(db.engine, expire_on_commit=False) as session:
-        #     session.add(workflow_run)
-        #     session.refresh(workflow_run)
 
         if trace_manager:
             trace_manager.add_trace_task(
@@ -295,79 +273,49 @@ class WorkflowCycleManage:
         return workflow_run
 
     def _handle_node_execution_start(
-        self, workflow_run: WorkflowRun, event: QueueNodeStartedEvent
+        self, *, session: Session, workflow_run: WorkflowRun, event: QueueNodeStartedEvent
     ) -> WorkflowNodeExecution:
-        # init workflow node execution
+        workflow_node_execution = WorkflowNodeExecution()
+        workflow_node_execution.id = event.node_execution_id
+        workflow_node_execution.tenant_id = workflow_run.tenant_id
+        workflow_node_execution.app_id = workflow_run.app_id
+        workflow_node_execution.workflow_id = workflow_run.workflow_id
+        workflow_node_execution.triggered_from = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value
+        workflow_node_execution.workflow_run_id = workflow_run.id
+        workflow_node_execution.predecessor_node_id = event.predecessor_node_id
+        workflow_node_execution.index = event.node_run_index
+        workflow_node_execution.node_execution_id = event.node_execution_id
+        workflow_node_execution.node_id = event.node_id
+        workflow_node_execution.node_type = event.node_type.value
+        workflow_node_execution.title = event.node_data.title
+        workflow_node_execution.status = WorkflowNodeExecutionStatus.RUNNING.value
+        workflow_node_execution.created_by_role = workflow_run.created_by_role
+        workflow_node_execution.created_by = workflow_run.created_by
+        workflow_node_execution.execution_metadata = json.dumps(
+            {
+                NodeRunMetadataKey.PARALLEL_MODE_RUN_ID: event.parallel_mode_run_id,
+                NodeRunMetadataKey.ITERATION_ID: event.in_iteration_id,
+            }
+        )
+        workflow_node_execution.created_at = datetime.now(UTC).replace(tzinfo=None)
 
-        with Session(db.engine, expire_on_commit=False) as session:
-            workflow_node_execution = WorkflowNodeExecution()
-            workflow_node_execution.tenant_id = workflow_run.tenant_id
-            workflow_node_execution.app_id = workflow_run.app_id
-            workflow_node_execution.workflow_id = workflow_run.workflow_id
-            workflow_node_execution.triggered_from = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value
-            workflow_node_execution.workflow_run_id = workflow_run.id
-            workflow_node_execution.predecessor_node_id = event.predecessor_node_id
-            workflow_node_execution.index = event.node_run_index
-            workflow_node_execution.node_execution_id = event.node_execution_id
-            workflow_node_execution.node_id = event.node_id
-            workflow_node_execution.node_type = event.node_type.value
-            workflow_node_execution.title = event.node_data.title
-            workflow_node_execution.status = WorkflowNodeExecutionStatus.RUNNING.value
-            workflow_node_execution.created_by_role = workflow_run.created_by_role
-            workflow_node_execution.created_by = workflow_run.created_by
-            workflow_node_execution.execution_metadata = json.dumps(
-                {
-                    NodeRunMetadataKey.PARALLEL_MODE_RUN_ID: event.parallel_mode_run_id,
-                    NodeRunMetadataKey.ITERATION_ID: event.in_iteration_id,
-                }
-            )
-            workflow_node_execution.created_at = datetime.now(UTC).replace(tzinfo=None)
-
-            session.add(workflow_node_execution)
-            session.commit()
-            session.refresh(workflow_node_execution)
-
-        self._wip_workflow_node_executions[workflow_node_execution.node_execution_id] = workflow_node_execution
+        session.add(workflow_node_execution)
         return workflow_node_execution
 
-    def _handle_workflow_node_execution_success(self, event: QueueNodeSucceededEvent) -> WorkflowNodeExecution:
-        """
-        Workflow node execution success
-        :param event: queue node succeeded event
-        :return:
-        """
-        workflow_node_execution = self._refetch_workflow_node_execution(event.node_execution_id)
-
+    def _handle_workflow_node_execution_success(
+        self, *, session: Session, event: QueueNodeSucceededEvent
+    ) -> WorkflowNodeExecution:
+        workflow_node_execution = self._get_workflow_node_execution(
+            session=session, node_execution_id=event.node_execution_id
+        )
         inputs = WorkflowEntry.handle_special_values(event.inputs)
         process_data = WorkflowEntry.handle_special_values(event.process_data)
         outputs = WorkflowEntry.handle_special_values(event.outputs)
         execution_metadata_dict = dict(event.execution_metadata or {})
-        if self._wip_workflow_agent_logs.get(workflow_node_execution.id):
-            if not execution_metadata_dict:
-                execution_metadata_dict = {}
-
-            execution_metadata_dict[NodeRunMetadataKey.AGENT_LOG] = self._wip_workflow_agent_logs.get(
-                workflow_node_execution.id, []
-            )
-
         execution_metadata = json.dumps(jsonable_encoder(execution_metadata_dict)) if execution_metadata_dict else None
         finished_at = datetime.now(UTC).replace(tzinfo=None)
         elapsed_time = (finished_at - event.start_at).total_seconds()
 
-        db.session.query(WorkflowNodeExecution).filter(WorkflowNodeExecution.id == workflow_node_execution.id).update(
-            {
-                WorkflowNodeExecution.status: WorkflowNodeExecutionStatus.SUCCEEDED.value,
-                WorkflowNodeExecution.inputs: json.dumps(inputs) if inputs else None,
-                WorkflowNodeExecution.process_data: json.dumps(process_data) if process_data else None,
-                WorkflowNodeExecution.outputs: json.dumps(outputs) if outputs else None,
-                WorkflowNodeExecution.execution_metadata: execution_metadata,
-                WorkflowNodeExecution.finished_at: finished_at,
-                WorkflowNodeExecution.elapsed_time: elapsed_time,
-            }
-        )
-
-        db.session.commit()
-        db.session.close()
         process_data = WorkflowEntry.handle_special_values(event.process_data)
 
         workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value
@@ -378,54 +326,31 @@ class WorkflowCycleManage:
         workflow_node_execution.finished_at = finished_at
         workflow_node_execution.elapsed_time = elapsed_time
 
-        self._wip_workflow_node_executions.pop(workflow_node_execution.node_execution_id)
-
         return workflow_node_execution
 
     def _handle_workflow_node_execution_failed(
-        self, event: QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent
+        self,
+        *,
+        session: Session,
+        event: QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent,
     ) -> WorkflowNodeExecution:
         """
         Workflow node execution failed
         :param event: queue node failed event
         :return:
         """
-        workflow_node_execution = self._refetch_workflow_node_execution(event.node_execution_id)
+        workflow_node_execution = self._get_workflow_node_execution(
+            session=session, node_execution_id=event.node_execution_id
+        )
 
         inputs = WorkflowEntry.handle_special_values(event.inputs)
         process_data = WorkflowEntry.handle_special_values(event.process_data)
         outputs = WorkflowEntry.handle_special_values(event.outputs)
         finished_at = datetime.now(UTC).replace(tzinfo=None)
         elapsed_time = (finished_at - event.start_at).total_seconds()
-        execution_metadata_dict = dict(event.execution_metadata or {})
-        if self._wip_workflow_agent_logs.get(workflow_node_execution.id):
-            if not execution_metadata_dict:
-                execution_metadata_dict = {}
-
-            execution_metadata_dict[NodeRunMetadataKey.AGENT_LOG] = self._wip_workflow_agent_logs.get(
-                workflow_node_execution.id, []
-            )
-
-        execution_metadata = json.dumps(jsonable_encoder(execution_metadata_dict)) if execution_metadata_dict else None
-        db.session.query(WorkflowNodeExecution).filter(WorkflowNodeExecution.id == workflow_node_execution.id).update(
-            {
-                WorkflowNodeExecution.status: (
-                    WorkflowNodeExecutionStatus.FAILED.value
-                    if not isinstance(event, QueueNodeExceptionEvent)
-                    else WorkflowNodeExecutionStatus.EXCEPTION.value
-                ),
-                WorkflowNodeExecution.error: event.error,
-                WorkflowNodeExecution.inputs: json.dumps(inputs) if inputs else None,
-                WorkflowNodeExecution.process_data: json.dumps(process_data) if process_data else None,
-                WorkflowNodeExecution.outputs: json.dumps(outputs) if outputs else None,
-                WorkflowNodeExecution.finished_at: finished_at,
-                WorkflowNodeExecution.elapsed_time: elapsed_time,
-                WorkflowNodeExecution.execution_metadata: execution_metadata,
-            }
+        execution_metadata = (
+            json.dumps(jsonable_encoder(event.execution_metadata)) if event.execution_metadata else None
         )
-
-        db.session.commit()
-        db.session.close()
         process_data = WorkflowEntry.handle_special_values(event.process_data)
         workflow_node_execution.status = (
             WorkflowNodeExecutionStatus.FAILED.value
@@ -440,12 +365,10 @@ class WorkflowCycleManage:
         workflow_node_execution.elapsed_time = elapsed_time
         workflow_node_execution.execution_metadata = execution_metadata
 
-        self._wip_workflow_node_executions.pop(workflow_node_execution.node_execution_id)
-
         return workflow_node_execution
 
     def _handle_workflow_node_execution_retried(
-        self, workflow_run: WorkflowRun, event: QueueNodeRetryEvent
+        self, *, session: Session, workflow_run: WorkflowRun, event: QueueNodeRetryEvent
     ) -> WorkflowNodeExecution:
         """
         Workflow node execution failed
@@ -469,6 +392,7 @@ class WorkflowCycleManage:
         execution_metadata = json.dumps(merged_metadata)
 
         workflow_node_execution = WorkflowNodeExecution()
+        workflow_node_execution.id = event.node_execution_id
         workflow_node_execution.tenant_id = workflow_run.tenant_id
         workflow_node_execution.app_id = workflow_run.app_id
         workflow_node_execution.workflow_id = workflow_run.workflow_id
@@ -491,10 +415,7 @@ class WorkflowCycleManage:
         workflow_node_execution.execution_metadata = execution_metadata
         workflow_node_execution.index = event.node_run_index
 
-        db.session.add(workflow_node_execution)
-        db.session.commit()
-        db.session.refresh(workflow_node_execution)
-
+        session.add(workflow_node_execution)
         return workflow_node_execution
 
     #################################################
@@ -502,14 +423,14 @@ class WorkflowCycleManage:
     #################################################
 
     def _workflow_start_to_stream_response(
-        self, task_id: str, workflow_run: WorkflowRun
+        self,
+        *,
+        session: Session,
+        task_id: str,
+        workflow_run: WorkflowRun,
     ) -> WorkflowStartStreamResponse:
-        """
-        Workflow start to stream response.
-        :param task_id: task id
-        :param workflow_run: workflow run
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         return WorkflowStartStreamResponse(
             task_id=task_id,
             workflow_run_id=workflow_run.id,
@@ -523,36 +444,32 @@ class WorkflowCycleManage:
         )
 
     def _workflow_finish_to_stream_response(
-        self, task_id: str, workflow_run: WorkflowRun
+        self,
+        *,
+        session: Session,
+        task_id: str,
+        workflow_run: WorkflowRun,
     ) -> WorkflowFinishStreamResponse:
-        """
-        Workflow finish to stream response.
-        :param task_id: task id
-        :param workflow_run: workflow run
-        :return:
-        """
-        # Attach WorkflowRun to an active session so "created_by_role" can be accessed.
-        workflow_run = db.session.merge(workflow_run)
-
-        # Refresh to ensure any expired attributes are fully loaded
-        db.session.refresh(workflow_run)
-
         created_by = None
-        if workflow_run.created_by_role == CreatedByRole.ACCOUNT.value:
-            created_by_account = workflow_run.created_by_account
-            if created_by_account:
+        if workflow_run.created_by_role == CreatedByRole.ACCOUNT:
+            stmt = select(Account).where(Account.id == workflow_run.created_by)
+            account = session.scalar(stmt)
+            if account:
                 created_by = {
-                    "id": created_by_account.id,
-                    "name": created_by_account.name,
-                    "email": created_by_account.email,
+                    "id": account.id,
+                    "name": account.name,
+                    "email": account.email,
+                }
+        elif workflow_run.created_by_role == CreatedByRole.END_USER:
+            stmt = select(EndUser).where(EndUser.id == workflow_run.created_by)
+            end_user = session.scalar(stmt)
+            if end_user:
+                created_by = {
+                    "id": end_user.id,
+                    "user": end_user.session_id,
                 }
         else:
-            created_by_end_user = workflow_run.created_by_end_user
-            if created_by_end_user:
-                created_by = {
-                    "id": created_by_end_user.id,
-                    "user": created_by_end_user.session_id,
-                }
+            raise NotImplementedError(f"unknown created_by_role: {workflow_run.created_by_role}")
 
         return WorkflowFinishStreamResponse(
             task_id=task_id,
@@ -576,17 +493,20 @@ class WorkflowCycleManage:
         )
 
     def _workflow_node_start_to_stream_response(
-        self, event: QueueNodeStartedEvent, task_id: str, workflow_node_execution: WorkflowNodeExecution
+        self,
+        *,
+        session: Session,
+        event: QueueNodeStartedEvent,
+        task_id: str,
+        workflow_node_execution: WorkflowNodeExecution,
     ) -> Optional[NodeStartStreamResponse]:
-        """
-        Workflow node start to stream response.
-        :param event: queue node started event
-        :param task_id: task id
-        :param workflow_node_execution: workflow node execution
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
+
         if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}:
             return None
+        if not workflow_node_execution.workflow_run_id:
+            return None
 
         response = NodeStartStreamResponse(
             task_id=task_id,
@@ -622,6 +542,8 @@ class WorkflowCycleManage:
 
     def _workflow_node_finish_to_stream_response(
         self,
+        *,
+        session: Session,
         event: QueueNodeSucceededEvent
         | QueueNodeFailedEvent
         | QueueNodeInIterationFailedEvent
@@ -629,15 +551,14 @@ class WorkflowCycleManage:
         task_id: str,
         workflow_node_execution: WorkflowNodeExecution,
     ) -> Optional[NodeFinishStreamResponse]:
-        """
-        Workflow node finish to stream response.
-        :param event: queue node succeeded or failed event
-        :param task_id: task id
-        :param workflow_node_execution: workflow node execution
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}:
             return None
+        if not workflow_node_execution.workflow_run_id:
+            return None
+        if not workflow_node_execution.finished_at:
+            return None
 
         return NodeFinishStreamResponse(
             task_id=task_id,
@@ -669,19 +590,20 @@ class WorkflowCycleManage:
 
     def _workflow_node_retry_to_stream_response(
         self,
+        *,
+        session: Session,
         event: QueueNodeRetryEvent,
         task_id: str,
         workflow_node_execution: WorkflowNodeExecution,
     ) -> Optional[Union[NodeRetryStreamResponse, NodeFinishStreamResponse]]:
-        """
-        Workflow node finish to stream response.
-        :param event: queue node succeeded or failed event
-        :param task_id: task id
-        :param workflow_node_execution: workflow node execution
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}:
             return None
+        if not workflow_node_execution.workflow_run_id:
+            return None
+        if not workflow_node_execution.finished_at:
+            return None
 
         return NodeRetryStreamResponse(
             task_id=task_id,
@@ -713,15 +635,10 @@ class WorkflowCycleManage:
         )
 
     def _workflow_parallel_branch_start_to_stream_response(
-        self, task_id: str, workflow_run: WorkflowRun, event: QueueParallelBranchRunStartedEvent
+        self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueParallelBranchRunStartedEvent
     ) -> ParallelBranchStartStreamResponse:
-        """
-        Workflow parallel branch start to stream response
-        :param task_id: task id
-        :param workflow_run: workflow run
-        :param event: parallel branch run started event
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         return ParallelBranchStartStreamResponse(
             task_id=task_id,
             workflow_run_id=workflow_run.id,
@@ -737,17 +654,14 @@ class WorkflowCycleManage:
 
     def _workflow_parallel_branch_finished_to_stream_response(
         self,
+        *,
+        session: Session,
         task_id: str,
         workflow_run: WorkflowRun,
         event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent,
     ) -> ParallelBranchFinishedStreamResponse:
-        """
-        Workflow parallel branch finished to stream response
-        :param task_id: task id
-        :param workflow_run: workflow run
-        :param event: parallel branch run succeeded or failed event
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         return ParallelBranchFinishedStreamResponse(
             task_id=task_id,
             workflow_run_id=workflow_run.id,
@@ -764,15 +678,10 @@ class WorkflowCycleManage:
         )
 
     def _workflow_iteration_start_to_stream_response(
-        self, task_id: str, workflow_run: WorkflowRun, event: QueueIterationStartEvent
+        self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationStartEvent
     ) -> IterationNodeStartStreamResponse:
-        """
-        Workflow iteration start to stream response
-        :param task_id: task id
-        :param workflow_run: workflow run
-        :param event: iteration start event
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         return IterationNodeStartStreamResponse(
             task_id=task_id,
             workflow_run_id=workflow_run.id,
@@ -791,15 +700,10 @@ class WorkflowCycleManage:
         )
 
     def _workflow_iteration_next_to_stream_response(
-        self, task_id: str, workflow_run: WorkflowRun, event: QueueIterationNextEvent
+        self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationNextEvent
     ) -> IterationNodeNextStreamResponse:
-        """
-        Workflow iteration next to stream response
-        :param task_id: task id
-        :param workflow_run: workflow run
-        :param event: iteration next event
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         return IterationNodeNextStreamResponse(
             task_id=task_id,
             workflow_run_id=workflow_run.id,
@@ -820,15 +724,10 @@ class WorkflowCycleManage:
         )
 
     def _workflow_iteration_completed_to_stream_response(
-        self, task_id: str, workflow_run: WorkflowRun, event: QueueIterationCompletedEvent
+        self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationCompletedEvent
     ) -> IterationNodeCompletedStreamResponse:
-        """
-        Workflow iteration completed to stream response
-        :param task_id: task id
-        :param workflow_run: workflow run
-        :param event: iteration completed event
-        :return:
-        """
+        # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this
+        _ = session
         return IterationNodeCompletedStreamResponse(
             task_id=task_id,
             workflow_run_id=workflow_run.id,
@@ -912,27 +811,22 @@ class WorkflowCycleManage:
 
         return None
 
-    def _refetch_workflow_run(self, workflow_run_id: str) -> WorkflowRun:
+    def _get_workflow_run(self, *, session: Session, workflow_run_id: str) -> WorkflowRun:
         """
         Refetch workflow run
         :param workflow_run_id: workflow run id
         :return:
         """
-        workflow_run = db.session.query(WorkflowRun).filter(WorkflowRun.id == workflow_run_id).first()
-
+        stmt = select(WorkflowRun).where(WorkflowRun.id == workflow_run_id)
+        workflow_run = session.scalar(stmt)
         if not workflow_run:
             raise WorkflowRunNotFoundError(workflow_run_id)
 
         return workflow_run
 
-    def _refetch_workflow_node_execution(self, node_execution_id: str) -> WorkflowNodeExecution:
-        """
-        Refetch workflow node execution
-        :param node_execution_id: workflow node execution id
-        :return:
-        """
-        workflow_node_execution = self._wip_workflow_node_executions.get(node_execution_id)
-
+    def _get_workflow_node_execution(self, session: Session, node_execution_id: str) -> WorkflowNodeExecution:
+        stmt = select(WorkflowNodeExecution).where(WorkflowNodeExecution.id == node_execution_id)
+        workflow_node_execution = session.scalar(stmt)
         if not workflow_node_execution:
             raise WorkflowNodeExecutionNotFoundError(node_execution_id)
 
@@ -945,41 +839,10 @@ class WorkflowCycleManage:
         :param event: agent log event
         :return:
         """
-        node_execution = self._wip_workflow_node_executions.get(event.node_execution_id)
-        if not node_execution:
-            raise Exception(f"Workflow node execution not found: {event.node_execution_id}")
-
-        node_execution_id = node_execution.id
-        original_agent_logs = self._wip_workflow_agent_logs.get(node_execution_id, [])
-
-        # try to find the log with the same id
-        for log in original_agent_logs:
-            if log.id == event.id:
-                # update the log
-                log.status = event.status
-                log.error = event.error
-                log.data = event.data
-                break
-        else:
-            # append the log
-            original_agent_logs.append(
-                AgentLogStreamResponse.Data(
-                    id=event.id,
-                    parent_id=event.parent_id,
-                    node_execution_id=node_execution_id,
-                    error=event.error,
-                    status=event.status,
-                    data=event.data,
-                    label=event.label,
-                )
-            )
-
-        self._wip_workflow_agent_logs[node_execution_id] = original_agent_logs
-
         return AgentLogStreamResponse(
             task_id=task_id,
             data=AgentLogStreamResponse.Data(
-                node_execution_id=node_execution_id,
+                node_execution_id=event.node_execution_id,
                 id=event.id,
                 parent_id=event.parent_id,
                 label=event.label,
diff --git a/api/core/entities/knowledge_entities.py b/api/core/entities/knowledge_entities.py
new file mode 100644
index 0000000000..90c9879733
--- /dev/null
+++ b/api/core/entities/knowledge_entities.py
@@ -0,0 +1,19 @@
+from typing import Optional
+
+from pydantic import BaseModel
+
+
+class PreviewDetail(BaseModel):
+    content: str
+    child_chunks: Optional[list[str]] = None
+
+
+class QAPreviewDetail(BaseModel):
+    question: str
+    answer: str
+
+
+class IndexingEstimate(BaseModel):
+    total_segments: int
+    preview: list[PreviewDetail]
+    qa_preview: Optional[list[QAPreviewDetail]] = None
diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py
index eed2d7e49a..0261a6309e 100644
--- a/api/core/entities/provider_configuration.py
+++ b/api/core/entities/provider_configuration.py
@@ -881,7 +881,7 @@ class ProviderConfiguration(BaseModel):
             # if llm name not in restricted llm list, remove it
             restrict_model_names = [rm.model for rm in restrict_models]
             for model in provider_models:
-                if model.model_type == ModelType.LLM and m.model not in restrict_model_names:
+                if model.model_type == ModelType.LLM and model.model not in restrict_model_names:
                     model.status = ModelStatus.NO_PERMISSION
                 elif not quota_configuration.is_valid:
                     model.status = ModelStatus.QUOTA_EXCEEDED
diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py
index 685dbc8ed4..da79b1bf03 100644
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -8,34 +8,34 @@ import time
 import uuid
 from typing import Any, Optional, cast
 
-from flask import Flask, current_app
+from flask import current_app
 from flask_login import current_user  # type: ignore
 from sqlalchemy.orm.exc import ObjectDeletedError
 
 from configs import dify_config
+from core.entities.knowledge_entities import IndexingEstimate, PreviewDetail, QAPreviewDetail
 from core.errors.error import ProviderTokenNotInitError
-from core.llm_generator.llm_generator import LLMGenerator
 from core.model_manager import ModelInstance, ModelManager
 from core.model_runtime.entities.model_entities import ModelType
 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.extract_setting import ExtractSetting
+from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
-from core.rag.models.document import Document
+from core.rag.models.document import ChildDocument, Document
 from core.rag.splitter.fixed_text_splitter import (
     EnhanceRecursiveCharacterTextSplitter,
     FixedRecursiveCharacterTextSplitter,
 )
 from core.rag.splitter.text_splitter import TextSplitter
 from core.tools.utils.rag_web_reader import get_image_upload_file_ids
-from core.tools.utils.text_processing_utils import remove_leading_symbols
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from extensions.ext_storage import storage
 from libs import helper
-from models.dataset import Dataset, DatasetProcessRule, DocumentSegment
+from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
 from models.dataset import Document as DatasetDocument
 from models.model import UploadFile
 from services.feature_service import FeatureService
@@ -115,6 +115,9 @@ class IndexingRunner:
 
             for document_segment in document_segments:
                 db.session.delete(document_segment)
+                if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                    # delete child chunks
+                    db.session.query(ChildChunk).filter(ChildChunk.segment_id == document_segment.id).delete()
             db.session.commit()
             # get the process rule
             processing_rule = (
@@ -183,7 +186,22 @@ class IndexingRunner:
                                 "dataset_id": document_segment.dataset_id,
                             },
                         )
-
+                        if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                            child_chunks = document_segment.child_chunks
+                            if child_chunks:
+                                child_documents = []
+                                for child_chunk in child_chunks:
+                                    child_document = ChildDocument(
+                                        page_content=child_chunk.content,
+                                        metadata={
+                                            "doc_id": child_chunk.index_node_id,
+                                            "doc_hash": child_chunk.index_node_hash,
+                                            "document_id": document_segment.document_id,
+                                            "dataset_id": document_segment.dataset_id,
+                                        },
+                                    )
+                                    child_documents.append(child_document)
+                                document.children = child_documents
                         documents.append(document)
 
             # build index
@@ -222,7 +240,7 @@ class IndexingRunner:
         doc_language: str = "English",
         dataset_id: Optional[str] = None,
         indexing_technique: str = "economy",
-    ) -> dict:
+    ) -> IndexingEstimate:
         """
         Estimate the indexing for the document.
         """
@@ -258,31 +276,38 @@ class IndexingRunner:
                     tenant_id=tenant_id,
                     model_type=ModelType.TEXT_EMBEDDING,
                 )
-        preview_texts: list[str] = []
+        preview_texts = []  # type: ignore
+
         total_segments = 0
         index_type = doc_form
         index_processor = IndexProcessorFactory(index_type).init_index_processor()
-        all_text_docs = []
         for extract_setting in extract_settings:
             # extract
-            text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
-            all_text_docs.extend(text_docs)
             processing_rule = DatasetProcessRule(
                 mode=tmp_processing_rule["mode"], rules=json.dumps(tmp_processing_rule["rules"])
             )
-
-            # get splitter
-            splitter = self._get_splitter(processing_rule, embedding_model_instance)
-
-            # split to documents
-            documents = self._split_to_documents_for_estimate(
-                text_docs=text_docs, splitter=splitter, processing_rule=processing_rule
+            text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
+            documents = index_processor.transform(
+                text_docs,
+                embedding_model_instance=embedding_model_instance,
+                process_rule=processing_rule.to_dict(),
+                tenant_id=current_user.current_tenant_id,
+                doc_language=doc_language,
+                preview=True,
             )
-
             total_segments += len(documents)
             for document in documents:
-                if len(preview_texts) < 5:
-                    preview_texts.append(document.page_content)
+                if len(preview_texts) < 10:
+                    if doc_form and doc_form == "qa_model":
+                        preview_detail = QAPreviewDetail(
+                            question=document.page_content, answer=document.metadata.get("answer") or ""
+                        )
+                        preview_texts.append(preview_detail)
+                    else:
+                        preview_detail = PreviewDetail(content=document.page_content)  # type: ignore
+                        if document.children:
+                            preview_detail.child_chunks = [child.page_content for child in document.children]  # type: ignore
+                        preview_texts.append(preview_detail)
 
                 # delete image files and related db records
                 image_upload_file_ids = get_image_upload_file_ids(document.page_content)
@@ -299,15 +324,8 @@ class IndexingRunner:
                     db.session.delete(image_file)
 
         if doc_form and doc_form == "qa_model":
-            if len(preview_texts) > 0:
-                # qa model document
-                response = LLMGenerator.generate_qa_document(
-                    current_user.current_tenant_id, preview_texts[0], doc_language
-                )
-                document_qa_list = self.format_split_text(response)
-
-                return {"total_segments": total_segments * 20, "qa_preview": document_qa_list, "preview": preview_texts}
-        return {"total_segments": total_segments, "preview": preview_texts}
+            return IndexingEstimate(total_segments=total_segments * 20, qa_preview=preview_texts, preview=[])
+        return IndexingEstimate(total_segments=total_segments, preview=preview_texts)  # type: ignore
 
     def _extract(
         self, index_processor: BaseIndexProcessor, dataset_document: DatasetDocument, process_rule: dict
@@ -401,31 +419,26 @@ class IndexingRunner:
 
     @staticmethod
     def _get_splitter(
-        processing_rule: DatasetProcessRule, embedding_model_instance: Optional[ModelInstance]
+        processing_rule_mode: str,
+        max_tokens: int,
+        chunk_overlap: int,
+        separator: str,
+        embedding_model_instance: Optional[ModelInstance],
     ) -> TextSplitter:
         """
         Get the NodeParser object according to the processing rule.
         """
-        character_splitter: TextSplitter
-        if processing_rule.mode == "custom":
+        if processing_rule_mode in ["custom", "hierarchical"]:
             # The user-defined segmentation rule
-            rules = json.loads(processing_rule.rules)
-            segmentation = rules["segmentation"]
             max_segmentation_tokens_length = dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH
-            if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > max_segmentation_tokens_length:
+            if max_tokens < 50 or max_tokens > max_segmentation_tokens_length:
                 raise ValueError(f"Custom segment length should be between 50 and {max_segmentation_tokens_length}.")
 
-            separator = segmentation["separator"]
             if separator:
                 separator = separator.replace("\\n", "\n")
 
-            if segmentation.get("chunk_overlap"):
-                chunk_overlap = segmentation["chunk_overlap"]
-            else:
-                chunk_overlap = 0
-
             character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder(
-                chunk_size=segmentation["max_tokens"],
+                chunk_size=max_tokens,
                 chunk_overlap=chunk_overlap,
                 fixed_separator=separator,
                 separators=["\n\n", "。", ". ", " ", ""],
@@ -441,143 +454,7 @@ class IndexingRunner:
                 embedding_model_instance=embedding_model_instance,
             )
 
-        return character_splitter
-
-    def _step_split(
-        self,
-        text_docs: list[Document],
-        splitter: TextSplitter,
-        dataset: Dataset,
-        dataset_document: DatasetDocument,
-        processing_rule: DatasetProcessRule,
-    ) -> list[Document]:
-        """
-        Split the text documents into documents and save them to the document segment.
-        """
-        documents = self._split_to_documents(
-            text_docs=text_docs,
-            splitter=splitter,
-            processing_rule=processing_rule,
-            tenant_id=dataset.tenant_id,
-            document_form=dataset_document.doc_form,
-            document_language=dataset_document.doc_language,
-        )
-
-        # save node to document segment
-        doc_store = DatasetDocumentStore(
-            dataset=dataset, user_id=dataset_document.created_by, document_id=dataset_document.id
-        )
-
-        # add document segments
-        doc_store.add_documents(documents)
-
-        # update document status to indexing
-        cur_time = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
-        self._update_document_index_status(
-            document_id=dataset_document.id,
-            after_indexing_status="indexing",
-            extra_update_params={
-                DatasetDocument.cleaning_completed_at: cur_time,
-                DatasetDocument.splitting_completed_at: cur_time,
-            },
-        )
-
-        # update segment status to indexing
-        self._update_segments_by_document(
-            dataset_document_id=dataset_document.id,
-            update_params={
-                DocumentSegment.status: "indexing",
-                DocumentSegment.indexing_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None),
-            },
-        )
-
-        return documents
-
-    def _split_to_documents(
-        self,
-        text_docs: list[Document],
-        splitter: TextSplitter,
-        processing_rule: DatasetProcessRule,
-        tenant_id: str,
-        document_form: str,
-        document_language: str,
-    ) -> list[Document]:
-        """
-        Split the text documents into nodes.
-        """
-        all_documents: list[Document] = []
-        all_qa_documents: list[Document] = []
-        for text_doc in text_docs:
-            # document clean
-            document_text = self._document_clean(text_doc.page_content, processing_rule)
-            text_doc.page_content = document_text
-
-            # parse document to nodes
-            documents = splitter.split_documents([text_doc])
-            split_documents = []
-            for document_node in documents:
-                if document_node.page_content.strip():
-                    if document_node.metadata is not None:
-                        doc_id = str(uuid.uuid4())
-                        hash = helper.generate_text_hash(document_node.page_content)
-                        document_node.metadata["doc_id"] = doc_id
-                        document_node.metadata["doc_hash"] = hash
-                    # delete Splitter character
-                    page_content = document_node.page_content
-                    document_node.page_content = remove_leading_symbols(page_content)
-
-                    if document_node.page_content:
-                        split_documents.append(document_node)
-            all_documents.extend(split_documents)
-        # processing qa document
-        if document_form == "qa_model":
-            for i in range(0, len(all_documents), 10):
-                threads = []
-                sub_documents = all_documents[i : i + 10]
-                for doc in sub_documents:
-                    document_format_thread = threading.Thread(
-                        target=self.format_qa_document,
-                        kwargs={
-                            "flask_app": current_app._get_current_object(),  # type: ignore
-                            "tenant_id": tenant_id,
-                            "document_node": doc,
-                            "all_qa_documents": all_qa_documents,
-                            "document_language": document_language,
-                        },
-                    )
-                    threads.append(document_format_thread)
-                    document_format_thread.start()
-                for thread in threads:
-                    thread.join()
-            return all_qa_documents
-        return all_documents
-
-    def format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents, document_language):
-        format_documents = []
-        if document_node.page_content is None or not document_node.page_content.strip():
-            return
-        with flask_app.app_context():
-            try:
-                # qa model document
-                response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content, document_language)
-                document_qa_list = self.format_split_text(response)
-                qa_documents = []
-                for result in document_qa_list:
-                    qa_document = Document(
-                        page_content=result["question"], metadata=document_node.metadata.model_copy()
-                    )
-                    if qa_document.metadata is not None:
-                        doc_id = str(uuid.uuid4())
-                        hash = helper.generate_text_hash(result["question"])
-                        qa_document.metadata["answer"] = result["answer"]
-                        qa_document.metadata["doc_id"] = doc_id
-                        qa_document.metadata["doc_hash"] = hash
-                    qa_documents.append(qa_document)
-                format_documents.extend(qa_documents)
-            except Exception as e:
-                logging.exception("Failed to format qa document")
-
-            all_qa_documents.extend(format_documents)
+        return character_splitter  # type: ignore
 
     def _split_to_documents_for_estimate(
         self, text_docs: list[Document], splitter: TextSplitter, processing_rule: DatasetProcessRule
@@ -624,11 +501,11 @@ class IndexingRunner:
         return document_text
 
     @staticmethod
-    def format_split_text(text):
+    def format_split_text(text: str) -> list[QAPreviewDetail]:
         regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q\d+:|$)"
         matches = re.findall(regex, text, re.UNICODE)
 
-        return [{"question": q, "answer": re.sub(r"\n\s*", "\n", a.strip())} for q, a in matches if q and a]
+        return [QAPreviewDetail(question=q, answer=re.sub(r"\n\s*", "\n", a.strip())) for q, a in matches if q and a]
 
     def _load(
         self,
@@ -654,13 +531,14 @@ class IndexingRunner:
         indexing_start_at = time.perf_counter()
         tokens = 0
         chunk_size = 10
+        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
+            # create keyword index
+            create_keyword_thread = threading.Thread(
+                target=self._process_keyword_index,
+                args=(current_app._get_current_object(), dataset.id, dataset_document.id, documents),  # type: ignore
+            )
+            create_keyword_thread.start()
 
-        # create keyword index
-        create_keyword_thread = threading.Thread(
-            target=self._process_keyword_index,
-            args=(current_app._get_current_object(), dataset.id, dataset_document.id, documents),  # type: ignore
-        )
-        create_keyword_thread.start()
         if dataset.indexing_technique == "high_quality":
             with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
                 futures = []
@@ -680,8 +558,8 @@ class IndexingRunner:
 
                 for future in futures:
                     tokens += future.result()
-
-        create_keyword_thread.join()
+        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
+            create_keyword_thread.join()
         indexing_end_at = time.perf_counter()
 
         # update document status to completed
@@ -791,28 +669,6 @@ class IndexingRunner:
         DocumentSegment.query.filter_by(document_id=dataset_document_id).update(update_params)
         db.session.commit()
 
-    @staticmethod
-    def batch_add_segments(segments: list[DocumentSegment], dataset: Dataset):
-        """
-        Batch add segments index processing
-        """
-        documents = []
-        for segment in segments:
-            document = Document(
-                page_content=segment.content,
-                metadata={
-                    "doc_id": segment.index_node_id,
-                    "doc_hash": segment.index_node_hash,
-                    "document_id": segment.document_id,
-                    "dataset_id": segment.dataset_id,
-                },
-            )
-            documents.append(document)
-        # save vector index
-        index_type = dataset.doc_form
-        index_processor = IndexProcessorFactory(index_type).init_index_processor()
-        index_processor.load(dataset, documents)
-
     def _transform(
         self,
         index_processor: BaseIndexProcessor,
@@ -854,7 +710,7 @@ class IndexingRunner:
         )
 
         # add document segments
-        doc_store.add_documents(documents)
+        doc_store.add_documents(docs=documents, save_child=dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX)
 
         # update document status to indexing
         cur_time = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py
index f538eaef5b..691cb8d400 100644
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@@ -9,6 +9,8 @@ from typing import Any, Optional, Union
 from uuid import UUID, uuid4
 
 from flask import current_app
+from sqlalchemy import select
+from sqlalchemy.orm import Session
 
 from core.helper.encrypter import decrypt_token, encrypt_token, obfuscated_token
 from core.ops.entities.config_entity import (
@@ -329,15 +331,15 @@ class TraceTask:
     ):
         self.trace_type = trace_type
         self.message_id = message_id
-        self.workflow_run = workflow_run
+        self.workflow_run_id = workflow_run.id if workflow_run else None
         self.conversation_id = conversation_id
         self.user_id = user_id
         self.timer = timer
-        self.kwargs = kwargs
         self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001")
-
         self.app_id = None
 
+        self.kwargs = kwargs
+
     def execute(self):
         return self.preprocess()
 
@@ -345,19 +347,23 @@ class TraceTask:
         preprocess_map = {
             TraceTaskName.CONVERSATION_TRACE: lambda: self.conversation_trace(**self.kwargs),
             TraceTaskName.WORKFLOW_TRACE: lambda: self.workflow_trace(
-                self.workflow_run, self.conversation_id, self.user_id
+                workflow_run_id=self.workflow_run_id, conversation_id=self.conversation_id, user_id=self.user_id
+            ),
+            TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(message_id=self.message_id),
+            TraceTaskName.MODERATION_TRACE: lambda: self.moderation_trace(
+                message_id=self.message_id, timer=self.timer, **self.kwargs
             ),
-            TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(self.message_id),
-            TraceTaskName.MODERATION_TRACE: lambda: self.moderation_trace(self.message_id, self.timer, **self.kwargs),
             TraceTaskName.SUGGESTED_QUESTION_TRACE: lambda: self.suggested_question_trace(
-                self.message_id, self.timer, **self.kwargs
+                message_id=self.message_id, timer=self.timer, **self.kwargs
             ),
             TraceTaskName.DATASET_RETRIEVAL_TRACE: lambda: self.dataset_retrieval_trace(
-                self.message_id, self.timer, **self.kwargs
+                message_id=self.message_id, timer=self.timer, **self.kwargs
+            ),
+            TraceTaskName.TOOL_TRACE: lambda: self.tool_trace(
+                message_id=self.message_id, timer=self.timer, **self.kwargs
             ),
-            TraceTaskName.TOOL_TRACE: lambda: self.tool_trace(self.message_id, self.timer, **self.kwargs),
             TraceTaskName.GENERATE_NAME_TRACE: lambda: self.generate_name_trace(
-                self.conversation_id, self.timer, **self.kwargs
+                conversation_id=self.conversation_id, timer=self.timer, **self.kwargs
             ),
         }
 
@@ -367,86 +373,100 @@ class TraceTask:
     def conversation_trace(self, **kwargs):
         return kwargs
 
-    def workflow_trace(self, workflow_run: WorkflowRun | None, conversation_id, user_id):
-        if not workflow_run:
-            raise ValueError("Workflow run not found")
+    def workflow_trace(
+        self,
+        *,
+        workflow_run_id: str | None,
+        conversation_id: str | None,
+        user_id: str | None,
+    ):
+        if not workflow_run_id:
+            return {}
 
-        db.session.merge(workflow_run)
-        db.session.refresh(workflow_run)
+        with Session(db.engine) as session:
+            workflow_run_stmt = select(WorkflowRun).where(WorkflowRun.id == workflow_run_id)
+            workflow_run = session.scalars(workflow_run_stmt).first()
+            if not workflow_run:
+                raise ValueError("Workflow run not found")
 
-        workflow_id = workflow_run.workflow_id
-        tenant_id = workflow_run.tenant_id
-        workflow_run_id = workflow_run.id
-        workflow_run_elapsed_time = workflow_run.elapsed_time
-        workflow_run_status = workflow_run.status
-        workflow_run_inputs = workflow_run.inputs_dict
-        workflow_run_outputs = workflow_run.outputs_dict
-        workflow_run_version = workflow_run.version
-        error = workflow_run.error or ""
+            workflow_id = workflow_run.workflow_id
+            tenant_id = workflow_run.tenant_id
+            workflow_run_id = workflow_run.id
+            workflow_run_elapsed_time = workflow_run.elapsed_time
+            workflow_run_status = workflow_run.status
+            workflow_run_inputs = workflow_run.inputs_dict
+            workflow_run_outputs = workflow_run.outputs_dict
+            workflow_run_version = workflow_run.version
+            error = workflow_run.error or ""
 
-        total_tokens = workflow_run.total_tokens
+            total_tokens = workflow_run.total_tokens
 
-        file_list = workflow_run_inputs.get("sys.file") or []
-        query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or ""
+            file_list = workflow_run_inputs.get("sys.file") or []
+            query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or ""
 
-        # get workflow_app_log_id
-        workflow_app_log_data = (
-            db.session.query(WorkflowAppLog)
-            .filter_by(tenant_id=tenant_id, app_id=workflow_run.app_id, workflow_run_id=workflow_run.id)
-            .first()
-        )
-        workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None
-        # get message_id
-        message_data = (
-            db.session.query(Message.id)
-            .filter_by(conversation_id=conversation_id, workflow_run_id=workflow_run_id)
-            .first()
-        )
-        message_id = str(message_data.id) if message_data else None
+            # get workflow_app_log_id
+            workflow_app_log_data_stmt = select(WorkflowAppLog.id).where(
+                WorkflowAppLog.tenant_id == tenant_id,
+                WorkflowAppLog.app_id == workflow_run.app_id,
+                WorkflowAppLog.workflow_run_id == workflow_run.id,
+            )
+            workflow_app_log_id = session.scalar(workflow_app_log_data_stmt)
+            # get message_id
+            message_id = None
+            if conversation_id:
+                message_data_stmt = select(Message.id).where(
+                    Message.conversation_id == conversation_id,
+                    Message.workflow_run_id == workflow_run_id,
+                )
+                message_id = session.scalar(message_data_stmt)
 
-        metadata = {
-            "workflow_id": workflow_id,
-            "conversation_id": conversation_id,
-            "workflow_run_id": workflow_run_id,
-            "tenant_id": tenant_id,
-            "elapsed_time": workflow_run_elapsed_time,
-            "status": workflow_run_status,
-            "version": workflow_run_version,
-            "total_tokens": total_tokens,
-            "file_list": file_list,
-            "triggered_form": workflow_run.triggered_from,
-            "user_id": user_id,
-        }
-
-        workflow_trace_info = WorkflowTraceInfo(
-            workflow_data=workflow_run.to_dict(),
-            conversation_id=conversation_id,
-            workflow_id=workflow_id,
-            tenant_id=tenant_id,
-            workflow_run_id=workflow_run_id,
-            workflow_run_elapsed_time=workflow_run_elapsed_time,
-            workflow_run_status=workflow_run_status,
-            workflow_run_inputs=workflow_run_inputs,
-            workflow_run_outputs=workflow_run_outputs,
-            workflow_run_version=workflow_run_version,
-            error=error,
-            total_tokens=total_tokens,
-            file_list=file_list,
-            query=query,
-            metadata=metadata,
-            workflow_app_log_id=workflow_app_log_id,
-            message_id=message_id,
-            start_time=workflow_run.created_at,
-            end_time=workflow_run.finished_at,
-        )
+            metadata = {
+                "workflow_id": workflow_id,
+                "conversation_id": conversation_id,
+                "workflow_run_id": workflow_run_id,
+                "tenant_id": tenant_id,
+                "elapsed_time": workflow_run_elapsed_time,
+                "status": workflow_run_status,
+                "version": workflow_run_version,
+                "total_tokens": total_tokens,
+                "file_list": file_list,
+                "triggered_form": workflow_run.triggered_from,
+                "user_id": user_id,
+            }
 
+            workflow_trace_info = WorkflowTraceInfo(
+                workflow_data=workflow_run.to_dict(),
+                conversation_id=conversation_id,
+                workflow_id=workflow_id,
+                tenant_id=tenant_id,
+                workflow_run_id=workflow_run_id,
+                workflow_run_elapsed_time=workflow_run_elapsed_time,
+                workflow_run_status=workflow_run_status,
+                workflow_run_inputs=workflow_run_inputs,
+                workflow_run_outputs=workflow_run_outputs,
+                workflow_run_version=workflow_run_version,
+                error=error,
+                total_tokens=total_tokens,
+                file_list=file_list,
+                query=query,
+                metadata=metadata,
+                workflow_app_log_id=workflow_app_log_id,
+                message_id=message_id,
+                start_time=workflow_run.created_at,
+                end_time=workflow_run.finished_at,
+            )
         return workflow_trace_info
 
-    def message_trace(self, message_id):
+    def message_trace(self, message_id: str | None):
+        if not message_id:
+            return {}
         message_data = get_message_data(message_id)
         if not message_data:
             return {}
-        conversation_mode = db.session.query(Conversation.mode).filter_by(id=message_data.conversation_id).first()
+        conversation_mode_stmt = select(Conversation.mode).where(Conversation.id == message_data.conversation_id)
+        conversation_mode = db.session.scalars(conversation_mode_stmt).all()
+        if not conversation_mode or len(conversation_mode) == 0:
+            return {}
         conversation_mode = conversation_mode[0]
         created_at = message_data.created_at
         inputs = message_data.message
diff --git a/api/core/ops/utils.py b/api/core/ops/utils.py
index 998eba9ea9..8b06df1930 100644
--- a/api/core/ops/utils.py
+++ b/api/core/ops/utils.py
@@ -18,7 +18,7 @@ def filter_none_values(data: dict):
     return new_data
 
 
-def get_message_data(message_id):
+def get_message_data(message_id: str):
     return db.session.query(Message).filter(Message.id == message_id).first()
 
 
diff --git a/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py b/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py
index 8b17e8dc0a..a6214d955b 100644
--- a/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py
+++ b/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py
@@ -1,5 +1,5 @@
 import re
-from typing import Optional
+from typing import Optional, cast
 
 
 class JiebaKeywordTableHandler:
@@ -8,18 +8,20 @@ class JiebaKeywordTableHandler:
 
         from core.rag.datasource.keyword.jieba.stopwords import STOPWORDS
 
-        jieba.analyse.default_tfidf.stop_words = STOPWORDS
+        jieba.analyse.default_tfidf.stop_words = STOPWORDS  # type: ignore
 
     def extract_keywords(self, text: str, max_keywords_per_chunk: Optional[int] = 10) -> set[str]:
         """Extract keywords with JIEBA tfidf."""
-        import jieba  # type: ignore
+        import jieba.analyse  # type: ignore
 
         keywords = jieba.analyse.extract_tags(
             sentence=text,
             topK=max_keywords_per_chunk,
         )
+        # jieba.analyse.extract_tags returns list[Any] when withFlag is False by default.
+        keywords = cast(list[str], keywords)
 
-        return set(self._expand_tokens_with_subtokens(keywords))
+        return set(self._expand_tokens_with_subtokens(set(keywords)))
 
     def _expand_tokens_with_subtokens(self, tokens: set[str]) -> set[str]:
         """Get subtokens from a list of tokens., filtering for stopwords."""
diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py
index 34343ad60e..3a8200bc7b 100644
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -6,11 +6,14 @@ from flask import Flask, current_app
 from core.rag.data_post_processor.data_post_processor import DataPostProcessor
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.embedding.retrieval import RetrievalSegments
+from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.models.document import Document
 from core.rag.rerank.rerank_type import RerankMode
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from extensions.ext_database import db
-from models.dataset import Dataset
+from models.dataset import ChildChunk, Dataset, DocumentSegment
+from models.dataset import Document as DatasetDocument
 from services.external_knowledge_service import ExternalDatasetService
 
 default_retrieval_model = {
@@ -248,3 +251,89 @@ class RetrievalService:
     @staticmethod
     def escape_query_for_search(query: str) -> str:
         return query.replace('"', '\\"')
+
+    @staticmethod
+    def format_retrieval_documents(documents: list[Document]) -> list[RetrievalSegments]:
+        records = []
+        include_segment_ids = []
+        segment_child_map = {}
+        for document in documents:
+            document_id = document.metadata.get("document_id")
+            dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first()
+            if dataset_document:
+                if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                    child_index_node_id = document.metadata.get("doc_id")
+                    result = (
+                        db.session.query(ChildChunk, DocumentSegment)
+                        .join(DocumentSegment, ChildChunk.segment_id == DocumentSegment.id)
+                        .filter(
+                            ChildChunk.index_node_id == child_index_node_id,
+                            DocumentSegment.dataset_id == dataset_document.dataset_id,
+                            DocumentSegment.enabled == True,
+                            DocumentSegment.status == "completed",
+                        )
+                        .first()
+                    )
+                    if result:
+                        child_chunk, segment = result
+                        if not segment:
+                            continue
+                        if segment.id not in include_segment_ids:
+                            include_segment_ids.append(segment.id)
+                            child_chunk_detail = {
+                                "id": child_chunk.id,
+                                "content": child_chunk.content,
+                                "position": child_chunk.position,
+                                "score": document.metadata.get("score", 0.0),
+                            }
+                            map_detail = {
+                                "max_score": document.metadata.get("score", 0.0),
+                                "child_chunks": [child_chunk_detail],
+                            }
+                            segment_child_map[segment.id] = map_detail
+                            record = {
+                                "segment": segment,
+                            }
+                            records.append(record)
+                        else:
+                            child_chunk_detail = {
+                                "id": child_chunk.id,
+                                "content": child_chunk.content,
+                                "position": child_chunk.position,
+                                "score": document.metadata.get("score", 0.0),
+                            }
+                            segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail)
+                            segment_child_map[segment.id]["max_score"] = max(
+                                segment_child_map[segment.id]["max_score"], document.metadata.get("score", 0.0)
+                            )
+                    else:
+                        continue
+                else:
+                    index_node_id = document.metadata["doc_id"]
+
+                    segment = (
+                        db.session.query(DocumentSegment)
+                        .filter(
+                            DocumentSegment.dataset_id == dataset_document.dataset_id,
+                            DocumentSegment.enabled == True,
+                            DocumentSegment.status == "completed",
+                            DocumentSegment.index_node_id == index_node_id,
+                        )
+                        .first()
+                    )
+
+                    if not segment:
+                        continue
+                    include_segment_ids.append(segment.id)
+                    record = {
+                        "segment": segment,
+                        "score": document.metadata.get("score", None),
+                    }
+
+                    records.append(record)
+            for record in records:
+                if record["segment"].id in segment_child_map:
+                    record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks", None)
+                    record["score"] = segment_child_map[record["segment"].id]["max_score"]
+
+        return [RetrievalSegments(**record) for record in records]
diff --git a/api/core/rag/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py
index 6d16a9bdc2..398b0daad9 100644
--- a/api/core/rag/docstore/dataset_docstore.py
+++ b/api/core/rag/docstore/dataset_docstore.py
@@ -7,7 +7,7 @@ from core.model_manager import ModelManager
 from core.model_runtime.entities.model_entities import ModelType
 from core.rag.models.document import Document
 from extensions.ext_database import db
-from models.dataset import Dataset, DocumentSegment
+from models.dataset import ChildChunk, Dataset, DocumentSegment
 
 
 class DatasetDocumentStore:
@@ -60,7 +60,7 @@ class DatasetDocumentStore:
 
         return output
 
-    def add_documents(self, docs: Sequence[Document], allow_update: bool = True) -> None:
+    def add_documents(self, docs: Sequence[Document], allow_update: bool = True, save_child: bool = False) -> None:
         max_position = (
             db.session.query(func.max(DocumentSegment.position))
             .filter(DocumentSegment.document_id == self._document_id)
@@ -120,13 +120,55 @@ class DatasetDocumentStore:
                     segment_document.answer = doc.metadata.pop("answer", "")
 
                 db.session.add(segment_document)
+                db.session.flush()
+                if save_child:
+                    if doc.children:
+                        for postion, child in enumerate(doc.children, start=1):
+                            child_segment = ChildChunk(
+                                tenant_id=self._dataset.tenant_id,
+                                dataset_id=self._dataset.id,
+                                document_id=self._document_id,
+                                segment_id=segment_document.id,
+                                position=postion,
+                                index_node_id=child.metadata.get("doc_id"),
+                                index_node_hash=child.metadata.get("doc_hash"),
+                                content=child.page_content,
+                                word_count=len(child.page_content),
+                                type="automatic",
+                                created_by=self._user_id,
+                            )
+                            db.session.add(child_segment)
             else:
                 segment_document.content = doc.page_content
                 if doc.metadata.get("answer"):
                     segment_document.answer = doc.metadata.pop("answer", "")
-                segment_document.index_node_hash = doc.metadata["doc_hash"]
+                segment_document.index_node_hash = doc.metadata.get("doc_hash")
                 segment_document.word_count = len(doc.page_content)
                 segment_document.tokens = tokens
+                if save_child and doc.children:
+                    # delete the existing child chunks
+                    db.session.query(ChildChunk).filter(
+                        ChildChunk.tenant_id == self._dataset.tenant_id,
+                        ChildChunk.dataset_id == self._dataset.id,
+                        ChildChunk.document_id == self._document_id,
+                        ChildChunk.segment_id == segment_document.id,
+                    ).delete()
+                    # add new child chunks
+                    for position, child in enumerate(doc.children, start=1):
+                        child_segment = ChildChunk(
+                            tenant_id=self._dataset.tenant_id,
+                            dataset_id=self._dataset.id,
+                            document_id=self._document_id,
+                            segment_id=segment_document.id,
+                            position=position,
+                            index_node_id=child.metadata.get("doc_id"),
+                            index_node_hash=child.metadata.get("doc_hash"),
+                            content=child.page_content,
+                            word_count=len(child.page_content),
+                            type="automatic",
+                            created_by=self._user_id,
+                        )
+                        db.session.add(child_segment)
 
             db.session.commit()
 
diff --git a/api/core/rag/embedding/retrieval.py b/api/core/rag/embedding/retrieval.py
new file mode 100644
index 0000000000..800422d888
--- /dev/null
+++ b/api/core/rag/embedding/retrieval.py
@@ -0,0 +1,23 @@
+from typing import Optional
+
+from pydantic import BaseModel
+
+from models.dataset import DocumentSegment
+
+
+class RetrievalChildChunk(BaseModel):
+    """Retrieval segments."""
+
+    id: str
+    content: str
+    score: float
+    position: int
+
+
+class RetrievalSegments(BaseModel):
+    """Retrieval segments."""
+
+    model_config = {"arbitrary_types_allowed": True}
+    segment: DocumentSegment
+    child_chunks: Optional[list[RetrievalChildChunk]] = None
+    score: Optional[float] = None
diff --git a/api/core/rag/extractor/excel_extractor.py b/api/core/rag/extractor/excel_extractor.py
index c444105bb5..a3b35458df 100644
--- a/api/core/rag/extractor/excel_extractor.py
+++ b/api/core/rag/extractor/excel_extractor.py
@@ -4,7 +4,7 @@ import os
 from typing import Optional, cast
 
 import pandas as pd
-from openpyxl import load_workbook
+from openpyxl import load_workbook  # type: ignore
 
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py
index a473b3dfa7..f9fd7f92a1 100644
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@@ -24,7 +24,6 @@ from core.rag.extractor.unstructured.unstructured_markdown_extractor import Unst
 from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor
 from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor
 from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor
-from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor
 from core.rag.extractor.unstructured.unstructured_xml_extractor import UnstructuredXmlExtractor
 from core.rag.extractor.word_extractor import WordExtractor
 from core.rag.models.document import Document
@@ -103,12 +102,11 @@ class ExtractProcessor:
                 input_file = Path(file_path)
                 file_extension = input_file.suffix.lower()
                 etl_type = dify_config.ETL_TYPE
-                unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
-                unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY
-                assert unstructured_api_url is not None, "unstructured_api_url is required"
-                assert unstructured_api_key is not None, "unstructured_api_key is required"
                 extractor: Optional[BaseExtractor] = None
                 if etl_type == "Unstructured":
+                    unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
+                    unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY or ""
+
                     if file_extension in {".xlsx", ".xls"}:
                         extractor = ExcelExtractor(file_path)
                     elif file_extension == ".pdf":
@@ -141,11 +139,7 @@ class ExtractProcessor:
                         extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url, unstructured_api_key)
                     else:
                         # txt
-                        extractor = (
-                            UnstructuredTextExtractor(file_path, unstructured_api_url)
-                            if is_automatic
-                            else TextExtractor(file_path, autodetect_encoding=True)
-                        )
+                        extractor = TextExtractor(file_path, autodetect_encoding=True)
                 else:
                     if file_extension in {".xlsx", ".xls"}:
                         extractor = ExcelExtractor(file_path)
diff --git a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
index 9647dedfff..f1fa5dde5c 100644
--- a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
@@ -1,5 +1,6 @@
 import base64
 import logging
+from typing import Optional
 
 from bs4 import BeautifulSoup  # type: ignore
 
@@ -15,7 +16,7 @@ class UnstructuredEmailExtractor(BaseExtractor):
         file_path: Path to the file to load.
     """
 
-    def __init__(self, file_path: str, api_url: str, api_key: str):
+    def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
         """Initialize with file path."""
         self._file_path = file_path
         self._api_url = api_url
diff --git a/api/core/rag/extractor/unstructured/unstructured_epub_extractor.py b/api/core/rag/extractor/unstructured/unstructured_epub_extractor.py
index 80c29157aa..35ca686f62 100644
--- a/api/core/rag/extractor/unstructured/unstructured_epub_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_epub_extractor.py
@@ -19,7 +19,7 @@ class UnstructuredEpubExtractor(BaseExtractor):
         self,
         file_path: str,
         api_url: Optional[str] = None,
-        api_key: Optional[str] = None,
+        api_key: str = "",
     ):
         """Initialize with file path."""
         self._file_path = file_path
@@ -30,9 +30,6 @@ class UnstructuredEpubExtractor(BaseExtractor):
         if self._api_url:
             from unstructured.partition.api import partition_via_api
 
-            if self._api_key is None:
-                raise ValueError("api_key is required")
-
             elements = partition_via_api(filename=self._file_path, api_url=self._api_url, api_key=self._api_key)
         else:
             from unstructured.partition.epub import partition_epub
diff --git a/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py b/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
index 4173d4d122..d5418e612a 100644
--- a/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
@@ -24,7 +25,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor):
             if the specified encoding fails.
     """
 
-    def __init__(self, file_path: str, api_url: str, api_key: str):
+    def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
         """Initialize with file path."""
         self._file_path = file_path
         self._api_url = api_url
diff --git a/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py b/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
index 57affb8d36..d363449c29 100644
--- a/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
@@ -14,7 +15,7 @@ class UnstructuredMsgExtractor(BaseExtractor):
         file_path: Path to the file to load.
     """
 
-    def __init__(self, file_path: str, api_url: str, api_key: str):
+    def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
         """Initialize with file path."""
         self._file_path = file_path
         self._api_url = api_url
diff --git a/api/core/rag/extractor/unstructured/unstructured_ppt_extractor.py b/api/core/rag/extractor/unstructured/unstructured_ppt_extractor.py
index e504d4bc23..ecc272a2f0 100644
--- a/api/core/rag/extractor/unstructured/unstructured_ppt_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_ppt_extractor.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
@@ -14,7 +15,7 @@ class UnstructuredPPTExtractor(BaseExtractor):
         file_path: Path to the file to load.
     """
 
-    def __init__(self, file_path: str, api_url: str, api_key: str):
+    def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
         """Initialize with file path."""
         self._file_path = file_path
         self._api_url = api_url
diff --git a/api/core/rag/extractor/unstructured/unstructured_pptx_extractor.py b/api/core/rag/extractor/unstructured/unstructured_pptx_extractor.py
index cefe72b290..e7bf6fd2e6 100644
--- a/api/core/rag/extractor/unstructured/unstructured_pptx_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_pptx_extractor.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
@@ -14,7 +15,7 @@ class UnstructuredPPTXExtractor(BaseExtractor):
         file_path: Path to the file to load.
     """
 
-    def __init__(self, file_path: str, api_url: str, api_key: str):
+    def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
         """Initialize with file path."""
         self._file_path = file_path
         self._api_url = api_url
diff --git a/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py b/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
index ef46ab0e70..916cdc3f2b 100644
--- a/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Optional
 
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
@@ -14,7 +15,7 @@ class UnstructuredXmlExtractor(BaseExtractor):
         file_path: Path to the file to load.
     """
 
-    def __init__(self, file_path: str, api_url: str, api_key: str):
+    def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
         """Initialize with file path."""
         self._file_path = file_path
         self._api_url = api_url
diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py
index c3161bc812..d93de5fef9 100644
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@@ -267,8 +267,10 @@ class WordExtractor(BaseExtractor):
                 if isinstance(element.tag, str) and element.tag.endswith("p"):  # paragraph
                     para = paragraphs.pop(0)
                     parsed_paragraph = parse_paragraph(para)
-                    if parsed_paragraph:
+                    if parsed_paragraph.strip():
                         content.append(parsed_paragraph)
+                    else:
+                        content.append("\n")
                 elif isinstance(element.tag, str) and element.tag.endswith("tbl"):  # table
                     table = tables.pop(0)
                     content.append(self._table_to_markdown(table, image_map))
diff --git a/api/core/rag/index_processor/constant/index_type.py b/api/core/rag/index_processor/constant/index_type.py
index e42cc44c6f..0845b58e25 100644
--- a/api/core/rag/index_processor/constant/index_type.py
+++ b/api/core/rag/index_processor/constant/index_type.py
@@ -1,8 +1,7 @@
 from enum import Enum
 
 
-class IndexType(Enum):
+class IndexType(str, Enum):
     PARAGRAPH_INDEX = "text_model"
     QA_INDEX = "qa_model"
-    PARENT_CHILD_INDEX = "parent_child_index"
-    SUMMARY_INDEX = "summary_index"
+    PARENT_CHILD_INDEX = "hierarchical_model"
diff --git a/api/core/rag/index_processor/index_processor_base.py b/api/core/rag/index_processor/index_processor_base.py
index 7e5efdc66e..2bcd1c79bb 100644
--- a/api/core/rag/index_processor/index_processor_base.py
+++ b/api/core/rag/index_processor/index_processor_base.py
@@ -27,10 +27,10 @@ class BaseIndexProcessor(ABC):
         raise NotImplementedError
 
     @abstractmethod
-    def load(self, dataset: Dataset, documents: list[Document], with_keywords: bool = True):
+    def load(self, dataset: Dataset, documents: list[Document], with_keywords: bool = True, **kwargs):
         raise NotImplementedError
 
-    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True):
+    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True, **kwargs):
         raise NotImplementedError
 
     @abstractmethod
@@ -45,26 +45,29 @@ class BaseIndexProcessor(ABC):
     ) -> list[Document]:
         raise NotImplementedError
 
-    def _get_splitter(self, processing_rule: dict, embedding_model_instance: Optional[ModelInstance]) -> TextSplitter:
+    def _get_splitter(
+        self,
+        processing_rule_mode: str,
+        max_tokens: int,
+        chunk_overlap: int,
+        separator: str,
+        embedding_model_instance: Optional[ModelInstance],
+    ) -> TextSplitter:
         """
         Get the NodeParser object according to the processing rule.
         """
-        character_splitter: TextSplitter
-        if processing_rule["mode"] == "custom":
+        if processing_rule_mode in ["custom", "hierarchical"]:
             # The user-defined segmentation rule
-            rules = processing_rule["rules"]
-            segmentation = rules["segmentation"]
             max_segmentation_tokens_length = dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH
-            if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > max_segmentation_tokens_length:
+            if max_tokens < 50 or max_tokens > max_segmentation_tokens_length:
                 raise ValueError(f"Custom segment length should be between 50 and {max_segmentation_tokens_length}.")
 
-            separator = segmentation["separator"]
             if separator:
                 separator = separator.replace("\\n", "\n")
 
             character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder(
-                chunk_size=segmentation["max_tokens"],
-                chunk_overlap=segmentation.get("chunk_overlap", 0) or 0,
+                chunk_size=max_tokens,
+                chunk_overlap=chunk_overlap,
                 fixed_separator=separator,
                 separators=["\n\n", "。", ". ", " ", ""],
                 embedding_model_instance=embedding_model_instance,
@@ -78,4 +81,4 @@ class BaseIndexProcessor(ABC):
                 embedding_model_instance=embedding_model_instance,
             )
 
-        return character_splitter
+        return character_splitter  # type: ignore
diff --git a/api/core/rag/index_processor/index_processor_factory.py b/api/core/rag/index_processor/index_processor_factory.py
index c5ba6295f3..c987edf342 100644
--- a/api/core/rag/index_processor/index_processor_factory.py
+++ b/api/core/rag/index_processor/index_processor_factory.py
@@ -3,6 +3,7 @@
 from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
+from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor
 from core.rag.index_processor.processor.qa_index_processor import QAIndexProcessor
 
 
@@ -18,9 +19,11 @@ class IndexProcessorFactory:
         if not self._index_type:
             raise ValueError("Index type must be specified.")
 
-        if self._index_type == IndexType.PARAGRAPH_INDEX.value:
+        if self._index_type == IndexType.PARAGRAPH_INDEX:
             return ParagraphIndexProcessor()
-        elif self._index_type == IndexType.QA_INDEX.value:
+        elif self._index_type == IndexType.QA_INDEX:
             return QAIndexProcessor()
+        elif self._index_type == IndexType.PARENT_CHILD_INDEX:
+            return ParentChildIndexProcessor()
         else:
             raise ValueError(f"Index type {self._index_type} is not supported.")
diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py
index c66fa54d50..dca84b9041 100644
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@@ -13,21 +13,40 @@ from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.models.document import Document
 from core.tools.utils.text_processing_utils import remove_leading_symbols
 from libs import helper
-from models.dataset import Dataset
+from models.dataset import Dataset, DatasetProcessRule
+from services.entities.knowledge_entities.knowledge_entities import Rule
 
 
 class ParagraphIndexProcessor(BaseIndexProcessor):
     def extract(self, extract_setting: ExtractSetting, **kwargs) -> list[Document]:
         text_docs = ExtractProcessor.extract(
-            extract_setting=extract_setting, is_automatic=kwargs.get("process_rule_mode") == "automatic"
+            extract_setting=extract_setting,
+            is_automatic=(
+                kwargs.get("process_rule_mode") == "automatic" or kwargs.get("process_rule_mode") == "hierarchical"
+            ),
         )
 
         return text_docs
 
     def transform(self, documents: list[Document], **kwargs) -> list[Document]:
+        process_rule = kwargs.get("process_rule")
+        if not process_rule:
+            raise ValueError("No process rule found.")
+        if process_rule.get("mode") == "automatic":
+            automatic_rule = DatasetProcessRule.AUTOMATIC_RULES
+            rules = Rule(**automatic_rule)
+        else:
+            if not process_rule.get("rules"):
+                raise ValueError("No rules found in process rule.")
+            rules = Rule(**process_rule.get("rules"))
         # Split the text documents into nodes.
+        if not rules.segmentation:
+            raise ValueError("No segmentation found in rules.")
         splitter = self._get_splitter(
-            processing_rule=kwargs.get("process_rule", {}),
+            processing_rule_mode=process_rule.get("mode"),
+            max_tokens=rules.segmentation.max_tokens,
+            chunk_overlap=rules.segmentation.chunk_overlap,
+            separator=rules.segmentation.separator,
             embedding_model_instance=kwargs.get("embedding_model_instance"),
         )
         all_documents = []
@@ -53,15 +72,19 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
             all_documents.extend(split_documents)
         return all_documents
 
-    def load(self, dataset: Dataset, documents: list[Document], with_keywords: bool = True):
+    def load(self, dataset: Dataset, documents: list[Document], with_keywords: bool = True, **kwargs):
         if dataset.indexing_technique == "high_quality":
             vector = Vector(dataset)
             vector.create(documents)
         if with_keywords:
+            keywords_list = kwargs.get("keywords_list")
             keyword = Keyword(dataset)
-            keyword.create(documents)
+            if keywords_list and len(keywords_list) > 0:
+                keyword.add_texts(documents, keywords_list=keywords_list)
+            else:
+                keyword.add_texts(documents)
 
-    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True):
+    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True, **kwargs):
         if dataset.indexing_technique == "high_quality":
             vector = Vector(dataset)
             if node_ids:
diff --git a/api/core/rag/index_processor/processor/parent_child_index_processor.py b/api/core/rag/index_processor/processor/parent_child_index_processor.py
new file mode 100644
index 0000000000..e8423e2b77
--- /dev/null
+++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py
@@ -0,0 +1,195 @@
+"""Paragraph index processor."""
+
+import uuid
+from typing import Optional
+
+from core.model_manager import ModelInstance
+from core.rag.cleaner.clean_processor import CleanProcessor
+from core.rag.datasource.retrieval_service import RetrievalService
+from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.extractor.entity.extract_setting import ExtractSetting
+from core.rag.extractor.extract_processor import ExtractProcessor
+from core.rag.index_processor.index_processor_base import BaseIndexProcessor
+from core.rag.models.document import ChildDocument, Document
+from extensions.ext_database import db
+from libs import helper
+from models.dataset import ChildChunk, Dataset, DocumentSegment
+from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
+
+
+class ParentChildIndexProcessor(BaseIndexProcessor):
+    def extract(self, extract_setting: ExtractSetting, **kwargs) -> list[Document]:
+        text_docs = ExtractProcessor.extract(
+            extract_setting=extract_setting,
+            is_automatic=(
+                kwargs.get("process_rule_mode") == "automatic" or kwargs.get("process_rule_mode") == "hierarchical"
+            ),
+        )
+
+        return text_docs
+
+    def transform(self, documents: list[Document], **kwargs) -> list[Document]:
+        process_rule = kwargs.get("process_rule")
+        if not process_rule:
+            raise ValueError("No process rule found.")
+        if not process_rule.get("rules"):
+            raise ValueError("No rules found in process rule.")
+        rules = Rule(**process_rule.get("rules"))
+        all_documents = []  # type: ignore
+        if rules.parent_mode == ParentMode.PARAGRAPH:
+            # Split the text documents into nodes.
+            splitter = self._get_splitter(
+                processing_rule_mode=process_rule.get("mode"),
+                max_tokens=rules.segmentation.max_tokens,
+                chunk_overlap=rules.segmentation.chunk_overlap,
+                separator=rules.segmentation.separator,
+                embedding_model_instance=kwargs.get("embedding_model_instance"),
+            )
+            for document in documents:
+                # document clean
+                document_text = CleanProcessor.clean(document.page_content, process_rule)
+                document.page_content = document_text
+                # parse document to nodes
+                document_nodes = splitter.split_documents([document])
+                split_documents = []
+                for document_node in document_nodes:
+                    if document_node.page_content.strip():
+                        doc_id = str(uuid.uuid4())
+                        hash = helper.generate_text_hash(document_node.page_content)
+                        document_node.metadata["doc_id"] = doc_id
+                        document_node.metadata["doc_hash"] = hash
+                        # delete Splitter character
+                        page_content = document_node.page_content
+                        if page_content.startswith(".") or page_content.startswith("。"):
+                            page_content = page_content[1:].strip()
+                        else:
+                            page_content = page_content
+                        if len(page_content) > 0:
+                            document_node.page_content = page_content
+                            # parse document to child nodes
+                            child_nodes = self._split_child_nodes(
+                                document_node, rules, process_rule.get("mode"), kwargs.get("embedding_model_instance")
+                            )
+                            document_node.children = child_nodes
+                            split_documents.append(document_node)
+                all_documents.extend(split_documents)
+        elif rules.parent_mode == ParentMode.FULL_DOC:
+            page_content = "\n".join([document.page_content for document in documents])
+            document = Document(page_content=page_content, metadata=documents[0].metadata)
+            # parse document to child nodes
+            child_nodes = self._split_child_nodes(
+                document, rules, process_rule.get("mode"), kwargs.get("embedding_model_instance")
+            )
+            document.children = child_nodes
+            doc_id = str(uuid.uuid4())
+            hash = helper.generate_text_hash(document.page_content)
+            document.metadata["doc_id"] = doc_id
+            document.metadata["doc_hash"] = hash
+            all_documents.append(document)
+
+        return all_documents
+
+    def load(self, dataset: Dataset, documents: list[Document], with_keywords: bool = True, **kwargs):
+        if dataset.indexing_technique == "high_quality":
+            vector = Vector(dataset)
+            for document in documents:
+                child_documents = document.children
+                if child_documents:
+                    formatted_child_documents = [
+                        Document(**child_document.model_dump()) for child_document in child_documents
+                    ]
+                    vector.create(formatted_child_documents)
+
+    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True, **kwargs):
+        # node_ids is segment's node_ids
+        if dataset.indexing_technique == "high_quality":
+            delete_child_chunks = kwargs.get("delete_child_chunks") or False
+            vector = Vector(dataset)
+            if node_ids:
+                child_node_ids = (
+                    db.session.query(ChildChunk.index_node_id)
+                    .join(DocumentSegment, ChildChunk.segment_id == DocumentSegment.id)
+                    .filter(
+                        DocumentSegment.dataset_id == dataset.id,
+                        DocumentSegment.index_node_id.in_(node_ids),
+                        ChildChunk.dataset_id == dataset.id,
+                    )
+                    .all()
+                )
+                child_node_ids = [child_node_id[0] for child_node_id in child_node_ids]
+                vector.delete_by_ids(child_node_ids)
+                if delete_child_chunks:
+                    db.session.query(ChildChunk).filter(
+                        ChildChunk.dataset_id == dataset.id, ChildChunk.index_node_id.in_(child_node_ids)
+                    ).delete()
+                    db.session.commit()
+            else:
+                vector.delete()
+
+                if delete_child_chunks:
+                    db.session.query(ChildChunk).filter(ChildChunk.dataset_id == dataset.id).delete()
+                    db.session.commit()
+
+    def retrieve(
+        self,
+        retrieval_method: str,
+        query: str,
+        dataset: Dataset,
+        top_k: int,
+        score_threshold: float,
+        reranking_model: dict,
+    ) -> list[Document]:
+        # Set search parameters.
+        results = RetrievalService.retrieve(
+            retrieval_method=retrieval_method,
+            dataset_id=dataset.id,
+            query=query,
+            top_k=top_k,
+            score_threshold=score_threshold,
+            reranking_model=reranking_model,
+        )
+        # Organize results.
+        docs = []
+        for result in results:
+            metadata = result.metadata
+            metadata["score"] = result.score
+            if result.score > score_threshold:
+                doc = Document(page_content=result.page_content, metadata=metadata)
+                docs.append(doc)
+        return docs
+
+    def _split_child_nodes(
+        self,
+        document_node: Document,
+        rules: Rule,
+        process_rule_mode: str,
+        embedding_model_instance: Optional[ModelInstance],
+    ) -> list[ChildDocument]:
+        if not rules.subchunk_segmentation:
+            raise ValueError("No subchunk segmentation found in rules.")
+        child_splitter = self._get_splitter(
+            processing_rule_mode=process_rule_mode,
+            max_tokens=rules.subchunk_segmentation.max_tokens,
+            chunk_overlap=rules.subchunk_segmentation.chunk_overlap,
+            separator=rules.subchunk_segmentation.separator,
+            embedding_model_instance=embedding_model_instance,
+        )
+        # parse document to child nodes
+        child_nodes = []
+        child_documents = child_splitter.split_documents([document_node])
+        for child_document_node in child_documents:
+            if child_document_node.page_content.strip():
+                doc_id = str(uuid.uuid4())
+                hash = helper.generate_text_hash(child_document_node.page_content)
+                child_document = ChildDocument(
+                    page_content=child_document_node.page_content, metadata=document_node.metadata
+                )
+                child_document.metadata["doc_id"] = doc_id
+                child_document.metadata["doc_hash"] = hash
+                child_page_content = child_document.page_content
+                if child_page_content.startswith(".") or child_page_content.startswith("。"):
+                    child_page_content = child_page_content[1:].strip()
+                if len(child_page_content) > 0:
+                    child_document.page_content = child_page_content
+                    child_nodes.append(child_document)
+        return child_nodes
diff --git a/api/core/rag/index_processor/processor/qa_index_processor.py b/api/core/rag/index_processor/processor/qa_index_processor.py
index 20fd16e8f3..58b50a9fcb 100644
--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@@ -21,18 +21,32 @@ from core.rag.models.document import Document
 from core.tools.utils.text_processing_utils import remove_leading_symbols
 from libs import helper
 from models.dataset import Dataset
+from services.entities.knowledge_entities.knowledge_entities import Rule
 
 
 class QAIndexProcessor(BaseIndexProcessor):
     def extract(self, extract_setting: ExtractSetting, **kwargs) -> list[Document]:
         text_docs = ExtractProcessor.extract(
-            extract_setting=extract_setting, is_automatic=kwargs.get("process_rule_mode") == "automatic"
+            extract_setting=extract_setting,
+            is_automatic=(
+                kwargs.get("process_rule_mode") == "automatic" or kwargs.get("process_rule_mode") == "hierarchical"
+            ),
         )
         return text_docs
 
     def transform(self, documents: list[Document], **kwargs) -> list[Document]:
+        preview = kwargs.get("preview")
+        process_rule = kwargs.get("process_rule")
+        if not process_rule:
+            raise ValueError("No process rule found.")
+        if not process_rule.get("rules"):
+            raise ValueError("No rules found in process rule.")
+        rules = Rule(**process_rule.get("rules"))
         splitter = self._get_splitter(
-            processing_rule=kwargs.get("process_rule") or {},
+            processing_rule_mode=process_rule.get("mode"),
+            max_tokens=rules.segmentation.max_tokens if rules.segmentation else 0,
+            chunk_overlap=rules.segmentation.chunk_overlap if rules.segmentation else 0,
+            separator=rules.segmentation.separator if rules.segmentation else "",
             embedding_model_instance=kwargs.get("embedding_model_instance"),
         )
 
@@ -59,24 +73,33 @@ class QAIndexProcessor(BaseIndexProcessor):
                     document_node.page_content = remove_leading_symbols(page_content)
                     split_documents.append(document_node)
             all_documents.extend(split_documents)
-        for i in range(0, len(all_documents), 10):
-            threads = []
-            sub_documents = all_documents[i : i + 10]
-            for doc in sub_documents:
-                document_format_thread = threading.Thread(
-                    target=self._format_qa_document,
-                    kwargs={
-                        "flask_app": current_app._get_current_object(),  # type: ignore
-                        "tenant_id": kwargs.get("tenant_id"),
-                        "document_node": doc,
-                        "all_qa_documents": all_qa_documents,
-                        "document_language": kwargs.get("doc_language", "English"),
-                    },
-                )
-                threads.append(document_format_thread)
-                document_format_thread.start()
-            for thread in threads:
-                thread.join()
+        if preview:
+            self._format_qa_document(
+                current_app._get_current_object(),  # type: ignore
+                kwargs.get("tenant_id"),  # type: ignore
+                all_documents[0],
+                all_qa_documents,
+                kwargs.get("doc_language", "English"),
+            )
+        else:
+            for i in range(0, len(all_documents), 10):
+                threads = []
+                sub_documents = all_documents[i : i + 10]
+                for doc in sub_documents:
+                    document_format_thread = threading.Thread(
+                        target=self._format_qa_document,
+                        kwargs={
+                            "flask_app": current_app._get_current_object(),  # type: ignore
+                            "tenant_id": kwargs.get("tenant_id"),  # type: ignore
+                            "document_node": doc,
+                            "all_qa_documents": all_qa_documents,
+                            "document_language": kwargs.get("doc_language", "English"),
+                        },
+                    )
+                    threads.append(document_format_thread)
+                    document_format_thread.start()
+                for thread in threads:
+                    thread.join()
         return all_qa_documents
 
     def format_by_template(self, file: FileStorage, **kwargs) -> list[Document]:
@@ -98,12 +121,12 @@ class QAIndexProcessor(BaseIndexProcessor):
             raise ValueError(str(e))
         return text_docs
 
-    def load(self, dataset: Dataset, documents: list[Document], with_keywords: bool = True):
+    def load(self, dataset: Dataset, documents: list[Document], with_keywords: bool = True, **kwargs):
         if dataset.indexing_technique == "high_quality":
             vector = Vector(dataset)
             vector.create(documents)
 
-    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True):
+    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True, **kwargs):
         vector = Vector(dataset)
         if node_ids:
             vector.delete_by_ids(node_ids)
diff --git a/api/core/rag/models/document.py b/api/core/rag/models/document.py
index 1e9aaa24f0..421cdc05df 100644
--- a/api/core/rag/models/document.py
+++ b/api/core/rag/models/document.py
@@ -2,7 +2,20 @@ from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from typing import Any, Optional
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
+
+
+class ChildDocument(BaseModel):
+    """Class for storing a piece of text and associated metadata."""
+
+    page_content: str
+
+    vector: Optional[list[float]] = None
+
+    """Arbitrary metadata about the page content (e.g., source, relationships to other
+        documents, etc.).
+    """
+    metadata: dict = {}
 
 
 class Document(BaseModel):
@@ -15,10 +28,12 @@ class Document(BaseModel):
     """Arbitrary metadata about the page content (e.g., source, relationships to other
         documents, etc.).
     """
-    metadata: Optional[dict] = Field(default_factory=dict)
+    metadata: dict = {}
 
     provider: Optional[str] = "dify"
 
+    children: Optional[list[ChildDocument]] = None
+
 
 class BaseDocumentTransformer(ABC):
     """Abstract base class for document transformation systems.
diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py
index 8a7172f27c..290d9e6e61 100644
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@@ -164,43 +164,29 @@ class DatasetRetrieval:
                 "content": item.page_content,
             }
             retrieval_resource_list.append(source)
-        document_score_list = {}
         # deal with dify documents
         if dify_documents:
-            for item in dify_documents:
-                if item.metadata.get("score"):
-                    document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
-
-            index_node_ids = [document.metadata["doc_id"] for document in dify_documents]
-            segments = DocumentSegment.query.filter(
-                DocumentSegment.dataset_id.in_(dataset_ids),
-                DocumentSegment.status == "completed",
-                DocumentSegment.enabled == True,
-                DocumentSegment.index_node_id.in_(index_node_ids),
-            ).all()
-
-            if segments:
-                index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
-                sorted_segments = sorted(
-                    segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
-                )
-                for segment in sorted_segments:
+            records = RetrievalService.format_retrieval_documents(dify_documents)
+            if records:
+                for record in records:
+                    segment = record.segment
                     if segment.answer:
                         document_context_list.append(
                             DocumentContext(
                                 content=f"question:{segment.get_sign_content()} answer:{segment.answer}",
-                                score=document_score_list.get(segment.index_node_id, None),
+                                score=record.score,
                             )
                         )
                     else:
                         document_context_list.append(
                             DocumentContext(
                                 content=segment.get_sign_content(),
-                                score=document_score_list.get(segment.index_node_id, None),
+                                score=record.score,
                             )
                         )
                 if show_retrieve_source:
-                    for segment in sorted_segments:
+                    for record in records:
+                        segment = record.segment
                         dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
                         document = DatasetDocument.query.filter(
                             DatasetDocument.id == segment.document_id,
@@ -216,7 +202,7 @@ class DatasetRetrieval:
                                 "data_source_type": document.data_source_type,
                                 "segment_id": segment.id,
                                 "retriever_from": invoke_from.to_source(),
-                                "score": document_score_list.get(segment.index_node_id, 0.0),
+                                "score": record.score or 0.0,
                             }
 
                             if invoke_from.to_source() == "dev":
diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py
index 38980f6d75..6941ff8fa2 100644
--- a/api/core/tools/entities/tool_entities.py
+++ b/api/core/tools/entities/tool_entities.py
@@ -267,6 +267,7 @@ class ToolParameter(PluginParameter):
         :param options: the options of the parameter
         """
         # convert options to ToolParameterOption
+        # FIXME fix the type error
         if options:
             option_objs = [
                 PluginParameterOption(value=option, label=I18nObject(en_US=option, zh_Hans=option))
diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py
index cfa8e6b8b2..702c4384ae 100644
--- a/api/core/tools/tool_engine.py
+++ b/api/core/tools/tool_engine.py
@@ -139,7 +139,7 @@ class ToolEngine:
             error_response = f"tool invoke error: {e}"
             agent_tool_callback.on_tool_error(e)
         except ToolEngineInvokeError as e:
-            meta = e.args[0]
+            meta = e.meta
             error_response = f"tool invoke error: {meta.error}"
             agent_tool_callback.on_tool_error(e)
             return error_response, [], meta
diff --git a/api/core/tools/utils/text_processing_utils.py b/api/core/tools/utils/text_processing_utils.py
index 6db9dfd0d9..105823f896 100644
--- a/api/core/tools/utils/text_processing_utils.py
+++ b/api/core/tools/utils/text_processing_utils.py
@@ -12,5 +12,6 @@ def remove_leading_symbols(text: str) -> str:
         str: The text with leading punctuation or symbols removed.
     """
     # Match Unicode ranges for punctuation and symbols
-    pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,\-./:;<=>?@\[\]^_`{|}~]+"
+    # FIXME this pattern is confused quick fix for #11868 maybe refactor it later
+    pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+"
     return re.sub(pattern, "", text)
diff --git a/api/core/workflow/graph_engine/entities/graph.py b/api/core/workflow/graph_engine/entities/graph.py
index b3bcc3b2cc..5c672c985b 100644
--- a/api/core/workflow/graph_engine/entities/graph.py
+++ b/api/core/workflow/graph_engine/entities/graph.py
@@ -613,10 +613,10 @@ class Graph(BaseModel):
         for (node_id, node_id2), branch_node_ids in duplicate_end_node_ids.items():
             # check which node is after
             if cls._is_node2_after_node1(node1_id=node_id, node2_id=node_id2, edge_mapping=edge_mapping):
-                if node_id in merge_branch_node_ids:
+                if node_id in merge_branch_node_ids and node_id2 in merge_branch_node_ids:
                     del merge_branch_node_ids[node_id2]
             elif cls._is_node2_after_node1(node1_id=node_id2, node2_id=node_id, edge_mapping=edge_mapping):
-                if node_id2 in merge_branch_node_ids:
+                if node_id in merge_branch_node_ids and node_id2 in merge_branch_node_ids:
                     del merge_branch_node_ids[node_id]
 
         branches_merge_node_ids: dict[str, str] = {}
diff --git a/api/core/workflow/nodes/answer/base_stream_processor.py b/api/core/workflow/nodes/answer/base_stream_processor.py
index 8ffb487ec1..f22ea078fb 100644
--- a/api/core/workflow/nodes/answer/base_stream_processor.py
+++ b/api/core/workflow/nodes/answer/base_stream_processor.py
@@ -48,9 +48,11 @@ class StreamProcessor(ABC):
                     # we remove the node maybe shortcut the answer node, so comment this code for now
                     # there is not effect on the answer node and the workflow, when we have a better solution
                     # we can open this code. Issues: #11542 #9560 #10638 #10564
-
-                    # reachable_node_ids.extend(self._fetch_node_ids_in_reachable_branch(edge.target_node_id))
-                    continue
+                    ids = self._fetch_node_ids_in_reachable_branch(edge.target_node_id)
+                    if "answer" in ids:
+                        continue
+                    else:
+                        reachable_node_ids.extend(ids)
                 else:
                     unreachable_first_node_ids.append(edge.target_node_id)
 
diff --git a/api/core/workflow/nodes/http_request/exc.py b/api/core/workflow/nodes/http_request/exc.py
index a815f277be..46613c9e86 100644
--- a/api/core/workflow/nodes/http_request/exc.py
+++ b/api/core/workflow/nodes/http_request/exc.py
@@ -20,3 +20,7 @@ class ResponseSizeError(HttpRequestNodeError):
 
 class RequestBodyError(HttpRequestNodeError):
     """Raised when the request body is invalid."""
+
+
+class InvalidURLError(HttpRequestNodeError):
+    """Raised when the URL is invalid."""
diff --git a/api/core/workflow/nodes/http_request/executor.py b/api/core/workflow/nodes/http_request/executor.py
index cdfdc6e6d5..fadd142e35 100644
--- a/api/core/workflow/nodes/http_request/executor.py
+++ b/api/core/workflow/nodes/http_request/executor.py
@@ -23,6 +23,7 @@ from .exc import (
     FileFetchError,
     HttpRequestNodeError,
     InvalidHttpMethodError,
+    InvalidURLError,
     RequestBodyError,
     ResponseSizeError,
 )
@@ -66,6 +67,12 @@ class Executor:
                 node_data.authorization.config.api_key
             ).text
 
+        # check if node_data.url is a valid URL
+        if not node_data.url:
+            raise InvalidURLError("url is required")
+        if not node_data.url.startswith(("http://", "https://")):
+            raise InvalidURLError("url should start with http:// or https://")
+
         self.url: str = node_data.url
         self.method = node_data.method
         self.auth = node_data.authorization
diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
index bfd93c074d..0f239af51a 100644
--- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
+++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
@@ -11,6 +11,7 @@ from core.entities.model_entities import ModelStatus
 from core.model_manager import ModelInstance, ModelManager
 from core.model_runtime.entities.model_entities import ModelFeature, ModelType
 from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.rag.datasource.retrieval_service import RetrievalService
 from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from core.variables import StringSegment
@@ -18,7 +19,7 @@ from core.workflow.entities.node_entities import NodeRunResult
 from core.workflow.nodes.base import BaseNode
 from core.workflow.nodes.enums import NodeType
 from extensions.ext_database import db
-from models.dataset import Dataset, Document, DocumentSegment
+from models.dataset import Dataset, Document
 from models.workflow import WorkflowNodeExecutionStatus
 
 from .entities import KnowledgeRetrievalNodeData
@@ -211,29 +212,12 @@ class KnowledgeRetrievalNode(BaseNode[KnowledgeRetrievalNodeData]):
                 "content": item.page_content,
             }
             retrieval_resource_list.append(source)
-        document_score_list: dict[str, float] = {}
         # deal with dify documents
         if dify_documents:
-            document_score_list = {}
-            for item in dify_documents:
-                if item.metadata.get("score"):
-                    document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
-
-            index_node_ids = [document.metadata["doc_id"] for document in dify_documents]
-            segments = DocumentSegment.query.filter(
-                DocumentSegment.dataset_id.in_(dataset_ids),
-                DocumentSegment.completed_at.isnot(None),
-                DocumentSegment.status == "completed",
-                DocumentSegment.enabled == True,
-                DocumentSegment.index_node_id.in_(index_node_ids),
-            ).all()
-            if segments:
-                index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
-                sorted_segments = sorted(
-                    segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
-                )
-
-                for segment in sorted_segments:
+            records = RetrievalService.format_retrieval_documents(dify_documents)
+            if records:
+                for record in records:
+                    segment = record.segment
                     dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
                     document = Document.query.filter(
                         Document.id == segment.document_id,
@@ -251,7 +235,7 @@ class KnowledgeRetrievalNode(BaseNode[KnowledgeRetrievalNodeData]):
                                 "document_data_source_type": document.data_source_type,
                                 "segment_id": segment.id,
                                 "retriever_from": "workflow",
-                                "score": document_score_list.get(segment.index_node_id, None),
+                                "score": record.score or 0.0,
                                 "segment_hit_count": segment.hit_count,
                                 "segment_word_count": segment.word_count,
                                 "segment_position": segment.position,
@@ -270,10 +254,8 @@ class KnowledgeRetrievalNode(BaseNode[KnowledgeRetrievalNodeData]):
                 key=lambda x: x["metadata"]["score"] if x["metadata"].get("score") is not None else 0.0,
                 reverse=True,
             )
-            position = 1
-            for item in retrieval_resource_list:
+            for position, item in enumerate(retrieval_resource_list, start=1):
                 item["metadata"]["position"] = position
-                position += 1
         return retrieval_resource_list
 
     @classmethod
diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py
index dc919892e5..d19d6413ed 100644
--- a/api/core/workflow/nodes/tool/tool_node.py
+++ b/api/core/workflow/nodes/tool/tool_node.py
@@ -5,7 +5,7 @@ from sqlalchemy import select
 from sqlalchemy.orm import Session
 
 from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler
-from core.file import File, FileTransferMethod, FileType
+from core.file import File, FileTransferMethod
 from core.plugin.manager.exc import PluginDaemonClientSideError
 from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter
 from core.tools.tool_engine import ToolEngine
@@ -189,10 +189,12 @@ class ToolNode(BaseNode[ToolNodeData]):
             conversation_id=None,
         )
 
-        files: list[File] = []
         text = ""
+        files: list[File] = []
         json: list[dict] = []
 
+        agent_logs: list[AgentLog] = []
+
         variables: dict[str, Any] = {}
 
         for message in message_stream:
@@ -239,14 +241,16 @@ class ToolNode(BaseNode[ToolNodeData]):
                     tool_file = session.scalar(stmt)
                     if tool_file is None:
                         raise ToolFileError(f"tool file {tool_file_id} not exists")
+
+                mapping = {
+                    "tool_file_id": tool_file_id,
+                    "transfer_method": FileTransferMethod.TOOL_FILE,
+                }
+
                 files.append(
-                    File(
+                    file_factory.build_from_mapping(
+                        mapping=mapping,
                         tenant_id=self.tenant_id,
-                        type=FileType.IMAGE,
-                        transfer_method=FileTransferMethod.TOOL_FILE,
-                        related_id=tool_file_id,
-                        extension=None,
-                        mime_type=message.meta.get("mime_type", "application/octet-stream"),
                     )
                 )
             elif message.type == ToolInvokeMessage.MessageType.TEXT:
diff --git a/api/extensions/ext_blueprints.py b/api/extensions/ext_blueprints.py
index fcd1547a2f..316be12f5c 100644
--- a/api/extensions/ext_blueprints.py
+++ b/api/extensions/ext_blueprints.py
@@ -5,7 +5,7 @@ from dify_app import DifyApp
 def init_app(app: DifyApp):
     # register blueprint routers
 
-    from flask_cors import CORS
+    from flask_cors import CORS  # type: ignore
 
     from controllers.console import bp as console_app_bp
     from controllers.files import bp as files_bp
diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py
index 30f216ff95..26bd6b3577 100644
--- a/api/extensions/ext_celery.py
+++ b/api/extensions/ext_celery.py
@@ -69,6 +69,7 @@ def init_app(app: DifyApp) -> Celery:
         "schedule.create_tidb_serverless_task",
         "schedule.update_tidb_serverless_status_task",
         "schedule.clean_messages",
+        "schedule.mail_clean_document_notify_task",
     ]
     day = dify_config.CELERY_BEAT_SCHEDULER_TIME
     beat_schedule = {
@@ -92,6 +93,11 @@ def init_app(app: DifyApp) -> Celery:
             "task": "schedule.clean_messages.clean_messages",
             "schedule": timedelta(days=day),
         },
+        # every Monday
+        "mail_clean_document_notify_task": {
+            "task": "schedule.mail_clean_document_notify_task.mail_clean_document_notify_task",
+            "schedule": crontab(minute="0", hour="10", day_of_week="1"),
+        },
     }
     celery_app.conf.update(beat_schedule=beat_schedule, imports=imports)
 
diff --git a/api/factories/file_factory.py b/api/factories/file_factory.py
index 99c7195b2c..f7b658a58f 100644
--- a/api/factories/file_factory.py
+++ b/api/factories/file_factory.py
@@ -1,4 +1,5 @@
 import mimetypes
+import uuid
 from collections.abc import Callable, Mapping, Sequence
 from typing import Any, cast
 
@@ -119,6 +120,11 @@ def _build_from_local_file(
     upload_file_id = mapping.get("upload_file_id")
     if not upload_file_id:
         raise ValueError("Invalid upload file id")
+    # check if upload_file_id is a valid uuid
+    try:
+        uuid.UUID(upload_file_id)
+    except ValueError:
+        raise ValueError("Invalid upload file id format")
     stmt = select(UploadFile).where(
         UploadFile.id == upload_file_id,
         UploadFile.tenant_id == tenant_id,
diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py
index a74e6f54fb..bedab5750f 100644
--- a/api/fields/dataset_fields.py
+++ b/api/fields/dataset_fields.py
@@ -73,6 +73,7 @@ dataset_detail_fields = {
     "embedding_available": fields.Boolean,
     "retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields),
     "tags": fields.List(fields.Nested(tag_fields)),
+    "doc_form": fields.String,
     "external_knowledge_info": fields.Nested(external_knowledge_info_fields),
     "external_retrieval_model": fields.Nested(external_retrieval_model_fields, allow_null=True),
 }
diff --git a/api/fields/document_fields.py b/api/fields/document_fields.py
index 2b2ac6243f..f2250d964a 100644
--- a/api/fields/document_fields.py
+++ b/api/fields/document_fields.py
@@ -34,6 +34,7 @@ document_with_segments_fields = {
     "data_source_info": fields.Raw(attribute="data_source_info_dict"),
     "data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"),
     "dataset_process_rule_id": fields.String,
+    "process_rule_dict": fields.Raw(attribute="process_rule_dict"),
     "name": fields.String,
     "created_from": fields.String,
     "created_by": fields.String,
diff --git a/api/fields/hit_testing_fields.py b/api/fields/hit_testing_fields.py
index aaafcab8ab..b9f7e78c17 100644
--- a/api/fields/hit_testing_fields.py
+++ b/api/fields/hit_testing_fields.py
@@ -34,8 +34,16 @@ segment_fields = {
     "document": fields.Nested(document_fields),
 }
 
+child_chunk_fields = {
+    "id": fields.String,
+    "content": fields.String,
+    "position": fields.Integer,
+    "score": fields.Float,
+}
+
 hit_testing_record_fields = {
     "segment": fields.Nested(segment_fields),
+    "child_chunks": fields.List(fields.Nested(child_chunk_fields)),
     "score": fields.Float,
     "tsne_position": fields.Raw,
 }
diff --git a/api/fields/segment_fields.py b/api/fields/segment_fields.py
index 4413af3160..52f89859c9 100644
--- a/api/fields/segment_fields.py
+++ b/api/fields/segment_fields.py
@@ -2,6 +2,17 @@ from flask_restful import fields  # type: ignore
 
 from libs.helper import TimestampField
 
+child_chunk_fields = {
+    "id": fields.String,
+    "segment_id": fields.String,
+    "content": fields.String,
+    "position": fields.Integer,
+    "word_count": fields.Integer,
+    "type": fields.String,
+    "created_at": TimestampField,
+    "updated_at": TimestampField,
+}
+
 segment_fields = {
     "id": fields.String,
     "position": fields.Integer,
@@ -20,10 +31,13 @@ segment_fields = {
     "status": fields.String,
     "created_by": fields.String,
     "created_at": TimestampField,
+    "updated_at": TimestampField,
+    "updated_by": fields.String,
     "indexing_at": TimestampField,
     "completed_at": TimestampField,
     "error": fields.String,
     "stopped_at": TimestampField,
+    "child_chunks": fields.List(fields.Nested(child_chunk_fields)),
 }
 
 segment_list_response = {
diff --git a/api/migrations/versions/2024_11_22_0701-e19037032219_parent_child_index.py b/api/migrations/versions/2024_11_22_0701-e19037032219_parent_child_index.py
new file mode 100644
index 0000000000..9238e5a0a8
--- /dev/null
+++ b/api/migrations/versions/2024_11_22_0701-e19037032219_parent_child_index.py
@@ -0,0 +1,55 @@
+"""parent-child-index
+
+Revision ID: e19037032219
+Revises: 01d6889832f7
+Create Date: 2024-11-22 07:01:17.550037
+
+"""
+from alembic import op
+import models as models
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'e19037032219'
+down_revision = 'd7999dfa4aae'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('child_chunks',
+    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
+    sa.Column('dataset_id', models.types.StringUUID(), nullable=False),
+    sa.Column('document_id', models.types.StringUUID(), nullable=False),
+    sa.Column('segment_id', models.types.StringUUID(), nullable=False),
+    sa.Column('position', sa.Integer(), nullable=False),
+    sa.Column('content', sa.Text(), nullable=False),
+    sa.Column('word_count', sa.Integer(), nullable=False),
+    sa.Column('index_node_id', sa.String(length=255), nullable=True),
+    sa.Column('index_node_hash', sa.String(length=255), nullable=True),
+    sa.Column('type', sa.String(length=255), server_default=sa.text("'automatic'::character varying"), nullable=False),
+    sa.Column('created_by', models.types.StringUUID(), nullable=False),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False),
+    sa.Column('updated_by', models.types.StringUUID(), nullable=True),
+    sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False),
+    sa.Column('indexing_at', sa.DateTime(), nullable=True),
+    sa.Column('completed_at', sa.DateTime(), nullable=True),
+    sa.Column('error', sa.Text(), nullable=True),
+    sa.PrimaryKeyConstraint('id', name='child_chunk_pkey')
+    )
+    with op.batch_alter_table('child_chunks', schema=None) as batch_op:
+        batch_op.create_index('child_chunk_dataset_id_idx', ['tenant_id', 'dataset_id', 'document_id', 'segment_id', 'index_node_id'], unique=False)
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('child_chunks', schema=None) as batch_op:
+        batch_op.drop_index('child_chunk_dataset_id_idx')
+
+    op.drop_table('child_chunks')
+    # ### end Alembic commands ###
diff --git a/api/migrations/versions/2024_12_25_1137-923752d42eb6_add_auto_disabled_dataset_logs.py b/api/migrations/versions/2024_12_25_1137-923752d42eb6_add_auto_disabled_dataset_logs.py
new file mode 100644
index 0000000000..6dadd4e4a8
--- /dev/null
+++ b/api/migrations/versions/2024_12_25_1137-923752d42eb6_add_auto_disabled_dataset_logs.py
@@ -0,0 +1,47 @@
+"""add_auto_disabled_dataset_logs
+
+Revision ID: 923752d42eb6
+Revises: e19037032219
+Create Date: 2024-12-25 11:37:55.467101
+
+"""
+from alembic import op
+import models as models
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '923752d42eb6'
+down_revision = 'e19037032219'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('dataset_auto_disable_logs',
+    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
+    sa.Column('dataset_id', models.types.StringUUID(), nullable=False),
+    sa.Column('document_id', models.types.StringUUID(), nullable=False),
+    sa.Column('notified', sa.Boolean(), server_default=sa.text('false'), nullable=False),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False),
+    sa.PrimaryKeyConstraint('id', name='dataset_auto_disable_log_pkey')
+    )
+    with op.batch_alter_table('dataset_auto_disable_logs', schema=None) as batch_op:
+        batch_op.create_index('dataset_auto_disable_log_created_atx', ['created_at'], unique=False)
+        batch_op.create_index('dataset_auto_disable_log_dataset_idx', ['dataset_id'], unique=False)
+        batch_op.create_index('dataset_auto_disable_log_tenant_idx', ['tenant_id'], unique=False)
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('dataset_auto_disable_logs', schema=None) as batch_op:
+        batch_op.drop_index('dataset_auto_disable_log_tenant_idx')
+        batch_op.drop_index('dataset_auto_disable_log_dataset_idx')
+        batch_op.drop_index('dataset_auto_disable_log_created_atx')
+
+    op.drop_table('dataset_auto_disable_logs')
+    # ### end Alembic commands ###
diff --git a/api/models/account.py b/api/models/account.py
index 4f8ca0530f..941dd54687 100644
--- a/api/models/account.py
+++ b/api/models/account.py
@@ -23,7 +23,7 @@ class Account(UserMixin, Base):
     __tablename__ = "accounts"
     __table_args__ = (db.PrimaryKeyConstraint("id", name="account_pkey"), db.Index("account_email_idx", "email"))
 
-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
+    id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()"))
     name = db.Column(db.String(255), nullable=False)
     email = db.Column(db.String(255), nullable=False)
     password = db.Column(db.String(255), nullable=True)
diff --git a/api/models/dataset.py b/api/models/dataset.py
index b9b41dcf47..567f7db432 100644
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -17,6 +17,7 @@ from sqlalchemy.dialects.postgresql import JSONB
 from configs import dify_config
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from extensions.ext_storage import storage
+from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
 
 from .account import Account
 from .engine import db
@@ -215,7 +216,7 @@ class DatasetProcessRule(db.Model):  # type: ignore[name-defined]
     created_by = db.Column(StringUUID, nullable=False)
     created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
 
-    MODES = ["automatic", "custom"]
+    MODES = ["automatic", "custom", "hierarchical"]
     PRE_PROCESSING_RULES = ["remove_stopwords", "remove_extra_spaces", "remove_urls_emails"]
     AUTOMATIC_RULES: dict[str, Any] = {
         "pre_processing_rules": [
@@ -231,8 +232,6 @@ class DatasetProcessRule(db.Model):  # type: ignore[name-defined]
             "dataset_id": self.dataset_id,
             "mode": self.mode,
             "rules": self.rules_dict,
-            "created_by": self.created_by,
-            "created_at": self.created_at,
         }
 
     @property
@@ -396,6 +395,12 @@ class Document(db.Model):  # type: ignore[name-defined]
             .scalar()
         )
 
+    @property
+    def process_rule_dict(self):
+        if self.dataset_process_rule_id:
+            return self.dataset_process_rule.to_dict()
+        return None
+
     def to_dict(self):
         return {
             "id": self.id,
@@ -560,6 +565,24 @@ class DocumentSegment(db.Model):  # type: ignore[name-defined]
             .first()
         )
 
+    @property
+    def child_chunks(self):
+        process_rule = self.document.dataset_process_rule
+        if process_rule.mode == "hierarchical":
+            rules = Rule(**process_rule.rules_dict)
+            if rules.parent_mode and rules.parent_mode != ParentMode.FULL_DOC:
+                child_chunks = (
+                    db.session.query(ChildChunk)
+                    .filter(ChildChunk.segment_id == self.id)
+                    .order_by(ChildChunk.position.asc())
+                    .all()
+                )
+                return child_chunks or []
+            else:
+                return []
+        else:
+            return []
+
     def get_sign_content(self):
         signed_urls = []
         text = self.content
@@ -605,6 +628,47 @@ class DocumentSegment(db.Model):  # type: ignore[name-defined]
         return text
 
 
+class ChildChunk(db.Model):  # type: ignore[name-defined]
+    __tablename__ = "child_chunks"
+    __table_args__ = (
+        db.PrimaryKeyConstraint("id", name="child_chunk_pkey"),
+        db.Index("child_chunk_dataset_id_idx", "tenant_id", "dataset_id", "document_id", "segment_id", "index_node_id"),
+    )
+
+    # initial fields
+    id = db.Column(StringUUID, nullable=False, server_default=db.text("uuid_generate_v4()"))
+    tenant_id = db.Column(StringUUID, nullable=False)
+    dataset_id = db.Column(StringUUID, nullable=False)
+    document_id = db.Column(StringUUID, nullable=False)
+    segment_id = db.Column(StringUUID, nullable=False)
+    position = db.Column(db.Integer, nullable=False)
+    content = db.Column(db.Text, nullable=False)
+    word_count = db.Column(db.Integer, nullable=False)
+    # indexing fields
+    index_node_id = db.Column(db.String(255), nullable=True)
+    index_node_hash = db.Column(db.String(255), nullable=True)
+    type = db.Column(db.String(255), nullable=False, server_default=db.text("'automatic'::character varying"))
+    created_by = db.Column(StringUUID, nullable=False)
+    created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
+    updated_by = db.Column(StringUUID, nullable=True)
+    updated_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
+    indexing_at = db.Column(db.DateTime, nullable=True)
+    completed_at = db.Column(db.DateTime, nullable=True)
+    error = db.Column(db.Text, nullable=True)
+
+    @property
+    def dataset(self):
+        return db.session.query(Dataset).filter(Dataset.id == self.dataset_id).first()
+
+    @property
+    def document(self):
+        return db.session.query(Document).filter(Document.id == self.document_id).first()
+
+    @property
+    def segment(self):
+        return db.session.query(DocumentSegment).filter(DocumentSegment.id == self.segment_id).first()
+
+
 class AppDatasetJoin(db.Model):  # type: ignore[name-defined]
     __tablename__ = "app_dataset_joins"
     __table_args__ = (
@@ -844,3 +908,20 @@ class ExternalKnowledgeBindings(db.Model):  # type: ignore[name-defined]
     created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
     updated_by = db.Column(StringUUID, nullable=True)
     updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+
+
+class DatasetAutoDisableLog(db.Model):  # type: ignore[name-defined]
+    __tablename__ = "dataset_auto_disable_logs"
+    __table_args__ = (
+        db.PrimaryKeyConstraint("id", name="dataset_auto_disable_log_pkey"),
+        db.Index("dataset_auto_disable_log_tenant_idx", "tenant_id"),
+        db.Index("dataset_auto_disable_log_dataset_idx", "dataset_id"),
+        db.Index("dataset_auto_disable_log_created_atx", "created_at"),
+    )
+
+    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
+    tenant_id = db.Column(StringUUID, nullable=False)
+    dataset_id = db.Column(StringUUID, nullable=False)
+    document_id = db.Column(StringUUID, nullable=False)
+    notified = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
+    created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
diff --git a/api/models/model.py b/api/models/model.py
index 39b091b5c9..462fbb672e 100644
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -611,13 +611,13 @@ class Conversation(Base):
         db.Index("conversation_app_from_user_idx", "app_id", "from_source", "from_end_user_id"),
     )
 
-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
+    id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()"))
     app_id = db.Column(StringUUID, nullable=False)
     app_model_config_id = db.Column(StringUUID, nullable=True)
     model_provider = db.Column(db.String(255), nullable=True)
     override_model_configs = db.Column(db.Text)
     model_id = db.Column(db.String(255), nullable=True)
-    mode = db.Column(db.String(255), nullable=False)
+    mode: Mapped[str] = mapped_column(db.String(255))
     name = db.Column(db.String(255), nullable=False)
     summary = db.Column(db.Text)
     _inputs: Mapped[dict] = mapped_column("inputs", db.JSON)
@@ -851,7 +851,7 @@ class Message(Base):
         Index("message_created_at_idx", "created_at"),
     )
 
-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
+    id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()"))
     app_id = db.Column(StringUUID, nullable=False)
     model_provider = db.Column(db.String(255), nullable=True)
     model_id = db.Column(db.String(255), nullable=True)
@@ -878,7 +878,7 @@ class Message(Base):
     from_source = db.Column(db.String(255), nullable=False)
     from_end_user_id: Mapped[Optional[str]] = db.Column(StringUUID)
     from_account_id: Mapped[Optional[str]] = db.Column(StringUUID)
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, server_default=func.current_timestamp())
     updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
     agent_based = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
     workflow_run_id = db.Column(StringUUID)
@@ -1403,7 +1403,7 @@ class EndUser(Base, UserMixin):
     external_user_id = db.Column(db.String(255), nullable=True)
     name = db.Column(db.String(255))
     is_anonymous = db.Column(db.Boolean, nullable=False, server_default=db.text("true"))
-    session_id = db.Column(db.String(255), nullable=False)
+    session_id: Mapped[str] = mapped_column()
     created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
     updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
 
diff --git a/api/models/tools.py b/api/models/tools.py
index 0fcd87d2b9..e9b52730d9 100644
--- a/api/models/tools.py
+++ b/api/models/tools.py
@@ -1,6 +1,7 @@
 import json
 from datetime import datetime
 from typing import Optional
+from typing import Any
 
 import sqlalchemy as sa
 from deprecated import deprecated
@@ -256,8 +257,8 @@ class ToolConversationVariables(Base):
     updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
 
     @property
-    def variables(self) -> dict:
-        return dict(json.loads(self.variables_str))
+    def variables(self) -> Any:
+        return json.loads(self.variables_str)
 
 
 class ToolFile(Base):
diff --git a/api/models/workflow.py b/api/models/workflow.py
index 6e2bdf2392..eba9c1b772 100644
--- a/api/models/workflow.py
+++ b/api/models/workflow.py
@@ -402,23 +402,23 @@ class WorkflowRun(Base):
         db.Index("workflow_run_tenant_app_sequence_idx", "tenant_id", "app_id", "sequence_number"),
     )
 
-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    app_id = db.Column(StringUUID, nullable=False)
-    sequence_number = db.Column(db.Integer, nullable=False)
-    workflow_id = db.Column(StringUUID, nullable=False)
-    type = db.Column(db.String(255), nullable=False)
-    triggered_from = db.Column(db.String(255), nullable=False)
-    version = db.Column(db.String(255), nullable=False)
-    graph = db.Column(db.Text)
-    inputs = db.Column(db.Text)
-    status = db.Column(db.String(255), nullable=False)  # running, succeeded, failed, stopped, partial-succeeded
+    id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID)
+    app_id: Mapped[str] = mapped_column(StringUUID)
+    sequence_number: Mapped[int] = mapped_column()
+    workflow_id: Mapped[str] = mapped_column(StringUUID)
+    type: Mapped[str] = mapped_column(db.String(255))
+    triggered_from: Mapped[str] = mapped_column(db.String(255))
+    version: Mapped[str] = mapped_column(db.String(255))
+    graph: Mapped[Optional[str]] = mapped_column(db.Text)
+    inputs: Mapped[Optional[str]] = mapped_column(db.Text)
+    status: Mapped[str] = mapped_column(db.String(255))  # running, succeeded, failed, stopped, partial-succeeded
     outputs: Mapped[Optional[str]] = mapped_column(sa.Text, default="{}")
-    error = db.Column(db.Text)
+    error: Mapped[Optional[str]] = mapped_column(db.Text)
     elapsed_time = db.Column(db.Float, nullable=False, server_default=db.text("0"))
-    total_tokens = db.Column(db.Integer, nullable=False, server_default=db.text("0"))
+    total_tokens: Mapped[int] = mapped_column(server_default=db.text("0"))
     total_steps = db.Column(db.Integer, server_default=db.text("0"))
-    created_by_role = db.Column(db.String(255), nullable=False)  # account, end_user
+    created_by_role: Mapped[str] = mapped_column(db.String(255))  # account, end_user
     created_by = db.Column(StringUUID, nullable=False)
     created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
     finished_at = db.Column(db.DateTime)
@@ -631,29 +631,29 @@ class WorkflowNodeExecution(Base):
         ),
     )
 
-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    app_id = db.Column(StringUUID, nullable=False)
-    workflow_id = db.Column(StringUUID, nullable=False)
-    triggered_from = db.Column(db.String(255), nullable=False)
-    workflow_run_id = db.Column(StringUUID)
-    index = db.Column(db.Integer, nullable=False)
-    predecessor_node_id = db.Column(db.String(255))
-    node_execution_id = db.Column(db.String(255), nullable=True)
-    node_id = db.Column(db.String(255), nullable=False)
-    node_type = db.Column(db.String(255), nullable=False)
-    title = db.Column(db.String(255), nullable=False)
-    inputs = db.Column(db.Text)
-    process_data = db.Column(db.Text)
-    outputs = db.Column(db.Text)
-    status = db.Column(db.String(255), nullable=False)
-    error = db.Column(db.Text)
-    elapsed_time = db.Column(db.Float, nullable=False, server_default=db.text("0"))
-    execution_metadata = db.Column(db.Text)
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    created_by_role = db.Column(db.String(255), nullable=False)
-    created_by = db.Column(StringUUID, nullable=False)
-    finished_at = db.Column(db.DateTime)
+    id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID)
+    app_id: Mapped[str] = mapped_column(StringUUID)
+    workflow_id: Mapped[str] = mapped_column(StringUUID)
+    triggered_from: Mapped[str] = mapped_column(db.String(255))
+    workflow_run_id: Mapped[Optional[str]] = mapped_column(StringUUID)
+    index: Mapped[int] = mapped_column(db.Integer)
+    predecessor_node_id: Mapped[Optional[str]] = mapped_column(db.String(255))
+    node_execution_id: Mapped[Optional[str]] = mapped_column(db.String(255))
+    node_id: Mapped[str] = mapped_column(db.String(255))
+    node_type: Mapped[str] = mapped_column(db.String(255))
+    title: Mapped[str] = mapped_column(db.String(255))
+    inputs: Mapped[Optional[str]] = mapped_column(db.Text)
+    process_data: Mapped[Optional[str]] = mapped_column(db.Text)
+    outputs: Mapped[Optional[str]] = mapped_column(db.Text)
+    status: Mapped[str] = mapped_column(db.String(255))
+    error: Mapped[Optional[str]] = mapped_column(db.Text)
+    elapsed_time: Mapped[float] = mapped_column(db.Float, server_default=db.text("0"))
+    execution_metadata: Mapped[Optional[str]] = mapped_column(db.Text)
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, server_default=func.current_timestamp())
+    created_by_role: Mapped[str] = mapped_column(db.String(255))
+    created_by: Mapped[str] = mapped_column(StringUUID)
+    finished_at: Mapped[Optional[datetime]] = mapped_column(db.DateTime)
 
     @property
     def created_by_account(self):
@@ -760,11 +760,11 @@ class WorkflowAppLog(Base):
         db.Index("workflow_app_log_app_idx", "tenant_id", "app_id"),
     )
 
-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    app_id = db.Column(StringUUID, nullable=False)
+    id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID)
+    app_id: Mapped[str] = mapped_column(StringUUID)
     workflow_id = db.Column(StringUUID, nullable=False)
-    workflow_run_id = db.Column(StringUUID, nullable=False)
+    workflow_run_id: Mapped[str] = mapped_column(StringUUID)
     created_from = db.Column(db.String(255), nullable=False)
     created_by_role = db.Column(db.String(255), nullable=False)
     created_by = db.Column(StringUUID, nullable=False)
diff --git a/api/schedule/clean_messages.py b/api/schedule/clean_messages.py
index 48bdc872f4..5e4d3ec323 100644
--- a/api/schedule/clean_messages.py
+++ b/api/schedule/clean_messages.py
@@ -28,7 +28,6 @@ def clean_messages():
     plan_sandbox_clean_message_day = datetime.datetime.now() - datetime.timedelta(
         days=dify_config.PLAN_SANDBOX_CLEAN_MESSAGE_DAY_SETTING
     )
-    page = 1
     while True:
         try:
             # Main query with join and filter
@@ -79,4 +78,4 @@ def clean_messages():
                 db.session.query(Message).filter(Message.id == message.id).delete()
                 db.session.commit()
     end_at = time.perf_counter()
-    click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green"))
+    click.echo(click.style("Cleaned messages from db success latency: {}".format(end_at - start_at), fg="green"))
diff --git a/api/schedule/clean_unused_datasets_task.py b/api/schedule/clean_unused_datasets_task.py
index f66b3c4797..eb73cc285d 100644
--- a/api/schedule/clean_unused_datasets_task.py
+++ b/api/schedule/clean_unused_datasets_task.py
@@ -10,7 +10,7 @@ from configs import dify_config
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
-from models.dataset import Dataset, DatasetQuery, Document
+from models.dataset import Dataset, DatasetAutoDisableLog, DatasetQuery, Document
 from services.feature_service import FeatureService
 
 
@@ -75,6 +75,23 @@ def clean_unused_datasets_task():
             )
             if not dataset_query or len(dataset_query) == 0:
                 try:
+                    # add auto disable log
+                    documents = (
+                        db.session.query(Document)
+                        .filter(
+                            Document.dataset_id == dataset.id,
+                            Document.enabled == True,
+                            Document.archived == False,
+                        )
+                        .all()
+                    )
+                    for document in documents:
+                        dataset_auto_disable_log = DatasetAutoDisableLog(
+                            tenant_id=dataset.tenant_id,
+                            dataset_id=dataset.id,
+                            document_id=document.id,
+                        )
+                        db.session.add(dataset_auto_disable_log)
                     # remove index
                     index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
                     index_processor.clean(dataset, None)
@@ -151,6 +168,23 @@ def clean_unused_datasets_task():
                     else:
                         plan = plan_cache.decode()
                     if plan == "sandbox":
+                        # add auto disable log
+                        documents = (
+                            db.session.query(Document)
+                            .filter(
+                                Document.dataset_id == dataset.id,
+                                Document.enabled == True,
+                                Document.archived == False,
+                            )
+                            .all()
+                        )
+                        for document in documents:
+                            dataset_auto_disable_log = DatasetAutoDisableLog(
+                                tenant_id=dataset.tenant_id,
+                                dataset_id=dataset.id,
+                                document_id=document.id,
+                            )
+                            db.session.add(dataset_auto_disable_log)
                         # remove index
                         index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
                         index_processor.clean(dataset, None)
diff --git a/api/schedule/mail_clean_document_notify_task.py b/api/schedule/mail_clean_document_notify_task.py
new file mode 100644
index 0000000000..fe6839288d
--- /dev/null
+++ b/api/schedule/mail_clean_document_notify_task.py
@@ -0,0 +1,90 @@
+import logging
+import time
+from collections import defaultdict
+
+import click
+from flask import render_template  # type: ignore
+
+import app
+from configs import dify_config
+from extensions.ext_database import db
+from extensions.ext_mail import mail
+from models.account import Account, Tenant, TenantAccountJoin
+from models.dataset import Dataset, DatasetAutoDisableLog
+from services.feature_service import FeatureService
+
+
+@app.celery.task(queue="dataset")
+def send_document_clean_notify_task():
+    """
+    Async Send document clean notify mail
+
+    Usage: send_document_clean_notify_task.delay()
+    """
+    if not mail.is_inited():
+        return
+
+    logging.info(click.style("Start send document clean notify mail", fg="green"))
+    start_at = time.perf_counter()
+
+    # send document clean notify mail
+    try:
+        dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter(DatasetAutoDisableLog.notified == False).all()
+        # group by tenant_id
+        dataset_auto_disable_logs_map: dict[str, list[DatasetAutoDisableLog]] = defaultdict(list)
+        for dataset_auto_disable_log in dataset_auto_disable_logs:
+            if dataset_auto_disable_log.tenant_id not in dataset_auto_disable_logs_map:
+                dataset_auto_disable_logs_map[dataset_auto_disable_log.tenant_id] = []
+            dataset_auto_disable_logs_map[dataset_auto_disable_log.tenant_id].append(dataset_auto_disable_log)
+        url = f"{dify_config.CONSOLE_WEB_URL}/datasets"
+        for tenant_id, tenant_dataset_auto_disable_logs in dataset_auto_disable_logs_map.items():
+            features = FeatureService.get_features(tenant_id)
+            plan = features.billing.subscription.plan
+            if plan != "sandbox":
+                knowledge_details = []
+                # check tenant
+                tenant = Tenant.query.filter(Tenant.id == tenant_id).first()
+                if not tenant:
+                    continue
+                # check current owner
+                current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first()
+                if not current_owner_join:
+                    continue
+                account = Account.query.filter(Account.id == current_owner_join.account_id).first()
+                if not account:
+                    continue
+
+                dataset_auto_dataset_map = {}  # type: ignore
+                for dataset_auto_disable_log in tenant_dataset_auto_disable_logs:
+                    if dataset_auto_disable_log.dataset_id not in dataset_auto_dataset_map:
+                        dataset_auto_dataset_map[dataset_auto_disable_log.dataset_id] = []
+                    dataset_auto_dataset_map[dataset_auto_disable_log.dataset_id].append(
+                        dataset_auto_disable_log.document_id
+                    )
+
+                for dataset_id, document_ids in dataset_auto_dataset_map.items():
+                    dataset = Dataset.query.filter(Dataset.id == dataset_id).first()
+                    if dataset:
+                        document_count = len(document_ids)
+                        knowledge_details.append(rf"Knowledge base {dataset.name}: {document_count} documents")
+                if knowledge_details:
+                    html_content = render_template(
+                        "clean_document_job_mail_template-US.html",
+                        userName=account.email,
+                        knowledge_details=knowledge_details,
+                        url=url,
+                    )
+                    mail.send(
+                        to=account.email, subject="Dify Knowledge base auto disable notification", html=html_content
+                    )
+
+            # update notified to True
+            for dataset_auto_disable_log in tenant_dataset_auto_disable_logs:
+                dataset_auto_disable_log.notified = True
+            db.session.commit()
+        end_at = time.perf_counter()
+        logging.info(
+            click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green")
+        )
+    except Exception:
+        logging.exception("Send document clean notify mail failed")
diff --git a/api/services/account_service.py b/api/services/account_service.py
index 13b70db580..214fb88995 100644
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@@ -798,6 +798,7 @@ class RegisterService:
         language: Optional[str] = None,
         status: Optional[AccountStatus] = None,
         is_setup: Optional[bool] = False,
+        create_workspace_required: Optional[bool] = True,
     ) -> Account:
         db.session.begin_nested()
         """Register account"""
@@ -815,7 +816,7 @@ class RegisterService:
             if open_id is not None and provider is not None:
                 AccountService.link_account_integrate(provider, open_id, account)
 
-            if FeatureService.get_system_features().is_allow_create_workspace:
+            if FeatureService.get_system_features().is_allow_create_workspace and create_workspace_required:
                 tenant = TenantService.create_tenant(f"{account.name}'s Workspace")
                 TenantService.create_tenant_member(tenant, account, role="owner")
                 account.current_tenant = tenant
diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py
index 7793fdc4ff..932d68bea1 100644
--- a/api/services/app_dsl_service.py
+++ b/api/services/app_dsl_service.py
@@ -4,7 +4,7 @@ from enum import StrEnum
 from typing import Optional, cast
 from uuid import uuid4
 
-import yaml
+import yaml  # type: ignore
 from packaging import version
 from pydantic import BaseModel, Field
 from sqlalchemy import select
@@ -196,6 +196,9 @@ class AppDslService:
                 data["kind"] = "app"
 
             imported_version = data.get("version", "0.1.0")
+            # check if imported_version is a float-like string
+            if not isinstance(imported_version, str):
+                raise ValueError(f"Invalid version type, expected str, got {type(imported_version)}")
             status = _check_version_compatibility(imported_version)
 
             # Extract app data
@@ -524,7 +527,7 @@ class AppDslService:
         else:
             cls._append_model_config_export_data(export_data, app_model)
 
-        return yaml.dump(export_data, allow_unicode=True)
+        return yaml.dump(export_data, allow_unicode=True)  # type: ignore
 
     @classmethod
     def _append_workflow_export_data(cls, *, export_data: dict, app_model: App, include_secret: bool) -> None:
diff --git a/api/services/audio_service.py b/api/services/audio_service.py
index 973110f515..ef52301c0a 100644
--- a/api/services/audio_service.py
+++ b/api/services/audio_service.py
@@ -1,5 +1,6 @@
 import io
 import logging
+import uuid
 from typing import Optional
 
 from werkzeug.datastructures import FileStorage
@@ -122,6 +123,10 @@ class AudioService:
                     raise e
 
         if message_id:
+            try:
+                uuid.UUID(message_id)
+            except ValueError:
+                return None
             message = db.session.query(Message).filter(Message.id == message_id).first()
             if message is None:
                 return None
diff --git a/api/services/billing_service.py b/api/services/billing_service.py
index d980186488..ed611a8be4 100644
--- a/api/services/billing_service.py
+++ b/api/services/billing_service.py
@@ -2,7 +2,7 @@ import os
 from typing import Optional
 
 import httpx
-from tenacity import retry, retry_if_not_exception_type, stop_before_delay, wait_fixed
+from tenacity import retry, retry_if_exception_type, stop_before_delay, wait_fixed
 
 from extensions.ext_database import db
 from models.account import TenantAccountJoin, TenantAccountRole
@@ -44,7 +44,7 @@ class BillingService:
     @retry(
         wait=wait_fixed(2),
         stop=stop_before_delay(10),
-        retry=retry_if_not_exception_type(httpx.RequestError),
+        retry=retry_if_exception_type(httpx.RequestError),
         reraise=True,
     )
     def _send_request(cls, method, endpoint, json=None, params=None):
diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py
index ca741f1935..b7ddd14025 100644
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -14,6 +14,7 @@ from configs import dify_config
 from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
 from core.model_manager import ModelManager
 from core.model_runtime.entities.model_entities import ModelType
+from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from events.dataset_event import dataset_was_deleted
 from events.document_event import document_was_deleted
@@ -23,7 +24,9 @@ from libs import helper
 from models.account import Account, TenantAccountRole
 from models.dataset import (
     AppDatasetJoin,
+    ChildChunk,
     Dataset,
+    DatasetAutoDisableLog,
     DatasetCollectionBinding,
     DatasetPermission,
     DatasetPermissionEnum,
@@ -35,8 +38,15 @@ from models.dataset import (
 )
 from models.model import UploadFile
 from models.source import DataSourceOauthBinding
-from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateEntity
-from services.errors.account import NoPermissionError
+from services.entities.knowledge_entities.knowledge_entities import (
+    ChildChunkUpdateArgs,
+    KnowledgeConfig,
+    RerankingModel,
+    RetrievalModel,
+    SegmentUpdateArgs,
+)
+from services.errors.account import InvalidActionError, NoPermissionError
+from services.errors.chunk import ChildChunkDeleteIndexError, ChildChunkIndexingError
 from services.errors.dataset import DatasetNameDuplicateError
 from services.errors.document import DocumentIndexingError
 from services.errors.file import FileNotExistsError
@@ -44,13 +54,16 @@ from services.external_knowledge_service import ExternalDatasetService
 from services.feature_service import FeatureModel, FeatureService
 from services.tag_service import TagService
 from services.vector_service import VectorService
+from tasks.batch_clean_document_task import batch_clean_document_task
 from tasks.clean_notion_document_task import clean_notion_document_task
 from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
 from tasks.delete_segment_from_index_task import delete_segment_from_index_task
 from tasks.disable_segment_from_index_task import disable_segment_from_index_task
+from tasks.disable_segments_from_index_task import disable_segments_from_index_task
 from tasks.document_indexing_task import document_indexing_task
 from tasks.document_indexing_update_task import document_indexing_update_task
 from tasks.duplicate_document_indexing_task import duplicate_document_indexing_task
+from tasks.enable_segments_to_index_task import enable_segments_to_index_task
 from tasks.recover_document_indexing_task import recover_document_indexing_task
 from tasks.retry_document_indexing_task import retry_document_indexing_task
 from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task
@@ -408,6 +421,24 @@ class DatasetService:
             .all()
         )
 
+    @staticmethod
+    def get_dataset_auto_disable_logs(dataset_id: str) -> dict:
+        # get recent 30 days auto disable logs
+        start_date = datetime.datetime.now() - datetime.timedelta(days=30)
+        dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter(
+            DatasetAutoDisableLog.dataset_id == dataset_id,
+            DatasetAutoDisableLog.created_at >= start_date,
+        ).all()
+        if dataset_auto_disable_logs:
+            return {
+                "document_ids": [log.document_id for log in dataset_auto_disable_logs],
+                "count": len(dataset_auto_disable_logs),
+            }
+        return {
+            "document_ids": [],
+            "count": 0,
+        }
+
 
 class DocumentService:
     DEFAULT_RULES = {
@@ -518,12 +549,14 @@ class DocumentService:
     }
 
     @staticmethod
-    def get_document(dataset_id: str, document_id: str) -> Optional[Document]:
-        document = (
-            db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()
-        )
-
-        return document
+    def get_document(dataset_id: str, document_id: Optional[str] = None) -> Optional[Document]:
+        if document_id:
+            document = (
+                db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()
+            )
+            return document
+        else:
+            return None
 
     @staticmethod
     def get_document_by_id(document_id: str) -> Optional[Document]:
@@ -588,6 +621,20 @@ class DocumentService:
         db.session.delete(document)
         db.session.commit()
 
+    @staticmethod
+    def delete_documents(dataset: Dataset, document_ids: list[str]):
+        documents = db.session.query(Document).filter(Document.id.in_(document_ids)).all()
+        file_ids = [
+            document.data_source_info_dict["upload_file_id"]
+            for document in documents
+            if document.data_source_type == "upload_file"
+        ]
+        batch_clean_document_task.delay(document_ids, dataset.id, dataset.doc_form, file_ids)
+
+        for document in documents:
+            db.session.delete(document)
+        db.session.commit()
+
     @staticmethod
     def rename_document(dataset_id: str, document_id: str, name: str) -> Document:
         dataset = DatasetService.get_dataset(dataset_id)
@@ -689,7 +736,7 @@ class DocumentService:
     @staticmethod
     def save_document_with_dataset_id(
         dataset: Dataset,
-        document_data: dict,
+        knowledge_config: KnowledgeConfig,
         account: Account | Any,
         dataset_process_rule: Optional[DatasetProcessRule] = None,
         created_from: str = "web",
@@ -698,37 +745,35 @@ class DocumentService:
         features = FeatureService.get_features(current_user.current_tenant_id)
 
         if features.billing.enabled:
-            if "original_document_id" not in document_data or not document_data["original_document_id"]:
+            if not knowledge_config.original_document_id:
                 count = 0
-                if document_data["data_source"]["type"] == "upload_file":
-                    upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"]
-                    count = len(upload_file_list)
-                elif document_data["data_source"]["type"] == "notion_import":
-                    notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"]
-                    for notion_info in notion_info_list:
-                        count = count + len(notion_info["pages"])
-                elif document_data["data_source"]["type"] == "website_crawl":
-                    website_info = document_data["data_source"]["info_list"]["website_info_list"]
-                    count = len(website_info["urls"])
-                batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
-                if count > batch_upload_limit:
-                    raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
+                if knowledge_config.data_source:
+                    if knowledge_config.data_source.info_list.data_source_type == "upload_file":
+                        upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids  # type: ignore
+                        count = len(upload_file_list)
+                    elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
+                        notion_info_list = knowledge_config.data_source.info_list.notion_info_list
+                        for notion_info in notion_info_list:  # type: ignore
+                            count = count + len(notion_info.pages)
+                    elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
+                        website_info = knowledge_config.data_source.info_list.website_info_list
+                        count = len(website_info.urls)  # type: ignore
+                    batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
+                    if count > batch_upload_limit:
+                        raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
 
-                DocumentService.check_documents_upload_quota(count, features)
+                    DocumentService.check_documents_upload_quota(count, features)
 
         # if dataset is empty, update dataset data_source_type
         if not dataset.data_source_type:
-            dataset.data_source_type = document_data["data_source"]["type"]
+            dataset.data_source_type = knowledge_config.data_source.info_list.data_source_type  # type: ignore
 
         if not dataset.indexing_technique:
-            if (
-                "indexing_technique" not in document_data
-                or document_data["indexing_technique"] not in Dataset.INDEXING_TECHNIQUE_LIST
-            ):
-                raise ValueError("Indexing technique is required")
+            if knowledge_config.indexing_technique not in Dataset.INDEXING_TECHNIQUE_LIST:
+                raise ValueError("Indexing technique is invalid")
 
-            dataset.indexing_technique = document_data["indexing_technique"]
-            if document_data["indexing_technique"] == "high_quality":
+            dataset.indexing_technique = knowledge_config.indexing_technique
+            if knowledge_config.indexing_technique == "high_quality":
                 model_manager = ModelManager()
                 embedding_model = model_manager.get_default_model_instance(
                     tenant_id=current_user.current_tenant_id, model_type=ModelType.TEXT_EMBEDDING
@@ -748,46 +793,47 @@ class DocumentService:
                         "score_threshold_enabled": False,
                     }
 
-                    dataset.retrieval_model = document_data.get("retrieval_model") or default_retrieval_model
+                    dataset.retrieval_model = knowledge_config.retrieval_model.model_dump() or default_retrieval_model  # type: ignore
 
         documents = []
-        if document_data.get("original_document_id"):
-            document = DocumentService.update_document_with_dataset_id(dataset, document_data, account)
+        if knowledge_config.original_document_id:
+            document = DocumentService.update_document_with_dataset_id(dataset, knowledge_config, account)
             documents.append(document)
             batch = document.batch
         else:
             batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
             # save process rule
             if not dataset_process_rule:
-                process_rule = document_data["process_rule"]
-                if process_rule["mode"] == "custom":
-                    dataset_process_rule = DatasetProcessRule(
-                        dataset_id=dataset.id,
-                        mode=process_rule["mode"],
-                        rules=json.dumps(process_rule["rules"]),
-                        created_by=account.id,
-                    )
-                elif process_rule["mode"] == "automatic":
-                    dataset_process_rule = DatasetProcessRule(
-                        dataset_id=dataset.id,
-                        mode=process_rule["mode"],
-                        rules=json.dumps(DatasetProcessRule.AUTOMATIC_RULES),
-                        created_by=account.id,
-                    )
-                else:
-                    logging.warn(
-                        f"Invalid process rule mode: {process_rule['mode']}, can not find dataset process rule"
-                    )
-                    return
-                db.session.add(dataset_process_rule)
-                db.session.commit()
+                process_rule = knowledge_config.process_rule
+                if process_rule:
+                    if process_rule.mode in ("custom", "hierarchical"):
+                        dataset_process_rule = DatasetProcessRule(
+                            dataset_id=dataset.id,
+                            mode=process_rule.mode,
+                            rules=process_rule.rules.model_dump_json() if process_rule.rules else None,
+                            created_by=account.id,
+                        )
+                    elif process_rule.mode == "automatic":
+                        dataset_process_rule = DatasetProcessRule(
+                            dataset_id=dataset.id,
+                            mode=process_rule.mode,
+                            rules=json.dumps(DatasetProcessRule.AUTOMATIC_RULES),
+                            created_by=account.id,
+                        )
+                    else:
+                        logging.warn(
+                            f"Invalid process rule mode: {process_rule.mode}, can not find dataset process rule"
+                        )
+                        return
+                    db.session.add(dataset_process_rule)
+                    db.session.commit()
             lock_name = "add_document_lock_dataset_id_{}".format(dataset.id)
             with redis_client.lock(lock_name, timeout=600):
                 position = DocumentService.get_documents_position(dataset.id)
                 document_ids = []
                 duplicate_document_ids = []
-                if document_data["data_source"]["type"] == "upload_file":
-                    upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"]
+                if knowledge_config.data_source.info_list.data_source_type == "upload_file":
+                    upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids  # type: ignore
                     for file_id in upload_file_list:
                         file = (
                             db.session.query(UploadFile)
@@ -804,7 +850,7 @@ class DocumentService:
                             "upload_file_id": file_id,
                         }
                         # check duplicate
-                        if document_data.get("duplicate", False):
+                        if knowledge_config.duplicate:
                             document = Document.query.filter_by(
                                 dataset_id=dataset.id,
                                 tenant_id=current_user.current_tenant_id,
@@ -813,11 +859,11 @@ class DocumentService:
                                 name=file_name,
                             ).first()
                             if document:
-                                document.dataset_process_rule_id = dataset_process_rule.id
-                                document.updated_at = datetime.datetime.utcnow()
+                                document.dataset_process_rule_id = dataset_process_rule.id  # type: ignore
+                                document.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
                                 document.created_from = created_from
-                                document.doc_form = document_data["doc_form"]
-                                document.doc_language = document_data["doc_language"]
+                                document.doc_form = knowledge_config.doc_form
+                                document.doc_language = knowledge_config.doc_language
                                 document.data_source_info = json.dumps(data_source_info)
                                 document.batch = batch
                                 document.indexing_status = "waiting"
@@ -827,10 +873,10 @@ class DocumentService:
                                 continue
                         document = DocumentService.build_document(
                             dataset,
-                            dataset_process_rule.id,
-                            document_data["data_source"]["type"],
-                            document_data["doc_form"],
-                            document_data["doc_language"],
+                            dataset_process_rule.id,  # type: ignore
+                            knowledge_config.data_source.info_list.data_source_type,
+                            knowledge_config.doc_form,
+                            knowledge_config.doc_language,
                             data_source_info,
                             created_from,
                             position,
@@ -843,8 +889,10 @@ class DocumentService:
                         document_ids.append(document.id)
                         documents.append(document)
                         position += 1
-                elif document_data["data_source"]["type"] == "notion_import":
-                    notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"]
+                elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
+                    notion_info_list = knowledge_config.data_source.info_list.notion_info_list
+                    if not notion_info_list:
+                        raise ValueError("No notion info list found.")
                     exist_page_ids = []
                     exist_document = {}
                     documents = Document.query.filter_by(
@@ -859,7 +907,7 @@ class DocumentService:
                             exist_page_ids.append(data_source_info["notion_page_id"])
                             exist_document[data_source_info["notion_page_id"]] = document.id
                     for notion_info in notion_info_list:
-                        workspace_id = notion_info["workspace_id"]
+                        workspace_id = notion_info.workspace_id
                         data_source_binding = DataSourceOauthBinding.query.filter(
                             db.and_(
                                 DataSourceOauthBinding.tenant_id == current_user.current_tenant_id,
@@ -870,25 +918,25 @@ class DocumentService:
                         ).first()
                         if not data_source_binding:
                             raise ValueError("Data source binding not found.")
-                        for page in notion_info["pages"]:
-                            if page["page_id"] not in exist_page_ids:
+                        for page in notion_info.pages:
+                            if page.page_id not in exist_page_ids:
                                 data_source_info = {
                                     "notion_workspace_id": workspace_id,
-                                    "notion_page_id": page["page_id"],
-                                    "notion_page_icon": page["page_icon"],
-                                    "type": page["type"],
+                                    "notion_page_id": page.page_id,
+                                    "notion_page_icon": page.page_icon.model_dump() if page.page_icon else None,
+                                    "type": page.type,
                                 }
                                 document = DocumentService.build_document(
                                     dataset,
-                                    dataset_process_rule.id,
-                                    document_data["data_source"]["type"],
-                                    document_data["doc_form"],
-                                    document_data["doc_language"],
+                                    dataset_process_rule.id,  # type: ignore
+                                    knowledge_config.data_source.info_list.data_source_type,
+                                    knowledge_config.doc_form,
+                                    knowledge_config.doc_language,
                                     data_source_info,
                                     created_from,
                                     position,
                                     account,
-                                    page["page_name"],
+                                    page.page_name,
                                     batch,
                                 )
                                 db.session.add(document)
@@ -897,19 +945,21 @@ class DocumentService:
                                 documents.append(document)
                                 position += 1
                             else:
-                                exist_document.pop(page["page_id"])
+                                exist_document.pop(page.page_id)
                     # delete not selected documents
                     if len(exist_document) > 0:
                         clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
-                elif document_data["data_source"]["type"] == "website_crawl":
-                    website_info = document_data["data_source"]["info_list"]["website_info_list"]
-                    urls = website_info["urls"]
+                elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
+                    website_info = knowledge_config.data_source.info_list.website_info_list
+                    if not website_info:
+                        raise ValueError("No website info list found.")
+                    urls = website_info.urls
                     for url in urls:
                         data_source_info = {
                             "url": url,
-                            "provider": website_info["provider"],
-                            "job_id": website_info["job_id"],
-                            "only_main_content": website_info.get("only_main_content", False),
+                            "provider": website_info.provider,
+                            "job_id": website_info.job_id,
+                            "only_main_content": website_info.only_main_content,
                             "mode": "crawl",
                         }
                         if len(url) > 255:
@@ -918,10 +968,10 @@ class DocumentService:
                             document_name = url
                         document = DocumentService.build_document(
                             dataset,
-                            dataset_process_rule.id,
-                            document_data["data_source"]["type"],
-                            document_data["doc_form"],
-                            document_data["doc_language"],
+                            dataset_process_rule.id,  # type: ignore
+                            knowledge_config.data_source.info_list.data_source_type,
+                            knowledge_config.doc_form,
+                            knowledge_config.doc_language,
                             data_source_info,
                             created_from,
                             position,
@@ -995,31 +1045,31 @@ class DocumentService:
     @staticmethod
     def update_document_with_dataset_id(
         dataset: Dataset,
-        document_data: dict,
+        document_data: KnowledgeConfig,
         account: Account,
         dataset_process_rule: Optional[DatasetProcessRule] = None,
         created_from: str = "web",
     ):
         DatasetService.check_dataset_model_setting(dataset)
-        document = DocumentService.get_document(dataset.id, document_data["original_document_id"])
+        document = DocumentService.get_document(dataset.id, document_data.original_document_id)
         if document is None:
             raise NotFound("Document not found")
         if document.display_status != "available":
             raise ValueError("Document is not available")
         # save process rule
-        if document_data.get("process_rule"):
-            process_rule = document_data["process_rule"]
-            if process_rule["mode"] == "custom":
+        if document_data.process_rule:
+            process_rule = document_data.process_rule
+            if process_rule.mode in {"custom", "hierarchical"}:
                 dataset_process_rule = DatasetProcessRule(
                     dataset_id=dataset.id,
-                    mode=process_rule["mode"],
-                    rules=json.dumps(process_rule["rules"]),
+                    mode=process_rule.mode,
+                    rules=process_rule.rules.model_dump_json() if process_rule.rules else None,
                     created_by=account.id,
                 )
-            elif process_rule["mode"] == "automatic":
+            elif process_rule.mode == "automatic":
                 dataset_process_rule = DatasetProcessRule(
                     dataset_id=dataset.id,
-                    mode=process_rule["mode"],
+                    mode=process_rule.mode,
                     rules=json.dumps(DatasetProcessRule.AUTOMATIC_RULES),
                     created_by=account.id,
                 )
@@ -1028,11 +1078,13 @@ class DocumentService:
                 db.session.commit()
                 document.dataset_process_rule_id = dataset_process_rule.id
         # update document data source
-        if document_data.get("data_source"):
+        if document_data.data_source:
             file_name = ""
             data_source_info = {}
-            if document_data["data_source"]["type"] == "upload_file":
-                upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"]
+            if document_data.data_source.info_list.data_source_type == "upload_file":
+                if not document_data.data_source.info_list.file_info_list:
+                    raise ValueError("No file info list found.")
+                upload_file_list = document_data.data_source.info_list.file_info_list.file_ids
                 for file_id in upload_file_list:
                     file = (
                         db.session.query(UploadFile)
@@ -1048,10 +1100,12 @@ class DocumentService:
                     data_source_info = {
                         "upload_file_id": file_id,
                     }
-            elif document_data["data_source"]["type"] == "notion_import":
-                notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"]
+            elif document_data.data_source.info_list.data_source_type == "notion_import":
+                if not document_data.data_source.info_list.notion_info_list:
+                    raise ValueError("No notion info list found.")
+                notion_info_list = document_data.data_source.info_list.notion_info_list
                 for notion_info in notion_info_list:
-                    workspace_id = notion_info["workspace_id"]
+                    workspace_id = notion_info.workspace_id
                     data_source_binding = DataSourceOauthBinding.query.filter(
                         db.and_(
                             DataSourceOauthBinding.tenant_id == current_user.current_tenant_id,
@@ -1062,31 +1116,32 @@ class DocumentService:
                     ).first()
                     if not data_source_binding:
                         raise ValueError("Data source binding not found.")
-                    for page in notion_info["pages"]:
+                    for page in notion_info.pages:
                         data_source_info = {
                             "notion_workspace_id": workspace_id,
-                            "notion_page_id": page["page_id"],
-                            "notion_page_icon": page["page_icon"],
-                            "type": page["type"],
+                            "notion_page_id": page.page_id,
+                            "notion_page_icon": page.page_icon.model_dump() if page.page_icon else None,  # type: ignore
+                            "type": page.type,
                         }
-            elif document_data["data_source"]["type"] == "website_crawl":
-                website_info = document_data["data_source"]["info_list"]["website_info_list"]
-                urls = website_info["urls"]
-                for url in urls:
-                    data_source_info = {
-                        "url": url,
-                        "provider": website_info["provider"],
-                        "job_id": website_info["job_id"],
-                        "only_main_content": website_info.get("only_main_content", False),
-                        "mode": "crawl",
-                    }
-            document.data_source_type = document_data["data_source"]["type"]
+            elif document_data.data_source.info_list.data_source_type == "website_crawl":
+                website_info = document_data.data_source.info_list.website_info_list
+                if website_info:
+                    urls = website_info.urls
+                    for url in urls:
+                        data_source_info = {
+                            "url": url,
+                            "provider": website_info.provider,
+                            "job_id": website_info.job_id,
+                            "only_main_content": website_info.only_main_content,  # type: ignore
+                            "mode": "crawl",
+                        }
+            document.data_source_type = document_data.data_source.info_list.data_source_type
             document.data_source_info = json.dumps(data_source_info)
             document.name = file_name
 
         # update document name
-        if document_data.get("name"):
-            document.name = document_data["name"]
+        if document_data.name:
+            document.name = document_data.name
         # update document to be waiting
         document.indexing_status = "waiting"
         document.completed_at = None
@@ -1096,7 +1151,7 @@ class DocumentService:
         document.splitting_completed_at = None
         document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
         document.created_from = created_from
-        document.doc_form = document_data["doc_form"]
+        document.doc_form = document_data.doc_form
         db.session.add(document)
         db.session.commit()
         # update document segment
@@ -1108,21 +1163,27 @@ class DocumentService:
         return document
 
     @staticmethod
-    def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
+    def save_document_without_dataset_id(tenant_id: str, knowledge_config: KnowledgeConfig, account: Account):
         features = FeatureService.get_features(current_user.current_tenant_id)
 
         if features.billing.enabled:
             count = 0
-            if document_data["data_source"]["type"] == "upload_file":
-                upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"]
+            if knowledge_config.data_source.info_list.data_source_type == "upload_file":
+                upload_file_list = (
+                    knowledge_config.data_source.info_list.file_info_list.file_ids
+                    if knowledge_config.data_source.info_list.file_info_list
+                    else []
+                )
                 count = len(upload_file_list)
-            elif document_data["data_source"]["type"] == "notion_import":
-                notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"]
-                for notion_info in notion_info_list:
-                    count = count + len(notion_info["pages"])
-            elif document_data["data_source"]["type"] == "website_crawl":
-                website_info = document_data["data_source"]["info_list"]["website_info_list"]
-                count = len(website_info["urls"])
+            elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
+                notion_info_list = knowledge_config.data_source.info_list.notion_info_list
+                if notion_info_list:
+                    for notion_info in notion_info_list:
+                        count = count + len(notion_info.pages)
+            elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
+                website_info = knowledge_config.data_source.info_list.website_info_list
+                if website_info:
+                    count = len(website_info.urls)
             batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
             if count > batch_upload_limit:
                 raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
@@ -1131,39 +1192,39 @@ class DocumentService:
 
         dataset_collection_binding_id = None
         retrieval_model = None
-        if document_data["indexing_technique"] == "high_quality":
+        if knowledge_config.indexing_technique == "high_quality":
             dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
-                document_data["embedding_model_provider"], document_data["embedding_model"]
+                knowledge_config.embedding_model_provider,  # type: ignore
+                knowledge_config.embedding_model,  # type: ignore
             )
             dataset_collection_binding_id = dataset_collection_binding.id
-            if document_data.get("retrieval_model"):
-                retrieval_model = document_data["retrieval_model"]
+            if knowledge_config.retrieval_model:
+                retrieval_model = knowledge_config.retrieval_model
             else:
-                default_retrieval_model = {
-                    "search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
-                    "reranking_enable": False,
-                    "reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
-                    "top_k": 2,
-                    "score_threshold_enabled": False,
-                }
-                retrieval_model = default_retrieval_model
+                retrieval_model = RetrievalModel(
+                    search_method=RetrievalMethod.SEMANTIC_SEARCH.value,
+                    reranking_enable=False,
+                    reranking_model=RerankingModel(reranking_provider_name="", reranking_model_name=""),
+                    top_k=2,
+                    score_threshold_enabled=False,
+                )
         # save dataset
         dataset = Dataset(
             tenant_id=tenant_id,
             name="",
-            data_source_type=document_data["data_source"]["type"],
-            indexing_technique=document_data.get("indexing_technique", "high_quality"),
+            data_source_type=knowledge_config.data_source.info_list.data_source_type,
+            indexing_technique=knowledge_config.indexing_technique,
             created_by=account.id,
-            embedding_model=document_data.get("embedding_model"),
-            embedding_model_provider=document_data.get("embedding_model_provider"),
+            embedding_model=knowledge_config.embedding_model,
+            embedding_model_provider=knowledge_config.embedding_model_provider,
             collection_binding_id=dataset_collection_binding_id,
-            retrieval_model=retrieval_model,
+            retrieval_model=retrieval_model.model_dump() if retrieval_model else None,
         )
 
-        db.session.add(dataset)
+        db.session.add(dataset)  # type: ignore
         db.session.flush()
 
-        documents, batch = DocumentService.save_document_with_dataset_id(dataset, document_data, account)
+        documents, batch = DocumentService.save_document_with_dataset_id(dataset, knowledge_config, account)
 
         cut_length = 18
         cut_name = documents[0].name[:cut_length]
@@ -1174,133 +1235,86 @@ class DocumentService:
         return dataset, documents, batch
 
     @classmethod
-    def document_create_args_validate(cls, args: dict):
-        if "original_document_id" not in args or not args["original_document_id"]:
-            DocumentService.data_source_args_validate(args)
-            DocumentService.process_rule_args_validate(args)
+    def document_create_args_validate(cls, knowledge_config: KnowledgeConfig):
+        if not knowledge_config.data_source and not knowledge_config.process_rule:
+            raise ValueError("Data source or Process rule is required")
         else:
-            if ("data_source" not in args or not args["data_source"]) and (
-                "process_rule" not in args or not args["process_rule"]
-            ):
-                raise ValueError("Data source or Process rule is required")
-            else:
-                if args.get("data_source"):
-                    DocumentService.data_source_args_validate(args)
-                if args.get("process_rule"):
-                    DocumentService.process_rule_args_validate(args)
+            if knowledge_config.data_source:
+                DocumentService.data_source_args_validate(knowledge_config)
+            if knowledge_config.process_rule:
+                DocumentService.process_rule_args_validate(knowledge_config)
 
     @classmethod
-    def data_source_args_validate(cls, args: dict):
-        if "data_source" not in args or not args["data_source"]:
+    def data_source_args_validate(cls, knowledge_config: KnowledgeConfig):
+        if not knowledge_config.data_source:
             raise ValueError("Data source is required")
 
-        if not isinstance(args["data_source"], dict):
-            raise ValueError("Data source is invalid")
-
-        if "type" not in args["data_source"] or not args["data_source"]["type"]:
-            raise ValueError("Data source type is required")
-
-        if args["data_source"]["type"] not in Document.DATA_SOURCES:
+        if knowledge_config.data_source.info_list.data_source_type not in Document.DATA_SOURCES:
             raise ValueError("Data source type is invalid")
 
-        if "info_list" not in args["data_source"] or not args["data_source"]["info_list"]:
+        if not knowledge_config.data_source.info_list:
             raise ValueError("Data source info is required")
 
-        if args["data_source"]["type"] == "upload_file":
-            if (
-                "file_info_list" not in args["data_source"]["info_list"]
-                or not args["data_source"]["info_list"]["file_info_list"]
-            ):
+        if knowledge_config.data_source.info_list.data_source_type == "upload_file":
+            if not knowledge_config.data_source.info_list.file_info_list:
                 raise ValueError("File source info is required")
-        if args["data_source"]["type"] == "notion_import":
-            if (
-                "notion_info_list" not in args["data_source"]["info_list"]
-                or not args["data_source"]["info_list"]["notion_info_list"]
-            ):
+        if knowledge_config.data_source.info_list.data_source_type == "notion_import":
+            if not knowledge_config.data_source.info_list.notion_info_list:
                 raise ValueError("Notion source info is required")
-        if args["data_source"]["type"] == "website_crawl":
-            if (
-                "website_info_list" not in args["data_source"]["info_list"]
-                or not args["data_source"]["info_list"]["website_info_list"]
-            ):
+        if knowledge_config.data_source.info_list.data_source_type == "website_crawl":
+            if not knowledge_config.data_source.info_list.website_info_list:
                 raise ValueError("Website source info is required")
 
     @classmethod
-    def process_rule_args_validate(cls, args: dict):
-        if "process_rule" not in args or not args["process_rule"]:
+    def process_rule_args_validate(cls, knowledge_config: KnowledgeConfig):
+        if not knowledge_config.process_rule:
             raise ValueError("Process rule is required")
 
-        if not isinstance(args["process_rule"], dict):
-            raise ValueError("Process rule is invalid")
-
-        if "mode" not in args["process_rule"] or not args["process_rule"]["mode"]:
+        if not knowledge_config.process_rule.mode:
             raise ValueError("Process rule mode is required")
 
-        if args["process_rule"]["mode"] not in DatasetProcessRule.MODES:
+        if knowledge_config.process_rule.mode not in DatasetProcessRule.MODES:
             raise ValueError("Process rule mode is invalid")
 
-        if args["process_rule"]["mode"] == "automatic":
-            args["process_rule"]["rules"] = {}
+        if knowledge_config.process_rule.mode == "automatic":
+            knowledge_config.process_rule.rules = None
         else:
-            if "rules" not in args["process_rule"] or not args["process_rule"]["rules"]:
+            if not knowledge_config.process_rule.rules:
                 raise ValueError("Process rule rules is required")
 
-            if not isinstance(args["process_rule"]["rules"], dict):
-                raise ValueError("Process rule rules is invalid")
-
-            if (
-                "pre_processing_rules" not in args["process_rule"]["rules"]
-                or args["process_rule"]["rules"]["pre_processing_rules"] is None
-            ):
+            if knowledge_config.process_rule.rules.pre_processing_rules is None:
                 raise ValueError("Process rule pre_processing_rules is required")
 
-            if not isinstance(args["process_rule"]["rules"]["pre_processing_rules"], list):
-                raise ValueError("Process rule pre_processing_rules is invalid")
-
             unique_pre_processing_rule_dicts = {}
-            for pre_processing_rule in args["process_rule"]["rules"]["pre_processing_rules"]:
-                if "id" not in pre_processing_rule or not pre_processing_rule["id"]:
+            for pre_processing_rule in knowledge_config.process_rule.rules.pre_processing_rules:
+                if not pre_processing_rule.id:
                     raise ValueError("Process rule pre_processing_rules id is required")
 
-                if pre_processing_rule["id"] not in DatasetProcessRule.PRE_PROCESSING_RULES:
-                    raise ValueError("Process rule pre_processing_rules id is invalid")
-
-                if "enabled" not in pre_processing_rule or pre_processing_rule["enabled"] is None:
-                    raise ValueError("Process rule pre_processing_rules enabled is required")
-
-                if not isinstance(pre_processing_rule["enabled"], bool):
+                if not isinstance(pre_processing_rule.enabled, bool):
                     raise ValueError("Process rule pre_processing_rules enabled is invalid")
 
-                unique_pre_processing_rule_dicts[pre_processing_rule["id"]] = pre_processing_rule
+                unique_pre_processing_rule_dicts[pre_processing_rule.id] = pre_processing_rule
 
-            args["process_rule"]["rules"]["pre_processing_rules"] = list(unique_pre_processing_rule_dicts.values())
+            knowledge_config.process_rule.rules.pre_processing_rules = list(unique_pre_processing_rule_dicts.values())
 
-            if (
-                "segmentation" not in args["process_rule"]["rules"]
-                or args["process_rule"]["rules"]["segmentation"] is None
-            ):
+            if not knowledge_config.process_rule.rules.segmentation:
                 raise ValueError("Process rule segmentation is required")
 
-            if not isinstance(args["process_rule"]["rules"]["segmentation"], dict):
-                raise ValueError("Process rule segmentation is invalid")
-
-            if (
-                "separator" not in args["process_rule"]["rules"]["segmentation"]
-                or not args["process_rule"]["rules"]["segmentation"]["separator"]
-            ):
+            if not knowledge_config.process_rule.rules.segmentation.separator:
                 raise ValueError("Process rule segmentation separator is required")
 
-            if not isinstance(args["process_rule"]["rules"]["segmentation"]["separator"], str):
+            if not isinstance(knowledge_config.process_rule.rules.segmentation.separator, str):
                 raise ValueError("Process rule segmentation separator is invalid")
 
-            if (
-                "max_tokens" not in args["process_rule"]["rules"]["segmentation"]
-                or not args["process_rule"]["rules"]["segmentation"]["max_tokens"]
+            if not (
+                knowledge_config.process_rule.mode == "hierarchical"
+                and knowledge_config.process_rule.rules.parent_mode == "full-doc"
             ):
-                raise ValueError("Process rule segmentation max_tokens is required")
+                if not knowledge_config.process_rule.rules.segmentation.max_tokens:
+                    raise ValueError("Process rule segmentation max_tokens is required")
 
-            if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
-                raise ValueError("Process rule segmentation max_tokens is invalid")
+                if not isinstance(knowledge_config.process_rule.rules.segmentation.max_tokens, int):
+                    raise ValueError("Process rule segmentation max_tokens is invalid")
 
     @classmethod
     def estimate_args_validate(cls, args: dict):
@@ -1447,7 +1461,7 @@ class SegmentService:
 
             # save vector index
             try:
-                VectorService.create_segments_vector([args["keywords"]], [segment_document], dataset)
+                VectorService.create_segments_vector([args["keywords"]], [segment_document], dataset, document.doc_form)
             except Exception as e:
                 logging.exception("create segment index failed")
                 segment_document.enabled = False
@@ -1528,7 +1542,7 @@ class SegmentService:
             db.session.add(document)
             try:
                 # save vector index
-                VectorService.create_segments_vector(keywords_list, pre_segment_data_list, dataset)
+                VectorService.create_segments_vector(keywords_list, pre_segment_data_list, dataset, document.doc_form)
             except Exception as e:
                 logging.exception("create segment index failed")
                 for segment_document in segment_data_list:
@@ -1540,14 +1554,13 @@ class SegmentService:
             return segment_data_list
 
     @classmethod
-    def update_segment(cls, args: dict, segment: DocumentSegment, document: Document, dataset: Dataset):
-        segment_update_entity = SegmentUpdateEntity(**args)
+    def update_segment(cls, args: SegmentUpdateArgs, segment: DocumentSegment, document: Document, dataset: Dataset):
         indexing_cache_key = "segment_{}_indexing".format(segment.id)
         cache_result = redis_client.get(indexing_cache_key)
         if cache_result is not None:
             raise ValueError("Segment is indexing, please try again later")
-        if segment_update_entity.enabled is not None:
-            action = segment_update_entity.enabled
+        if args.enabled is not None:
+            action = args.enabled
             if segment.enabled != action:
                 if not action:
                     segment.enabled = action
@@ -1560,22 +1573,22 @@ class SegmentService:
                     disable_segment_from_index_task.delay(segment.id)
                     return segment
         if not segment.enabled:
-            if segment_update_entity.enabled is not None:
-                if not segment_update_entity.enabled:
+            if args.enabled is not None:
+                if not args.enabled:
                     raise ValueError("Can't update disabled segment")
             else:
                 raise ValueError("Can't update disabled segment")
         try:
             word_count_change = segment.word_count
-            content = segment_update_entity.content
+            content = args.content or segment.content
             if segment.content == content:
                 segment.word_count = len(content)
                 if document.doc_form == "qa_model":
-                    segment.answer = segment_update_entity.answer
-                    segment.word_count += len(segment_update_entity.answer or "")
+                    segment.answer = args.answer
+                    segment.word_count += len(args.answer) if args.answer else 0
                 word_count_change = segment.word_count - word_count_change
-                if segment_update_entity.keywords:
-                    segment.keywords = segment_update_entity.keywords
+                if args.keywords:
+                    segment.keywords = args.keywords
                 segment.enabled = True
                 segment.disabled_at = None
                 segment.disabled_by = None
@@ -1586,9 +1599,45 @@ class SegmentService:
                     document.word_count = max(0, document.word_count + word_count_change)
                     db.session.add(document)
                 # update segment index task
-                if segment_update_entity.enabled:
-                    keywords = segment_update_entity.keywords or []
-                    VectorService.create_segments_vector([keywords], [segment], dataset)
+                if args.enabled:
+                    VectorService.create_segments_vector(
+                        [args.keywords] if args.keywords else None,
+                        [segment],
+                        dataset,
+                        document.doc_form,
+                    )
+                if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks:
+                    # regenerate child chunks
+                    # get embedding model instance
+                    if dataset.indexing_technique == "high_quality":
+                        # check embedding model setting
+                        model_manager = ModelManager()
+
+                        if dataset.embedding_model_provider:
+                            embedding_model_instance = model_manager.get_model_instance(
+                                tenant_id=dataset.tenant_id,
+                                provider=dataset.embedding_model_provider,
+                                model_type=ModelType.TEXT_EMBEDDING,
+                                model=dataset.embedding_model,
+                            )
+                        else:
+                            embedding_model_instance = model_manager.get_default_model_instance(
+                                tenant_id=dataset.tenant_id,
+                                model_type=ModelType.TEXT_EMBEDDING,
+                            )
+                    else:
+                        raise ValueError("The knowledge base index technique is not high quality!")
+                    # get the process rule
+                    processing_rule = (
+                        db.session.query(DatasetProcessRule)
+                        .filter(DatasetProcessRule.id == document.dataset_process_rule_id)
+                        .first()
+                    )
+                    if not processing_rule:
+                        raise ValueError("No processing rule found.")
+                    VectorService.generate_child_chunks(
+                        segment, document, dataset, embedding_model_instance, processing_rule, True
+                    )
             else:
                 segment_hash = helper.generate_text_hash(content)
                 tokens = 0
@@ -1619,8 +1668,8 @@ class SegmentService:
                 segment.disabled_at = None
                 segment.disabled_by = None
                 if document.doc_form == "qa_model":
-                    segment.answer = segment_update_entity.answer
-                    segment.word_count += len(segment_update_entity.answer or "")
+                    segment.answer = args.answer
+                    segment.word_count += len(args.answer) if args.answer else 0
                 word_count_change = segment.word_count - word_count_change
                 # update document word count
                 if word_count_change != 0:
@@ -1628,8 +1677,40 @@ class SegmentService:
                     db.session.add(document)
                 db.session.add(segment)
                 db.session.commit()
-                # update segment vector index
-                VectorService.update_segment_vector(segment_update_entity.keywords, segment, dataset)
+                if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks:
+                    # get embedding model instance
+                    if dataset.indexing_technique == "high_quality":
+                        # check embedding model setting
+                        model_manager = ModelManager()
+
+                        if dataset.embedding_model_provider:
+                            embedding_model_instance = model_manager.get_model_instance(
+                                tenant_id=dataset.tenant_id,
+                                provider=dataset.embedding_model_provider,
+                                model_type=ModelType.TEXT_EMBEDDING,
+                                model=dataset.embedding_model,
+                            )
+                        else:
+                            embedding_model_instance = model_manager.get_default_model_instance(
+                                tenant_id=dataset.tenant_id,
+                                model_type=ModelType.TEXT_EMBEDDING,
+                            )
+                    else:
+                        raise ValueError("The knowledge base index technique is not high quality!")
+                    # get the process rule
+                    processing_rule = (
+                        db.session.query(DatasetProcessRule)
+                        .filter(DatasetProcessRule.id == document.dataset_process_rule_id)
+                        .first()
+                    )
+                    if not processing_rule:
+                        raise ValueError("No processing rule found.")
+                    VectorService.generate_child_chunks(
+                        segment, document, dataset, embedding_model_instance, processing_rule, True
+                    )
+                elif document.doc_form in (IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX):
+                    # update segment vector index
+                    VectorService.update_segment_vector(args.keywords, segment, dataset)
 
         except Exception as e:
             logging.exception("update segment index failed")
@@ -1652,13 +1733,265 @@ class SegmentService:
         if segment.enabled:
             # send delete segment index task
             redis_client.setex(indexing_cache_key, 600, 1)
-            delete_segment_from_index_task.delay(segment.id, segment.index_node_id, dataset.id, document.id)
+            delete_segment_from_index_task.delay([segment.index_node_id], dataset.id, document.id)
         db.session.delete(segment)
         # update document word count
         document.word_count -= segment.word_count
         db.session.add(document)
         db.session.commit()
 
+    @classmethod
+    def delete_segments(cls, segment_ids: list, document: Document, dataset: Dataset):
+        index_node_ids = (
+            DocumentSegment.query.with_entities(DocumentSegment.index_node_id)
+            .filter(
+                DocumentSegment.id.in_(segment_ids),
+                DocumentSegment.dataset_id == dataset.id,
+                DocumentSegment.document_id == document.id,
+                DocumentSegment.tenant_id == current_user.current_tenant_id,
+            )
+            .all()
+        )
+        index_node_ids = [index_node_id[0] for index_node_id in index_node_ids]
+
+        delete_segment_from_index_task.delay(index_node_ids, dataset.id, document.id)
+        db.session.query(DocumentSegment).filter(DocumentSegment.id.in_(segment_ids)).delete()
+        db.session.commit()
+
+    @classmethod
+    def update_segments_status(cls, segment_ids: list, action: str, dataset: Dataset, document: Document):
+        if action == "enable":
+            segments = (
+                db.session.query(DocumentSegment)
+                .filter(
+                    DocumentSegment.id.in_(segment_ids),
+                    DocumentSegment.dataset_id == dataset.id,
+                    DocumentSegment.document_id == document.id,
+                    DocumentSegment.enabled == False,
+                )
+                .all()
+            )
+            if not segments:
+                return
+            real_deal_segmment_ids = []
+            for segment in segments:
+                indexing_cache_key = "segment_{}_indexing".format(segment.id)
+                cache_result = redis_client.get(indexing_cache_key)
+                if cache_result is not None:
+                    continue
+                segment.enabled = True
+                segment.disabled_at = None
+                segment.disabled_by = None
+                db.session.add(segment)
+                real_deal_segmment_ids.append(segment.id)
+            db.session.commit()
+
+            enable_segments_to_index_task.delay(real_deal_segmment_ids, dataset.id, document.id)
+        elif action == "disable":
+            segments = (
+                db.session.query(DocumentSegment)
+                .filter(
+                    DocumentSegment.id.in_(segment_ids),
+                    DocumentSegment.dataset_id == dataset.id,
+                    DocumentSegment.document_id == document.id,
+                    DocumentSegment.enabled == True,
+                )
+                .all()
+            )
+            if not segments:
+                return
+            real_deal_segmment_ids = []
+            for segment in segments:
+                indexing_cache_key = "segment_{}_indexing".format(segment.id)
+                cache_result = redis_client.get(indexing_cache_key)
+                if cache_result is not None:
+                    continue
+                segment.enabled = False
+                segment.disabled_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
+                segment.disabled_by = current_user.id
+                db.session.add(segment)
+                real_deal_segmment_ids.append(segment.id)
+            db.session.commit()
+
+            disable_segments_from_index_task.delay(real_deal_segmment_ids, dataset.id, document.id)
+        else:
+            raise InvalidActionError()
+
+    @classmethod
+    def create_child_chunk(
+        cls, content: str, segment: DocumentSegment, document: Document, dataset: Dataset
+    ) -> ChildChunk:
+        lock_name = "add_child_lock_{}".format(segment.id)
+        with redis_client.lock(lock_name, timeout=20):
+            index_node_id = str(uuid.uuid4())
+            index_node_hash = helper.generate_text_hash(content)
+            child_chunk_count = (
+                db.session.query(ChildChunk)
+                .filter(
+                    ChildChunk.tenant_id == current_user.current_tenant_id,
+                    ChildChunk.dataset_id == dataset.id,
+                    ChildChunk.document_id == document.id,
+                    ChildChunk.segment_id == segment.id,
+                )
+                .count()
+            )
+            max_position = (
+                db.session.query(func.max(ChildChunk.position))
+                .filter(
+                    ChildChunk.tenant_id == current_user.current_tenant_id,
+                    ChildChunk.dataset_id == dataset.id,
+                    ChildChunk.document_id == document.id,
+                    ChildChunk.segment_id == segment.id,
+                )
+                .scalar()
+            )
+            child_chunk = ChildChunk(
+                tenant_id=current_user.current_tenant_id,
+                dataset_id=dataset.id,
+                document_id=document.id,
+                segment_id=segment.id,
+                position=max_position + 1,
+                index_node_id=index_node_id,
+                index_node_hash=index_node_hash,
+                content=content,
+                word_count=len(content),
+                type="customized",
+                created_by=current_user.id,
+            )
+            db.session.add(child_chunk)
+            # save vector index
+            try:
+                VectorService.create_child_chunk_vector(child_chunk, dataset)
+            except Exception as e:
+                logging.exception("create child chunk index failed")
+                db.session.rollback()
+                raise ChildChunkIndexingError(str(e))
+            db.session.commit()
+
+            return child_chunk
+
+    @classmethod
+    def update_child_chunks(
+        cls,
+        child_chunks_update_args: list[ChildChunkUpdateArgs],
+        segment: DocumentSegment,
+        document: Document,
+        dataset: Dataset,
+    ) -> list[ChildChunk]:
+        child_chunks = (
+            db.session.query(ChildChunk)
+            .filter(
+                ChildChunk.dataset_id == dataset.id,
+                ChildChunk.document_id == document.id,
+                ChildChunk.segment_id == segment.id,
+            )
+            .all()
+        )
+        child_chunks_map = {chunk.id: chunk for chunk in child_chunks}
+
+        new_child_chunks, update_child_chunks, delete_child_chunks, new_child_chunks_args = [], [], [], []
+
+        for child_chunk_update_args in child_chunks_update_args:
+            if child_chunk_update_args.id:
+                child_chunk = child_chunks_map.pop(child_chunk_update_args.id, None)
+                if child_chunk:
+                    if child_chunk.content != child_chunk_update_args.content:
+                        child_chunk.content = child_chunk_update_args.content
+                        child_chunk.word_count = len(child_chunk.content)
+                        child_chunk.updated_by = current_user.id
+                        child_chunk.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
+                        child_chunk.type = "customized"
+                        update_child_chunks.append(child_chunk)
+            else:
+                new_child_chunks_args.append(child_chunk_update_args)
+        if child_chunks_map:
+            delete_child_chunks = list(child_chunks_map.values())
+        try:
+            if update_child_chunks:
+                db.session.bulk_save_objects(update_child_chunks)
+
+            if delete_child_chunks:
+                for child_chunk in delete_child_chunks:
+                    db.session.delete(child_chunk)
+            if new_child_chunks_args:
+                child_chunk_count = len(child_chunks)
+                for position, args in enumerate(new_child_chunks_args, start=child_chunk_count + 1):
+                    index_node_id = str(uuid.uuid4())
+                    index_node_hash = helper.generate_text_hash(args.content)
+                    child_chunk = ChildChunk(
+                        tenant_id=current_user.current_tenant_id,
+                        dataset_id=dataset.id,
+                        document_id=document.id,
+                        segment_id=segment.id,
+                        position=position,
+                        index_node_id=index_node_id,
+                        index_node_hash=index_node_hash,
+                        content=args.content,
+                        word_count=len(args.content),
+                        type="customized",
+                        created_by=current_user.id,
+                    )
+
+                    db.session.add(child_chunk)
+                    db.session.flush()
+                    new_child_chunks.append(child_chunk)
+            VectorService.update_child_chunk_vector(new_child_chunks, update_child_chunks, delete_child_chunks, dataset)
+            db.session.commit()
+        except Exception as e:
+            logging.exception("update child chunk index failed")
+            db.session.rollback()
+            raise ChildChunkIndexingError(str(e))
+        return sorted(new_child_chunks + update_child_chunks, key=lambda x: x.position)
+
+    @classmethod
+    def update_child_chunk(
+        cls,
+        content: str,
+        child_chunk: ChildChunk,
+        segment: DocumentSegment,
+        document: Document,
+        dataset: Dataset,
+    ) -> ChildChunk:
+        try:
+            child_chunk.content = content
+            child_chunk.word_count = len(content)
+            child_chunk.updated_by = current_user.id
+            child_chunk.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
+            child_chunk.type = "customized"
+            db.session.add(child_chunk)
+            VectorService.update_child_chunk_vector([], [child_chunk], [], dataset)
+            db.session.commit()
+        except Exception as e:
+            logging.exception("update child chunk index failed")
+            db.session.rollback()
+            raise ChildChunkIndexingError(str(e))
+        return child_chunk
+
+    @classmethod
+    def delete_child_chunk(cls, child_chunk: ChildChunk, dataset: Dataset):
+        db.session.delete(child_chunk)
+        try:
+            VectorService.delete_child_chunk_vector(child_chunk, dataset)
+        except Exception as e:
+            logging.exception("delete child chunk index failed")
+            db.session.rollback()
+            raise ChildChunkDeleteIndexError(str(e))
+        db.session.commit()
+
+    @classmethod
+    def get_child_chunks(
+        cls, segment_id: str, document_id: str, dataset_id: str, page: int, limit: int, keyword: Optional[str] = None
+    ):
+        query = ChildChunk.query.filter_by(
+            tenant_id=current_user.current_tenant_id,
+            dataset_id=dataset_id,
+            document_id=document_id,
+            segment_id=segment_id,
+        ).order_by(ChildChunk.position.asc())
+        if keyword:
+            query = query.where(ChildChunk.content.ilike(f"%{keyword}%"))
+        return query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False)
+
 
 class DatasetCollectionBindingService:
     @classmethod
diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py
index 449b79f339..76d9c28812 100644
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@@ -1,4 +1,5 @@
-from typing import Optional
+from enum import Enum
+from typing import Literal, Optional
 
 from pydantic import BaseModel
 
@@ -8,3 +9,112 @@ class SegmentUpdateEntity(BaseModel):
     answer: Optional[str] = None
     keywords: Optional[list[str]] = None
     enabled: Optional[bool] = None
+
+
+class ParentMode(str, Enum):
+    FULL_DOC = "full-doc"
+    PARAGRAPH = "paragraph"
+
+
+class NotionIcon(BaseModel):
+    type: str
+    url: Optional[str] = None
+    emoji: Optional[str] = None
+
+
+class NotionPage(BaseModel):
+    page_id: str
+    page_name: str
+    page_icon: Optional[NotionIcon] = None
+    type: str
+
+
+class NotionInfo(BaseModel):
+    workspace_id: str
+    pages: list[NotionPage]
+
+
+class WebsiteInfo(BaseModel):
+    provider: str
+    job_id: str
+    urls: list[str]
+    only_main_content: bool = True
+
+
+class FileInfo(BaseModel):
+    file_ids: list[str]
+
+
+class InfoList(BaseModel):
+    data_source_type: Literal["upload_file", "notion_import", "website_crawl"]
+    notion_info_list: Optional[list[NotionInfo]] = None
+    file_info_list: Optional[FileInfo] = None
+    website_info_list: Optional[WebsiteInfo] = None
+
+
+class DataSource(BaseModel):
+    info_list: InfoList
+
+
+class PreProcessingRule(BaseModel):
+    id: str
+    enabled: bool
+
+
+class Segmentation(BaseModel):
+    separator: str = "\n"
+    max_tokens: int
+    chunk_overlap: int = 0
+
+
+class Rule(BaseModel):
+    pre_processing_rules: Optional[list[PreProcessingRule]] = None
+    segmentation: Optional[Segmentation] = None
+    parent_mode: Optional[Literal["full-doc", "paragraph"]] = None
+    subchunk_segmentation: Optional[Segmentation] = None
+
+
+class ProcessRule(BaseModel):
+    mode: Literal["automatic", "custom", "hierarchical"]
+    rules: Optional[Rule] = None
+
+
+class RerankingModel(BaseModel):
+    reranking_provider_name: Optional[str] = None
+    reranking_model_name: Optional[str] = None
+
+
+class RetrievalModel(BaseModel):
+    search_method: Literal["hybrid_search", "semantic_search", "full_text_search"]
+    reranking_enable: bool
+    reranking_model: Optional[RerankingModel] = None
+    top_k: int
+    score_threshold_enabled: bool
+    score_threshold: Optional[float] = None
+
+
+class KnowledgeConfig(BaseModel):
+    original_document_id: Optional[str] = None
+    duplicate: bool = True
+    indexing_technique: Literal["high_quality", "economy"]
+    data_source: DataSource
+    process_rule: Optional[ProcessRule] = None
+    retrieval_model: Optional[RetrievalModel] = None
+    doc_form: str = "text_model"
+    doc_language: str = "English"
+    embedding_model: Optional[str] = None
+    embedding_model_provider: Optional[str] = None
+    name: Optional[str] = None
+
+
+class SegmentUpdateArgs(BaseModel):
+    content: Optional[str] = None
+    answer: Optional[str] = None
+    keywords: Optional[list[str]] = None
+    regenerate_child_chunks: bool = False
+    enabled: Optional[bool] = None
+
+
+class ChildChunkUpdateArgs(BaseModel):
+    id: Optional[str] = None
+    content: str
diff --git a/api/services/errors/base.py b/api/services/errors/base.py
index 4d39f956b8..35ea28468e 100644
--- a/api/services/errors/base.py
+++ b/api/services/errors/base.py
@@ -1,6 +1,6 @@
 from typing import Optional
 
 
-class BaseServiceError(Exception):
+class BaseServiceError(ValueError):
     def __init__(self, description: Optional[str] = None):
         self.description = description
diff --git a/api/services/errors/chunk.py b/api/services/errors/chunk.py
new file mode 100644
index 0000000000..75bf4d5d5f
--- /dev/null
+++ b/api/services/errors/chunk.py
@@ -0,0 +1,9 @@
+from services.errors.base import BaseServiceError
+
+
+class ChildChunkIndexingError(BaseServiceError):
+    description = "{message}"
+
+
+class ChildChunkDeleteIndexError(BaseServiceError):
+    description = "{message}"
diff --git a/api/services/feature_service.py b/api/services/feature_service.py
index 36c79d7045..a42b3020cd 100644
--- a/api/services/feature_service.py
+++ b/api/services/feature_service.py
@@ -76,7 +76,7 @@ class FeatureService:
 
         cls._fulfill_params_from_env(features)
 
-        if dify_config.BILLING_ENABLED:
+        if dify_config.BILLING_ENABLED and tenant_id:
             cls._fulfill_params_from_billing_api(features, tenant_id)
 
         return features
diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py
index 41b4e1ec46..e9176fc1c6 100644
--- a/api/services/hit_testing_service.py
+++ b/api/services/hit_testing_service.py
@@ -7,7 +7,7 @@ from core.rag.models.document import Document
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from extensions.ext_database import db
 from models.account import Account
-from models.dataset import Dataset, DatasetQuery, DocumentSegment
+from models.dataset import Dataset, DatasetQuery
 
 default_retrieval_model = {
     "search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
@@ -69,7 +69,7 @@ class HitTestingService:
         db.session.add(dataset_query)
         db.session.commit()
 
-        return dict(cls.compact_retrieve_response(dataset, query, all_documents))
+        return cls.compact_retrieve_response(query, all_documents)  # type: ignore
 
     @classmethod
     def external_retrieve(
@@ -106,41 +106,14 @@ class HitTestingService:
         return dict(cls.compact_external_retrieve_response(dataset, query, all_documents))
 
     @classmethod
-    def compact_retrieve_response(cls, dataset: Dataset, query: str, documents: list[Document]):
-        records = []
-
-        for document in documents:
-            if document.metadata is None:
-                continue
-
-            index_node_id = document.metadata["doc_id"]
-
-            segment = (
-                db.session.query(DocumentSegment)
-                .filter(
-                    DocumentSegment.dataset_id == dataset.id,
-                    DocumentSegment.enabled == True,
-                    DocumentSegment.status == "completed",
-                    DocumentSegment.index_node_id == index_node_id,
-                )
-                .first()
-            )
-
-            if not segment:
-                continue
-
-            record = {
-                "segment": segment,
-                "score": document.metadata.get("score", None),
-            }
-
-            records.append(record)
+    def compact_retrieve_response(cls, query: str, documents: list[Document]):
+        records = RetrievalService.format_retrieval_documents(documents)
 
         return {
             "query": {
                 "content": query,
             },
-            "records": records,
+            "records": [record.model_dump() for record in records],
         }
 
     @classmethod
diff --git a/api/services/message_service.py b/api/services/message_service.py
index c4447a84da..c17122ef64 100644
--- a/api/services/message_service.py
+++ b/api/services/message_service.py
@@ -152,6 +152,7 @@ class MessageService:
     @classmethod
     def create_feedback(
         cls,
+        *,
         app_model: App,
         message_id: str,
         user: Optional[Union[Account, EndUser]],
diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py
index b5565e986d..c4b9db69ec 100644
--- a/api/services/tools/tools_transform_service.py
+++ b/api/services/tools/tools_transform_service.py
@@ -64,7 +64,10 @@ class ToolTransformService:
             )
         elif isinstance(provider, ToolProviderApiEntity):
             if provider.plugin_id:
-                provider.icon = ToolTransformService.get_plugin_icon_url(tenant_id=tenant_id, filename=provider.icon)
+                if isinstance(provider.icon, str):
+                    provider.icon = ToolTransformService.get_plugin_icon_url(
+                        tenant_id=tenant_id, filename=provider.icon
+                    )
             else:
                 provider.icon = ToolTransformService.get_tool_provider_icon_url(
                     provider_type=provider.type.value, provider_name=provider.name, icon=provider.icon
diff --git a/api/services/vector_service.py b/api/services/vector_service.py
index 3c67351335..92422bf29d 100644
--- a/api/services/vector_service.py
+++ b/api/services/vector_service.py
@@ -1,40 +1,70 @@
 from typing import Optional
 
+from core.model_manager import ModelInstance, ModelManager
+from core.model_runtime.entities.model_entities import ModelType
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.index_processor.constant.index_type import IndexType
+from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
 from core.rag.models.document import Document
-from models.dataset import Dataset, DocumentSegment
+from extensions.ext_database import db
+from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
+from models.dataset import Document as DatasetDocument
+from services.entities.knowledge_entities.knowledge_entities import ParentMode
 
 
 class VectorService:
     @classmethod
     def create_segments_vector(
-        cls, keywords_list: Optional[list[list[str]]], segments: list[DocumentSegment], dataset: Dataset
+        cls, keywords_list: Optional[list[list[str]]], segments: list[DocumentSegment], dataset: Dataset, doc_form: str
     ):
         documents = []
+
         for segment in segments:
-            document = Document(
-                page_content=segment.content,
-                metadata={
-                    "doc_id": segment.index_node_id,
-                    "doc_hash": segment.index_node_hash,
-                    "document_id": segment.document_id,
-                    "dataset_id": segment.dataset_id,
-                },
-            )
-            documents.append(document)
-        if dataset.indexing_technique == "high_quality":
-            # save vector index
-            vector = Vector(dataset=dataset)
-            vector.add_texts(documents, duplicate_check=True)
+            if doc_form == IndexType.PARENT_CHILD_INDEX:
+                document = DatasetDocument.query.filter_by(id=segment.document_id).first()
+                # get the process rule
+                processing_rule = (
+                    db.session.query(DatasetProcessRule)
+                    .filter(DatasetProcessRule.id == document.dataset_process_rule_id)
+                    .first()
+                )
+                if not processing_rule:
+                    raise ValueError("No processing rule found.")
+                # get embedding model instance
+                if dataset.indexing_technique == "high_quality":
+                    # check embedding model setting
+                    model_manager = ModelManager()
 
-        # save keyword index
-        keyword = Keyword(dataset)
-
-        if keywords_list and len(keywords_list) > 0:
-            keyword.add_texts(documents, keywords_list=keywords_list)
-        else:
-            keyword.add_texts(documents)
+                    if dataset.embedding_model_provider:
+                        embedding_model_instance = model_manager.get_model_instance(
+                            tenant_id=dataset.tenant_id,
+                            provider=dataset.embedding_model_provider,
+                            model_type=ModelType.TEXT_EMBEDDING,
+                            model=dataset.embedding_model,
+                        )
+                    else:
+                        embedding_model_instance = model_manager.get_default_model_instance(
+                            tenant_id=dataset.tenant_id,
+                            model_type=ModelType.TEXT_EMBEDDING,
+                        )
+                else:
+                    raise ValueError("The knowledge base index technique is not high quality!")
+                cls.generate_child_chunks(segment, document, dataset, embedding_model_instance, processing_rule, False)
+            else:
+                document = Document(
+                    page_content=segment.content,
+                    metadata={
+                        "doc_id": segment.index_node_id,
+                        "doc_hash": segment.index_node_hash,
+                        "document_id": segment.document_id,
+                        "dataset_id": segment.dataset_id,
+                    },
+                )
+                documents.append(document)
+        if len(documents) > 0:
+            index_processor = IndexProcessorFactory(doc_form).init_index_processor()
+            index_processor.load(dataset, documents, with_keywords=True, keywords_list=keywords_list)
 
     @classmethod
     def update_segment_vector(cls, keywords: Optional[list[str]], segment: DocumentSegment, dataset: Dataset):
@@ -65,3 +95,123 @@ class VectorService:
             keyword.add_texts([document], keywords_list=[keywords])
         else:
             keyword.add_texts([document])
+
+    @classmethod
+    def generate_child_chunks(
+        cls,
+        segment: DocumentSegment,
+        dataset_document: DatasetDocument,
+        dataset: Dataset,
+        embedding_model_instance: ModelInstance,
+        processing_rule: DatasetProcessRule,
+        regenerate: bool = False,
+    ):
+        index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
+        if regenerate:
+            # delete child chunks
+            index_processor.clean(dataset, [segment.index_node_id], with_keywords=True, delete_child_chunks=True)
+
+        # generate child chunks
+        document = Document(
+            page_content=segment.content,
+            metadata={
+                "doc_id": segment.index_node_id,
+                "doc_hash": segment.index_node_hash,
+                "document_id": segment.document_id,
+                "dataset_id": segment.dataset_id,
+            },
+        )
+        # use full doc mode to generate segment's child chunk
+        processing_rule_dict = processing_rule.to_dict()
+        processing_rule_dict["rules"]["parent_mode"] = ParentMode.FULL_DOC.value
+        documents = index_processor.transform(
+            [document],
+            embedding_model_instance=embedding_model_instance,
+            process_rule=processing_rule_dict,
+            tenant_id=dataset.tenant_id,
+            doc_language=dataset_document.doc_language,
+        )
+        # save child chunks
+        if documents and documents[0].children:
+            index_processor.load(dataset, documents)
+
+            for position, child_chunk in enumerate(documents[0].children, start=1):
+                child_segment = ChildChunk(
+                    tenant_id=dataset.tenant_id,
+                    dataset_id=dataset.id,
+                    document_id=dataset_document.id,
+                    segment_id=segment.id,
+                    position=position,
+                    index_node_id=child_chunk.metadata["doc_id"],
+                    index_node_hash=child_chunk.metadata["doc_hash"],
+                    content=child_chunk.page_content,
+                    word_count=len(child_chunk.page_content),
+                    type="automatic",
+                    created_by=dataset_document.created_by,
+                )
+                db.session.add(child_segment)
+        db.session.commit()
+
+    @classmethod
+    def create_child_chunk_vector(cls, child_segment: ChildChunk, dataset: Dataset):
+        child_document = Document(
+            page_content=child_segment.content,
+            metadata={
+                "doc_id": child_segment.index_node_id,
+                "doc_hash": child_segment.index_node_hash,
+                "document_id": child_segment.document_id,
+                "dataset_id": child_segment.dataset_id,
+            },
+        )
+        if dataset.indexing_technique == "high_quality":
+            # save vector index
+            vector = Vector(dataset=dataset)
+            vector.add_texts([child_document], duplicate_check=True)
+
+    @classmethod
+    def update_child_chunk_vector(
+        cls,
+        new_child_chunks: list[ChildChunk],
+        update_child_chunks: list[ChildChunk],
+        delete_child_chunks: list[ChildChunk],
+        dataset: Dataset,
+    ):
+        documents = []
+        delete_node_ids = []
+        for new_child_chunk in new_child_chunks:
+            new_child_document = Document(
+                page_content=new_child_chunk.content,
+                metadata={
+                    "doc_id": new_child_chunk.index_node_id,
+                    "doc_hash": new_child_chunk.index_node_hash,
+                    "document_id": new_child_chunk.document_id,
+                    "dataset_id": new_child_chunk.dataset_id,
+                },
+            )
+            documents.append(new_child_document)
+        for update_child_chunk in update_child_chunks:
+            child_document = Document(
+                page_content=update_child_chunk.content,
+                metadata={
+                    "doc_id": update_child_chunk.index_node_id,
+                    "doc_hash": update_child_chunk.index_node_hash,
+                    "document_id": update_child_chunk.document_id,
+                    "dataset_id": update_child_chunk.dataset_id,
+                },
+            )
+            documents.append(child_document)
+            delete_node_ids.append(update_child_chunk.index_node_id)
+        for delete_child_chunk in delete_child_chunks:
+            delete_node_ids.append(delete_child_chunk.index_node_id)
+        if dataset.indexing_technique == "high_quality":
+            # update vector index
+            vector = Vector(dataset=dataset)
+            if delete_node_ids:
+                vector.delete_by_ids(delete_node_ids)
+            if documents:
+                vector.add_texts(documents, duplicate_check=True)
+
+    @classmethod
+    def delete_child_chunk_vector(cls, child_chunk: ChildChunk, dataset: Dataset):
+        vector = Vector(dataset=dataset)
+        vector.delete_by_ids([child_chunk.index_node_id])
diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py
index 95649106e2..2de3d0ac55 100644
--- a/api/services/workflow_service.py
+++ b/api/services/workflow_service.py
@@ -3,6 +3,7 @@ import time
 from collections.abc import Callable, Generator, Sequence
 from datetime import UTC, datetime
 from typing import Any, Optional
+from uuid import uuid4
 
 from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager
 from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager
@@ -333,6 +334,7 @@ class WorkflowService:
             error = e.error
 
         workflow_node_execution = WorkflowNodeExecution()
+        workflow_node_execution.id = str(uuid4())
         workflow_node_execution.tenant_id = tenant_id
         workflow_node_execution.triggered_from = WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP.value
         workflow_node_execution.index = 1
diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py
index 50bb2b6e63..9a172b2d9d 100644
--- a/api/tasks/add_document_to_index_task.py
+++ b/api/tasks/add_document_to_index_task.py
@@ -6,12 +6,13 @@ import click
 from celery import shared_task  # type: ignore
 from werkzeug.exceptions import NotFound
 
+from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
-from core.rag.models.document import Document
+from core.rag.models.document import ChildDocument, Document
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
+from models.dataset import DatasetAutoDisableLog, DocumentSegment
 from models.dataset import Document as DatasetDocument
-from models.dataset import DocumentSegment
 
 
 @shared_task(queue="dataset")
@@ -53,7 +54,22 @@ def add_document_to_index_task(dataset_document_id: str):
                     "dataset_id": segment.dataset_id,
                 },
             )
-
+            if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                child_chunks = segment.child_chunks
+                if child_chunks:
+                    child_documents = []
+                    for child_chunk in child_chunks:
+                        child_document = ChildDocument(
+                            page_content=child_chunk.content,
+                            metadata={
+                                "doc_id": child_chunk.index_node_id,
+                                "doc_hash": child_chunk.index_node_hash,
+                                "document_id": segment.document_id,
+                                "dataset_id": segment.dataset_id,
+                            },
+                        )
+                        child_documents.append(child_document)
+                    document.children = child_documents
             documents.append(document)
 
         dataset = dataset_document.dataset
@@ -65,6 +81,12 @@ def add_document_to_index_task(dataset_document_id: str):
         index_processor = IndexProcessorFactory(index_type).init_index_processor()
         index_processor.load(dataset, documents)
 
+        # delete auto disable log
+        db.session.query(DatasetAutoDisableLog).filter(
+            DatasetAutoDisableLog.document_id == dataset_document.id
+        ).delete()
+        db.session.commit()
+
         end_at = time.perf_counter()
         logging.info(
             click.style(
diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py
new file mode 100644
index 0000000000..3bae82a5e3
--- /dev/null
+++ b/api/tasks/batch_clean_document_task.py
@@ -0,0 +1,76 @@
+import logging
+import time
+
+import click
+from celery import shared_task  # type: ignore
+
+from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
+from core.tools.utils.web_reader_tool import get_image_upload_file_ids
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from models.dataset import Dataset, DocumentSegment
+from models.model import UploadFile
+
+
+@shared_task(queue="dataset")
+def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form: str, file_ids: list[str]):
+    """
+    Clean document when document deleted.
+    :param document_ids: document ids
+    :param dataset_id: dataset id
+    :param doc_form: doc_form
+    :param file_ids: file ids
+
+    Usage: clean_document_task.delay(document_id, dataset_id)
+    """
+    logging.info(click.style("Start batch clean documents when documents deleted", fg="green"))
+    start_at = time.perf_counter()
+
+    try:
+        dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
+
+        if not dataset:
+            raise Exception("Document has no dataset")
+
+        segments = db.session.query(DocumentSegment).filter(DocumentSegment.document_id.in_(document_ids)).all()
+        # check segment is exist
+        if segments:
+            index_node_ids = [segment.index_node_id for segment in segments]
+            index_processor = IndexProcessorFactory(doc_form).init_index_processor()
+            index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
+
+            for segment in segments:
+                image_upload_file_ids = get_image_upload_file_ids(segment.content)
+                for upload_file_id in image_upload_file_ids:
+                    image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first()
+                    try:
+                        if image_file and image_file.key:
+                            storage.delete(image_file.key)
+                    except Exception:
+                        logging.exception(
+                            "Delete image_files failed when storage deleted, \
+                                          image_upload_file_is: {}".format(upload_file_id)
+                        )
+                    db.session.delete(image_file)
+                db.session.delete(segment)
+
+            db.session.commit()
+        if file_ids:
+            files = db.session.query(UploadFile).filter(UploadFile.id.in_(file_ids)).all()
+            for file in files:
+                try:
+                    storage.delete(file.key)
+                except Exception:
+                    logging.exception("Delete file failed when document deleted, file_id: {}".format(file.id))
+                db.session.delete(file)
+            db.session.commit()
+
+        end_at = time.perf_counter()
+        logging.info(
+            click.style(
+                "Cleaned documents when documents deleted latency: {}".format(end_at - start_at),
+                fg="green",
+            )
+        )
+    except Exception:
+        logging.exception("Cleaned documents when documents deleted failed")
diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py
index ce3d65526c..3238842307 100644
--- a/api/tasks/batch_create_segment_to_index_task.py
+++ b/api/tasks/batch_create_segment_to_index_task.py
@@ -7,13 +7,13 @@ import click
 from celery import shared_task  # type: ignore
 from sqlalchemy import func
 
-from core.indexing_runner import IndexingRunner
 from core.model_manager import ModelManager
 from core.model_runtime.entities.model_entities import ModelType
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from libs import helper
 from models.dataset import Dataset, Document, DocumentSegment
+from services.vector_service import VectorService
 
 
 @shared_task(queue="dataset")
@@ -98,8 +98,7 @@ def batch_create_segment_to_index_task(
         dataset_document.word_count += word_count_change
         db.session.add(dataset_document)
         # add index to db
-        indexing_runner = IndexingRunner()
-        indexing_runner.batch_add_segments(document_segments, dataset)
+        VectorService.create_segments_vector(None, document_segments, dataset, dataset_document.doc_form)
         db.session.commit()
         redis_client.setex(indexing_cache_key, 600, "completed")
         end_at = time.perf_counter()
diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py
index c48eb2e320..4d77f1fb65 100644
--- a/api/tasks/clean_dataset_task.py
+++ b/api/tasks/clean_dataset_task.py
@@ -62,7 +62,7 @@ def clean_dataset_task(
             if doc_form is None:
                 raise ValueError("Index type must be specified.")
             index_processor = IndexProcessorFactory(doc_form).init_index_processor()
-            index_processor.clean(dataset, None)
+            index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True)
 
             for document in documents:
                 db.session.delete(document)
diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py
index 05eb9fd625..5a4d7a52b2 100644
--- a/api/tasks/clean_document_task.py
+++ b/api/tasks/clean_document_task.py
@@ -38,7 +38,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
         if segments:
             index_node_ids = [segment.index_node_id for segment in segments]
             index_processor = IndexProcessorFactory(doc_form).init_index_processor()
-            index_processor.clean(dataset, index_node_ids)
+            index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
             for segment in segments:
                 image_upload_file_ids = get_image_upload_file_ids(segment.content)
diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py
index f5d6406d9c..5a6eb00a62 100644
--- a/api/tasks/clean_notion_document_task.py
+++ b/api/tasks/clean_notion_document_task.py
@@ -37,7 +37,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str):
             segments = db.session.query(DocumentSegment).filter(DocumentSegment.document_id == document_id).all()
             index_node_ids = [segment.index_node_id for segment in segments]
 
-            index_processor.clean(dataset, index_node_ids)
+            index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
             for segment in segments:
                 db.session.delete(segment)
diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py
index b025509aeb..0efc924a77 100644
--- a/api/tasks/deal_dataset_vector_index_task.py
+++ b/api/tasks/deal_dataset_vector_index_task.py
@@ -4,8 +4,9 @@ import time
 import click
 from celery import shared_task  # type: ignore
 
+from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
-from core.rag.models.document import Document
+from core.rag.models.document import ChildDocument, Document
 from extensions.ext_database import db
 from models.dataset import Dataset, DocumentSegment
 from models.dataset import Document as DatasetDocument
@@ -105,7 +106,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
                 db.session.commit()
 
                 # clean index
-                index_processor.clean(dataset, None, with_keywords=False)
+                index_processor.clean(dataset, None, with_keywords=False, delete_child_chunks=False)
 
                 for dataset_document in dataset_documents:
                     # update from vector index
@@ -128,7 +129,22 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
                                         "dataset_id": segment.dataset_id,
                                     },
                                 )
-
+                                if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                                    child_chunks = segment.child_chunks
+                                    if child_chunks:
+                                        child_documents = []
+                                        for child_chunk in child_chunks:
+                                            child_document = ChildDocument(
+                                                page_content=child_chunk.content,
+                                                metadata={
+                                                    "doc_id": child_chunk.index_node_id,
+                                                    "doc_hash": child_chunk.index_node_hash,
+                                                    "document_id": segment.document_id,
+                                                    "dataset_id": segment.dataset_id,
+                                                },
+                                            )
+                                            child_documents.append(child_document)
+                                        document.children = child_documents
                                 documents.append(document)
                             # save vector index
                             index_processor.load(dataset, documents, with_keywords=False)
diff --git a/api/tasks/delete_segment_from_index_task.py b/api/tasks/delete_segment_from_index_task.py
index 45a612c745..3b04143dd9 100644
--- a/api/tasks/delete_segment_from_index_task.py
+++ b/api/tasks/delete_segment_from_index_task.py
@@ -6,48 +6,38 @@ from celery import shared_task  # type: ignore
 
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
 from extensions.ext_database import db
-from extensions.ext_redis import redis_client
 from models.dataset import Dataset, Document
 
 
 @shared_task(queue="dataset")
-def delete_segment_from_index_task(segment_id: str, index_node_id: str, dataset_id: str, document_id: str):
+def delete_segment_from_index_task(index_node_ids: list, dataset_id: str, document_id: str):
     """
     Async Remove segment from index
-    :param segment_id:
-    :param index_node_id:
+    :param index_node_ids:
     :param dataset_id:
     :param document_id:
 
-    Usage: delete_segment_from_index_task.delay(segment_id)
+    Usage: delete_segment_from_index_task.delay(segment_ids)
     """
-    logging.info(click.style("Start delete segment from index: {}".format(segment_id), fg="green"))
+    logging.info(click.style("Start delete segment from index", fg="green"))
     start_at = time.perf_counter()
-    indexing_cache_key = "segment_{}_delete_indexing".format(segment_id)
     try:
         dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
         if not dataset:
-            logging.info(click.style("Segment {} has no dataset, pass.".format(segment_id), fg="cyan"))
             return
 
         dataset_document = db.session.query(Document).filter(Document.id == document_id).first()
         if not dataset_document:
-            logging.info(click.style("Segment {} has no document, pass.".format(segment_id), fg="cyan"))
             return
 
         if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
-            logging.info(click.style("Segment {} document status is invalid, pass.".format(segment_id), fg="cyan"))
             return
 
         index_type = dataset_document.doc_form
         index_processor = IndexProcessorFactory(index_type).init_index_processor()
-        index_processor.clean(dataset, [index_node_id])
+        index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
         end_at = time.perf_counter()
-        logging.info(
-            click.style("Segment deleted from index: {} latency: {}".format(segment_id, end_at - start_at), fg="green")
-        )
+        logging.info(click.style("Segment deleted from index latency: {}".format(end_at - start_at), fg="green"))
     except Exception:
         logging.exception("delete segment from index failed")
-    finally:
-        redis_client.delete(indexing_cache_key)
diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py
new file mode 100644
index 0000000000..67112666e7
--- /dev/null
+++ b/api/tasks/disable_segments_from_index_task.py
@@ -0,0 +1,76 @@
+import logging
+import time
+
+import click
+from celery import shared_task  # type: ignore
+
+from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
+from extensions.ext_database import db
+from extensions.ext_redis import redis_client
+from models.dataset import Dataset, DocumentSegment
+from models.dataset import Document as DatasetDocument
+
+
+@shared_task(queue="dataset")
+def disable_segments_from_index_task(segment_ids: list, dataset_id: str, document_id: str):
+    """
+    Async disable segments from index
+    :param segment_ids:
+
+    Usage: disable_segments_from_index_task.delay(segment_ids, dataset_id, document_id)
+    """
+    start_at = time.perf_counter()
+
+    dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
+    if not dataset:
+        logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan"))
+        return
+
+    dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first()
+
+    if not dataset_document:
+        logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan"))
+        return
+    if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
+        logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan"))
+        return
+    # sync index processor
+    index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
+
+    segments = (
+        db.session.query(DocumentSegment)
+        .filter(
+            DocumentSegment.id.in_(segment_ids),
+            DocumentSegment.dataset_id == dataset_id,
+            DocumentSegment.document_id == document_id,
+        )
+        .all()
+    )
+
+    if not segments:
+        return
+
+    try:
+        index_node_ids = [segment.index_node_id for segment in segments]
+        index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)
+
+        end_at = time.perf_counter()
+        logging.info(click.style("Segments removed from index latency: {}".format(end_at - start_at), fg="green"))
+    except Exception:
+        # update segment error msg
+        db.session.query(DocumentSegment).filter(
+            DocumentSegment.id.in_(segment_ids),
+            DocumentSegment.dataset_id == dataset_id,
+            DocumentSegment.document_id == document_id,
+        ).update(
+            {
+                "disabled_at": None,
+                "disabled_by": None,
+                "enabled": True,
+            }
+        )
+        db.session.commit()
+    finally:
+        for segment in segments:
+            indexing_cache_key = "segment_{}_indexing".format(segment.id)
+            redis_client.delete(indexing_cache_key)
diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py
index ac4e81f95d..d686698b9a 100644
--- a/api/tasks/document_indexing_sync_task.py
+++ b/api/tasks/document_indexing_sync_task.py
@@ -82,7 +82,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
                 index_node_ids = [segment.index_node_id for segment in segments]
 
                 # delete from vector index
-                index_processor.clean(dataset, index_node_ids)
+                index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
                 for segment in segments:
                     db.session.delete(segment)
diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py
index 5f1e9a892f..d8f14830c9 100644
--- a/api/tasks/document_indexing_update_task.py
+++ b/api/tasks/document_indexing_update_task.py
@@ -47,7 +47,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
             index_node_ids = [segment.index_node_id for segment in segments]
 
             # delete from vector index
-            index_processor.clean(dataset, index_node_ids)
+            index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
             for segment in segments:
                 db.session.delete(segment)
diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py
index 6db2620eb6..8e1d2b6b5d 100644
--- a/api/tasks/duplicate_document_indexing_task.py
+++ b/api/tasks/duplicate_document_indexing_task.py
@@ -51,7 +51,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list):
             if document:
                 document.indexing_status = "error"
                 document.error = str(e)
-                document.stopped_at = datetime.datetime.utcnow()
+                document.stopped_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
                 db.session.add(document)
         db.session.commit()
         return
@@ -73,14 +73,14 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list):
                 index_node_ids = [segment.index_node_id for segment in segments]
 
                 # delete from vector index
-                index_processor.clean(dataset, index_node_ids)
+                index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
                 for segment in segments:
                     db.session.delete(segment)
                 db.session.commit()
 
             document.indexing_status = "parsing"
-            document.processing_started_at = datetime.datetime.utcnow()
+            document.processing_started_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
             documents.append(document)
             db.session.add(document)
     db.session.commit()
diff --git a/api/tasks/enable_segment_to_index_task.py b/api/tasks/enable_segment_to_index_task.py
index 2f6eb7b82a..76522f4720 100644
--- a/api/tasks/enable_segment_to_index_task.py
+++ b/api/tasks/enable_segment_to_index_task.py
@@ -6,8 +6,9 @@ import click
 from celery import shared_task  # type: ignore
 from werkzeug.exceptions import NotFound
 
+from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
-from core.rag.models.document import Document
+from core.rag.models.document import ChildDocument, Document
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from models.dataset import DocumentSegment
@@ -61,6 +62,22 @@ def enable_segment_to_index_task(segment_id: str):
             return
 
         index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
+        if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+            child_chunks = segment.child_chunks
+            if child_chunks:
+                child_documents = []
+                for child_chunk in child_chunks:
+                    child_document = ChildDocument(
+                        page_content=child_chunk.content,
+                        metadata={
+                            "doc_id": child_chunk.index_node_id,
+                            "doc_hash": child_chunk.index_node_hash,
+                            "document_id": segment.document_id,
+                            "dataset_id": segment.dataset_id,
+                        },
+                    )
+                    child_documents.append(child_document)
+                document.children = child_documents
         # save vector index
         index_processor.load(dataset, [document])
 
diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py
new file mode 100644
index 0000000000..0864e05e25
--- /dev/null
+++ b/api/tasks/enable_segments_to_index_task.py
@@ -0,0 +1,108 @@
+import datetime
+import logging
+import time
+
+import click
+from celery import shared_task  # type: ignore
+
+from core.rag.index_processor.constant.index_type import IndexType
+from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
+from core.rag.models.document import ChildDocument, Document
+from extensions.ext_database import db
+from extensions.ext_redis import redis_client
+from models.dataset import Dataset, DocumentSegment
+from models.dataset import Document as DatasetDocument
+
+
+@shared_task(queue="dataset")
+def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_id: str):
+    """
+    Async enable segments to index
+    :param segment_ids:
+
+    Usage: enable_segments_to_index_task.delay(segment_ids)
+    """
+    start_at = time.perf_counter()
+    dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
+    if not dataset:
+        logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan"))
+        return
+
+    dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first()
+
+    if not dataset_document:
+        logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan"))
+        return
+    if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
+        logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan"))
+        return
+    # sync index processor
+    index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
+
+    segments = (
+        db.session.query(DocumentSegment)
+        .filter(
+            DocumentSegment.id.in_(segment_ids),
+            DocumentSegment.dataset_id == dataset_id,
+            DocumentSegment.document_id == document_id,
+        )
+        .all()
+    )
+    if not segments:
+        return
+
+    try:
+        documents = []
+        for segment in segments:
+            document = Document(
+                page_content=segment.content,
+                metadata={
+                    "doc_id": segment.index_node_id,
+                    "doc_hash": segment.index_node_hash,
+                    "document_id": document_id,
+                    "dataset_id": dataset_id,
+                },
+            )
+
+            if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                child_chunks = segment.child_chunks
+                if child_chunks:
+                    child_documents = []
+                    for child_chunk in child_chunks:
+                        child_document = ChildDocument(
+                            page_content=child_chunk.content,
+                            metadata={
+                                "doc_id": child_chunk.index_node_id,
+                                "doc_hash": child_chunk.index_node_hash,
+                                "document_id": document_id,
+                                "dataset_id": dataset_id,
+                            },
+                        )
+                        child_documents.append(child_document)
+                    document.children = child_documents
+            documents.append(document)
+        # save vector index
+        index_processor.load(dataset, documents)
+
+        end_at = time.perf_counter()
+        logging.info(click.style("Segments enabled to index latency: {}".format(end_at - start_at), fg="green"))
+    except Exception as e:
+        logging.exception("enable segments to index failed")
+        # update segment error msg
+        db.session.query(DocumentSegment).filter(
+            DocumentSegment.id.in_(segment_ids),
+            DocumentSegment.dataset_id == dataset_id,
+            DocumentSegment.document_id == document_id,
+        ).update(
+            {
+                "error": str(e),
+                "status": "error",
+                "disabled_at": datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None),
+                "enabled": False,
+            }
+        )
+        db.session.commit()
+    finally:
+        for segment in segments:
+            indexing_cache_key = "segment_{}_indexing".format(segment.id)
+            redis_client.delete(indexing_cache_key)
diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py
index 4ba6d1a83e..1d580b3802 100644
--- a/api/tasks/remove_document_from_index_task.py
+++ b/api/tasks/remove_document_from_index_task.py
@@ -43,7 +43,7 @@ def remove_document_from_index_task(document_id: str):
         index_node_ids = [segment.index_node_id for segment in segments]
         if index_node_ids:
             try:
-                index_processor.clean(dataset, index_node_ids)
+                index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)
             except Exception:
                 logging.exception(f"clean dataset {dataset.id} from index failed")
 
diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py
index 485caa5152..74fd542f6c 100644
--- a/api/tasks/retry_document_indexing_task.py
+++ b/api/tasks/retry_document_indexing_task.py
@@ -48,7 +48,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
             if document:
                 document.indexing_status = "error"
                 document.error = str(e)
-                document.stopped_at = datetime.datetime.utcnow()
+                document.stopped_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
                 db.session.add(document)
                 db.session.commit()
             redis_client.delete(retry_indexing_cache_key)
@@ -69,14 +69,14 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
             if segments:
                 index_node_ids = [segment.index_node_id for segment in segments]
                 # delete from vector index
-                index_processor.clean(dataset, index_node_ids)
+                index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
-                for segment in segments:
-                    db.session.delete(segment)
-                db.session.commit()
+            for segment in segments:
+                db.session.delete(segment)
+            db.session.commit()
 
             document.indexing_status = "parsing"
-            document.processing_started_at = datetime.datetime.utcnow()
+            document.processing_started_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
             db.session.add(document)
             db.session.commit()
 
@@ -86,7 +86,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
         except Exception as ex:
             document.indexing_status = "error"
             document.error = str(ex)
-            document.stopped_at = datetime.datetime.utcnow()
+            document.stopped_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
             db.session.add(document)
             db.session.commit()
             logging.info(click.style(str(ex), fg="yellow"))
diff --git a/api/tasks/sync_website_document_indexing_task.py b/api/tasks/sync_website_document_indexing_task.py
index 5d6b069cf4..8da050d0d1 100644
--- a/api/tasks/sync_website_document_indexing_task.py
+++ b/api/tasks/sync_website_document_indexing_task.py
@@ -46,7 +46,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
         if document:
             document.indexing_status = "error"
             document.error = str(e)
-            document.stopped_at = datetime.datetime.utcnow()
+            document.stopped_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
             db.session.add(document)
             db.session.commit()
         redis_client.delete(sync_indexing_cache_key)
@@ -65,14 +65,14 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
         if segments:
             index_node_ids = [segment.index_node_id for segment in segments]
             # delete from vector index
-            index_processor.clean(dataset, index_node_ids)
+            index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
-            for segment in segments:
-                db.session.delete(segment)
-            db.session.commit()
+        for segment in segments:
+            db.session.delete(segment)
+        db.session.commit()
 
         document.indexing_status = "parsing"
-        document.processing_started_at = datetime.datetime.utcnow()
+        document.processing_started_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
         db.session.add(document)
         db.session.commit()
 
@@ -82,7 +82,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
     except Exception as ex:
         document.indexing_status = "error"
         document.error = str(ex)
-        document.stopped_at = datetime.datetime.utcnow()
+        document.stopped_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
         db.session.add(document)
         db.session.commit()
         logging.info(click.style(str(ex), fg="yellow"))
diff --git a/api/templates/clean_document_job_mail_template-US.html b/api/templates/clean_document_job_mail_template-US.html
new file mode 100644
index 0000000000..88e78f41c7
--- /dev/null
+++ b/api/templates/clean_document_job_mail_template-US.html
@@ -0,0 +1,100 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Documents Disabled Notification</title>
+  <style>
+    body {
+      font-family: Arial, sans-serif;
+      margin: 0;
+      padding: 0;
+      background-color: #f5f5f5;
+    }
+    .email-container {
+      max-width: 600px;
+      margin: 20px auto;
+      background: #ffffff;
+      border-radius: 10px;
+      box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+      overflow: hidden;
+    }
+    .header {
+      background-color: #eef2fa;
+      padding: 20px;
+      text-align: center;
+    }
+    .header img {
+      height: 40px;
+    }
+    .content {
+      padding: 20px;
+      line-height: 1.6;
+      color: #333;
+    }
+    .content h1 {
+      font-size: 24px;
+      color: #222;
+    }
+    .content p {
+      margin: 10px 0;
+    }
+    .content ul {
+      padding-left: 20px;
+    }
+    .content ul li {
+      margin-bottom: 10px;
+    }
+    .cta-button, .cta-button:hover, .cta-button:active, .cta-button:visited, .cta-button:focus {
+      display: block;
+      margin: 20px auto;
+      padding: 10px 20px;
+      background-color: #4e89f9;
+      color: #ffffff !important;
+      text-align: center;
+      text-decoration: none !important;
+      border-radius: 5px;
+      width: fit-content;
+    }
+    .footer {
+      text-align: center;
+      padding: 10px;
+      font-size: 12px;
+      color: #777;
+      background-color: #f9f9f9;
+    }
+  </style>
+</head>
+<body>
+  <div class="email-container">
+    <!-- Header -->
+    <div class="header">
+      <img src="https://img.mailinblue.com/6365111/images/content_library/original/64cb67ca60532312c211dc72.png" alt="Dify Logo">
+    </div>
+
+    <!-- Content -->
+    <div class="content">
+      <h1>Some Documents in Your Knowledge Base Have Been Disabled</h1>
+      <p>Dear {{userName}},</p>
+      <p>
+        We're sorry for the inconvenience. To ensure optimal performance, documents 
+        that haven’t been updated or accessed in the past 30 days have been disabled in
+        your knowledge bases:
+      </p>
+      <ul>
+          {% for item in knowledge_details %}
+            <li>{{ item }}</li>
+          {% endfor %}
+      </ul>
+      <p>You can re-enable them anytime.</p>
+      <a href={{url}} class="cta-button">Re-enable in Dify</a>
+    </div>
+
+    <!-- Footer -->
+    <div class="footer">
+      Sincerely,<br>
+      The Dify Team
+    </div>
+  </div>
+</body>
+</html>
\ No newline at end of file
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index 7122f4a6d0..e65ca45858 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -15,15 +15,15 @@ x-shared-env: &shared-api-worker-env
   LOG_FILE: ${LOG_FILE:-/app/logs/server.log}
   LOG_FILE_MAX_SIZE: ${LOG_FILE_MAX_SIZE:-20}
   LOG_FILE_BACKUP_COUNT: ${LOG_FILE_BACKUP_COUNT:-5}
-  LOG_DATEFORMAT: ${LOG_DATEFORMAT:-"%Y-%m-%d %H:%M:%S"}
+  LOG_DATEFORMAT: ${LOG_DATEFORMAT:-%Y-%m-%d %H:%M:%S}
   LOG_TZ: ${LOG_TZ:-UTC}
   DEBUG: ${DEBUG:-false}
   FLASK_DEBUG: ${FLASK_DEBUG:-false}
   SECRET_KEY: ${SECRET_KEY:-sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U}
   INIT_PASSWORD: ${INIT_PASSWORD:-}
   DEPLOY_ENV: ${DEPLOY_ENV:-PRODUCTION}
-  CHECK_UPDATE_URL: ${CHECK_UPDATE_URL:-"https://updates.dify.ai"}
-  OPENAI_API_BASE: ${OPENAI_API_BASE:-"https://api.openai.com/v1"}
+  CHECK_UPDATE_URL: ${CHECK_UPDATE_URL:-https://updates.dify.ai}
+  OPENAI_API_BASE: ${OPENAI_API_BASE:-https://api.openai.com/v1}
   MIGRATION_ENABLED: ${MIGRATION_ENABLED:-true}
   FILES_ACCESS_TIMEOUT: ${FILES_ACCESS_TIMEOUT:-300}
   ACCESS_TOKEN_EXPIRE_MINUTES: ${ACCESS_TOKEN_EXPIRE_MINUTES:-60}
@@ -69,7 +69,7 @@ x-shared-env: &shared-api-worker-env
   REDIS_USE_CLUSTERS: ${REDIS_USE_CLUSTERS:-false}
   REDIS_CLUSTERS: ${REDIS_CLUSTERS:-}
   REDIS_CLUSTERS_PASSWORD: ${REDIS_CLUSTERS_PASSWORD:-}
-  CELERY_BROKER_URL: ${CELERY_BROKER_URL:-"redis://:difyai123456@redis:6379/1"}
+  CELERY_BROKER_URL: ${CELERY_BROKER_URL:-redis://:difyai123456@redis:6379/1}
   BROKER_USE_SSL: ${BROKER_USE_SSL:-false}
   CELERY_USE_SENTINEL: ${CELERY_USE_SENTINEL:-false}
   CELERY_SENTINEL_MASTER_NAME: ${CELERY_SENTINEL_MASTER_NAME:-}
@@ -88,13 +88,13 @@ x-shared-env: &shared-api-worker-env
   AZURE_BLOB_ACCOUNT_NAME: ${AZURE_BLOB_ACCOUNT_NAME:-difyai}
   AZURE_BLOB_ACCOUNT_KEY: ${AZURE_BLOB_ACCOUNT_KEY:-difyai}
   AZURE_BLOB_CONTAINER_NAME: ${AZURE_BLOB_CONTAINER_NAME:-difyai-container}
-  AZURE_BLOB_ACCOUNT_URL: ${AZURE_BLOB_ACCOUNT_URL:-"https://<your_account_name>.blob.core.windows.net"}
+  AZURE_BLOB_ACCOUNT_URL: ${AZURE_BLOB_ACCOUNT_URL:-https://<your_account_name>.blob.core.windows.net}
   GOOGLE_STORAGE_BUCKET_NAME: ${GOOGLE_STORAGE_BUCKET_NAME:-your-bucket-name}
   GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64: ${GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64:-your-google-service-account-json-base64-string}
   ALIYUN_OSS_BUCKET_NAME: ${ALIYUN_OSS_BUCKET_NAME:-your-bucket-name}
   ALIYUN_OSS_ACCESS_KEY: ${ALIYUN_OSS_ACCESS_KEY:-your-access-key}
   ALIYUN_OSS_SECRET_KEY: ${ALIYUN_OSS_SECRET_KEY:-your-secret-key}
-  ALIYUN_OSS_ENDPOINT: ${ALIYUN_OSS_ENDPOINT:-"https://oss-ap-southeast-1-internal.aliyuncs.com"}
+  ALIYUN_OSS_ENDPOINT: ${ALIYUN_OSS_ENDPOINT:-https://oss-ap-southeast-1-internal.aliyuncs.com}
   ALIYUN_OSS_REGION: ${ALIYUN_OSS_REGION:-ap-southeast-1}
   ALIYUN_OSS_AUTH_VERSION: ${ALIYUN_OSS_AUTH_VERSION:-v4}
   ALIYUN_OSS_PATH: ${ALIYUN_OSS_PATH:-your-path}
@@ -103,7 +103,7 @@ x-shared-env: &shared-api-worker-env
   TENCENT_COS_SECRET_ID: ${TENCENT_COS_SECRET_ID:-your-secret-id}
   TENCENT_COS_REGION: ${TENCENT_COS_REGION:-your-region}
   TENCENT_COS_SCHEME: ${TENCENT_COS_SCHEME:-your-scheme}
-  OCI_ENDPOINT: ${OCI_ENDPOINT:-"https://objectstorage.us-ashburn-1.oraclecloud.com"}
+  OCI_ENDPOINT: ${OCI_ENDPOINT:-https://objectstorage.us-ashburn-1.oraclecloud.com}
   OCI_BUCKET_NAME: ${OCI_BUCKET_NAME:-your-bucket-name}
   OCI_ACCESS_KEY: ${OCI_ACCESS_KEY:-your-access-key}
   OCI_SECRET_KEY: ${OCI_SECRET_KEY:-your-secret-key}
@@ -125,14 +125,14 @@ x-shared-env: &shared-api-worker-env
   SUPABASE_API_KEY: ${SUPABASE_API_KEY:-your-access-key}
   SUPABASE_URL: ${SUPABASE_URL:-your-server-url}
   VECTOR_STORE: ${VECTOR_STORE:-weaviate}
-  WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-"http://weaviate:8080"}
+  WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-http://weaviate:8080}
   WEAVIATE_API_KEY: ${WEAVIATE_API_KEY:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih}
-  QDRANT_URL: ${QDRANT_URL:-"http://qdrant:6333"}
+  QDRANT_URL: ${QDRANT_URL:-http://qdrant:6333}
   QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456}
   QDRANT_CLIENT_TIMEOUT: ${QDRANT_CLIENT_TIMEOUT:-20}
   QDRANT_GRPC_ENABLED: ${QDRANT_GRPC_ENABLED:-false}
   QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334}
-  MILVUS_URI: ${MILVUS_URI:-"http://127.0.0.1:19530"}
+  MILVUS_URI: ${MILVUS_URI:-http://127.0.0.1:19530}
   MILVUS_TOKEN: ${MILVUS_TOKEN:-}
   MILVUS_USER: ${MILVUS_USER:-root}
   MILVUS_PASSWORD: ${MILVUS_PASSWORD:-Milvus}
@@ -142,7 +142,7 @@ x-shared-env: &shared-api-worker-env
   MYSCALE_PASSWORD: ${MYSCALE_PASSWORD:-}
   MYSCALE_DATABASE: ${MYSCALE_DATABASE:-dify}
   MYSCALE_FTS_PARAMS: ${MYSCALE_FTS_PARAMS:-}
-  COUCHBASE_CONNECTION_STRING: ${COUCHBASE_CONNECTION_STRING:-"couchbase://couchbase-server"}
+  COUCHBASE_CONNECTION_STRING: ${COUCHBASE_CONNECTION_STRING:-couchbase://couchbase-server}
   COUCHBASE_USER: ${COUCHBASE_USER:-Administrator}
   COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password}
   COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings}
@@ -176,15 +176,15 @@ x-shared-env: &shared-api-worker-env
   TIDB_VECTOR_USER: ${TIDB_VECTOR_USER:-}
   TIDB_VECTOR_PASSWORD: ${TIDB_VECTOR_PASSWORD:-}
   TIDB_VECTOR_DATABASE: ${TIDB_VECTOR_DATABASE:-dify}
-  TIDB_ON_QDRANT_URL: ${TIDB_ON_QDRANT_URL:-"http://127.0.0.1"}
+  TIDB_ON_QDRANT_URL: ${TIDB_ON_QDRANT_URL:-http://127.0.0.1}
   TIDB_ON_QDRANT_API_KEY: ${TIDB_ON_QDRANT_API_KEY:-dify}
   TIDB_ON_QDRANT_CLIENT_TIMEOUT: ${TIDB_ON_QDRANT_CLIENT_TIMEOUT:-20}
   TIDB_ON_QDRANT_GRPC_ENABLED: ${TIDB_ON_QDRANT_GRPC_ENABLED:-false}
   TIDB_ON_QDRANT_GRPC_PORT: ${TIDB_ON_QDRANT_GRPC_PORT:-6334}
   TIDB_PUBLIC_KEY: ${TIDB_PUBLIC_KEY:-dify}
   TIDB_PRIVATE_KEY: ${TIDB_PRIVATE_KEY:-dify}
-  TIDB_API_URL: ${TIDB_API_URL:-"http://127.0.0.1"}
-  TIDB_IAM_API_URL: ${TIDB_IAM_API_URL:-"http://127.0.0.1"}
+  TIDB_API_URL: ${TIDB_API_URL:-http://127.0.0.1}
+  TIDB_IAM_API_URL: ${TIDB_IAM_API_URL:-http://127.0.0.1}
   TIDB_REGION: ${TIDB_REGION:-regions/aws-us-east-1}
   TIDB_PROJECT_ID: ${TIDB_PROJECT_ID:-dify}
   TIDB_SPEND_LIMIT: ${TIDB_SPEND_LIMIT:-100}
@@ -209,7 +209,7 @@ x-shared-env: &shared-api-worker-env
   OPENSEARCH_USER: ${OPENSEARCH_USER:-admin}
   OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin}
   OPENSEARCH_SECURE: ${OPENSEARCH_SECURE:-true}
-  TENCENT_VECTOR_DB_URL: ${TENCENT_VECTOR_DB_URL:-"http://127.0.0.1"}
+  TENCENT_VECTOR_DB_URL: ${TENCENT_VECTOR_DB_URL:-http://127.0.0.1}
   TENCENT_VECTOR_DB_API_KEY: ${TENCENT_VECTOR_DB_API_KEY:-dify}
   TENCENT_VECTOR_DB_TIMEOUT: ${TENCENT_VECTOR_DB_TIMEOUT:-30}
   TENCENT_VECTOR_DB_USERNAME: ${TENCENT_VECTOR_DB_USERNAME:-dify}
@@ -221,7 +221,7 @@ x-shared-env: &shared-api-worker-env
   ELASTICSEARCH_USERNAME: ${ELASTICSEARCH_USERNAME:-elastic}
   ELASTICSEARCH_PASSWORD: ${ELASTICSEARCH_PASSWORD:-elastic}
   KIBANA_PORT: ${KIBANA_PORT:-5601}
-  BAIDU_VECTOR_DB_ENDPOINT: ${BAIDU_VECTOR_DB_ENDPOINT:-"http://127.0.0.1:5287"}
+  BAIDU_VECTOR_DB_ENDPOINT: ${BAIDU_VECTOR_DB_ENDPOINT:-http://127.0.0.1:5287}
   BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS: ${BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS:-30000}
   BAIDU_VECTOR_DB_ACCOUNT: ${BAIDU_VECTOR_DB_ACCOUNT:-root}
   BAIDU_VECTOR_DB_API_KEY: ${BAIDU_VECTOR_DB_API_KEY:-dify}
@@ -235,7 +235,7 @@ x-shared-env: &shared-api-worker-env
   VIKINGDB_SCHEMA: ${VIKINGDB_SCHEMA:-http}
   VIKINGDB_CONNECTION_TIMEOUT: ${VIKINGDB_CONNECTION_TIMEOUT:-30}
   VIKINGDB_SOCKET_TIMEOUT: ${VIKINGDB_SOCKET_TIMEOUT:-30}
-  LINDORM_URL: ${LINDORM_URL:-"http://lindorm:30070"}
+  LINDORM_URL: ${LINDORM_URL:-http://lindorm:30070}
   LINDORM_USERNAME: ${LINDORM_USERNAME:-lindorm}
   LINDORM_PASSWORD: ${LINDORM_PASSWORD:-lindorm}
   OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase}
@@ -245,7 +245,7 @@ x-shared-env: &shared-api-worker-env
   OCEANBASE_VECTOR_DATABASE: ${OCEANBASE_VECTOR_DATABASE:-test}
   OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
   OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
-  UPSTASH_VECTOR_URL: ${UPSTASH_VECTOR_URL:-"https://xxx-vector.upstash.io"}
+  UPSTASH_VECTOR_URL: ${UPSTASH_VECTOR_URL:-https://xxx-vector.upstash.io}
   UPSTASH_VECTOR_TOKEN: ${UPSTASH_VECTOR_TOKEN:-dify}
   UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15}
   UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
@@ -270,7 +270,7 @@ x-shared-env: &shared-api-worker-env
   NOTION_INTERNAL_SECRET: ${NOTION_INTERNAL_SECRET:-}
   MAIL_TYPE: ${MAIL_TYPE:-resend}
   MAIL_DEFAULT_SEND_FROM: ${MAIL_DEFAULT_SEND_FROM:-}
-  RESEND_API_URL: ${RESEND_API_URL:-"https://api.resend.com"}
+  RESEND_API_URL: ${RESEND_API_URL:-https://api.resend.com}
   RESEND_API_KEY: ${RESEND_API_KEY:-your-resend-api-key}
   SMTP_SERVER: ${SMTP_SERVER:-}
   SMTP_PORT: ${SMTP_PORT:-465}
@@ -281,7 +281,7 @@ x-shared-env: &shared-api-worker-env
   INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-4000}
   INVITE_EXPIRY_HOURS: ${INVITE_EXPIRY_HOURS:-72}
   RESET_PASSWORD_TOKEN_EXPIRY_MINUTES: ${RESET_PASSWORD_TOKEN_EXPIRY_MINUTES:-5}
-  CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-"http://sandbox:8194"}
+  CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194}
   CODE_EXECUTION_API_KEY: ${CODE_EXECUTION_API_KEY:-dify-sandbox}
   CODE_MAX_NUMBER: ${CODE_MAX_NUMBER:-9223372036854775807}
   CODE_MIN_NUMBER: ${CODE_MIN_NUMBER:--9223372036854775808}
@@ -303,8 +303,8 @@ x-shared-env: &shared-api-worker-env
   WORKFLOW_FILE_UPLOAD_LIMIT: ${WORKFLOW_FILE_UPLOAD_LIMIT:-10}
   HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760}
   HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576}
-  SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-"http://ssrf_proxy:3128"}
-  SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-"http://ssrf_proxy:3128"}
+  SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128}
+  SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128}
   TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000}
   PGUSER: ${PGUSER:-${DB_USERNAME}}
   POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-${DB_PASSWORD}}
@@ -314,8 +314,8 @@ x-shared-env: &shared-api-worker-env
   SANDBOX_GIN_MODE: ${SANDBOX_GIN_MODE:-release}
   SANDBOX_WORKER_TIMEOUT: ${SANDBOX_WORKER_TIMEOUT:-15}
   SANDBOX_ENABLE_NETWORK: ${SANDBOX_ENABLE_NETWORK:-true}
-  SANDBOX_HTTP_PROXY: ${SANDBOX_HTTP_PROXY:-"http://ssrf_proxy:3128"}
-  SANDBOX_HTTPS_PROXY: ${SANDBOX_HTTPS_PROXY:-"http://ssrf_proxy:3128"}
+  SANDBOX_HTTP_PROXY: ${SANDBOX_HTTP_PROXY:-http://ssrf_proxy:3128}
+  SANDBOX_HTTPS_PROXY: ${SANDBOX_HTTPS_PROXY:-http://ssrf_proxy:3128}
   SANDBOX_PORT: ${SANDBOX_PORT:-8194}
   WEAVIATE_PERSISTENCE_DATA_PATH: ${WEAVIATE_PERSISTENCE_DATA_PATH:-/var/lib/weaviate}
   WEAVIATE_QUERY_DEFAULTS_LIMIT: ${WEAVIATE_QUERY_DEFAULTS_LIMIT:-25}
@@ -338,8 +338,8 @@ x-shared-env: &shared-api-worker-env
   ETCD_SNAPSHOT_COUNT: ${ETCD_SNAPSHOT_COUNT:-50000}
   MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin}
   MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin}
-  ETCD_ENDPOINTS: ${ETCD_ENDPOINTS:-"etcd:2379"}
-  MINIO_ADDRESS: ${MINIO_ADDRESS:-"minio:9000"}
+  ETCD_ENDPOINTS: ${ETCD_ENDPOINTS:-etcd:2379}
+  MINIO_ADDRESS: ${MINIO_ADDRESS:-minio:9000}
   MILVUS_AUTHORIZATION_ENABLED: ${MILVUS_AUTHORIZATION_ENABLED:-true}
   PGVECTOR_PGUSER: ${PGVECTOR_PGUSER:-postgres}
   PGVECTOR_POSTGRES_PASSWORD: ${PGVECTOR_POSTGRES_PASSWORD:-difyai123456}
@@ -360,7 +360,7 @@ x-shared-env: &shared-api-worker-env
   NGINX_SSL_PORT: ${NGINX_SSL_PORT:-443}
   NGINX_SSL_CERT_FILENAME: ${NGINX_SSL_CERT_FILENAME:-dify.crt}
   NGINX_SSL_CERT_KEY_FILENAME: ${NGINX_SSL_CERT_KEY_FILENAME:-dify.key}
-  NGINX_SSL_PROTOCOLS: ${NGINX_SSL_PROTOCOLS:-"TLSv1.1 TLSv1.2 TLSv1.3"}
+  NGINX_SSL_PROTOCOLS: ${NGINX_SSL_PROTOCOLS:-TLSv1.1 TLSv1.2 TLSv1.3}
   NGINX_WORKER_PROCESSES: ${NGINX_WORKER_PROCESSES:-auto}
   NGINX_CLIENT_MAX_BODY_SIZE: ${NGINX_CLIENT_MAX_BODY_SIZE:-15M}
   NGINX_KEEPALIVE_TIMEOUT: ${NGINX_KEEPALIVE_TIMEOUT:-65}
@@ -374,7 +374,7 @@ x-shared-env: &shared-api-worker-env
   SSRF_COREDUMP_DIR: ${SSRF_COREDUMP_DIR:-/var/spool/squid}
   SSRF_REVERSE_PROXY_PORT: ${SSRF_REVERSE_PROXY_PORT:-8194}
   SSRF_SANDBOX_HOST: ${SSRF_SANDBOX_HOST:-sandbox}
-  COMPOSE_PROFILES: ${COMPOSE_PROFILES:-"${VECTOR_STORE:-weaviate}"}
+  COMPOSE_PROFILES: ${COMPOSE_PROFILES:-${VECTOR_STORE:-weaviate}}
   EXPOSE_NGINX_PORT: ${EXPOSE_NGINX_PORT:-80}
   EXPOSE_NGINX_SSL_PORT: ${EXPOSE_NGINX_SSL_PORT:-443}
   POSITION_TOOL_PINS: ${POSITION_TOOL_PINS:-}
diff --git a/docker/generate_docker_compose b/docker/generate_docker_compose
index 54b6d55217..dc4460f96c 100755
--- a/docker/generate_docker_compose
+++ b/docker/generate_docker_compose
@@ -43,7 +43,7 @@ def generate_shared_env_block(env_vars, anchor_name="shared-api-worker-env"):
         else:
             # If default value contains special characters, wrap it in quotes
             if re.search(r"[:\s]", default):
-                default = f'"{default}"'
+                default = f"{default}"
             lines.append(f"  {key}: ${{{key}:-{default}}}")
     return "\n".join(lines)
 
diff --git a/web/app/(commonLayout)/apps/hooks/useAppsQueryState.ts b/web/app/(commonLayout)/apps/hooks/useAppsQueryState.ts
index fae5357bfc..7f1f4ba659 100644
--- a/web/app/(commonLayout)/apps/hooks/useAppsQueryState.ts
+++ b/web/app/(commonLayout)/apps/hooks/useAppsQueryState.ts
@@ -37,7 +37,7 @@ function useAppsQueryState() {
   const syncSearchParams = useCallback((params: URLSearchParams) => {
     const search = params.toString()
     const query = search ? `?${search}` : ''
-    router.push(`${pathname}${query}`)
+    router.push(`${pathname}${query}`, { scroll: false })
   }, [router, pathname])
 
   // Update the URL search string whenever the query changes.
diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx
index b416659a6a..a6fb116fa8 100644
--- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx
+++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx
@@ -7,85 +7,36 @@ import { useTranslation } from 'react-i18next'
 import { useBoolean } from 'ahooks'
 import {
   Cog8ToothIcon,
-  // CommandLineIcon,
-  Squares2X2Icon,
-  // eslint-disable-next-line sort-imports
-  PuzzlePieceIcon,
   DocumentTextIcon,
   PaperClipIcon,
-  QuestionMarkCircleIcon,
 } from '@heroicons/react/24/outline'
 import {
   Cog8ToothIcon as Cog8ToothSolidIcon,
   // CommandLineIcon as CommandLineSolidIcon,
   DocumentTextIcon as DocumentTextSolidIcon,
 } from '@heroicons/react/24/solid'
-import Link from 'next/link'
+import { RiApps2AddLine, RiInformation2Line } from '@remixicon/react'
 import s from './style.module.css'
 import classNames from '@/utils/classnames'
 import { fetchDatasetDetail, fetchDatasetRelatedApps } from '@/service/datasets'
-import type { RelatedApp, RelatedAppResponse } from '@/models/datasets'
+import type { RelatedAppResponse } from '@/models/datasets'
 import AppSideBar from '@/app/components/app-sidebar'
-import Divider from '@/app/components/base/divider'
-import AppIcon from '@/app/components/base/app-icon'
 import Loading from '@/app/components/base/loading'
-import FloatPopoverContainer from '@/app/components/base/float-popover-container'
 import DatasetDetailContext from '@/context/dataset-detail'
 import { DataSourceType } from '@/models/datasets'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import { LanguagesSupported } from '@/i18n/language'
 import { useStore } from '@/app/components/app/store'
-import { AiText, ChatBot, CuteRobot } from '@/app/components/base/icons/src/vender/solid/communication'
-import { Route } from '@/app/components/base/icons/src/vender/solid/mapsAndTravel'
 import { getLocaleOnClient } from '@/i18n'
 import { useAppContext } from '@/context/app-context'
+import Tooltip from '@/app/components/base/tooltip'
+import LinkedAppsPanel from '@/app/components/base/linked-apps-panel'
 
 export type IAppDetailLayoutProps = {
   children: React.ReactNode
   params: { datasetId: string }
 }
 
-type ILikedItemProps = {
-  type?: 'plugin' | 'app'
-  appStatus?: boolean
-  detail: RelatedApp
-  isMobile: boolean
-}
-
-const LikedItem = ({
-  type = 'app',
-  detail,
-  isMobile,
-}: ILikedItemProps) => {
-  return (
-    <Link className={classNames(s.itemWrapper, 'px-2', isMobile && 'justify-center')} href={`/app/${detail?.id}/overview`}>
-      <div className={classNames(s.iconWrapper, 'mr-0')}>
-        <AppIcon size='tiny' iconType={detail.icon_type} icon={detail.icon} background={detail.icon_background} imageUrl={detail.icon_url} />
-        {type === 'app' && (
-          <span className='absolute bottom-[-2px] right-[-2px] w-3.5 h-3.5 p-0.5 bg-white rounded border-[0.5px] border-[rgba(0,0,0,0.02)] shadow-sm'>
-            {detail.mode === 'advanced-chat' && (
-              <ChatBot className='w-2.5 h-2.5 text-[#1570EF]' />
-            )}
-            {detail.mode === 'agent-chat' && (
-              <CuteRobot className='w-2.5 h-2.5 text-indigo-600' />
-            )}
-            {detail.mode === 'chat' && (
-              <ChatBot className='w-2.5 h-2.5 text-[#1570EF]' />
-            )}
-            {detail.mode === 'completion' && (
-              <AiText className='w-2.5 h-2.5 text-[#0E9384]' />
-            )}
-            {detail.mode === 'workflow' && (
-              <Route className='w-2.5 h-2.5 text-[#f79009]' />
-            )}
-          </span>
-        )}
-      </div>
-      {!isMobile && <div className={classNames(s.appInfo, 'ml-2')}>{detail?.name || '--'}</div>}
-    </Link>
-  )
-}
-
 const TargetIcon = ({ className }: SVGProps<SVGElement>) => {
   return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
     <g clipPath="url(#clip0_4610_6951)">
@@ -117,65 +68,80 @@ const BookOpenIcon = ({ className }: SVGProps<SVGElement>) => {
 type IExtraInfoProps = {
   isMobile: boolean
   relatedApps?: RelatedAppResponse
+  expand: boolean
 }
 
-const ExtraInfo = ({ isMobile, relatedApps }: IExtraInfoProps) => {
+const ExtraInfo = ({ isMobile, relatedApps, expand }: IExtraInfoProps) => {
   const locale = getLocaleOnClient()
   const [isShowTips, { toggle: toggleTips, set: setShowTips }] = useBoolean(!isMobile)
   const { t } = useTranslation()
 
+  const hasRelatedApps = relatedApps?.data && relatedApps?.data?.length > 0
+  const relatedAppsTotal = relatedApps?.data?.length || 0
+
   useEffect(() => {
     setShowTips(!isMobile)
   }, [isMobile, setShowTips])
 
-  return <div className='w-full flex flex-col items-center'>
-    <Divider className='mt-5' />
-    {(relatedApps?.data && relatedApps?.data?.length > 0) && (
+  return <div>
+    {hasRelatedApps && (
       <>
-        {!isMobile && <div className='w-full px-2 pb-1 pt-4 uppercase text-xs text-gray-500 font-medium'>{relatedApps?.total || '--'} {t('common.datasetMenus.relatedApp')}</div>}
+        {!isMobile && (
+          <Tooltip
+            position='right'
+            noDecoration
+            needsDelay
+            popupContent={
+              <LinkedAppsPanel
+                relatedApps={relatedApps.data}
+                isMobile={isMobile}
+              />
+            }
+          >
+            <div className='inline-flex items-center system-xs-medium-uppercase text-text-secondary space-x-1 cursor-pointer'>
+              <span>{relatedAppsTotal || '--'} {t('common.datasetMenus.relatedApp')}</span>
+              <RiInformation2Line className='w-4 h-4' />
+            </div>
+          </Tooltip>
+        )}
+
         {isMobile && <div className={classNames(s.subTitle, 'flex items-center justify-center !px-0 gap-1')}>
-          {relatedApps?.total || '--'}
+          {relatedAppsTotal || '--'}
           <PaperClipIcon className='h-4 w-4 text-gray-700' />
         </div>}
-        {relatedApps?.data?.map((item, index) => (<LikedItem key={index} isMobile={isMobile} detail={item} />))}
       </>
     )}
-    {!relatedApps?.data?.length && (
-      <FloatPopoverContainer
-        placement='bottom-start'
-        open={isShowTips}
-        toggle={toggleTips}
-        isMobile={isMobile}
-        triggerElement={
-          <div className={classNames('h-7 w-7 inline-flex justify-center items-center rounded-lg bg-transparent', isShowTips && '!bg-gray-50')}>
-            <QuestionMarkCircleIcon className='h-4 w-4 flex-shrink-0 text-gray-500' />
+    {!hasRelatedApps && !expand && (
+      <Tooltip
+        position='right'
+        noDecoration
+        needsDelay
+        popupContent={
+          <div className='p-4 w-[240px] bg-components-panel-bg-blur border-[0.5px] border-components-panel-border rounded-xl'>
+            <div className='inline-flex p-2 rounded-lg border-[0.5px] border-components-panel-border-subtle bg-background-default-subtle'>
+              <RiApps2AddLine className='h-4 w-4 text-text-tertiary' />
+            </div>
+            <div className='text-xs text-text-tertiary my-2'>{t('common.datasetMenus.emptyTip')}</div>
+            <a
+              className='inline-flex items-center text-xs text-text-accent mt-2 cursor-pointer'
+              href={
+                locale === LanguagesSupported[1]
+                  ? 'https://docs.dify.ai/v/zh-hans/guides/knowledge-base/integrate-knowledge-within-application'
+                  : 'https://docs.dify.ai/guides/knowledge-base/integrate-knowledge-within-application'
+              }
+              target='_blank' rel='noopener noreferrer'
+            >
+              <BookOpenIcon className='mr-1' />
+              {t('common.datasetMenus.viewDoc')}
+            </a>
           </div>
         }
       >
-        <div className={classNames('mt-5 p-3', isMobile && 'border-[0.5px] border-gray-200 shadow-lg rounded-lg bg-white w-[160px]')}>
-          <div className='flex items-center justify-start gap-2'>
-            <div className={s.emptyIconDiv}>
-              <Squares2X2Icon className='w-3 h-3 text-gray-500' />
-            </div>
-            <div className={s.emptyIconDiv}>
-              <PuzzlePieceIcon className='w-3 h-3 text-gray-500' />
-            </div>
-          </div>
-          <div className='text-xs text-gray-500 mt-2'>{t('common.datasetMenus.emptyTip')}</div>
-          <a
-            className='inline-flex items-center text-xs text-primary-600 mt-2 cursor-pointer'
-            href={
-              locale === LanguagesSupported[1]
-                ? 'https://docs.dify.ai/v/zh-hans/guides/knowledge-base/integrate-knowledge-within-application'
-                : 'https://docs.dify.ai/guides/knowledge-base/integrate-knowledge-within-application'
-            }
-            target='_blank' rel='noopener noreferrer'
-          >
-            <BookOpenIcon className='mr-1' />
-            {t('common.datasetMenus.viewDoc')}
-          </a>
+        <div className='inline-flex items-center system-xs-medium-uppercase text-text-secondary space-x-1 cursor-pointer'>
+          <span>{t('common.datasetMenus.noRelatedApp')}</span>
+          <RiInformation2Line className='w-4 h-4' />
         </div>
-      </FloatPopoverContainer>
+      </Tooltip>
     )}
   </div>
 }
@@ -235,7 +201,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
   }, [isMobile, setAppSiderbarExpand])
 
   if (!datasetRes && !error)
-    return <Loading />
+    return <Loading type='app' />
 
   return (
     <div className='grow flex overflow-hidden'>
@@ -246,7 +212,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
         desc={datasetRes?.description || '--'}
         isExternal={datasetRes?.provider === 'external'}
         navigation={navigation}
-        extraInfo={!isCurrentWorkspaceDatasetOperator ? mode => <ExtraInfo isMobile={mode === 'collapse'} relatedApps={relatedApps} /> : undefined}
+        extraInfo={!isCurrentWorkspaceDatasetOperator ? mode => <ExtraInfo isMobile={mode === 'collapse'} relatedApps={relatedApps} expand={mode === 'collapse'} /> : undefined}
         iconType={datasetRes?.data_source_type === DataSourceType.NOTION ? 'notion' : 'dataset'}
       />}
       <DatasetDetailContext.Provider value={{
@@ -254,7 +220,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
         dataset: datasetRes,
         mutateDatasetRes: () => mutateDatasetRes(),
       }}>
-        <div className="bg-white grow overflow-hidden">{children}</div>
+        <div className="bg-background-default-subtle grow overflow-hidden">{children}</div>
       </DatasetDetailContext.Provider>
     </div>
   )
diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx
index df314ddafe..3a65f1d30f 100644
--- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx
+++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx
@@ -7,10 +7,10 @@ const Settings = async () => {
   const { t } = await translate(locale, 'dataset-settings')
 
   return (
-    <div className='bg-white h-full overflow-y-auto'>
+    <div className='h-full overflow-y-auto'>
       <div className='px-6 py-3'>
-        <div className='mb-1 text-lg font-semibold text-gray-900'>{t('title')}</div>
-        <div className='text-sm text-gray-500'>{t('desc')}</div>
+        <div className='mb-1 system-xl-semibold text-text-primary'>{t('title')}</div>
+        <div className='system-sm-regular text-text-tertiary'>{t('desc')}</div>
       </div>
       <Form />
     </div>
diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css
index 0ee64b4fcd..516b124809 100644
--- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css
+++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css
@@ -1,12 +1,3 @@
-.itemWrapper {
-  @apply flex items-center w-full h-10 rounded-lg hover:bg-gray-50 cursor-pointer;
-}
-.appInfo {
-  @apply truncate text-gray-700 text-sm font-normal;
-}
-.iconWrapper {
-  @apply relative w-6 h-6 rounded-lg;
-}
 .statusPoint {
   @apply flex justify-center items-center absolute -right-0.5 -bottom-0.5 w-2.5 h-2.5 bg-white rounded;
 }
diff --git a/web/app/(commonLayout)/datasets/Container.tsx b/web/app/(commonLayout)/datasets/Container.tsx
index a30521d998..a0edb1cd61 100644
--- a/web/app/(commonLayout)/datasets/Container.tsx
+++ b/web/app/(commonLayout)/datasets/Container.tsx
@@ -17,7 +17,6 @@ import TagManagementModal from '@/app/components/base/tag-management'
 import TagFilter from '@/app/components/base/tag-management/filter'
 import Button from '@/app/components/base/button'
 import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
-import SearchInput from '@/app/components/base/search-input'
 
 // Services
 import { fetchDatasetApiBaseUrl } from '@/service/datasets'
@@ -29,6 +28,7 @@ import { useAppContext } from '@/context/app-context'
 import { useExternalApiPanel } from '@/context/external-api-panel-context'
 // eslint-disable-next-line import/order
 import { useQuery } from '@tanstack/react-query'
+import Input from '@/app/components/base/input'
 
 const Container = () => {
   const { t } = useTranslation()
@@ -81,17 +81,24 @@ const Container = () => {
   }, [currentWorkspace, router])
 
   return (
-    <div ref={containerRef} className='grow relative flex flex-col bg-gray-100 overflow-y-auto'>
-      <div className='sticky top-0 flex justify-between pt-4 px-12 pb-2 leading-[56px] bg-gray-100 z-10 flex-wrap gap-y-2'>
+    <div ref={containerRef} className='grow relative flex flex-col bg-background-body overflow-y-auto'>
+      <div className='sticky top-0 flex justify-between pt-4 px-12 pb-2 leading-[56px] bg-background-body z-10 flex-wrap gap-y-2'>
         <TabSliderNew
           value={activeTab}
           onChange={newActiveTab => setActiveTab(newActiveTab)}
           options={options}
         />
         {activeTab === 'dataset' && (
-          <div className='flex items-center gap-2'>
+          <div className='flex items-center justify-center gap-2'>
             <TagFilter type='knowledge' value={tagFilterValue} onChange={handleTagsChange} />
-            <SearchInput className='w-[200px]' value={keywords} onChange={handleKeywordsChange} />
+            <Input
+              showLeftIcon
+              showClearIcon
+              wrapperClassName='w-[200px]'
+              value={keywords}
+              onChange={e => handleKeywordsChange(e.target.value)}
+              onClear={() => handleKeywordsChange('')}
+            />
             <div className="w-[1px] h-4 bg-divider-regular" />
             <Button
               className='gap-0.5 shadows-shadow-xs'
diff --git a/web/app/(commonLayout)/datasets/DatasetCard.tsx b/web/app/(commonLayout)/datasets/DatasetCard.tsx
index e8ccddbcb7..ad83a41dff 100644
--- a/web/app/(commonLayout)/datasets/DatasetCard.tsx
+++ b/web/app/(commonLayout)/datasets/DatasetCard.tsx
@@ -111,7 +111,7 @@ const DatasetCard = ({
   return (
     <>
       <div
-        className='group relative col-span-1 bg-white border-[0.5px] border-solid border-transparent rounded-xl shadow-sm min-h-[160px] flex flex-col transition-all duration-200 ease-in-out cursor-pointer hover:shadow-lg'
+        className='group relative col-span-1 bg-components-card-bg border-[0.5px] border-solid border-components-card-border rounded-xl shadow-sm min-h-[160px] flex flex-col transition-all duration-200 ease-in-out cursor-pointer hover:shadow-lg'
         data-disable-nprogress={true}
         onClick={(e) => {
           e.preventDefault()
@@ -129,8 +129,8 @@ const DatasetCard = ({
             <Folder className='w-5 h-5 text-[#444CE7]' />
           </div>
           <div className='grow w-0 py-[1px]'>
-            <div className='flex items-center text-sm leading-5 font-semibold text-gray-800'>
-              <div className={cn('truncate', !dataset.embedding_available && 'opacity-50 hover:opacity-100')} title={dataset.name}>{dataset.name}</div>
+            <div className='flex items-center text-sm leading-5 font-semibold text-text-secondary'>
+              <div className={cn('truncate', !dataset.embedding_available && 'opacity-50 hover:opacity-100 text-text-tertiary')} title={dataset.name}>{dataset.name}</div>
               {!dataset.embedding_available && (
                 <Tooltip
                   popupContent={t('dataset.unavailableTip')}
@@ -139,7 +139,7 @@ const DatasetCard = ({
                 </Tooltip>
               )}
             </div>
-            <div className='flex items-center mt-[1px] text-xs leading-[18px] text-gray-500'>
+            <div className='flex items-center mt-[1px] text-xs leading-[18px] text-text-tertiary'>
               <div
                 className={cn('truncate', (!dataset.embedding_available || !dataset.document_count) && 'opacity-50')}
                 title={dataset.provider === 'external' ? `${dataset.app_count}${t('dataset.appCount')}` : `${dataset.document_count}${t('dataset.documentCount')} · ${Math.round(dataset.word_count / 1000)}${t('dataset.wordCount')} · ${dataset.app_count}${t('dataset.appCount')}`}
@@ -162,7 +162,7 @@ const DatasetCard = ({
         </div>
         <div
           className={cn(
-            'grow mb-2 px-[14px] max-h-[72px] text-xs leading-normal text-gray-500 group-hover:line-clamp-2 group-hover:max-h-[36px]',
+            'grow mb-2 px-[14px] max-h-[72px] text-xs leading-normal text-text-tertiary group-hover:line-clamp-2 group-hover:max-h-[36px]',
             tags.length ? 'line-clamp-2' : 'line-clamp-4',
             !dataset.embedding_available && 'opacity-50 hover:opacity-100',
           )}
diff --git a/web/app/components/app-sidebar/dataset-info.tsx b/web/app/components/app-sidebar/dataset-info.tsx
new file mode 100644
index 0000000000..5ce51d46e9
--- /dev/null
+++ b/web/app/components/app-sidebar/dataset-info.tsx
@@ -0,0 +1,45 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import AppIcon from '../base/app-icon'
+
+const DatasetSvg = <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <path fillRule="evenodd" clipRule="evenodd" d="M0.833497 5.13481C0.833483 4.69553 0.83347 4.31654 0.858973 4.0044C0.88589 3.67495 0.94532 3.34727 1.10598 3.03195C1.34567 2.56155 1.72812 2.17909 2.19852 1.93941C2.51384 1.77875 2.84152 1.71932 3.17097 1.6924C3.48312 1.6669 3.86209 1.66691 4.30137 1.66693L7.62238 1.66684C8.11701 1.66618 8.55199 1.66561 8.95195 1.80356C9.30227 1.92439 9.62134 2.12159 9.88607 2.38088C10.1883 2.67692 10.3823 3.06624 10.603 3.50894L11.3484 5.00008H14.3679C15.0387 5.00007 15.5924 5.00006 16.0434 5.03691C16.5118 5.07518 16.9424 5.15732 17.3468 5.36339C17.974 5.68297 18.4839 6.19291 18.8035 6.82011C19.0096 7.22456 19.0917 7.65515 19.13 8.12356C19.1668 8.57455 19.1668 9.12818 19.1668 9.79898V13.5345C19.1668 14.2053 19.1668 14.7589 19.13 15.2099C19.0917 15.6784 19.0096 16.1089 18.8035 16.5134C18.4839 17.1406 17.974 17.6505 17.3468 17.9701C16.9424 18.1762 16.5118 18.2583 16.0434 18.2966C15.5924 18.3334 15.0387 18.3334 14.3679 18.3334H5.63243C4.96163 18.3334 4.40797 18.3334 3.95698 18.2966C3.48856 18.2583 3.05798 18.1762 2.65353 17.9701C2.02632 17.6505 1.51639 17.1406 1.19681 16.5134C0.990734 16.1089 0.908597 15.6784 0.870326 15.2099C0.833478 14.7589 0.833487 14.2053 0.833497 13.5345V5.13481ZM7.51874 3.33359C8.17742 3.33359 8.30798 3.34447 8.4085 3.37914C8.52527 3.41942 8.63163 3.48515 8.71987 3.57158C8.79584 3.64598 8.86396 3.7579 9.15852 4.34704L9.48505 5.00008L2.50023 5.00008C2.50059 4.61259 2.50314 4.34771 2.5201 4.14012C2.5386 3.91374 2.57 3.82981 2.59099 3.7886C2.67089 3.6318 2.79837 3.50432 2.95517 3.42442C2.99638 3.40343 3.08031 3.37203 3.30669 3.35353C3.54281 3.33424 3.85304 3.33359 4.3335 3.33359H7.51874Z" fill="#444CE7" />
+</svg>
+
+type Props = {
+  isExternal?: boolean
+  name: string
+  description: string
+  expand: boolean
+  extraInfo?: React.ReactNode
+}
+
+const DatasetInfo: FC<Props> = ({
+  name,
+  description,
+  isExternal,
+  expand,
+  extraInfo,
+}) => {
+  const { t } = useTranslation()
+  return (
+    <div className='pl-1 pt-1'>
+      <div className='flex-shrink-0 mr-3'>
+        <AppIcon innerIcon={DatasetSvg} className='!border-[0.5px] !border-indigo-100 !bg-indigo-25' />
+      </div>
+      {expand && (
+        <div className='mt-2'>
+          <div className='system-md-semibold text-text-secondary'>
+            {name}
+          </div>
+          <div className='mt-1 text-text-tertiary system-2xs-medium-uppercase'>{isExternal ? t('dataset.externalTag') : t('dataset.localDocs')}</div>
+          <div className='my-3  system-xs-regular text-text-tertiary first-letter:capitalize'>{description}</div>
+        </div>
+      )}
+      {extraInfo}
+    </div>
+  )
+}
+export default React.memo(DatasetInfo)
diff --git a/web/app/components/app-sidebar/index.tsx b/web/app/components/app-sidebar/index.tsx
index 5ee063ad64..61e4bf8330 100644
--- a/web/app/components/app-sidebar/index.tsx
+++ b/web/app/components/app-sidebar/index.tsx
@@ -1,15 +1,15 @@
 import React, { useEffect } from 'react'
 import { useShallow } from 'zustand/react/shallow'
+import { RiLayoutRight2Line } from '@remixicon/react'
+import { LayoutRight2LineMod } from '../base/icons/src/public/knowledge'
 import NavLink from './navLink'
 import type { NavIcon } from './navLink'
 import AppBasic from './basic'
 import AppInfo from './app-info'
+import DatasetInfo from './dataset-info'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
-import {
-  AlignLeft01,
-  AlignRight01,
-} from '@/app/components/base/icons/src/vender/line/layout'
 import { useStore as useAppStore } from '@/app/components/app/store'
+import cn from '@/utils/classnames'
 
 export type IAppDetailNavProps = {
   iconType?: 'app' | 'dataset' | 'notion'
@@ -63,7 +63,16 @@ const AppDetailNav = ({ title, desc, isExternal, icon, icon_background, navigati
         {iconType === 'app' && (
           <AppInfo expand={expand} />
         )}
-        {iconType !== 'app' && (
+        {iconType === 'dataset' && (
+          <DatasetInfo
+            name={title}
+            description={desc}
+            isExternal={isExternal}
+            expand={expand}
+            extraInfo={extraInfo && extraInfo(appSidebarExpand)}
+          />
+        )}
+        {!['app', 'dataset'].includes(iconType) && (
           <AppBasic
             mode={appSidebarExpand}
             iconType={iconType}
@@ -75,9 +84,9 @@ const AppDetailNav = ({ title, desc, isExternal, icon, icon_background, navigati
           />
         )}
       </div>
-      {!expand && (
-        <div className='mt-1 mx-auto w-6 h-[1px] bg-divider-subtle' />
-      )}
+      <div className='px-4'>
+        <div className={cn('mt-1 mx-auto h-[1px] bg-divider-subtle', !expand && 'w-6')} />
+      </div>
       <nav
         className={`
           grow space-y-1
@@ -89,7 +98,6 @@ const AppDetailNav = ({ title, desc, isExternal, icon, icon_background, navigati
             <NavLink key={index} mode={appSidebarExpand} iconMap={{ selected: item.selectedIcon, normal: item.icon }} name={item.name} href={item.href} />
           )
         })}
-        {extraInfo && extraInfo(appSidebarExpand)}
       </nav>
       {
         !isMobile && (
@@ -105,8 +113,8 @@ const AppDetailNav = ({ title, desc, isExternal, icon, icon_background, navigati
             >
               {
                 expand
-                  ? <AlignLeft01 className='w-[14px] h-[14px]' />
-                  : <AlignRight01 className='w-[14px] h-[14px]' />
+                  ? <RiLayoutRight2Line className='w-5 h-5 text-components-menu-item-text' />
+                  : <LayoutRight2LineMod className='w-5 h-5 text-components-menu-item-text' />
               }
             </div>
           </div>
diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
index 389ae0d1fa..7a347a1899 100644
--- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
+++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
@@ -12,7 +12,7 @@ import Divider from '@/app/components/base/divider'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
-import type { DataSet } from '@/models/datasets'
+import { type DataSet, RerankingModeEnum } from '@/models/datasets'
 import { useToastContext } from '@/app/components/base/toast'
 import { updateDatasetSetting } from '@/service/datasets'
 import { useAppContext } from '@/context/app-context'
@@ -111,7 +111,10 @@ const SettingsModal: FC<SettingsModalProps> = ({
     }
     const postRetrievalConfig = ensureRerankModelSelected({
       rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig,
+      retrievalConfig: {
+        ...retrievalConfig,
+        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
+      },
       indexMethod,
     })
     try {
@@ -255,7 +258,8 @@ const SettingsModal: FC<SettingsModalProps> = ({
                 disable={!localeCurrentDataset?.embedding_available}
                 value={indexMethod}
                 onChange={v => setIndexMethod(v!)}
-                itemClassName='sm:!w-[280px]'
+                docForm={currentDataset.doc_form}
+                currentValue={currentDataset.indexing_technique}
               />
             </div>
           </div>
@@ -287,7 +291,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
         {/* Retrieval Method Config */}
         {currentDataset?.provider === 'external'
           ? <>
-            <div className={rowClass}><Divider/></div>
+            <div className={rowClass}><Divider /></div>
             <div className={rowClass}>
               <div className={labelClass}>
                 <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
@@ -300,7 +304,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
                 isInRetrievalSetting={true}
               />
             </div>
-            <div className={rowClass}><Divider/></div>
+            <div className={rowClass}><Divider /></div>
             <div className={rowClass}>
               <div className={labelClass}>
                 <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.externalKnowledgeAPI')}</div>
@@ -326,7 +330,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
                 </div>
               </div>
             </div>
-            <div className={rowClass}><Divider/></div>
+            <div className={rowClass}><Divider /></div>
           </>
           : <div className={rowClass}>
             <div className={cn(labelClass, 'w-auto min-w-[168px]')}>
diff --git a/web/app/components/app/create-app-dialog/app-list/index.tsx b/web/app/components/app/create-app-dialog/app-list/index.tsx
index c9354ce2e1..f158f21d99 100644
--- a/web/app/components/app/create-app-dialog/app-list/index.tsx
+++ b/web/app/components/app/create-app-dialog/app-list/index.tsx
@@ -147,7 +147,7 @@ const Apps = ({
       if (onSuccess)
         onSuccess()
       localStorage.setItem(NEED_REFRESH_APP_LIST_KEY, '1')
-      getRedirection(isCurrentWorkspaceEditor, app, push)
+      getRedirection(isCurrentWorkspaceEditor, { id: app.app_id }, push)
     }
     catch (e) {
       Toast.notify({ type: 'error', message: t('app.newApp.appCreateFailed') })
diff --git a/web/app/components/base/auto-height-textarea/common.tsx b/web/app/components/base/auto-height-textarea/common.tsx
index c71df04395..98ff0b7272 100644
--- a/web/app/components/base/auto-height-textarea/common.tsx
+++ b/web/app/components/base/auto-height-textarea/common.tsx
@@ -49,4 +49,6 @@ const AutoHeightTextarea = forwardRef<HTMLTextAreaElement, AutoHeightTextareaPro
   },
 )
 
+AutoHeightTextarea.displayName = 'AutoHeightTextarea'
+
 export default AutoHeightTextarea
diff --git a/web/app/components/base/badge.tsx b/web/app/components/base/badge.tsx
index 722fde3237..787b005a8f 100644
--- a/web/app/components/base/badge.tsx
+++ b/web/app/components/base/badge.tsx
@@ -3,13 +3,15 @@ import cn from '@/utils/classnames'
 
 type BadgeProps = {
   className?: string
-  text: string
+  text?: string
+  children?: React.ReactNode
   uppercase?: boolean
 }
 
 const Badge = ({
   className,
   text,
+  children,
   uppercase = true,
 }: BadgeProps) => {
   return (
@@ -20,7 +22,7 @@ const Badge = ({
         className,
       )}
     >
-      {text}
+      {children || text}
     </div>
   )
 }
diff --git a/web/app/components/base/checkbox/assets/mixed.svg b/web/app/components/base/checkbox/assets/mixed.svg
new file mode 100644
index 0000000000..e16b8fc975
--- /dev/null
+++ b/web/app/components/base/checkbox/assets/mixed.svg
@@ -0,0 +1,5 @@
+<svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="check">
+<path id="Vector 1" d="M2.5 6H9.5" stroke="white" stroke-width="1.5" stroke-linecap="round"/>
+</g>
+</svg>
diff --git a/web/app/components/base/checkbox/index.module.css b/web/app/components/base/checkbox/index.module.css
new file mode 100644
index 0000000000..d675607b46
--- /dev/null
+++ b/web/app/components/base/checkbox/index.module.css
@@ -0,0 +1,10 @@
+.mixed {
+  background: var(--color-components-checkbox-bg) url(./assets/mixed.svg) center center no-repeat;
+  background-size: 12px 12px;
+  border: none;
+}
+
+.checked.disabled {
+  background-color: #d0d5dd;
+  border-color: #d0d5dd;
+}
\ No newline at end of file
diff --git a/web/app/components/base/checkbox/index.tsx b/web/app/components/base/checkbox/index.tsx
index c2b43ba4b2..df2c368cc0 100644
--- a/web/app/components/base/checkbox/index.tsx
+++ b/web/app/components/base/checkbox/index.tsx
@@ -1,4 +1,5 @@
 import { RiCheckLine } from '@remixicon/react'
+import s from './index.module.css'
 import cn from '@/utils/classnames'
 
 type CheckboxProps = {
@@ -6,15 +7,17 @@ type CheckboxProps = {
   onCheck?: () => void
   className?: string
   disabled?: boolean
+  mixed?: boolean
 }
 
-const Checkbox = ({ checked, onCheck, className, disabled }: CheckboxProps) => {
+const Checkbox = ({ checked, onCheck, className, disabled, mixed }: CheckboxProps) => {
   if (!checked) {
     return (
       <div
         className={cn(
           'w-4 h-4 rounded-[4px] bg-components-checkbox-bg-unchecked border border-components-checkbox-border hover:bg-components-checkbox-bg-unchecked-hover hover:border-components-checkbox-border-hover shadow-xs cursor-pointer',
           disabled && 'border-components-checkbox-border-disabled bg-components-checkbox-bg-disabled hover:border-components-checkbox-border-disabled hover:bg-components-checkbox-bg-disabled cursor-not-allowed',
+          mixed && s.mixed,
           className,
         )}
         onClick={() => {
diff --git a/web/app/components/base/divider/index.tsx b/web/app/components/base/divider/index.tsx
index fcee10a9e2..6fe16b95a2 100644
--- a/web/app/components/base/divider/index.tsx
+++ b/web/app/components/base/divider/index.tsx
@@ -22,7 +22,7 @@ const dividerVariants = cva('',
   },
 )
 
-type DividerProps = {
+export type DividerProps = {
   className?: string
   style?: CSSProperties
 } & VariantProps<typeof dividerVariants>
diff --git a/web/app/components/base/divider/with-label.tsx b/web/app/components/base/divider/with-label.tsx
new file mode 100644
index 0000000000..608bc79998
--- /dev/null
+++ b/web/app/components/base/divider/with-label.tsx
@@ -0,0 +1,23 @@
+import type { FC } from 'react'
+import type { DividerProps } from '.'
+import Divider from '.'
+import classNames from '@/utils/classnames'
+
+export type DividerWithLabelProps = DividerProps & {
+  label: string
+}
+
+export const DividerWithLabel: FC<DividerWithLabelProps> = (props) => {
+  const { label, className, ...rest } = props
+  return <div
+    className="flex items-center gap-2 my-2"
+  >
+    <Divider {...rest} className={classNames('flex-1', className)} />
+    <span className="text-text-tertiary text-xs">
+      {label}
+    </span>
+    <Divider {...rest} className={classNames('flex-1', className)} />
+  </div>
+}
+
+export default DividerWithLabel
diff --git a/web/app/components/base/drawer/index.tsx b/web/app/components/base/drawer/index.tsx
index d9df7cc053..c1057b9f1f 100644
--- a/web/app/components/base/drawer/index.tsx
+++ b/web/app/components/base/drawer/index.tsx
@@ -19,6 +19,7 @@ export type IDrawerProps = {
   onClose: () => void
   onCancel?: () => void
   onOk?: () => void
+  unmount?: boolean
 }
 
 export default function Drawer({
@@ -35,11 +36,12 @@ export default function Drawer({
   onClose,
   onCancel,
   onOk,
+  unmount = false,
 }: IDrawerProps) {
   const { t } = useTranslation()
   return (
     <Dialog
-      unmount={false}
+      unmount={unmount}
       open={isOpen}
       onClose={() => !clickOutsideNotOpen && onClose()}
       className="fixed z-30 inset-0 overflow-y-auto"
diff --git a/web/app/components/base/file-uploader/file-type-icon.tsx b/web/app/components/base/file-uploader/file-type-icon.tsx
index ed4cdde7e7..08d0131520 100644
--- a/web/app/components/base/file-uploader/file-type-icon.tsx
+++ b/web/app/components/base/file-uploader/file-type-icon.tsx
@@ -82,11 +82,8 @@ const FileTypeIcon = ({
   size = 'sm',
   className,
 }: FileTypeIconProps) => {
-  const Icon = FILE_TYPE_ICON_MAP[type].component
-  const color = FILE_TYPE_ICON_MAP[type].color
-
-  if (!Icon)
-    return null
+  const Icon = FILE_TYPE_ICON_MAP[type]?.component || FILE_TYPE_ICON_MAP[FileAppearanceTypeEnum.document].component
+  const color = FILE_TYPE_ICON_MAP[type]?.color || FILE_TYPE_ICON_MAP[FileAppearanceTypeEnum.document].color
 
   return <Icon className={cn('shrink-0', SizeMap[size], color, className)} />
 }
diff --git a/web/app/components/base/icons/assets/public/knowledge/chunk.svg b/web/app/components/base/icons/assets/public/knowledge/chunk.svg
new file mode 100644
index 0000000000..1dc04943fc
--- /dev/null
+++ b/web/app/components/base/icons/assets/public/knowledge/chunk.svg
@@ -0,0 +1,13 @@
+<svg width="10" height="10" viewBox="0 0 10 10" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Group">
+<path id="Vector" d="M2.5 10H0V7.5H2.5V10Z" fill="#676F83"/>
+<path id="Vector_2" d="M6.25 6.25H3.75V3.75H6.25V6.25Z" fill="#676F83"/>
+<path id="Vector_3" d="M2.5 6.25H0V3.75H2.5V6.25Z" fill="#676F83"/>
+<path id="Vector_4" d="M6.25 2.5H3.75V0H6.25V2.5Z" fill="#676F83"/>
+<path id="Vector_5" d="M2.5 2.5H0V0H2.5V2.5Z" fill="#676F83"/>
+<path id="Vector_6" d="M10 2.5H7.5V0H10V2.5Z" fill="#676F83"/>
+<path id="Vector_7" d="M9.58342 7.91663H7.91675V9.58329H9.58342V7.91663Z" fill="#676F83"/>
+<path id="Vector_8" d="M9.58342 4.16663H7.91675V5.83329H9.58342V4.16663Z" fill="#676F83"/>
+<path id="Vector_9" d="M5.83341 7.91663H4.16675V9.58329H5.83341V7.91663Z" fill="#676F83"/>
+</g>
+</svg>
diff --git a/web/app/components/base/icons/assets/public/knowledge/collapse.svg b/web/app/components/base/icons/assets/public/knowledge/collapse.svg
new file mode 100644
index 0000000000..b54e046085
--- /dev/null
+++ b/web/app/components/base/icons/assets/public/knowledge/collapse.svg
@@ -0,0 +1,9 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Icon L">
+<g id="Vector">
+<path d="M2.66602 11.3333H0.666016L3.33268 8.66667L5.99935 11.3333H3.99935L3.99935 14H2.66602L2.66602 11.3333Z" fill="#354052"/>
+<path d="M2.66602 4.66667L2.66602 2L3.99935 2L3.99935 4.66667L5.99935 4.66667L3.33268 7.33333L0.666016 4.66667L2.66602 4.66667Z" fill="#354052"/>
+<path d="M7.33268 2.66667H13.9993V4H7.33268V2.66667ZM7.33268 12H13.9993V13.3333H7.33268V12ZM5.99935 7.33333H13.9993V8.66667H5.99935V7.33333Z" fill="#354052"/>
+</g>
+</g>
+</svg>
diff --git a/web/app/components/base/icons/assets/public/knowledge/general-type.svg b/web/app/components/base/icons/assets/public/knowledge/general-type.svg
new file mode 100644
index 0000000000..779df5f31c
--- /dev/null
+++ b/web/app/components/base/icons/assets/public/knowledge/general-type.svg
@@ -0,0 +1,5 @@
+<svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Icon L">
+<path id="Vector" fill-rule="evenodd" clip-rule="evenodd" d="M6 0.5C6.27615 0.5 6.5 0.72386 6.5 1V1.52755C6.95855 1.57831 7.3967 1.69804 7.80355 1.87619L8.067 1.41997C8.20505 1.18083 8.51085 1.09889 8.75 1.23696C8.98915 1.37503 9.07105 1.68082 8.933 1.91998L8.6692 2.37685C9.033 2.64523 9.3548 2.96707 9.6232 3.33084L10.0801 3.06703C10.3193 2.92896 10.6251 3.0109 10.7632 3.25005C10.9012 3.4892 10.8193 3.79499 10.5801 3.93306L10.1238 4.19649C10.302 4.60333 10.4218 5.0415 10.4725 5.50005H11C11.2761 5.50005 11.5 5.7239 11.5 6.00005C11.5 6.2762 11.2761 6.50005 11 6.50005H10.4725C10.4218 6.9586 10.302 7.3968 10.1238 7.80365L10.5801 8.0671C10.8193 8.20515 10.9012 8.51095 10.7632 8.7501C10.6251 8.98925 10.3193 9.0712 10.0801 8.9331L9.6232 8.6693C9.3548 9.03305 9.03295 9.3549 8.6692 9.62325L8.933 10.0802C9.07105 10.3193 8.98915 10.6251 8.75 10.7632C8.51085 10.9012 8.20505 10.8193 8.067 10.5802L7.80355 10.1239C7.3967 10.3021 6.95855 10.4218 6.5 10.4726V11C6.5 11.2761 6.27615 11.5 6 11.5C5.72385 11.5 5.5 11.2761 5.5 11V10.4726C5.04145 10.4218 4.60328 10.3021 4.19644 10.1239L3.933 10.5802C3.79493 10.8194 3.48914 10.9013 3.24999 10.7633C3.01084 10.6252 2.92891 10.3194 3.06698 10.0802L3.3308 9.62325C2.96702 9.3549 2.64517 9.03305 2.37678 8.66925L1.91986 8.93305C1.68071 9.07115 1.37492 8.9892 1.23685 8.75005C1.09878 8.5109 1.18072 8.2051 1.41986 8.06705L1.87612 7.8036C1.69797 7.39675 1.57824 6.9586 1.52749 6.50005L0.999975 6.5C0.723835 6.5 0.499987 6.2761 0.5 6C0.500015 5.72385 0.72388 5.5 1.00003 5.5L1.5275 5.50005C1.57825 5.0415 1.69796 4.60335 1.87611 4.19652L1.41987 3.93312C1.18072 3.79504 1.09878 3.48925 1.23685 3.2501C1.37492 3.01095 1.68071 2.92901 1.91985 3.06709L2.37675 3.33086C2.64514 2.96708 2.967 2.64524 3.33078 2.37684L3.06698 1.91992C2.92891 1.68077 3.01084 1.37498 3.24999 1.23691C3.48914 1.09884 3.79493 1.18077 3.933 1.41992L4.19642 1.87619C4.60327 1.69803 5.04145 1.57831 5.5 1.52755V1C5.5 0.72386 5.72385 0.5 6 0.5ZM3.83484 3.24991C3.48643 3.52463 3.19141 3.86415 2.96808 4.25014C2.67048 4.7645 2.49999 5.3616 2.49999 6.00005C2.49999 6.6385 2.67048 7.2356 2.96809 7.75C3.19142 8.13595 3.48645 8.4755 3.83486 8.7502L4.8599 6.97475C4.63581 6.71285 4.49999 6.37245 4.49999 6.00005C4.49999 5.62765 4.63581 5.28725 4.8599 5.02535L3.83484 3.24991ZM5.7258 4.52514L4.70041 2.74911C5.10185 2.58847 5.5402 2.50005 6 2.50005C6.63845 2.50005 7.23555 2.67054 7.74995 2.96816C8.28125 3.27557 8.7245 3.71882 9.0319 4.25012C9.2503 4.62764 9.4003 5.04975 9.4646 5.50005H7.41465C7.2087 4.91745 6.6531 4.50005 6 4.50005C5.9065 4.50005 5.8148 4.50865 5.7258 4.52514ZM7.41465 6.50005C7.2087 7.08265 6.6531 7.50005 6 7.50005C5.9065 7.50005 5.8148 7.49145 5.7258 7.47495L4.70043 9.251C5.10185 9.41165 5.5402 9.50005 6 9.50005C6.63845 9.50005 7.23555 9.32955 7.7499 9.03195C8.2812 8.72455 8.72445 8.2813 9.03185 7.75C9.2503 7.3725 9.4003 6.95035 9.4646 6.50005H7.41465Z" fill="#676F83"/>
+</g>
+</svg>
diff --git a/web/app/components/base/icons/assets/public/knowledge/layout-right-2-line-mod.svg b/web/app/components/base/icons/assets/public/knowledge/layout-right-2-line-mod.svg
new file mode 100644
index 0000000000..188f9b55a3
--- /dev/null
+++ b/web/app/components/base/icons/assets/public/knowledge/layout-right-2-line-mod.svg
@@ -0,0 +1,5 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Icon L">
+<path id="Vector" d="M14.0002 2C14.3684 2 14.6668 2.29848 14.6668 2.66667V13.3333C14.6668 13.7015 14.3684 14 14.0002 14H2.00016C1.63198 14 1.3335 13.7015 1.3335 13.3333V2.66667C1.3335 2.29848 1.63198 2 2.00016 2H14.0002ZM13.3335 3.33333H2.66683V12.6667H13.3335V3.33333ZM14.0002 2.66667V13.3333H10.0002V2.66667H14.0002Z" fill="#354052"/>
+</g>
+</svg>
diff --git a/web/app/components/base/icons/assets/public/knowledge/parent-child-type.svg b/web/app/components/base/icons/assets/public/knowledge/parent-child-type.svg
new file mode 100644
index 0000000000..bc596b672a
--- /dev/null
+++ b/web/app/components/base/icons/assets/public/knowledge/parent-child-type.svg
@@ -0,0 +1,7 @@
+<svg width="10" height="11" viewBox="0 0 10 11" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Group">
+<path id="Vector" d="M2.70833 3.87501C3.51375 3.87501 4.16666 3.22209 4.16666 2.41668C4.16666 1.61126 3.51375 0.958344 2.70833 0.958344C1.90292 0.958344 1.25 1.61126 1.25 2.41668C1.25 3.22209 1.90292 3.87501 2.70833 3.87501Z" fill="#676F83"/>
+<path id="Vector_2" d="M7.29158 3.87501C8.097 3.87501 8.74992 3.22209 8.74992 2.41668C8.74992 1.61126 8.097 0.958344 7.29158 0.958344C6.48617 0.958344 5.83325 1.61126 5.83325 2.41668C5.83325 3.22209 6.48617 3.87501 7.29158 3.87501Z" fill="#676F83"/>
+<path id="Vector_3" d="M7.29167 4.70835C6.83771 4.70886 6.39118 4.82363 5.99324 5.04208C5.59529 5.26053 5.25874 5.57563 5.01459 5.95835C5.34482 5.9622 5.66011 6.09658 5.89159 6.33215C6.12306 6.56771 6.25191 6.8853 6.24998 7.21555C6.24805 7.5458 6.11551 7.86187 5.8813 8.09472C5.6471 8.32756 5.33026 8.45826 5 8.45826C4.66975 8.45826 4.35291 8.32756 4.1187 8.09472C3.8845 7.86187 3.75195 7.5458 3.75003 7.21555C3.7481 6.8853 3.87695 6.56771 4.10842 6.33215C4.3399 6.09658 4.65519 5.9622 4.98542 5.95835C4.67086 5.46415 4.20432 5.08546 3.656 4.87926C3.10767 4.67306 2.50721 4.6505 1.94496 4.81497C1.3827 4.97944 0.889064 5.32205 0.538306 5.79125C0.187547 6.26045 -0.00135882 6.83086 7.35834e-06 7.41668V10.125C7.35834e-06 10.2355 0.043906 10.3415 0.122046 10.4196C0.200186 10.4978 0.306167 10.5417 0.416674 10.5417H3.33334V9.50001L1.83334 8.37501C1.78957 8.34218 1.75269 8.30105 1.72481 8.25397C1.69693 8.20688 1.6786 8.15477 1.67086 8.1006C1.65523 7.99121 1.6837 7.88008 1.75001 7.79168C1.81631 7.70327 1.91502 7.64483 2.02441 7.6292C2.13381 7.61357 2.24493 7.64204 2.33334 7.70835L3.88875 8.87501H6.11125L7.66667 7.70835C7.75507 7.64204 7.8662 7.61357 7.97559 7.6292C8.08499 7.64483 8.1837 7.70327 8.25 7.79168C8.31631 7.88008 8.34478 7.99121 8.32915 8.1006C8.31352 8.21 8.25507 8.30871 8.16667 8.37501L6.66667 9.50001V10.5417H9.58333C9.69384 10.5417 9.79982 10.4978 9.87796 10.4196C9.9561 10.3415 10 10.2355 10 10.125V7.41668C9.99912 6.69866 9.71349 6.01029 9.20577 5.50257C8.69805 4.99485 8.00969 4.70923 7.29167 4.70835Z" fill="#676F83"/>
+</g>
+</svg>
diff --git a/web/app/components/base/icons/assets/public/knowledge/selection-mod.svg b/web/app/components/base/icons/assets/public/knowledge/selection-mod.svg
new file mode 100644
index 0000000000..ae3c9c5c75
--- /dev/null
+++ b/web/app/components/base/icons/assets/public/knowledge/selection-mod.svg
@@ -0,0 +1,13 @@
+<svg width="10" height="10" viewBox="0 0 10 10" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Group">
+<path id="Vector" d="M2.5 10H0V7.5H2.5V10Z" fill="#676F83"/>
+<path id="Vector_2" d="M6.25 6.25H3.75V3.75H6.25V6.25Z" fill="#676F83"/>
+<path id="Vector_3" d="M2.5 6.25H0V3.75H2.5V6.25Z" fill="#676F83"/>
+<path id="Vector_4" d="M6.25 2.5H3.75V0H6.25V2.5Z" fill="#676F83"/>
+<path id="Vector_5" d="M2.5 2.5H0V0H2.5V2.5Z" fill="#676F83"/>
+<path id="Vector_6" d="M10 2.5H7.5V0H10V2.5Z" fill="#676F83"/>
+<path id="Vector_7" d="M9.58332 7.91663H7.91666V9.58329H9.58332V7.91663Z" fill="#676F83"/>
+<path id="Vector_8" d="M9.58332 4.16663H7.91666V5.83329H9.58332V4.16663Z" fill="#676F83"/>
+<path id="Vector_9" d="M5.83332 7.91663H4.16666V9.58329H5.83332V7.91663Z" fill="#676F83"/>
+</g>
+</svg>
diff --git a/web/app/components/base/icons/src/public/knowledge/Chunk.json b/web/app/components/base/icons/src/public/knowledge/Chunk.json
new file mode 100644
index 0000000000..7bd5668810
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/Chunk.json
@@ -0,0 +1,116 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "10",
+			"height": "10",
+			"viewBox": "0 0 10 10",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"id": "Group"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector",
+							"d": "M2.5 10H0V7.5H2.5V10Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_2",
+							"d": "M6.25 6.25H3.75V3.75H6.25V6.25Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_3",
+							"d": "M2.5 6.25H0V3.75H2.5V6.25Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_4",
+							"d": "M6.25 2.5H3.75V0H6.25V2.5Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_5",
+							"d": "M2.5 2.5H0V0H2.5V2.5Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_6",
+							"d": "M10 2.5H7.5V0H10V2.5Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_7",
+							"d": "M9.58342 7.91663H7.91675V9.58329H9.58342V7.91663Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_8",
+							"d": "M9.58342 4.16663H7.91675V5.83329H9.58342V4.16663Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_9",
+							"d": "M5.83341 7.91663H4.16675V9.58329H5.83341V7.91663Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					}
+				]
+			}
+		]
+	},
+	"name": "Chunk"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/public/knowledge/Chunk.tsx b/web/app/components/base/icons/src/public/knowledge/Chunk.tsx
new file mode 100644
index 0000000000..87ff635811
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/Chunk.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './Chunk.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'Chunk'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/public/knowledge/Collapse.json b/web/app/components/base/icons/src/public/knowledge/Collapse.json
new file mode 100644
index 0000000000..5e3cf08ce0
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/Collapse.json
@@ -0,0 +1,62 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "16",
+			"height": "16",
+			"viewBox": "0 0 16 16",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"id": "Icon L"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "g",
+						"attributes": {
+							"id": "Vector"
+						},
+						"children": [
+							{
+								"type": "element",
+								"name": "path",
+								"attributes": {
+									"d": "M2.66602 11.3333H0.666016L3.33268 8.66667L5.99935 11.3333H3.99935L3.99935 14H2.66602L2.66602 11.3333Z",
+									"fill": "currentColor"
+								},
+								"children": []
+							},
+							{
+								"type": "element",
+								"name": "path",
+								"attributes": {
+									"d": "M2.66602 4.66667L2.66602 2L3.99935 2L3.99935 4.66667L5.99935 4.66667L3.33268 7.33333L0.666016 4.66667L2.66602 4.66667Z",
+									"fill": "currentColor"
+								},
+								"children": []
+							},
+							{
+								"type": "element",
+								"name": "path",
+								"attributes": {
+									"d": "M7.33268 2.66667H13.9993V4H7.33268V2.66667ZM7.33268 12H13.9993V13.3333H7.33268V12ZM5.99935 7.33333H13.9993V8.66667H5.99935V7.33333Z",
+									"fill": "currentColor"
+								},
+								"children": []
+							}
+						]
+					}
+				]
+			}
+		]
+	},
+	"name": "Collapse"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/public/knowledge/Collapse.tsx b/web/app/components/base/icons/src/public/knowledge/Collapse.tsx
new file mode 100644
index 0000000000..48206c4d0c
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/Collapse.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './Collapse.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'Collapse'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/public/knowledge/GeneralType.json b/web/app/components/base/icons/src/public/knowledge/GeneralType.json
new file mode 100644
index 0000000000..9a87d00a60
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/GeneralType.json
@@ -0,0 +1,38 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "12",
+			"height": "12",
+			"viewBox": "0 0 12 12",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"id": "Icon L"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector",
+							"fill-rule": "evenodd",
+							"clip-rule": "evenodd",
+							"d": "M6 0.5C6.27615 0.5 6.5 0.72386 6.5 1V1.52755C6.95855 1.57831 7.3967 1.69804 7.80355 1.87619L8.067 1.41997C8.20505 1.18083 8.51085 1.09889 8.75 1.23696C8.98915 1.37503 9.07105 1.68082 8.933 1.91998L8.6692 2.37685C9.033 2.64523 9.3548 2.96707 9.6232 3.33084L10.0801 3.06703C10.3193 2.92896 10.6251 3.0109 10.7632 3.25005C10.9012 3.4892 10.8193 3.79499 10.5801 3.93306L10.1238 4.19649C10.302 4.60333 10.4218 5.0415 10.4725 5.50005H11C11.2761 5.50005 11.5 5.7239 11.5 6.00005C11.5 6.2762 11.2761 6.50005 11 6.50005H10.4725C10.4218 6.9586 10.302 7.3968 10.1238 7.80365L10.5801 8.0671C10.8193 8.20515 10.9012 8.51095 10.7632 8.7501C10.6251 8.98925 10.3193 9.0712 10.0801 8.9331L9.6232 8.6693C9.3548 9.03305 9.03295 9.3549 8.6692 9.62325L8.933 10.0802C9.07105 10.3193 8.98915 10.6251 8.75 10.7632C8.51085 10.9012 8.20505 10.8193 8.067 10.5802L7.80355 10.1239C7.3967 10.3021 6.95855 10.4218 6.5 10.4726V11C6.5 11.2761 6.27615 11.5 6 11.5C5.72385 11.5 5.5 11.2761 5.5 11V10.4726C5.04145 10.4218 4.60328 10.3021 4.19644 10.1239L3.933 10.5802C3.79493 10.8194 3.48914 10.9013 3.24999 10.7633C3.01084 10.6252 2.92891 10.3194 3.06698 10.0802L3.3308 9.62325C2.96702 9.3549 2.64517 9.03305 2.37678 8.66925L1.91986 8.93305C1.68071 9.07115 1.37492 8.9892 1.23685 8.75005C1.09878 8.5109 1.18072 8.2051 1.41986 8.06705L1.87612 7.8036C1.69797 7.39675 1.57824 6.9586 1.52749 6.50005L0.999975 6.5C0.723835 6.5 0.499987 6.2761 0.5 6C0.500015 5.72385 0.72388 5.5 1.00003 5.5L1.5275 5.50005C1.57825 5.0415 1.69796 4.60335 1.87611 4.19652L1.41987 3.93312C1.18072 3.79504 1.09878 3.48925 1.23685 3.2501C1.37492 3.01095 1.68071 2.92901 1.91985 3.06709L2.37675 3.33086C2.64514 2.96708 2.967 2.64524 3.33078 2.37684L3.06698 1.91992C2.92891 1.68077 3.01084 1.37498 3.24999 1.23691C3.48914 1.09884 3.79493 1.18077 3.933 1.41992L4.19642 1.87619C4.60327 1.69803 5.04145 1.57831 5.5 1.52755V1C5.5 0.72386 5.72385 0.5 6 0.5ZM3.83484 3.24991C3.48643 3.52463 3.19141 3.86415 2.96808 4.25014C2.67048 4.7645 2.49999 5.3616 2.49999 6.00005C2.49999 6.6385 2.67048 7.2356 2.96809 7.75C3.19142 8.13595 3.48645 8.4755 3.83486 8.7502L4.8599 6.97475C4.63581 6.71285 4.49999 6.37245 4.49999 6.00005C4.49999 5.62765 4.63581 5.28725 4.8599 5.02535L3.83484 3.24991ZM5.7258 4.52514L4.70041 2.74911C5.10185 2.58847 5.5402 2.50005 6 2.50005C6.63845 2.50005 7.23555 2.67054 7.74995 2.96816C8.28125 3.27557 8.7245 3.71882 9.0319 4.25012C9.2503 4.62764 9.4003 5.04975 9.4646 5.50005H7.41465C7.2087 4.91745 6.6531 4.50005 6 4.50005C5.9065 4.50005 5.8148 4.50865 5.7258 4.52514ZM7.41465 6.50005C7.2087 7.08265 6.6531 7.50005 6 7.50005C5.9065 7.50005 5.8148 7.49145 5.7258 7.47495L4.70043 9.251C5.10185 9.41165 5.5402 9.50005 6 9.50005C6.63845 9.50005 7.23555 9.32955 7.7499 9.03195C8.2812 8.72455 8.72445 8.2813 9.03185 7.75C9.2503 7.3725 9.4003 6.95035 9.4646 6.50005H7.41465Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					}
+				]
+			}
+		]
+	},
+	"name": "GeneralType"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/public/knowledge/GeneralType.tsx b/web/app/components/base/icons/src/public/knowledge/GeneralType.tsx
new file mode 100644
index 0000000000..7fce78a61d
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/GeneralType.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './GeneralType.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'GeneralType'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.json b/web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.json
new file mode 100644
index 0000000000..6f5b00eb54
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.json
@@ -0,0 +1,36 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "16",
+			"height": "16",
+			"viewBox": "0 0 16 16",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"id": "Icon L"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector",
+							"d": "M14.0002 2C14.3684 2 14.6668 2.29848 14.6668 2.66667V13.3333C14.6668 13.7015 14.3684 14 14.0002 14H2.00016C1.63198 14 1.3335 13.7015 1.3335 13.3333V2.66667C1.3335 2.29848 1.63198 2 2.00016 2H14.0002ZM13.3335 3.33333H2.66683V12.6667H13.3335V3.33333ZM14.0002 2.66667V13.3333H10.0002V2.66667H14.0002Z",
+							"fill": "currentColor"
+						},
+						"children": []
+					}
+				]
+			}
+		]
+	},
+	"name": "LayoutRight2LineMod"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.tsx b/web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.tsx
new file mode 100644
index 0000000000..2189893eb3
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './LayoutRight2LineMod.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'LayoutRight2LineMod'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/public/knowledge/ParentChildType.json b/web/app/components/base/icons/src/public/knowledge/ParentChildType.json
new file mode 100644
index 0000000000..250da77fc8
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/ParentChildType.json
@@ -0,0 +1,56 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "10",
+			"height": "11",
+			"viewBox": "0 0 10 11",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"id": "Group"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector",
+							"d": "M2.70833 3.87501C3.51375 3.87501 4.16666 3.22209 4.16666 2.41668C4.16666 1.61126 3.51375 0.958344 2.70833 0.958344C1.90292 0.958344 1.25 1.61126 1.25 2.41668C1.25 3.22209 1.90292 3.87501 2.70833 3.87501Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_2",
+							"d": "M7.29158 3.87501C8.097 3.87501 8.74992 3.22209 8.74992 2.41668C8.74992 1.61126 8.097 0.958344 7.29158 0.958344C6.48617 0.958344 5.83325 1.61126 5.83325 2.41668C5.83325 3.22209 6.48617 3.87501 7.29158 3.87501Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_3",
+							"d": "M7.29167 4.70835C6.83771 4.70886 6.39118 4.82363 5.99324 5.04208C5.59529 5.26053 5.25874 5.57563 5.01459 5.95835C5.34482 5.9622 5.66011 6.09658 5.89159 6.33215C6.12306 6.56771 6.25191 6.8853 6.24998 7.21555C6.24805 7.5458 6.11551 7.86187 5.8813 8.09472C5.6471 8.32756 5.33026 8.45826 5 8.45826C4.66975 8.45826 4.35291 8.32756 4.1187 8.09472C3.8845 7.86187 3.75195 7.5458 3.75003 7.21555C3.7481 6.8853 3.87695 6.56771 4.10842 6.33215C4.3399 6.09658 4.65519 5.9622 4.98542 5.95835C4.67086 5.46415 4.20432 5.08546 3.656 4.87926C3.10767 4.67306 2.50721 4.6505 1.94496 4.81497C1.3827 4.97944 0.889064 5.32205 0.538306 5.79125C0.187547 6.26045 -0.00135882 6.83086 7.35834e-06 7.41668V10.125C7.35834e-06 10.2355 0.043906 10.3415 0.122046 10.4196C0.200186 10.4978 0.306167 10.5417 0.416674 10.5417H3.33334V9.50001L1.83334 8.37501C1.78957 8.34218 1.75269 8.30105 1.72481 8.25397C1.69693 8.20688 1.6786 8.15477 1.67086 8.1006C1.65523 7.99121 1.6837 7.88008 1.75001 7.79168C1.81631 7.70327 1.91502 7.64483 2.02441 7.6292C2.13381 7.61357 2.24493 7.64204 2.33334 7.70835L3.88875 8.87501H6.11125L7.66667 7.70835C7.75507 7.64204 7.8662 7.61357 7.97559 7.6292C8.08499 7.64483 8.1837 7.70327 8.25 7.79168C8.31631 7.88008 8.34478 7.99121 8.32915 8.1006C8.31352 8.21 8.25507 8.30871 8.16667 8.37501L6.66667 9.50001V10.5417H9.58333C9.69384 10.5417 9.79982 10.4978 9.87796 10.4196C9.9561 10.3415 10 10.2355 10 10.125V7.41668C9.99912 6.69866 9.71349 6.01029 9.20577 5.50257C8.69805 4.99485 8.00969 4.70923 7.29167 4.70835Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					}
+				]
+			}
+		]
+	},
+	"name": "ParentChildType"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/public/knowledge/ParentChildType.tsx b/web/app/components/base/icons/src/public/knowledge/ParentChildType.tsx
new file mode 100644
index 0000000000..2b13aa939c
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/ParentChildType.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './ParentChildType.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'ParentChildType'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/public/knowledge/SelectionMod.json b/web/app/components/base/icons/src/public/knowledge/SelectionMod.json
new file mode 100644
index 0000000000..ff8174a572
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/SelectionMod.json
@@ -0,0 +1,116 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "10",
+			"height": "10",
+			"viewBox": "0 0 10 10",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"id": "Group"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector",
+							"d": "M2.5 10H0V7.5H2.5V10Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_2",
+							"d": "M6.25 6.25H3.75V3.75H6.25V6.25Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_3",
+							"d": "M2.5 6.25H0V3.75H2.5V6.25Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_4",
+							"d": "M6.25 2.5H3.75V0H6.25V2.5Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_5",
+							"d": "M2.5 2.5H0V0H2.5V2.5Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_6",
+							"d": "M10 2.5H7.5V0H10V2.5Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_7",
+							"d": "M9.58332 7.91663H7.91666V9.58329H9.58332V7.91663Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_8",
+							"d": "M9.58332 4.16663H7.91666V5.83329H9.58332V4.16663Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					},
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Vector_9",
+							"d": "M5.83332 7.91663H4.16666V9.58329H5.83332V7.91663Z",
+							"fill": "#676F83"
+						},
+						"children": []
+					}
+				]
+			}
+		]
+	},
+	"name": "SelectionMod"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/public/knowledge/SelectionMod.tsx b/web/app/components/base/icons/src/public/knowledge/SelectionMod.tsx
new file mode 100644
index 0000000000..19125b7063
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/SelectionMod.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './SelectionMod.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'SelectionMod'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/public/knowledge/index.ts b/web/app/components/base/icons/src/public/knowledge/index.ts
new file mode 100644
index 0000000000..0af2cf341d
--- /dev/null
+++ b/web/app/components/base/icons/src/public/knowledge/index.ts
@@ -0,0 +1,6 @@
+export { default as Chunk } from './Chunk'
+export { default as Collapse } from './Collapse'
+export { default as GeneralType } from './GeneralType'
+export { default as LayoutRight2LineMod } from './LayoutRight2LineMod'
+export { default as ParentChildType } from './ParentChildType'
+export { default as SelectionMod } from './SelectionMod'
diff --git a/web/app/components/base/icons/src/vender/features/index.ts b/web/app/components/base/icons/src/vender/features/index.ts
index f246732226..853cad8f9f 100644
--- a/web/app/components/base/icons/src/vender/features/index.ts
+++ b/web/app/components/base/icons/src/vender/features/index.ts
@@ -1,5 +1,6 @@
 export { default as Citations } from './Citations'
 export { default as ContentModeration } from './ContentModeration'
+export { default as Document } from './Document'
 export { default as FolderUpload } from './FolderUpload'
 export { default as LoveMessage } from './LoveMessage'
 export { default as MessageFast } from './MessageFast'
@@ -7,4 +8,3 @@ export { default as Microphone01 } from './Microphone01'
 export { default as TextToAudio } from './TextToAudio'
 export { default as VirtualAssistant } from './VirtualAssistant'
 export { default as Vision } from './Vision'
-export { default as Document } from './Document'
diff --git a/web/app/components/base/input-number/index.tsx b/web/app/components/base/input-number/index.tsx
new file mode 100644
index 0000000000..316d863b48
--- /dev/null
+++ b/web/app/components/base/input-number/index.tsx
@@ -0,0 +1,86 @@
+import type { FC } from 'react'
+import { RiArrowDownSLine, RiArrowUpSLine } from '@remixicon/react'
+import Input, { type InputProps } from '../input'
+import classNames from '@/utils/classnames'
+
+export type InputNumberProps = {
+  unit?: string
+  value?: number
+  onChange: (value?: number) => void
+  amount?: number
+  size?: 'sm' | 'md'
+  max?: number
+  min?: number
+  defaultValue?: number
+} & Omit<InputProps, 'value' | 'onChange' | 'size' | 'min' | 'max' | 'defaultValue'>
+
+export const InputNumber: FC<InputNumberProps> = (props) => {
+  const { unit, className, onChange, amount = 1, value, size = 'md', max, min, defaultValue, ...rest } = props
+
+  const isValidValue = (v: number) => {
+    if (max && v > max)
+      return false
+    if (min && v < min)
+      return false
+    return true
+  }
+
+  const inc = () => {
+    if (value === undefined) {
+      onChange(defaultValue)
+      return
+    }
+    const newValue = value + amount
+    if (!isValidValue(newValue))
+      return
+    onChange(newValue)
+  }
+  const dec = () => {
+    if (value === undefined) {
+      onChange(defaultValue)
+      return
+    }
+    const newValue = value - amount
+    if (!isValidValue(newValue))
+      return
+    onChange(newValue)
+  }
+
+  return <div className='flex'>
+    <Input {...rest}
+      // disable default controller
+      type='text'
+      className={classNames('rounded-r-none', className)}
+      value={value}
+      max={max}
+      min={min}
+      onChange={(e) => {
+        if (e.target.value === '')
+          onChange(undefined)
+
+        const parsed = Number(e.target.value)
+        if (Number.isNaN(parsed))
+          return
+
+        if (!isValidValue(parsed))
+          return
+        onChange(parsed)
+      }}
+      unit={unit}
+    />
+    <div className='flex flex-col bg-components-input-bg-normal rounded-r-md border-l border-divider-subtle text-text-tertiary focus:shadow-xs'>
+      <button onClick={inc} className={classNames(
+        size === 'sm' ? 'pt-1' : 'pt-1.5',
+        'px-1.5 hover:bg-components-input-bg-hover',
+      )}>
+        <RiArrowUpSLine className='size-3' />
+      </button>
+      <button onClick={dec} className={classNames(
+        size === 'sm' ? 'pb-1' : 'pb-1.5',
+        'px-1.5 hover:bg-components-input-bg-hover',
+      )}>
+        <RiArrowDownSLine className='size-3' />
+      </button>
+    </div>
+  </div>
+}
diff --git a/web/app/components/base/linked-apps-panel/index.tsx b/web/app/components/base/linked-apps-panel/index.tsx
new file mode 100644
index 0000000000..4320cb0fc6
--- /dev/null
+++ b/web/app/components/base/linked-apps-panel/index.tsx
@@ -0,0 +1,62 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import Link from 'next/link'
+import { useTranslation } from 'react-i18next'
+import { RiArrowRightUpLine } from '@remixicon/react'
+import cn from '@/utils/classnames'
+import AppIcon from '@/app/components/base/app-icon'
+import type { RelatedApp } from '@/models/datasets'
+
+type ILikedItemProps = {
+  appStatus?: boolean
+  detail: RelatedApp
+  isMobile: boolean
+}
+
+const appTypeMap = {
+  'chat': 'Chatbot',
+  'completion': 'Completion',
+  'agent-chat': 'Agent',
+  'advanced-chat': 'Chatflow',
+  'workflow': 'Workflow',
+}
+
+const LikedItem = ({
+  detail,
+  isMobile,
+}: ILikedItemProps) => {
+  return (
+    <Link className={cn('group/link-item flex items-center justify-between w-full h-8 rounded-lg hover:bg-state-base-hover cursor-pointer px-2', isMobile && 'justify-center')} href={`/app/${detail?.id}/overview`}>
+      <div className='flex items-center'>
+        <div className={cn('relative w-6 h-6 rounded-md')}>
+          <AppIcon size='tiny' iconType={detail.icon_type} icon={detail.icon} background={detail.icon_background} imageUrl={detail.icon_url} />
+        </div>
+        {!isMobile && <div className={cn(' ml-2 truncate system-sm-medium text-text-primary')}>{detail?.name || '--'}</div>}
+      </div>
+      <div className='group-hover/link-item:hidden shrink-0 system-2xs-medium-uppercase text-text-tertiary'>{appTypeMap[detail.mode]}</div>
+      <RiArrowRightUpLine className='hidden group-hover/link-item:block w-4 h-4 text-text-tertiary' />
+    </Link>
+  )
+}
+
+type Props = {
+  relatedApps: RelatedApp[]
+  isMobile: boolean
+}
+
+const LinkedAppsPanel: FC<Props> = ({
+  relatedApps,
+  isMobile,
+}) => {
+  const { t } = useTranslation()
+  return (
+    <div className='p-1 w-[320px] bg-components-panel-bg-blur border-[0.5px] border-components-panel-border shadow-lg rounded-xl  backdrop-blur-[5px]'>
+      <div className='mt-1 mb-0.5 pl-2 system-xs-medium-uppercase text-text-tertiary'>{relatedApps.length || '--'} {t('common.datasetMenus.relatedApp')}</div>
+      {relatedApps.map((item, index) => (
+        <LikedItem key={index} detail={item} isMobile={isMobile} />
+      ))}
+    </div>
+  )
+}
+export default React.memo(LinkedAppsPanel)
diff --git a/web/app/components/base/pagination/index.tsx b/web/app/components/base/pagination/index.tsx
index b64c712425..c0cc9f86ec 100644
--- a/web/app/components/base/pagination/index.tsx
+++ b/web/app/components/base/pagination/index.tsx
@@ -8,7 +8,7 @@ import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import cn from '@/utils/classnames'
 
-type Props = {
+export type Props = {
   className?: string
   current: number
   onChange: (cur: number) => void
diff --git a/web/app/components/base/param-item/index.tsx b/web/app/components/base/param-item/index.tsx
index 49acc81484..68c980ad09 100644
--- a/web/app/components/base/param-item/index.tsx
+++ b/web/app/components/base/param-item/index.tsx
@@ -1,5 +1,6 @@
 'use client'
 import type { FC } from 'react'
+import { InputNumber } from '../input-number'
 import Tooltip from '@/app/components/base/tooltip'
 import Slider from '@/app/components/base/slider'
 import Switch from '@/app/components/base/switch'
@@ -23,39 +24,44 @@ type Props = {
 const ParamItem: FC<Props> = ({ className, id, name, noTooltip, tip, step = 0.1, min = 0, max, value, enable, onChange, hasSwitch, onSwitchChange }) => {
   return (
     <div className={className}>
-      <div className="flex items-center h-8 justify-between">
-        <div className="flex items-center">
+      <div className="flex items-center justify-between">
+        <div className="flex items-center h-6">
           {hasSwitch && (
             <Switch
               size='md'
+              className='mr-2'
               defaultValue={enable}
               onChange={async (val) => {
                 onSwitchChange?.(id, val)
               }}
             />
           )}
-          <span className="mx-1 text-gray-900 text-[13px] leading-[18px] font-medium">{name}</span>
+          <span className="mr-1 text-text-secondary system-sm-semibold">{name}</span>
           {!noTooltip && (
             <Tooltip
               triggerClassName='w-4 h-4 shrink-0'
               popupContent={<div className="w-[200px]">{tip}</div>}
             />
           )}
-
         </div>
-        <div className="flex items-center"></div>
       </div>
-      <div className="mt-2 flex items-center">
-        <div className="mr-4 flex shrink-0 items-center">
-          <input disabled={!enable} type="number" min={min} max={max} step={step} className="block w-[48px] h-7 text-xs leading-[18px] rounded-lg border-0 pl-1 pl py-1.5 bg-gray-50 text-gray-900  placeholder:text-gray-400 focus:ring-1 focus:ring-inset focus:ring-primary-600 disabled:opacity-60" value={(value === null || value === undefined) ? '' : value} onChange={(e) => {
-            const value = parseFloat(e.target.value)
-            if (value < min || value > max)
-              return
-
-            onChange(id, value)
-          }} />
+      <div className="mt-1 flex items-center">
+        <div className="mr-3 flex shrink-0 items-center">
+          <InputNumber
+            disabled={!enable}
+            type="number"
+            min={min}
+            max={max}
+            step={step}
+            size='sm'
+            value={value}
+            onChange={(value) => {
+              onChange(id, value)
+            }}
+            className='w-[72px]'
+          />
         </div>
-        <div className="flex items-center h-7 grow">
+        <div className="flex items-center grow">
           <Slider
             className='w-full'
             disabled={!enable}
diff --git a/web/app/components/base/radio-card/index.tsx b/web/app/components/base/radio-card/index.tsx
index 28fd5dc143..b8e8440dd0 100644
--- a/web/app/components/base/radio-card/index.tsx
+++ b/web/app/components/base/radio-card/index.tsx
@@ -26,24 +26,26 @@ const RadioCard: FC<Props> = ({
   onChosen = () => { },
   chosenConfig,
   chosenConfigWrapClassName,
+  className,
 }) => {
   return (
     <div
       className={cn(
-        'border border-components-option-card-option-border bg-components-option-card-option-bg rounded-xl hover:shadow-xs cursor-pointer',
-        isChosen && 'bg-components-option-card-option-selected-bg border-components-panel-border shadow-xs',
+        'relative p-3 border-[0.5px] border-components-option-card-option-border bg-components-option-card-option-bg rounded-xl cursor-pointer',
+        isChosen && 'border-[1.5px] bg-components-option-card-option-selected-bg',
+        className,
       )}
     >
-      <div className='flex py-3 pl-3 pr-4' onClick={onChosen}>
-        <div className={cn(iconBgClassName, 'mr-3 shrink-0 flex w-8 justify-center h-8 items-center rounded-lg')}>
+      <div className='flex gap-x-2' onClick={onChosen}>
+        <div className={cn(iconBgClassName, 'shrink-0 flex size-8 justify-center items-center rounded-lg shadow-md')}>
           {icon}
         </div>
         <div className='grow'>
-          <div className='leading-5 text-sm font-medium text-gray-900'>{title}</div>
-          <div className='leading-[18px] text-xs font-normal text-[#667085]'>{description}</div>
+          <div className='system-sm-semibold text-text-secondary mb-1'>{title}</div>
+          <div className='system-xs-regular text-text-tertiary'>{description}</div>
         </div>
         {!noRadio && (
-          <div className='shrink-0 flex items-center h-8'>
+          <div className='absolute top-3 right-3'>
             <div className={cn(
               'w-4 h-4 border border-components-radio-border bg-components-radio-bg shadow-xs rounded-full',
               isChosen && 'border-[5px] border-components-radio-border-checked',
@@ -52,8 +54,11 @@ const RadioCard: FC<Props> = ({
         )}
       </div>
       {((isChosen && chosenConfig) || noRadio) && (
-        <div className={cn(chosenConfigWrapClassName, 'p-3 border-t border-gray-200')}>
-          {chosenConfig}
+        <div className='flex gap-x-2 mt-2'>
+          <div className='size-8 shrink-0'></div>
+          <div className={cn(chosenConfigWrapClassName, 'grow')}>
+            {chosenConfig}
+          </div>
         </div>
       )}
     </div>
diff --git a/web/app/components/base/retry-button/index.tsx b/web/app/components/base/retry-button/index.tsx
deleted file mode 100644
index 689827af7b..0000000000
--- a/web/app/components/base/retry-button/index.tsx
+++ /dev/null
@@ -1,85 +0,0 @@
-'use client'
-import type { FC } from 'react'
-import React, { useEffect, useReducer } from 'react'
-import { useTranslation } from 'react-i18next'
-import useSWR from 'swr'
-import s from './style.module.css'
-import classNames from '@/utils/classnames'
-import Divider from '@/app/components/base/divider'
-import { getErrorDocs, retryErrorDocs } from '@/service/datasets'
-import type { IndexingStatusResponse } from '@/models/datasets'
-
-const WarningIcon = () =>
-  <svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000 /svg">
-    <path fillRule="evenodd" clipRule="evenodd" d="M6.40616 0.834307C6.14751 0.719294 5.85222 0.719294 5.59356 0.834307C5.3938 0.923133 5.26403 1.07959 5.17373 1.20708C5.08495 1.33242 4.9899 1.49664 4.88536 1.67723L0.751783 8.81705C0.646828 8.9983 0.551451 9.16302 0.486781 9.3028C0.421056 9.44487 0.349754 9.63584 0.372478 9.85381C0.401884 10.1359 0.549654 10.3922 0.779012 10.5589C0.956259 10.6878 1.15726 10.7218 1.31314 10.7361C1.46651 10.7501 1.65684 10.7501 1.86628 10.7501H10.1334C10.3429 10.7501 10.5332 10.7501 10.6866 10.7361C10.8425 10.7218 11.0435 10.6878 11.2207 10.5589C11.4501 10.3922 11.5978 10.1359 11.6272 9.85381C11.65 9.63584 11.5787 9.44487 11.5129 9.3028C11.4483 9.16303 11.3529 8.99833 11.248 8.81709L7.11436 1.67722C7.00983 1.49663 6.91477 1.33242 6.82599 1.20708C6.73569 1.07959 6.60593 0.923133 6.40616 0.834307ZM6.49988 4.50012C6.49988 4.22398 6.27602 4.00012 5.99988 4.00012C5.72374 4.00012 5.49988 4.22398 5.49988 4.50012V6.50012C5.49988 6.77626 5.72374 7.00012 5.99988 7.00012C6.27602 7.00012 6.49988 6.77626 6.49988 6.50012V4.50012ZM5.99988 8.00012C5.72374 8.00012 5.49988 8.22398 5.49988 8.50012C5.49988 8.77626 5.72374 9.00012 5.99988 9.00012H6.00488C6.28102 9.00012 6.50488 8.77626 6.50488 8.50012C6.50488 8.22398 6.28102 8.00012 6.00488 8.00012H5.99988Z" fill="#F79009" />
-  </svg>
-
-type Props = {
-  datasetId: string
-}
-type IIndexState = {
-  value: string
-}
-type ActionType = 'retry' | 'success' | 'error'
-
-type IAction = {
-  type: ActionType
-}
-const indexStateReducer = (state: IIndexState, action: IAction) => {
-  const actionMap = {
-    retry: 'retry',
-    success: 'success',
-    error: 'error',
-  }
-
-  return {
-    ...state,
-    value: actionMap[action.type] || state.value,
-  }
-}
-
-const RetryButton: FC<Props> = ({ datasetId }) => {
-  const { t } = useTranslation()
-  const [indexState, dispatch] = useReducer(indexStateReducer, { value: 'success' })
-  const { data: errorDocs } = useSWR({ datasetId }, getErrorDocs)
-
-  const onRetryErrorDocs = async () => {
-    dispatch({ type: 'retry' })
-    const document_ids = errorDocs?.data.map((doc: IndexingStatusResponse) => doc.id) || []
-    const res = await retryErrorDocs({ datasetId, document_ids })
-    if (res.result === 'success')
-      dispatch({ type: 'success' })
-    else
-      dispatch({ type: 'error' })
-  }
-
-  useEffect(() => {
-    if (errorDocs?.total === 0)
-      dispatch({ type: 'success' })
-    else
-      dispatch({ type: 'error' })
-  }, [errorDocs?.total])
-
-  if (indexState.value === 'success')
-    return null
-
-  return (
-    <div className={classNames('inline-flex justify-center items-center gap-2', s.retryBtn)}>
-      <WarningIcon />
-      <span className='flex shrink-0 text-sm text-gray-500'>
-        {errorDocs?.total} {t('dataset.docsFailedNotice')}
-      </span>
-      <Divider type='vertical' className='!h-4' />
-      <span
-        className={classNames(
-          'text-primary-600 font-semibold text-sm cursor-pointer',
-          indexState.value === 'retry' && '!text-gray-500 !cursor-not-allowed',
-        )}
-        onClick={indexState.value === 'error' ? onRetryErrorDocs : undefined}
-      >
-        {t('dataset.retry')}
-      </span>
-    </div>
-  )
-}
-export default RetryButton
diff --git a/web/app/components/base/retry-button/style.module.css b/web/app/components/base/retry-button/style.module.css
deleted file mode 100644
index 99a0947576..0000000000
--- a/web/app/components/base/retry-button/style.module.css
+++ /dev/null
@@ -1,4 +0,0 @@
-.retryBtn {
-    @apply inline-flex justify-center items-center content-center h-9 leading-5 rounded-lg px-4 py-2 text-base;
-    @apply border-solid border border-gray-200 text-gray-500 hover:bg-white hover:shadow-sm hover:border-gray-300;
-}
diff --git a/web/app/components/base/simple-pie-chart/index.tsx b/web/app/components/base/simple-pie-chart/index.tsx
index 7de539cbb1..4b987ab42d 100644
--- a/web/app/components/base/simple-pie-chart/index.tsx
+++ b/web/app/components/base/simple-pie-chart/index.tsx
@@ -10,10 +10,11 @@ export type SimplePieChartProps = {
   fill?: string
   stroke?: string
   size?: number
+  animationDuration?: number
   className?: string
 }
 
-const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009', size = 12, className }: SimplePieChartProps) => {
+const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009', size = 12, animationDuration, className }: SimplePieChartProps) => {
   const option: EChartsOption = useMemo(() => ({
     series: [
       {
@@ -34,7 +35,7 @@ const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009',
       {
         type: 'pie',
         radius: '83%',
-        animationDuration: 600,
+        animationDuration: animationDuration ?? 600,
         data: [
           { value: percentage, itemStyle: { color: fill } },
           { value: 100 - percentage, itemStyle: { color: '#fff' } },
@@ -48,7 +49,7 @@ const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009',
         cursor: 'default',
       },
     ],
-  }), [stroke, fill, percentage])
+  }), [stroke, fill, percentage, animationDuration])
 
   return (
     <ReactECharts
diff --git a/web/app/components/base/skeleton/index.tsx b/web/app/components/base/skeleton/index.tsx
index 5f29c22f7c..a2d4e4da63 100644
--- a/web/app/components/base/skeleton/index.tsx
+++ b/web/app/components/base/skeleton/index.tsx
@@ -3,7 +3,7 @@ import classNames from '@/utils/classnames'
 
 type SkeletonProps = ComponentProps<'div'>
 
-export const SkeletonContanier: FC<SkeletonProps> = (props) => {
+export const SkeletonContainer: FC<SkeletonProps> = (props) => {
   const { className, children, ...rest } = props
   return (
     <div className={classNames('flex flex-col gap-1', className)} {...rest}>
@@ -30,11 +30,14 @@ export const SkeletonRectangle: FC<SkeletonProps> = (props) => {
   )
 }
 
-export const SkeletonPoint: FC = () =>
-  <div className='text-text-quaternary text-xs font-medium'>·</div>
-
+export const SkeletonPoint: FC<SkeletonProps> = (props) => {
+  const { className, ...rest } = props
+  return (
+    <div className={classNames('text-text-quaternary text-xs font-medium', className)} {...rest}>·</div>
+  )
+}
 /** Usage
- * <SkeletonContanier>
+ * <SkeletonContainer>
  *  <SkeletonRow>
  *    <SkeletonRectangle className="w-96" />
  *    <SkeletonPoint />
diff --git a/web/app/components/base/switch/index.tsx b/web/app/components/base/switch/index.tsx
index f61c6f46ff..8bf32b1311 100644
--- a/web/app/components/base/switch/index.tsx
+++ b/web/app/components/base/switch/index.tsx
@@ -64,4 +64,7 @@ const Switch = ({ onChange, size = 'md', defaultValue = false, disabled = false,
     </OriginalSwitch>
   )
 }
+
+Switch.displayName = 'Switch'
+
 export default React.memo(Switch)
diff --git a/web/app/components/base/tag-input/index.tsx b/web/app/components/base/tag-input/index.tsx
index b26d0c6438..ec6c1cee34 100644
--- a/web/app/components/base/tag-input/index.tsx
+++ b/web/app/components/base/tag-input/index.tsx
@@ -3,8 +3,8 @@ import type { ChangeEvent, FC, KeyboardEvent } from 'react'
 import { } from 'use-context-selector'
 import { useTranslation } from 'react-i18next'
 import AutosizeInput from 'react-18-input-autosize'
+import { RiAddLine, RiCloseLine } from '@remixicon/react'
 import cn from '@/utils/classnames'
-import { X } from '@/app/components/base/icons/src/vender/line/general'
 import { useToastContext } from '@/app/components/base/toast'
 
 type TagInputProps = {
@@ -75,14 +75,14 @@ const TagInput: FC<TagInputProps> = ({
         (items || []).map((item, index) => (
           <div
             key={item}
-            className={cn('flex items-center mr-1 mt-1 px-2 py-1 text-sm text-gray-700 border border-gray-200', isSpecialMode ? 'bg-white rounded-md' : 'rounded-lg')}>
+            className={cn('flex items-center mr-1 mt-1 pl-1.5 pr-1 py-1 system-xs-regular text-text-secondary border border-divider-deep bg-components-badge-white-to-dark rounded-md')}
+          >
             {item}
             {
               !disableRemove && (
-                <X
-                  className='ml-0.5 w-3 h-3 text-gray-500 cursor-pointer'
-                  onClick={() => handleRemove(index)}
-                />
+                <div className='flex items-center justify-center w-4 h-4 cursor-pointer' onClick={() => handleRemove(index)}>
+                  <RiCloseLine className='ml-0.5 w-3.5 h-3.5 text-text-tertiary' />
+                </div>
               )
             }
           </div>
@@ -90,24 +90,27 @@ const TagInput: FC<TagInputProps> = ({
       }
       {
         !disableAdd && (
-          <AutosizeInput
-            inputClassName={cn('outline-none appearance-none placeholder:text-gray-300 caret-primary-600 hover:placeholder:text-gray-400', isSpecialMode ? 'bg-transparent' : '')}
-            className={cn(
-              !isInWorkflow && 'max-w-[300px]',
-              isInWorkflow && 'max-w-[146px]',
-              `
-              mt-1 py-1 rounded-lg border border-transparent text-sm  overflow-hidden
-              ${focused && 'px-2 border !border-dashed !border-gray-200'}
-            `)}
-            onFocus={() => setFocused(true)}
-            onBlur={handleBlur}
-            value={value}
-            onChange={(e: ChangeEvent<HTMLInputElement>) => {
-              setValue(e.target.value)
-            }}
-            onKeyDown={handleKeyDown}
-            placeholder={t(placeholder || (isSpecialMode ? 'common.model.params.stop_sequencesPlaceholder' : 'datasetDocuments.segment.addKeyWord'))}
-          />
+          <div className={cn('flex items-center gap-x-0.5 mt-1 group/tag-add', !isSpecialMode ? 'px-1.5 rounded-md border border-dashed border-divider-deep' : '')}>
+            {!isSpecialMode && !focused && <RiAddLine className='w-3.5 h-3.5 text-text-placeholder group-hover/tag-add:text-text-secondary' />}
+            <AutosizeInput
+              inputClassName={cn('outline-none appearance-none placeholder:text-text-placeholder caret-[#295EFF] group-hover/tag-add:placeholder:text-text-secondary', isSpecialMode ? 'bg-transparent' : '')}
+              className={cn(
+                !isInWorkflow && 'max-w-[300px]',
+                isInWorkflow && 'max-w-[146px]',
+                `
+                py-1 rounded-md overflow-hidden system-xs-regular
+                ${focused && isSpecialMode && 'px-1.5 border border-dashed border-divider-deep'}
+              `)}
+              onFocus={() => setFocused(true)}
+              onBlur={handleBlur}
+              value={value}
+              onChange={(e: ChangeEvent<HTMLInputElement>) => {
+                setValue(e.target.value)
+              }}
+              onKeyDown={handleKeyDown}
+              placeholder={t(placeholder || (isSpecialMode ? 'common.model.params.stop_sequencesPlaceholder' : 'datasetDocuments.segment.addKeyWord'))}
+            />
+          </div>
         )
       }
     </div>
diff --git a/web/app/components/base/toast/index.tsx b/web/app/components/base/toast/index.tsx
index b9a6de9fe5..ba7d8af518 100644
--- a/web/app/components/base/toast/index.tsx
+++ b/web/app/components/base/toast/index.tsx
@@ -21,6 +21,7 @@ export type IToastProps = {
   children?: ReactNode
   onClose?: () => void
   className?: string
+  customComponent?: ReactNode
 }
 type IToastContext = {
   notify: (props: IToastProps) => void
@@ -35,6 +36,7 @@ const Toast = ({
   message,
   children,
   className,
+  customComponent,
 }: IToastProps) => {
   const { close } = useToastContext()
   // sometimes message is react node array. Not handle it.
@@ -49,8 +51,7 @@ const Toast = ({
     'top-0',
     'right-0',
   )}>
-    <div className={`absolute inset-0 opacity-40 ${
-      (type === 'success' && 'bg-[linear-gradient(92deg,rgba(23,178,106,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
+    <div className={`absolute inset-0 opacity-40 -z-10 ${(type === 'success' && 'bg-[linear-gradient(92deg,rgba(23,178,106,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
       || (type === 'warning' && 'bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
       || (type === 'error' && 'bg-[linear-gradient(92deg,rgba(240,68,56,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
       || (type === 'info' && 'bg-[linear-gradient(92deg,rgba(11,165,236,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
@@ -63,14 +64,17 @@ const Toast = ({
         {type === 'warning' && <RiAlertFill className={`${size === 'md' ? 'w-5 h-5' : 'w-4 h-4'} text-text-warning-secondary`} aria-hidden="true" />}
         {type === 'info' && <RiInformation2Fill className={`${size === 'md' ? 'w-5 h-5' : 'w-4 h-4'} text-text-accent`} aria-hidden="true" />}
       </div>
-      <div className={`flex py-1 ${size === 'md' ? 'px-1' : 'px-0.5'} flex-col items-start gap-1 flex-grow`}>
-        <div className='text-text-primary system-sm-semibold'>{message}</div>
+      <div className={`flex py-1 ${size === 'md' ? 'px-1' : 'px-0.5'} flex-col items-start gap-1 flex-grow z-10`}>
+        <div className='flex items-center gap-1'>
+          <div className='text-text-primary system-sm-semibold'>{message}</div>
+          {customComponent}
+        </div>
         {children && <div className='text-text-secondary system-xs-regular'>
           {children}
         </div>
         }
       </div>
-      <ActionButton className='z-[1000]' onClick={close}>
+      <ActionButton onClick={close}>
         <RiCloseLine className='w-4 h-4 flex-shrink-0 text-text-tertiary' />
       </ActionButton>
     </div>
@@ -117,7 +121,8 @@ Toast.notify = ({
   message,
   duration,
   className,
-}: Pick<IToastProps, 'type' | 'size' | 'message' | 'duration' | 'className'>) => {
+  customComponent,
+}: Pick<IToastProps, 'type' | 'size' | 'message' | 'duration' | 'className' | 'customComponent'>) => {
   const defaultDuring = (type === 'success' || type === 'info') ? 3000 : 6000
   if (typeof window === 'object') {
     const holder = document.createElement('div')
@@ -133,7 +138,7 @@ Toast.notify = ({
           }
         },
       }}>
-        <Toast type={type} size={size} message={message} duration={duration} className={className} />
+        <Toast type={type} size={size} message={message} duration={duration} className={className} customComponent={customComponent} />
       </ToastContext.Provider>,
     )
     document.body.appendChild(holder)
diff --git a/web/app/components/base/tooltip/index.tsx b/web/app/components/base/tooltip/index.tsx
index 8ec3cd8c7a..65b5a99077 100644
--- a/web/app/components/base/tooltip/index.tsx
+++ b/web/app/components/base/tooltip/index.tsx
@@ -14,6 +14,7 @@ export type TooltipProps = {
   popupContent?: React.ReactNode
   children?: React.ReactNode
   popupClassName?: string
+  noDecoration?: boolean
   offset?: OffsetOptions
   needsDelay?: boolean
   asChild?: boolean
@@ -27,6 +28,7 @@ const Tooltip: FC<TooltipProps> = ({
   popupContent,
   children,
   popupClassName,
+  noDecoration,
   offset,
   asChild = true,
   needsDelay = false,
@@ -96,7 +98,7 @@ const Tooltip: FC<TooltipProps> = ({
       >
         {popupContent && (<div
           className={cn(
-            'relative px-3 py-2 system-xs-regular text-text-tertiary bg-components-panel-bg rounded-md shadow-lg break-words',
+            !noDecoration && 'relative px-3 py-2 system-xs-regular text-text-tertiary bg-components-panel-bg rounded-md shadow-lg break-words',
             popupClassName,
           )}
           onMouseEnter={() => triggerMethod === 'hover' && setHoverPopup()}
diff --git a/web/app/components/billing/priority-label/index.tsx b/web/app/components/billing/priority-label/index.tsx
index 36338cf4a8..6ecac4a79e 100644
--- a/web/app/components/billing/priority-label/index.tsx
+++ b/web/app/components/billing/priority-label/index.tsx
@@ -4,6 +4,7 @@ import {
   DocumentProcessingPriority,
   Plan,
 } from '../type'
+import cn from '@/utils/classnames'
 import { useProviderContext } from '@/context/provider-context'
 import {
   ZapFast,
@@ -11,7 +12,11 @@ import {
 } from '@/app/components/base/icons/src/vender/solid/general'
 import Tooltip from '@/app/components/base/tooltip'
 
-const PriorityLabel = () => {
+type PriorityLabelProps = {
+  className?: string
+}
+
+const PriorityLabel = ({ className }: PriorityLabelProps) => {
   const { t } = useTranslation()
   const { plan } = useProviderContext()
 
@@ -37,18 +42,18 @@ const PriorityLabel = () => {
         }
       </div>
     }>
-      <span className={`
-        flex items-center ml-1 px-[5px] h-[18px] rounded border border-[#C7D7FE]
-        text-[10px] font-medium text-[#3538CD]
-      `}>
+      <span className={cn(`
+        shrink-0 flex items-center ml-1 px-1 h-[18px] rounded-[5px] border border-text-accent-secondary
+        text-2xs font-medium text-text-accent-secondary
+      `, className)}>
         {
           plan.type === Plan.professional && (
-            <ZapNarrow className='mr-0.5 w-3 h-3' />
+            <ZapNarrow className='mr-0.5 size-3' />
           )
         }
         {
           (plan.type === Plan.team || plan.type === Plan.enterprise) && (
-            <ZapFast className='mr-0.5 w-3 h-3' />
+            <ZapFast className='mr-0.5 size-3' />
           )
         }
         {t(`billing.plansCommon.priority.${priority}`)}
diff --git a/web/app/components/datasets/chunk.tsx b/web/app/components/datasets/chunk.tsx
new file mode 100644
index 0000000000..bf2835dbdb
--- /dev/null
+++ b/web/app/components/datasets/chunk.tsx
@@ -0,0 +1,54 @@
+import type { FC, PropsWithChildren } from 'react'
+import { SelectionMod } from '../base/icons/src/public/knowledge'
+import type { QA } from '@/models/datasets'
+
+export type ChunkLabelProps = {
+  label: string
+  characterCount: number
+}
+
+export const ChunkLabel: FC<ChunkLabelProps> = (props) => {
+  const { label, characterCount } = props
+  return <div className='flex items-center text-text-tertiary text-xs font-medium'>
+    <SelectionMod className='size-[10px]' />
+    <p className='flex gap-2 ml-0.5'><span>
+      {label}
+    </span>
+    <span>
+        ·
+    </span>
+    <span>
+      {`${characterCount} characters`}
+    </span></p>
+  </div>
+}
+
+export type ChunkContainerProps = ChunkLabelProps & PropsWithChildren
+
+export const ChunkContainer: FC<ChunkContainerProps> = (props) => {
+  const { label, characterCount, children } = props
+  return <div className='space-y-2'>
+    <ChunkLabel label={label} characterCount={characterCount} />
+    <div className='text-text-secondary body-md-regular'>
+      {children}
+    </div>
+  </div>
+}
+
+export type QAPreviewProps = {
+  qa: QA
+}
+
+export const QAPreview: FC<QAPreviewProps> = (props) => {
+  const { qa } = props
+  return <div className='flex flex-col gap-y-2'>
+    <div className='flex gap-x-1'>
+      <label className='text-text-tertiary text-[13px] font-medium leading-[20px] shrink-0'>Q</label>
+      <p className='text-text-secondary body-md-regular'>{qa.question}</p>
+    </div>
+    <div className='flex gap-x-1'>
+      <label className='text-text-tertiary text-[13px] font-medium leading-[20px] shrink-0'>A</label>
+      <p className='text-text-secondary body-md-regular'>{qa.answer}</p>
+    </div>
+  </div>
+}
diff --git a/web/app/components/datasets/common/chunking-mode-label.tsx b/web/app/components/datasets/common/chunking-mode-label.tsx
new file mode 100644
index 0000000000..7c6e924009
--- /dev/null
+++ b/web/app/components/datasets/common/chunking-mode-label.tsx
@@ -0,0 +1,29 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import Badge from '@/app/components/base/badge'
+import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge'
+
+type Props = {
+  isGeneralMode: boolean
+  isQAMode: boolean
+}
+
+const ChunkingModeLabel: FC<Props> = ({
+  isGeneralMode,
+  isQAMode,
+}) => {
+  const { t } = useTranslation()
+  const TypeIcon = isGeneralMode ? GeneralType : ParentChildType
+
+  return (
+    <Badge>
+      <div className='flex items-center h-full space-x-0.5 text-text-tertiary'>
+        <TypeIcon className='w-3 h-3' />
+        <span className='system-2xs-medium-uppercase'>{isGeneralMode ? `${t('dataset.chunkingMode.general')}${isQAMode ? ' · QA' : ''}` : t('dataset.chunkingMode.parentChild')}</span>
+      </div>
+    </Badge>
+  )
+}
+export default React.memo(ChunkingModeLabel)
diff --git a/web/app/components/datasets/common/document-file-icon.tsx b/web/app/components/datasets/common/document-file-icon.tsx
new file mode 100644
index 0000000000..5842cbbc7c
--- /dev/null
+++ b/web/app/components/datasets/common/document-file-icon.tsx
@@ -0,0 +1,40 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import FileTypeIcon from '../../base/file-uploader/file-type-icon'
+import type { FileAppearanceType } from '@/app/components/base/file-uploader/types'
+import { FileAppearanceTypeEnum } from '@/app/components/base/file-uploader/types'
+
+const extendToFileTypeMap: { [key: string]: FileAppearanceType } = {
+  pdf: FileAppearanceTypeEnum.pdf,
+  json: FileAppearanceTypeEnum.document,
+  html: FileAppearanceTypeEnum.document,
+  txt: FileAppearanceTypeEnum.document,
+  markdown: FileAppearanceTypeEnum.markdown,
+  md: FileAppearanceTypeEnum.markdown,
+  xlsx: FileAppearanceTypeEnum.excel,
+  xls: FileAppearanceTypeEnum.excel,
+  csv: FileAppearanceTypeEnum.excel,
+  doc: FileAppearanceTypeEnum.word,
+  docx: FileAppearanceTypeEnum.word,
+}
+
+type Props = {
+  extension?: string
+  name?: string
+  size?: 'sm' | 'lg' | 'md'
+  className?: string
+}
+
+const DocumentFileIcon: FC<Props> = ({
+  extension,
+  name,
+  size = 'md',
+  className,
+}) => {
+  const localExtension = extension?.toLowerCase() || name?.split('.')?.pop()?.toLowerCase()
+  return (
+    <FileTypeIcon type={extendToFileTypeMap[localExtension!] || FileAppearanceTypeEnum.document} size={size} className={className} />
+  )
+}
+export default React.memo(DocumentFileIcon)
diff --git a/web/app/components/datasets/common/document-picker/document-list.tsx b/web/app/components/datasets/common/document-picker/document-list.tsx
new file mode 100644
index 0000000000..3e320d7507
--- /dev/null
+++ b/web/app/components/datasets/common/document-picker/document-list.tsx
@@ -0,0 +1,42 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback } from 'react'
+import FileIcon from '../document-file-icon'
+import cn from '@/utils/classnames'
+import type { DocumentItem } from '@/models/datasets'
+
+type Props = {
+  className?: string
+  list: DocumentItem[]
+  onChange: (value: DocumentItem) => void
+}
+
+const DocumentList: FC<Props> = ({
+  className,
+  list,
+  onChange,
+}) => {
+  const handleChange = useCallback((item: DocumentItem) => {
+    return () => onChange(item)
+  }, [onChange])
+
+  return (
+    <div className={cn(className)}>
+      {list.map((item) => {
+        const { id, name, extension } = item
+        return (
+          <div
+            key={id}
+            className='flex items-center h-8 px-2 hover:bg-state-base-hover rounded-lg space-x-2 cursor-pointer'
+            onClick={handleChange(item)}
+          >
+            <FileIcon name={item.name} extension={extension} size='md' />
+            <div className='truncate text-text-secondary text-sm'>{name}</div>
+          </div>
+        )
+      })}
+    </div>
+  )
+}
+
+export default React.memo(DocumentList)
diff --git a/web/app/components/datasets/common/document-picker/index.tsx b/web/app/components/datasets/common/document-picker/index.tsx
new file mode 100644
index 0000000000..30690fca00
--- /dev/null
+++ b/web/app/components/datasets/common/document-picker/index.tsx
@@ -0,0 +1,118 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback, useState } from 'react'
+import { useBoolean } from 'ahooks'
+import { RiArrowDownSLine } from '@remixicon/react'
+import { useTranslation } from 'react-i18next'
+import FileIcon from '../document-file-icon'
+import DocumentList from './document-list'
+import type { DocumentItem, ParentMode, SimpleDocumentDetail } from '@/models/datasets'
+import { ProcessMode } from '@/models/datasets'
+import {
+  PortalToFollowElem,
+  PortalToFollowElemContent,
+  PortalToFollowElemTrigger,
+} from '@/app/components/base/portal-to-follow-elem'
+import cn from '@/utils/classnames'
+import SearchInput from '@/app/components/base/search-input'
+import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge'
+import { useDocumentList } from '@/service/knowledge/use-document'
+import Loading from '@/app/components/base/loading'
+
+type Props = {
+  datasetId: string
+  value: {
+    name?: string
+    extension?: string
+    processMode?: ProcessMode
+    parentMode?: ParentMode
+  }
+  onChange: (value: SimpleDocumentDetail) => void
+}
+
+const DocumentPicker: FC<Props> = ({
+  datasetId,
+  value,
+  onChange,
+}) => {
+  const { t } = useTranslation()
+  const {
+    name,
+    extension,
+    processMode,
+    parentMode,
+  } = value
+  const [query, setQuery] = useState('')
+
+  const { data } = useDocumentList({
+    datasetId,
+    query: {
+      keyword: query,
+      page: 1,
+      limit: 20,
+    },
+  })
+  const documentsList = data?.data
+  const isParentChild = processMode === ProcessMode.parentChild
+  const TypeIcon = isParentChild ? ParentChildType : GeneralType
+
+  const [open, {
+    set: setOpen,
+    toggle: togglePopup,
+  }] = useBoolean(false)
+  const ArrowIcon = RiArrowDownSLine
+
+  const handleChange = useCallback(({ id }: DocumentItem) => {
+    onChange(documentsList?.find(item => item.id === id) as SimpleDocumentDetail)
+    setOpen(false)
+  }, [documentsList, onChange, setOpen])
+
+  return (
+    <PortalToFollowElem
+      open={open}
+      onOpenChange={setOpen}
+      placement='bottom-start'
+    >
+      <PortalToFollowElemTrigger onClick={togglePopup}>
+        <div className={cn('flex items-center ml-1 px-2 py-0.5 rounded-lg hover:bg-state-base-hover select-none cursor-pointer', open && 'bg-state-base-hover')}>
+          <FileIcon name={name} extension={extension} size='lg' />
+          <div className='flex flex-col items-start ml-1 mr-0.5'>
+            <div className='flex items-center space-x-0.5'>
+              <span className={cn('system-md-semibold')}> {name || '--'}</span>
+              <ArrowIcon className={'h-4 w-4 text-text-primary'} />
+            </div>
+            <div className='flex items-center h-3 text-text-tertiary space-x-0.5'>
+              <TypeIcon className='w-3 h-3' />
+              <span className={cn('system-2xs-medium-uppercase', isParentChild && 'mt-0.5' /* to icon problem cause not ver align */)}>
+                {isParentChild ? t('dataset.chunkingMode.parentChild') : t('dataset.chunkingMode.general')}
+                {isParentChild && ` · ${!parentMode ? '--' : parentMode === 'paragraph' ? t('dataset.parentMode.paragraph') : t('dataset.parentMode.fullDoc')}`}
+              </span>
+            </div>
+          </div>
+        </div>
+      </PortalToFollowElemTrigger>
+      <PortalToFollowElemContent className='z-[11]'>
+        <div className='w-[360px] p-1 pt-2 rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur shadow-lg backdrop-blur-[5px]'>
+          <SearchInput value={query} onChange={setQuery} className='mx-1' />
+          {documentsList
+            ? (
+              <DocumentList
+                className='mt-2'
+                list={documentsList.map(d => ({
+                  id: d.id,
+                  name: d.name,
+                  extension: d.data_source_detail_dict?.upload_file?.extension || '',
+                }))}
+                onChange={handleChange}
+              />
+            )
+            : (<div className='mt-2 flex items-center justify-center w-[360px] h-[100px]'>
+              <Loading />
+            </div>)}
+        </div>
+
+      </PortalToFollowElemContent>
+    </PortalToFollowElem>
+  )
+}
+export default React.memo(DocumentPicker)
diff --git a/web/app/components/datasets/common/document-picker/preview-document-picker.tsx b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx
new file mode 100644
index 0000000000..2a35b75471
--- /dev/null
+++ b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx
@@ -0,0 +1,82 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback } from 'react'
+import { useBoolean } from 'ahooks'
+import { RiArrowDownSLine } from '@remixicon/react'
+import { useTranslation } from 'react-i18next'
+import FileIcon from '../document-file-icon'
+import DocumentList from './document-list'
+import {
+  PortalToFollowElem,
+  PortalToFollowElemContent,
+  PortalToFollowElemTrigger,
+} from '@/app/components/base/portal-to-follow-elem'
+import cn from '@/utils/classnames'
+import Loading from '@/app/components/base/loading'
+import type { DocumentItem } from '@/models/datasets'
+
+type Props = {
+  className?: string
+  value: DocumentItem
+  files: DocumentItem[]
+  onChange: (value: DocumentItem) => void
+}
+
+const PreviewDocumentPicker: FC<Props> = ({
+  className,
+  value,
+  files,
+  onChange,
+}) => {
+  const { t } = useTranslation()
+  const { name, extension } = value
+
+  const [open, {
+    set: setOpen,
+    toggle: togglePopup,
+  }] = useBoolean(false)
+  const ArrowIcon = RiArrowDownSLine
+
+  const handleChange = useCallback((item: DocumentItem) => {
+    onChange(item)
+    setOpen(false)
+  }, [onChange, setOpen])
+
+  return (
+    <PortalToFollowElem
+      open={open}
+      onOpenChange={setOpen}
+      placement='bottom-start'
+      offset={4}
+    >
+      <PortalToFollowElemTrigger onClick={togglePopup}>
+        <div className={cn('flex items-center h-6 px-1 rounded-md hover:bg-state-base-hover select-none', open && 'bg-state-base-hover', className)}>
+          <FileIcon name={name} extension={extension} size='md' />
+          <div className='flex flex-col items-start ml-1'>
+            <div className='flex items-center space-x-0.5'>
+              <span className={cn('system-md-semibold max-w-[200px] truncate text-text-primary')}> {name || '--'}</span>
+              <ArrowIcon className={'h-[18px] w-[18px] text-text-primary'} />
+            </div>
+          </div>
+        </div>
+      </PortalToFollowElemTrigger>
+      <PortalToFollowElemContent className='z-[11]'>
+        <div className='w-[392px] p-1 rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur shadow-lg backdrop-blur-[5px]'>
+          {files?.length > 1 && <div className='pl-2 flex items-center h-8 system-xs-medium-uppercase text-text-tertiary'>{t('dataset.preprocessDocument', { num: files.length })}</div>}
+          {files?.length > 0
+            ? (
+              <DocumentList
+                list={files}
+                onChange={handleChange}
+              />
+            )
+            : (<div className='mt-2 flex items-center justify-center w-[360px] h-[100px]'>
+              <Loading />
+            </div>)}
+        </div>
+
+      </PortalToFollowElemContent>
+    </PortalToFollowElem>
+  )
+}
+export default React.memo(PreviewDocumentPicker)
diff --git a/web/app/components/datasets/common/document-status-with-action/auto-disabled-document.tsx b/web/app/components/datasets/common/document-status-with-action/auto-disabled-document.tsx
new file mode 100644
index 0000000000..b687c004e5
--- /dev/null
+++ b/web/app/components/datasets/common/document-status-with-action/auto-disabled-document.tsx
@@ -0,0 +1,38 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback } from 'react'
+import { useTranslation } from 'react-i18next'
+import StatusWithAction from './status-with-action'
+import { useAutoDisabledDocuments, useDocumentEnable, useInvalidDisabledDocument } from '@/service/knowledge/use-document'
+import Toast from '@/app/components/base/toast'
+type Props = {
+  datasetId: string
+}
+
+const AutoDisabledDocument: FC<Props> = ({
+  datasetId,
+}) => {
+  const { t } = useTranslation()
+  const { data, isLoading } = useAutoDisabledDocuments(datasetId)
+  const invalidDisabledDocument = useInvalidDisabledDocument()
+  const documentIds = data?.document_ids
+  const hasDisabledDocument = documentIds && documentIds.length > 0
+  const { mutateAsync: enableDocument } = useDocumentEnable()
+  const handleEnableDocuments = useCallback(async () => {
+    await enableDocument({ datasetId, documentIds })
+    invalidDisabledDocument()
+    Toast.notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+  }, [])
+  if (!hasDisabledDocument || isLoading)
+    return null
+
+  return (
+    <StatusWithAction
+      type='info'
+      description={t('dataset.documentsDisabled', { num: documentIds?.length })}
+      actionText={t('dataset.enable')}
+      onAction={handleEnableDocuments}
+    />
+  )
+}
+export default React.memo(AutoDisabledDocument)
diff --git a/web/app/components/datasets/common/document-status-with-action/index-failed.tsx b/web/app/components/datasets/common/document-status-with-action/index-failed.tsx
new file mode 100644
index 0000000000..37311768b9
--- /dev/null
+++ b/web/app/components/datasets/common/document-status-with-action/index-failed.tsx
@@ -0,0 +1,69 @@
+'use client'
+import type { FC } from 'react'
+import React, { useEffect, useReducer } from 'react'
+import { useTranslation } from 'react-i18next'
+import useSWR from 'swr'
+import StatusWithAction from './status-with-action'
+import { getErrorDocs, retryErrorDocs } from '@/service/datasets'
+import type { IndexingStatusResponse } from '@/models/datasets'
+
+type Props = {
+  datasetId: string
+}
+type IIndexState = {
+  value: string
+}
+type ActionType = 'retry' | 'success' | 'error'
+
+type IAction = {
+  type: ActionType
+}
+const indexStateReducer = (state: IIndexState, action: IAction) => {
+  const actionMap = {
+    retry: 'retry',
+    success: 'success',
+    error: 'error',
+  }
+
+  return {
+    ...state,
+    value: actionMap[action.type] || state.value,
+  }
+}
+
+const RetryButton: FC<Props> = ({ datasetId }) => {
+  const { t } = useTranslation()
+  const [indexState, dispatch] = useReducer(indexStateReducer, { value: 'success' })
+  const { data: errorDocs, isLoading } = useSWR({ datasetId }, getErrorDocs)
+
+  const onRetryErrorDocs = async () => {
+    dispatch({ type: 'retry' })
+    const document_ids = errorDocs?.data.map((doc: IndexingStatusResponse) => doc.id) || []
+    const res = await retryErrorDocs({ datasetId, document_ids })
+    if (res.result === 'success')
+      dispatch({ type: 'success' })
+    else
+      dispatch({ type: 'error' })
+  }
+
+  useEffect(() => {
+    if (errorDocs?.total === 0)
+      dispatch({ type: 'success' })
+    else
+      dispatch({ type: 'error' })
+  }, [errorDocs?.total])
+
+  if (isLoading || indexState.value === 'success')
+    return null
+
+  return (
+    <StatusWithAction
+      type='warning'
+      description={`${errorDocs?.total} ${t('dataset.docsFailedNotice')}`}
+      actionText={t('dataset.retry')}
+      disabled={indexState.value === 'retry'}
+      onAction={indexState.value === 'error' ? onRetryErrorDocs : () => { }}
+    />
+  )
+}
+export default RetryButton
diff --git a/web/app/components/datasets/common/document-status-with-action/status-with-action.tsx b/web/app/components/datasets/common/document-status-with-action/status-with-action.tsx
new file mode 100644
index 0000000000..a8da9bf6cc
--- /dev/null
+++ b/web/app/components/datasets/common/document-status-with-action/status-with-action.tsx
@@ -0,0 +1,65 @@
+'use client'
+import { RiAlertFill, RiCheckboxCircleFill, RiErrorWarningFill, RiInformation2Fill } from '@remixicon/react'
+import type { FC } from 'react'
+import React from 'react'
+import cn from '@/utils/classnames'
+import Divider from '@/app/components/base/divider'
+
+type Status = 'success' | 'error' | 'warning' | 'info'
+type Props = {
+  type?: Status
+  description: string
+  actionText: string
+  onAction: () => void
+  disabled?: boolean
+}
+
+const IconMap = {
+  success: {
+    Icon: RiCheckboxCircleFill,
+    color: 'text-text-success',
+  },
+  error: {
+    Icon: RiErrorWarningFill,
+    color: 'text-text-destructive',
+  },
+  warning: {
+    Icon: RiAlertFill,
+    color: 'text-text-warning-secondary',
+  },
+  info: {
+    Icon: RiInformation2Fill,
+    color: 'text-text-accent',
+  },
+}
+
+const getIcon = (type: Status) => {
+  return IconMap[type]
+}
+
+const StatusAction: FC<Props> = ({
+  type = 'info',
+  description,
+  actionText,
+  onAction,
+  disabled,
+}) => {
+  const { Icon, color } = getIcon(type)
+  return (
+    <div className='relative flex items-center h-[34px] rounded-lg pl-2 pr-3 border border-components-panel-border bg-components-panel-bg-blur shadow-xs'>
+      <div className={`absolute inset-0 opacity-40 rounded-lg ${(type === 'success' && 'bg-[linear-gradient(92deg,rgba(23,178,106,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
+        || (type === 'warning' && 'bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
+        || (type === 'error' && 'bg-[linear-gradient(92deg,rgba(240,68,56,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
+        || (type === 'info' && 'bg-[linear-gradient(92deg,rgba(11,165,236,0.25)_0%,rgba(255,255,255,0.00)_100%)]')
+      }`}
+      />
+      <div className='relative z-10 flex h-full items-center space-x-2'>
+        <Icon className={cn('w-4 h-4', color)} />
+        <div className='text-[13px] font-normal text-text-secondary'>{description}</div>
+        <Divider type='vertical' className='!h-4' />
+        <div onClick={onAction} className={cn('text-text-accent font-semibold text-[13px] cursor-pointer', disabled && 'text-text-disabled cursor-not-allowed')}>{actionText}</div>
+      </div>
+    </div>
+  )
+}
+export default React.memo(StatusAction)
diff --git a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
index f3da67b92c..9236858ae4 100644
--- a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
+++ b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
@@ -2,10 +2,11 @@
 import type { FC } from 'react'
 import React from 'react'
 import { useTranslation } from 'react-i18next'
+import Image from 'next/image'
 import RetrievalParamConfig from '../retrieval-param-config'
+import { OptionCard } from '../../create/step-two/option-card'
+import { retrievalIcon } from '../../create/icons'
 import { RETRIEVE_METHOD } from '@/types/app'
-import RadioCard from '@/app/components/base/radio-card'
-import { HighPriority } from '@/app/components/base/icons/src/vender/solid/arrows'
 import type { RetrievalConfig } from '@/types/app'
 
 type Props = {
@@ -21,19 +22,17 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
 
   return (
     <div className='space-y-2'>
-      <RadioCard
-        icon={<HighPriority className='w-4 h-4 text-[#7839EE]' />}
+      <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
         title={t('dataset.retrieval.invertedIndex.title')}
-        description={t('dataset.retrieval.invertedIndex.description')}
-        noRadio
-        chosenConfig={
-          <RetrievalParamConfig
-            type={RETRIEVE_METHOD.invertedIndex}
-            value={value}
-            onChange={onChange}
-          />
-        }
-      />
+        description={t('dataset.retrieval.invertedIndex.description')} isActive
+        activeHeaderClassName='bg-dataset-option-card-purple-gradient'
+      >
+        <RetrievalParamConfig
+          type={RETRIEVE_METHOD.invertedIndex}
+          value={value}
+          onChange={onChange}
+        />
+      </OptionCard>
     </div>
   )
 }
diff --git a/web/app/components/datasets/common/retrieval-method-config/index.tsx b/web/app/components/datasets/common/retrieval-method-config/index.tsx
index 20d93568ad..9ab157571b 100644
--- a/web/app/components/datasets/common/retrieval-method-config/index.tsx
+++ b/web/app/components/datasets/common/retrieval-method-config/index.tsx
@@ -2,12 +2,13 @@
 import type { FC } from 'react'
 import React from 'react'
 import { useTranslation } from 'react-i18next'
+import Image from 'next/image'
 import RetrievalParamConfig from '../retrieval-param-config'
+import { OptionCard } from '../../create/step-two/option-card'
+import Effect from '../../create/assets/option-card-effect-purple.svg'
+import { retrievalIcon } from '../../create/icons'
 import type { RetrievalConfig } from '@/types/app'
 import { RETRIEVE_METHOD } from '@/types/app'
-import RadioCard from '@/app/components/base/radio-card'
-import { PatternRecognition, Semantic } from '@/app/components/base/icons/src/vender/solid/development'
-import { FileSearch02 } from '@/app/components/base/icons/src/vender/solid/files'
 import { useProviderContext } from '@/context/provider-context'
 import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
@@ -16,6 +17,7 @@ import {
   RerankingModeEnum,
   WeightedScoreEnum,
 } from '@/models/datasets'
+import Badge from '@/app/components/base/badge'
 
 type Props = {
   value: RetrievalConfig
@@ -56,67 +58,72 @@ const RetrievalMethodConfig: FC<Props> = ({
   return (
     <div className='space-y-2'>
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <RadioCard
-          icon={<Semantic className='w-4 h-4 text-[#7839EE]' />}
+        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
           title={t('dataset.retrieval.semantic_search.title')}
           description={t('dataset.retrieval.semantic_search.description')}
-          isChosen={value.search_method === RETRIEVE_METHOD.semantic}
-          onChosen={() => onChange({
+          isActive={
+            value.search_method === RETRIEVE_METHOD.semantic
+          }
+          onSwitched={() => onChange({
             ...value,
             search_method: RETRIEVE_METHOD.semantic,
           })}
-          chosenConfig={
-            <RetrievalParamConfig
-              type={RETRIEVE_METHOD.semantic}
-              value={value}
-              onChange={onChange}
-            />
-          }
-        />
+          effectImg={Effect.src}
+          activeHeaderClassName='bg-dataset-option-card-purple-gradient'
+        >
+          <RetrievalParamConfig
+            type={RETRIEVE_METHOD.semantic}
+            value={value}
+            onChange={onChange}
+          />
+        </OptionCard>
       )}
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <RadioCard
-          icon={<FileSearch02 className='w-4 h-4 text-[#7839EE]' />}
+        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
           title={t('dataset.retrieval.full_text_search.title')}
           description={t('dataset.retrieval.full_text_search.description')}
-          isChosen={value.search_method === RETRIEVE_METHOD.fullText}
-          onChosen={() => onChange({
+          isActive={
+            value.search_method === RETRIEVE_METHOD.fullText
+          }
+          onSwitched={() => onChange({
             ...value,
             search_method: RETRIEVE_METHOD.fullText,
           })}
-          chosenConfig={
-            <RetrievalParamConfig
-              type={RETRIEVE_METHOD.fullText}
-              value={value}
-              onChange={onChange}
-            />
-          }
-        />
+          effectImg={Effect.src}
+          activeHeaderClassName='bg-dataset-option-card-purple-gradient'
+        >
+          <RetrievalParamConfig
+            type={RETRIEVE_METHOD.fullText}
+            value={value}
+            onChange={onChange}
+          />
+        </OptionCard>
       )}
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <RadioCard
-          icon={<PatternRecognition className='w-4 h-4 text-[#7839EE]' />}
+        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
           title={
             <div className='flex items-center space-x-1'>
               <div>{t('dataset.retrieval.hybrid_search.title')}</div>
-              <div className='flex h-full items-center px-1.5 rounded-md border border-[#E0EAFF] text-xs font-medium text-[#444CE7]'>{t('dataset.retrieval.hybrid_search.recommend')}</div>
+              <Badge text={t('dataset.retrieval.hybrid_search.recommend')!} className='border-text-accent-secondary text-text-accent-secondary ml-1 h-[18px]' uppercase />
             </div>
           }
-          description={t('dataset.retrieval.hybrid_search.description')}
-          isChosen={value.search_method === RETRIEVE_METHOD.hybrid}
-          onChosen={() => onChange({
+          description={t('dataset.retrieval.hybrid_search.description')} isActive={
+            value.search_method === RETRIEVE_METHOD.hybrid
+          }
+          onSwitched={() => onChange({
             ...value,
             search_method: RETRIEVE_METHOD.hybrid,
             reranking_enable: true,
           })}
-          chosenConfig={
-            <RetrievalParamConfig
-              type={RETRIEVE_METHOD.hybrid}
-              value={value}
-              onChange={onChange}
-            />
-          }
-        />
+          effectImg={Effect.src}
+          activeHeaderClassName='bg-dataset-option-card-purple-gradient'
+        >
+          <RetrievalParamConfig
+            type={RETRIEVE_METHOD.hybrid}
+            value={value}
+            onChange={onChange}
+          />
+        </OptionCard>
       )}
     </div>
   )
diff --git a/web/app/components/datasets/common/retrieval-method-info/index.tsx b/web/app/components/datasets/common/retrieval-method-info/index.tsx
index 7d9b999c53..fc3020d4a9 100644
--- a/web/app/components/datasets/common/retrieval-method-info/index.tsx
+++ b/web/app/components/datasets/common/retrieval-method-info/index.tsx
@@ -2,12 +2,11 @@
 import type { FC } from 'react'
 import React from 'react'
 import { useTranslation } from 'react-i18next'
+import Image from 'next/image'
+import { retrievalIcon } from '../../create/icons'
 import type { RetrievalConfig } from '@/types/app'
 import { RETRIEVE_METHOD } from '@/types/app'
 import RadioCard from '@/app/components/base/radio-card'
-import { HighPriority } from '@/app/components/base/icons/src/vender/solid/arrows'
-import { PatternRecognition, Semantic } from '@/app/components/base/icons/src/vender/solid/development'
-import { FileSearch02 } from '@/app/components/base/icons/src/vender/solid/files'
 
 type Props = {
   value: RetrievalConfig
@@ -15,11 +14,12 @@ type Props = {
 
 export const getIcon = (type: RETRIEVE_METHOD) => {
   return ({
-    [RETRIEVE_METHOD.semantic]: Semantic,
-    [RETRIEVE_METHOD.fullText]: FileSearch02,
-    [RETRIEVE_METHOD.hybrid]: PatternRecognition,
-    [RETRIEVE_METHOD.invertedIndex]: HighPriority,
-  })[type] || FileSearch02
+    [RETRIEVE_METHOD.semantic]: retrievalIcon.vector,
+    [RETRIEVE_METHOD.fullText]: retrievalIcon.fullText,
+    [RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid,
+    [RETRIEVE_METHOD.invertedIndex]: retrievalIcon.vector,
+    [RETRIEVE_METHOD.keywordSearch]: retrievalIcon.vector,
+  })[type] || retrievalIcon.vector
 }
 
 const EconomicalRetrievalMethodConfig: FC<Props> = ({
@@ -28,11 +28,11 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
 }) => {
   const { t } = useTranslation()
   const type = value.search_method
-  const Icon = getIcon(type)
+  const icon = <Image className='size-3.5 text-util-colors-purple-purple-600' src={getIcon(type)} alt='' />
   return (
     <div className='space-y-2'>
       <RadioCard
-        icon={<Icon className='w-4 h-4 text-[#7839EE]' />}
+        icon={icon}
         title={t(`dataset.retrieval.${type}.title`)}
         description={t(`dataset.retrieval.${type}.description`)}
         noRadio
diff --git a/web/app/components/datasets/common/retrieval-param-config/index.tsx b/web/app/components/datasets/common/retrieval-param-config/index.tsx
index 9d48d56a8d..5136ac1659 100644
--- a/web/app/components/datasets/common/retrieval-param-config/index.tsx
+++ b/web/app/components/datasets/common/retrieval-param-config/index.tsx
@@ -3,6 +3,9 @@ import type { FC } from 'react'
 import React, { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 
+import Image from 'next/image'
+import ProgressIndicator from '../../create/assets/progress-indicator.svg'
+import Reranking from '../../create/assets/rerank.svg'
 import cn from '@/utils/classnames'
 import TopKItem from '@/app/components/base/param-item/top-k-item'
 import ScoreThresholdItem from '@/app/components/base/param-item/score-threshold-item'
@@ -20,6 +23,7 @@ import {
 } from '@/models/datasets'
 import WeightedScore from '@/app/components/app/configuration/dataset-config/params-config/weighted-score'
 import Toast from '@/app/components/base/toast'
+import RadioCard from '@/app/components/base/radio-card'
 
 type Props = {
   type: RETRIEVE_METHOD
@@ -116,7 +120,7 @@ const RetrievalParamConfig: FC<Props> = ({
     <div>
       {!isEconomical && !isHybridSearch && (
         <div>
-          <div className='flex h-8 items-center text-[13px] font-medium text-gray-900 space-x-2'>
+          <div className='flex items-center space-x-2 mb-2'>
             {canToggleRerankModalEnable && (
               <div
                 className='flex items-center'
@@ -136,7 +140,7 @@ const RetrievalParamConfig: FC<Props> = ({
               </div>
             )}
             <div className='flex items-center'>
-              <span className='mr-0.5'>{t('common.modelProvider.rerankModel.key')}</span>
+              <span className='mr-0.5 system-sm-semibold text-text-secondary'>{t('common.modelProvider.rerankModel.key')}</span>
               <Tooltip
                 popupContent={
                   <div className="w-[200px]">{t('common.modelProvider.rerankModel.tip')}</div>
@@ -163,7 +167,7 @@ const RetrievalParamConfig: FC<Props> = ({
       )}
       {
         !isHybridSearch && (
-          <div className={cn(!isEconomical && 'mt-4', 'flex space-between space-x-6')}>
+          <div className={cn(!isEconomical && 'mt-4', 'flex space-between space-x-4')}>
             <TopKItem
               className='grow'
               value={value.top_k}
@@ -201,24 +205,22 @@ const RetrievalParamConfig: FC<Props> = ({
       {
         isHybridSearch && (
           <>
-            <div className='flex items-center justify-between'>
+            <div className='flex gap-2 mb-4'>
               {
                 rerankingModeOptions.map(option => (
-                  <div
+                  <RadioCard
                     key={option.value}
-                    className={cn(
-                      'flex items-center justify-center mb-4 w-[calc((100%-8px)/2)] h-8 rounded-lg border border-components-option-card-option-border bg-components-option-card-option-bg cursor-pointer system-sm-medium text-text-secondary',
-                      value.reranking_mode === RerankingModeEnum.WeightedScore && option.value === RerankingModeEnum.WeightedScore && 'border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary',
-                      value.reranking_mode !== RerankingModeEnum.WeightedScore && option.value !== RerankingModeEnum.WeightedScore && 'border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary',
-                    )}
-                    onClick={() => handleChangeRerankMode(option.value)}
-                  >
-                    <div className='truncate'>{option.label}</div>
-                    <Tooltip
-                      popupContent={<div className='w-[200px]'>{option.tips}</div>}
-                      triggerClassName='ml-0.5 w-3.5 h-3.5'
-                    />
-                  </div>
+                    isChosen={value.reranking_mode === option.value}
+                    onChosen={() => handleChangeRerankMode(option.value)}
+                    icon={<Image src={
+                      option.value === RerankingModeEnum.WeightedScore
+                        ? ProgressIndicator
+                        : Reranking
+                    } alt=''/>}
+                    title={option.label}
+                    description={option.tips}
+                    className='flex-1'
+                  />
                 ))
               }
             </div>
diff --git a/web/app/components/datasets/create/assets/family-mod.svg b/web/app/components/datasets/create/assets/family-mod.svg
new file mode 100644
index 0000000000..b1c4e6f566
--- /dev/null
+++ b/web/app/components/datasets/create/assets/family-mod.svg
@@ -0,0 +1,6 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M6.18055 6.45828C7.52291 6.45828 8.61111 5.37008 8.61111 4.02772C8.61111 2.68536 7.52291 1.59717 6.18055 1.59717C4.8382 1.59717 3.75 2.68536 3.75 4.02772C3.75 5.37008 4.8382 6.45828 6.18055 6.45828Z" fill="#EF6820"/>
+    <path d="M13.8192 6.45828C15.1616 6.45828 16.2498 5.37008 16.2498 4.02772C16.2498 2.68536 15.1616 1.59717 13.8192 1.59717C12.4769 1.59717 11.3887 2.68536 11.3887 4.02772C11.3887 5.37008 12.4769 6.45828 13.8192 6.45828Z" fill="#EF6820"/>
+    <path d="M13.8193 7.84719C13.0627 7.84805 12.3185 8.03933 11.6552 8.40341C10.992 8.7675 10.4311 9.29267 10.0241 9.93053C10.5745 9.93695 11.1 10.1609 11.4858 10.5535C11.8716 10.9461 12.0864 11.4755 12.0831 12.0259C12.0799 12.5763 11.859 13.1031 11.4687 13.4911C11.0783 13.8792 10.5503 14.097 9.99984 14.097C9.44942 14.097 8.92135 13.8792 8.53101 13.4911C8.14066 13.1031 7.91976 12.5763 7.91655 12.0259C7.91334 11.4755 8.12808 10.9461 8.51387 10.5535C8.89966 10.1609 9.42515 9.93695 9.97554 9.93053C9.45127 9.10686 8.67371 8.47572 7.75983 8.13205C6.84596 7.78839 5.84519 7.75078 4.9081 8.0249C3.97101 8.29902 3.14828 8.87003 2.56368 9.65203C1.97908 10.434 1.66424 11.3847 1.66652 12.3611V16.875C1.66652 17.0591 1.73968 17.2358 1.86991 17.366C2.00015 17.4962 2.17678 17.5694 2.36096 17.5694H7.22207V15.8333L4.72207 13.9583C4.64911 13.9036 4.58765 13.835 4.54118 13.7566C4.49472 13.6781 4.46417 13.5912 4.45127 13.501C4.42522 13.3186 4.47267 13.1334 4.58318 12.9861C4.69369 12.8387 4.8582 12.7413 5.04053 12.7153C5.22285 12.6892 5.40806 12.7367 5.5554 12.8472L8.14776 14.7916H11.8519L14.4443 12.8472C14.5916 12.7367 14.7768 12.6892 14.9592 12.7153C15.1415 12.7413 15.306 12.8387 15.4165 12.9861C15.527 13.1334 15.5745 13.3186 15.5484 13.501C15.5224 13.6833 15.425 13.8478 15.2776 13.9583L12.7776 15.8333V17.5694H17.6387C17.8229 17.5694 17.9995 17.4962 18.1298 17.366C18.26 17.2358 18.3332 17.0591 18.3332 16.875V12.3611C18.3317 11.1644 17.8557 10.0171 17.0095 9.17091C16.1633 8.32471 15.016 7.84867 13.8193 7.84719Z" fill="#EF6820"/>
+    </svg>
+    
\ No newline at end of file
diff --git a/web/app/components/datasets/create/assets/file-list-3-fill.svg b/web/app/components/datasets/create/assets/file-list-3-fill.svg
new file mode 100644
index 0000000000..a4e6c4da97
--- /dev/null
+++ b/web/app/components/datasets/create/assets/file-list-3-fill.svg
@@ -0,0 +1,5 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="file-list-3-fill">
+<path id="Vector" d="M15.8332 18.3333H4.1665C2.7858 18.3333 1.6665 17.2141 1.6665 15.8333V2.50001C1.6665 2.03977 2.0396 1.66667 2.49984 1.66667H14.1665C14.6268 1.66667 14.9998 2.03977 14.9998 2.50001V12.5H18.3332V15.8333C18.3332 17.2141 17.2139 18.3333 15.8332 18.3333ZM14.9998 14.1667V15.8333C14.9998 16.2936 15.3729 16.6667 15.8332 16.6667C16.2934 16.6667 16.6665 16.2936 16.6665 15.8333V14.1667H14.9998ZM4.99984 5.83334V7.50001H11.6665V5.83334H4.99984ZM4.99984 9.16667V10.8333H11.6665V9.16667H4.99984ZM4.99984 12.5V14.1667H9.1665V12.5H4.99984Z" fill="#1570EF"/>
+</g>
+</svg>
diff --git a/web/app/components/datasets/create/assets/gold.svg b/web/app/components/datasets/create/assets/gold.svg
new file mode 100644
index 0000000000..b48ac0eae5
--- /dev/null
+++ b/web/app/components/datasets/create/assets/gold.svg
@@ -0,0 +1,4 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M9.99984 1.66663C8.35166 1.66663 6.7405 2.15537 5.37009 3.07105C3.99968 3.98672 2.93157 5.28821 2.30084 6.81093C1.67011 8.33365 1.50509 10.0092 1.82663 11.6257C2.14817 13.2422 2.94185 14.7271 4.10728 15.8925C5.27272 17.058 6.75758 17.8516 8.37409 18.1732C9.9906 18.4947 11.6662 18.3297 13.1889 17.699C14.7116 17.0682 16.0131 16.0001 16.9288 14.6297C17.8444 13.2593 18.3332 11.6481 18.3332 9.99996C18.3332 7.78982 17.4552 5.67021 15.8924 4.1074C14.3296 2.5446 12.21 1.66663 9.99984 1.66663ZM12.295 5.65899L13.1116 4.53538C13.1653 4.46155 13.2329 4.39901 13.3107 4.35133C13.3885 4.30365 13.4749 4.27175 13.565 4.25747C13.6551 4.24319 13.7472 4.24679 13.8359 4.26809C13.9246 4.28938 14.0083 4.32793 14.0821 4.38156C14.156 4.43518 14.2185 4.50282 14.2662 4.58061C14.3139 4.6584 14.3458 4.74482 14.36 4.83494C14.3743 4.92506 14.3707 5.01711 14.3494 5.10583C14.3281 5.19456 14.2896 5.27822 14.236 5.35204L13.4193 6.47565C13.311 6.62474 13.1479 6.72471 12.9659 6.75356C12.7839 6.7824 12.5979 6.73777 12.4488 6.62947C12.2997 6.52118 12.1997 6.35809 12.1709 6.17609C12.142 5.99408 12.1867 5.80808 12.295 5.65899ZM5.9179 4.3819C5.99174 4.32795 6.07551 4.28911 6.1644 4.26761C6.25329 4.24612 6.34556 4.2424 6.43589 4.25666C6.52623 4.27092 6.61286 4.30288 6.69081 4.35071C6.76875 4.39854 6.83649 4.4613 6.89012 4.53538L7.70817 5.65899C7.81647 5.80854 7.86092 5.99499 7.83175 6.17731C7.80258 6.35964 7.70217 6.52291 7.55262 6.63121C7.40307 6.73951 7.21662 6.78396 7.03429 6.75478C6.85196 6.72561 6.68869 6.62521 6.5804 6.47565L5.76373 5.35204C5.71013 5.27823 5.6716 5.19457 5.65034 5.10586C5.62908 5.01715 5.62551 4.92512 5.63983 4.83503C5.65414 4.74494 5.68607 4.65855 5.73378 4.5808C5.78149 4.50306 5.84406 4.43547 5.9179 4.3819ZM5.59151 12.1597L4.27206 12.5888C4.18433 12.6215 4.0909 12.6361 3.99739 12.6317C3.90388 12.6273 3.81222 12.6041 3.72791 12.5634C3.64361 12.5227 3.56841 12.4654 3.50682 12.3949C3.44524 12.3244 3.39854 12.2421 3.36954 12.1531C3.34055 12.0641 3.32984 11.9702 3.33808 11.8769C3.34631 11.7837 3.37332 11.693 3.41747 11.6105C3.46162 11.528 3.522 11.4552 3.59499 11.3966C3.66798 11.3379 3.75207 11.2947 3.8422 11.2694L5.16165 10.8402C5.24947 10.8072 5.34308 10.7924 5.43681 10.7965C5.53054 10.8007 5.62245 10.8238 5.707 10.8645C5.79154 10.9052 5.86697 10.9626 5.92872 11.0332C5.99047 11.1039 6.03727 11.1863 6.06629 11.2755C6.09531 11.3647 6.10595 11.4589 6.09757 11.5524C6.08919 11.6458 6.06195 11.7366 6.01752 11.8192C5.97308 11.9018 5.91236 11.9746 5.83902 12.0331C5.76568 12.0916 5.68194 12.1347 5.59151 12.1597ZM10.6943 16.25C10.6943 16.4341 10.6211 16.6108 10.4909 16.741C10.3607 16.8712 10.184 16.9444 9.99984 16.9444C9.81566 16.9444 9.63903 16.8712 9.50879 16.741C9.37856 16.6108 9.3054 16.4341 9.3054 16.25V14.8611C9.3054 14.6769 9.37856 14.5003 9.50879 14.37C9.63903 14.2398 9.81566 14.1666 9.99984 14.1666C10.184 14.1666 10.3607 14.2398 10.4909 14.37C10.6211 14.5003 10.6943 14.6769 10.6943 14.8611V16.25ZM9.99984 12.2222L7.38595 13.5972L7.88526 10.6868L5.77067 8.62565L8.6929 8.20135L9.99984 5.55551L11.3068 8.20135L14.229 8.62565L12.1144 10.6868L12.6137 13.5972L9.99984 12.2222ZM15.729 12.5902L14.4096 12.1611C14.3191 12.1361 14.2347 12.093 14.1614 12.0345C14.088 11.976 14.0273 11.9032 13.9829 11.8206C13.9384 11.738 13.9112 11.6472 13.9028 11.5537C13.8944 11.4603 13.9051 11.3661 13.9341 11.2769C13.9631 11.1877 14.0099 11.1053 14.0717 11.0346C14.1334 10.964 14.2088 10.9066 14.2934 10.8659C14.3779 10.8252 14.4698 10.8021 14.5636 10.7979C14.6573 10.7938 14.7509 10.8086 14.8387 10.8416L16.1582 11.2708C16.2483 11.2961 16.3324 11.3393 16.4054 11.398C16.4784 11.4566 16.5388 11.5293 16.5829 11.6119C16.6271 11.6944 16.6541 11.7851 16.6623 11.8783C16.6705 11.9716 16.6598 12.0655 16.6308 12.1545C16.6018 12.2435 16.5551 12.3258 16.4936 12.3963C16.432 12.4668 16.3568 12.5241 16.2725 12.5648C16.1882 12.6055 16.0965 12.6287 16.003 12.6331C15.9095 12.6375 15.8167 12.6229 15.729 12.5902Z" fill="#EF6820"/>
+    </svg>
+    
\ No newline at end of file
diff --git a/web/app/components/datasets/create/assets/note-mod.svg b/web/app/components/datasets/create/assets/note-mod.svg
new file mode 100644
index 0000000000..b9e81f6bd5
--- /dev/null
+++ b/web/app/components/datasets/create/assets/note-mod.svg
@@ -0,0 +1,5 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="note-mod">
+<path id="Vector" d="M17.6387 3.05555H2.36095C1.97762 3.05555 1.6665 3.36666 1.6665 3.74999V16.25C1.6665 16.6333 1.97762 16.9444 2.36095 16.9444H17.6387C18.0221 16.9444 18.3332 16.6333 18.3332 16.25V3.74999C18.3332 3.36666 18.0221 3.05555 17.6387 3.05555ZM9.30539 14.1667H5.13873C4.75539 14.1667 4.44428 13.8555 4.44428 13.4722C4.44428 13.0889 4.75539 12.7778 5.13873 12.7778H9.30539C9.68873 12.7778 9.99984 13.0889 9.99984 13.4722C9.99984 13.8555 9.68873 14.1667 9.30539 14.1667ZM14.8609 10.6944H5.13873C4.75539 10.6944 4.44428 10.3833 4.44428 9.99999C4.44428 9.61666 4.75539 9.30555 5.13873 9.30555H14.8609C15.2443 9.30555 15.5554 9.61666 15.5554 9.99999C15.5554 10.3833 15.2443 10.6944 14.8609 10.6944ZM14.8609 7.22221H5.13873C4.75539 7.22221 4.44428 6.9111 4.44428 6.52777C4.44428 6.14443 4.75539 5.83332 5.13873 5.83332H14.8609C15.2443 5.83332 15.5554 6.14443 15.5554 6.52777C15.5554 6.9111 15.2443 7.22221 14.8609 7.22221Z" fill="#1570EF"/>
+</g>
+</svg>
diff --git a/web/app/components/datasets/create/assets/option-card-effect-blue.svg b/web/app/components/datasets/create/assets/option-card-effect-blue.svg
new file mode 100644
index 0000000000..00a8afad8b
--- /dev/null
+++ b/web/app/components/datasets/create/assets/option-card-effect-blue.svg
@@ -0,0 +1,12 @@
+<svg width="220" height="220" viewBox="0 0 220 220" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Effect" opacity="0.8" filter="url(#filter0_f_1328_28605)">
+<circle cx="32" cy="32" r="28" fill="#444CE7"/>
+</g>
+<defs>
+<filter id="filter0_f_1328_28605" x="-156" y="-156" width="376" height="376" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
+<feFlood flood-opacity="0" result="BackgroundImageFix"/>
+<feBlend mode="normal" in="SourceGraphic" in2="BackgroundImageFix" result="shape"/>
+<feGaussianBlur stdDeviation="80" result="effect1_foregroundBlur_1328_28605"/>
+</filter>
+</defs>
+</svg>
diff --git a/web/app/components/datasets/create/assets/option-card-effect-orange.svg b/web/app/components/datasets/create/assets/option-card-effect-orange.svg
new file mode 100644
index 0000000000..d833764f0c
--- /dev/null
+++ b/web/app/components/datasets/create/assets/option-card-effect-orange.svg
@@ -0,0 +1,12 @@
+<svg width="220" height="220" viewBox="0 0 220 220" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Effect" opacity="0.8" filter="url(#filter0_f_481_16338)">
+<circle cx="32" cy="32" r="28" fill="#EF6820"/>
+</g>
+<defs>
+<filter id="filter0_f_481_16338" x="-156" y="-156" width="376" height="376" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
+<feFlood flood-opacity="0" result="BackgroundImageFix"/>
+<feBlend mode="normal" in="SourceGraphic" in2="BackgroundImageFix" result="shape"/>
+<feGaussianBlur stdDeviation="80" result="effect1_foregroundBlur_481_16338"/>
+</filter>
+</defs>
+</svg>
diff --git a/web/app/components/datasets/create/assets/option-card-effect-purple.svg b/web/app/components/datasets/create/assets/option-card-effect-purple.svg
new file mode 100644
index 0000000000..a7857f8e57
--- /dev/null
+++ b/web/app/components/datasets/create/assets/option-card-effect-purple.svg
@@ -0,0 +1,12 @@
+<svg width="220" height="220" viewBox="0 0 220 220" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Effect" opacity="0.8" filter="url(#filter0_f_481_16453)">
+<circle cx="32" cy="32" r="28" fill="#6938EF"/>
+</g>
+<defs>
+<filter id="filter0_f_481_16453" x="-156" y="-156" width="376" height="376" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
+<feFlood flood-opacity="0" result="BackgroundImageFix"/>
+<feBlend mode="normal" in="SourceGraphic" in2="BackgroundImageFix" result="shape"/>
+<feGaussianBlur stdDeviation="80" result="effect1_foregroundBlur_481_16453"/>
+</filter>
+</defs>
+</svg>
diff --git a/web/app/components/datasets/create/assets/pattern-recognition-mod.svg b/web/app/components/datasets/create/assets/pattern-recognition-mod.svg
new file mode 100644
index 0000000000..1083e888ed
--- /dev/null
+++ b/web/app/components/datasets/create/assets/pattern-recognition-mod.svg
@@ -0,0 +1,12 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M3.93923 18.3333C3.48973 18.3333 3.05032 18.2 2.67657 17.9503C2.30282 17.7006 2.01152 17.3456 1.83951 16.9303C1.66749 16.515 1.62248 16.0581 1.71017 15.6172C1.79787 15.1763 2.01432 14.7714 2.33217 14.4535C2.65002 14.1357 3.05498 13.9192 3.49584 13.8315C3.93671 13.7438 4.39368 13.7889 4.80897 13.9609C5.22425 14.1329 5.5792 14.4242 5.82894 14.7979C6.07867 15.1717 6.21196 15.6111 6.21196 16.0606C6.21196 16.6634 5.97251 17.2414 5.54629 17.6677C5.12007 18.0939 4.542 18.3333 3.93923 18.3333Z" fill="#6938EF"/>
+    <path d="M9.99978 7.72726C9.55028 7.72726 9.11087 7.86056 8.73712 8.11029C8.36337 8.36002 8.07207 8.71497 7.90005 9.13026C7.72804 9.54554 7.68303 10.0025 7.77072 10.4434C7.85842 10.8842 8.07487 11.2892 8.39272 11.6071C8.71056 11.9249 9.11553 12.1414 9.55639 12.229C9.99726 12.3167 10.4542 12.2717 10.8695 12.0997C11.2848 11.9277 11.6398 11.6364 11.8895 11.2627C12.1392 10.8889 12.2725 10.4495 12.2725 9.99999C12.2725 9.39723 12.0331 8.81915 11.6068 8.39293C11.1806 7.96671 10.6025 7.72726 9.99978 7.72726Z" fill="#6938EF"/>
+    <path d="M3.93923 1.66666C3.48973 1.66666 3.05032 1.79995 2.67657 2.04968C2.30282 2.29941 2.01152 2.65436 1.83951 3.06965C1.66749 3.48494 1.62248 3.9419 1.71017 4.38277C1.79787 4.82364 2.01432 5.2286 2.33217 5.54644C2.65002 5.86429 3.05498 6.08075 3.49585 6.16844C3.93671 6.25613 4.39368 6.21113 4.80897 6.03911C5.22425 5.86709 5.57921 5.57579 5.82894 5.20204C6.07867 4.8283 6.21196 4.38889 6.21196 3.93938C6.21196 3.33662 5.97251 2.75854 5.54629 2.33232C5.12007 1.9061 4.542 1.66666 3.93923 1.66666Z" fill="#6938EF"/>
+    <path d="M16.0603 1.66666C15.6108 1.66666 15.1714 1.79995 14.7977 2.04968C14.4239 2.29941 14.1326 2.65436 13.9606 3.06965C13.7886 3.48494 13.7436 3.9419 13.8313 4.38277C13.919 4.82364 14.1354 5.2286 14.4533 5.54644C14.7711 5.86429 15.1761 6.08075 15.6169 6.16844C16.0578 6.25613 16.5148 6.21113 16.9301 6.03911C17.3453 5.86709 17.7003 5.57579 17.95 5.20204C18.1998 4.8283 18.3331 4.38889 18.3331 3.93938C18.3331 3.33662 18.0936 2.75854 17.6674 2.33232C17.2412 1.9061 16.6631 1.66666 16.0603 1.66666Z" fill="#6938EF"/>
+    <path d="M16.0603 13.7879C15.6108 13.7879 15.1714 13.9212 14.7977 14.1709C14.4239 14.4206 14.1326 14.7756 13.9606 15.1909C13.7886 15.6062 13.7436 16.0631 13.8313 16.504C13.919 16.9449 14.1354 17.3498 14.4533 17.6677C14.7711 17.9855 15.1761 18.202 15.6169 18.2897C16.0578 18.3774 16.5148 18.3323 16.9301 18.1603C17.3453 17.9883 17.7003 17.697 17.95 17.3233C18.1998 16.9495 18.3331 16.5101 18.3331 16.0606C18.3331 15.4578 18.0936 14.8798 17.6674 14.4535C17.2412 14.0273 16.6631 13.7879 16.0603 13.7879Z" fill="#6938EF"/>
+    <path d="M6.21196 7.72726H1.6665V12.2727H6.21196V7.72726Z" fill="#6938EF"/>
+    <path d="M18.3331 7.72726H13.7876V12.2727H18.3331V7.72726Z" fill="#6938EF"/>
+    <path d="M12.2725 1.66666H7.72705V6.21211H12.2725V1.66666Z" fill="#6938EF"/>
+    <path d="M12.2725 13.7879H7.72705V18.3333H12.2725V13.7879Z" fill="#6938EF"/>
+    </svg>
+    
\ No newline at end of file
diff --git a/web/app/components/datasets/create/assets/piggy-bank-mod.svg b/web/app/components/datasets/create/assets/piggy-bank-mod.svg
new file mode 100644
index 0000000000..b1120ad9a9
--- /dev/null
+++ b/web/app/components/datasets/create/assets/piggy-bank-mod.svg
@@ -0,0 +1,7 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M7.91672 15.2028V17.9805H6.52783V15.2028H7.91672Z" fill="#444CE7"/>
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M14.1667 15.2028V17.9805H12.7778V15.2028H14.1667Z" fill="#444CE7"/>
+    <path d="M14.1666 2.0083C14.1666 3.54243 12.923 4.78608 11.3889 4.78608C9.85476 4.78608 8.61108 3.54243 8.61108 2.0083L14.1666 2.0083Z" fill="#444CE7"/>
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M5.8864 5.23918C6.9718 4.92907 8.12598 5.30791 8.81883 6.17498H13.1251C16.0015 6.17498 18.3334 8.50683 18.3334 11.3833C18.3334 14.2598 16.0015 16.5916 13.1251 16.5916H7.39252C6.14908 16.5916 4.97062 16.0363 4.1791 15.0773L3.32342 14.0407L1.66675 13.3448V9.93061L3.65692 9.40957L4.44453 8.40703V5.65114L5.8864 5.23918ZM8.61119 8.25831H14.1667V9.64721H8.61119V8.25831ZM6.52786 9.99443C6.52786 10.5697 6.06149 11.0361 5.48619 11.0361C4.91089 11.0361 4.44453 10.5697 4.44453 9.99443C4.44453 9.41915 4.91089 8.95276 5.48619 8.95276C6.06149 8.95276 6.52786 9.41915 6.52786 9.99443Z" fill="#444CE7"/>
+    </svg>
+    
\ No newline at end of file
diff --git a/web/app/components/datasets/create/assets/progress-indicator.svg b/web/app/components/datasets/create/assets/progress-indicator.svg
new file mode 100644
index 0000000000..3c99713636
--- /dev/null
+++ b/web/app/components/datasets/create/assets/progress-indicator.svg
@@ -0,0 +1,8 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="progress-indicator">
+<g id="Vector">
+<path d="M18.4029 10.7639H1.59738C1.17572 10.7639 0.833496 11.1061 0.833496 11.5278V16.1111C0.833496 16.5328 1.17572 16.875 1.59738 16.875H18.4029C18.8246 16.875 19.1668 16.5328 19.1668 16.1111V11.5278C19.1668 11.1061 18.8246 10.7639 18.4029 10.7639ZM17.6391 15.3472H10.0002V12.2917H17.6391V15.3472Z" fill="#1570EF"/>
+<path d="M9.716 7.58153C9.78933 7.66174 9.89169 7.70833 10.0002 7.70833C10.1086 7.70833 10.211 7.6625 10.2843 7.58153L13.7218 3.76208C13.8227 3.64979 13.8479 3.48937 13.7868 3.35111C13.7249 3.21361 13.5881 3.125 13.4377 3.125H6.56266C6.41218 3.125 6.27544 3.21361 6.21356 3.35111C6.15245 3.48937 6.17766 3.64979 6.2785 3.76208L9.716 7.58153Z" fill="#1570EF"/>
+</g>
+</g>
+</svg>
diff --git a/web/app/components/datasets/create/assets/rerank.svg b/web/app/components/datasets/create/assets/rerank.svg
new file mode 100644
index 0000000000..409b52e6e2
--- /dev/null
+++ b/web/app/components/datasets/create/assets/rerank.svg
@@ -0,0 +1,13 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="rerank">
+<g id="Vector">
+<path d="M18.3333 4.58329C18.3333 5.73389 17.4005 6.66663 16.2499 6.66663C15.0993 6.66663 14.1666 5.73389 14.1666 4.58329C14.1666 3.4327 15.0993 2.49996 16.2499 2.49996C17.4005 2.49996 18.3333 3.4327 18.3333 4.58329Z" fill="#0E9384"/>
+<path d="M13.3333 15.4166C13.3333 16.5672 12.4005 17.5 11.2499 17.5C10.0993 17.5 9.16658 16.5672 9.16658 15.4166C9.16658 14.266 10.0993 13.3333 11.2499 13.3333C12.4005 13.3333 13.3333 14.266 13.3333 15.4166Z" fill="#0E9384"/>
+<path d="M12.0833 4.58329C12.0833 5.27365 11.5236 5.83329 10.8333 5.83329C10.1429 5.83329 9.58325 5.27365 9.58325 4.58329C9.58325 3.89294 10.1429 3.33329 10.8333 3.33329C11.5236 3.33329 12.0833 3.89294 12.0833 4.58329Z" fill="#0E9384"/>
+<path d="M17.4999 15.4166C17.4999 16.107 16.9403 16.6666 16.2499 16.6666C15.5596 16.6666 14.9999 16.107 14.9999 15.4166C14.9999 14.7263 15.5596 14.1666 16.2499 14.1666C16.9403 14.1666 17.4999 14.7263 17.4999 15.4166Z" fill="#0E9384"/>
+<path d="M7.49992 15.4166C7.49992 17.0275 6.19408 18.3333 4.58325 18.3333C2.97242 18.3333 1.66659 17.0275 1.66659 15.4166C1.66659 13.8058 2.97242 12.5 4.58325 12.5C6.19408 12.5 7.49992 13.8058 7.49992 15.4166Z" fill="#0E9384"/>
+<path d="M7.49992 4.58329C7.49992 6.19412 6.19408 7.49996 4.58325 7.49996C2.97242 7.49996 1.66659 6.19412 1.66659 4.58329C1.66659 2.97246 2.97242 1.66663 4.58325 1.66663C6.19408 1.66663 7.49992 2.97246 7.49992 4.58329Z" fill="#0E9384"/>
+<path d="M0.833252 9.99996C0.833252 9.53972 1.20635 9.16663 1.66659 9.16663H18.3333C18.7935 9.16663 19.1666 9.53972 19.1666 9.99996C19.1666 10.4602 18.7935 10.8333 18.3333 10.8333H1.66659C1.20635 10.8333 0.833252 10.4602 0.833252 9.99996Z" fill="#0E9384"/>
+</g>
+</g>
+</svg>
diff --git a/web/app/components/datasets/create/assets/research-mod.svg b/web/app/components/datasets/create/assets/research-mod.svg
new file mode 100644
index 0000000000..1f0bb34233
--- /dev/null
+++ b/web/app/components/datasets/create/assets/research-mod.svg
@@ -0,0 +1,6 @@
+<svg width="16" height="18" viewBox="0 0 16 18" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M13.6752 4.83333H9.91553V1.07361L13.6752 4.83333Z" fill="#6938EF"/>
+    <path d="M7.2003 13.8611H2.62391C2.53183 13.8611 2.44351 13.8245 2.37839 13.7594C2.31327 13.6943 2.27669 13.606 2.27669 13.5139V12.8195C2.27669 12.7274 2.31327 12.6391 2.37839 12.5739C2.44351 12.5088 2.53183 12.4722 2.62391 12.4722H7.2003C7.33176 11.6964 7.68097 10.9739 8.20725 10.3889H2.62391C2.53183 10.3889 2.44351 10.3523 2.37839 10.2872C2.31327 10.2221 2.27669 10.1338 2.27669 10.0417V9.34724C2.27669 9.25515 2.31327 9.16684 2.37839 9.10172C2.44351 9.0366 2.53183 9.00002 2.62391 9.00002H11.3045C12.3309 9.0003 13.3207 9.38137 14.0822 10.0695V6.22224H9.22114C9.03696 6.22224 8.86032 6.14908 8.73009 6.01884C8.59986 5.88861 8.52669 5.71198 8.52669 5.5278V0.666687H0.887804C0.703626 0.666687 0.526991 0.739851 0.396757 0.870085C0.266524 1.00032 0.193359 1.17695 0.193359 1.36113V16.6389C0.193359 16.8231 0.266524 16.9997 0.396757 17.13C0.526991 17.2602 0.703626 17.3334 0.887804 17.3334H10.61C9.73337 17.224 8.91945 16.8214 8.30046 16.1911C7.68146 15.5607 7.29375 14.7396 7.2003 13.8611ZM2.62391 5.5278H6.09614C6.18823 5.5278 6.27654 5.56438 6.34166 5.6295C6.40678 5.69461 6.44336 5.78293 6.44336 5.87502V6.56947C6.44336 6.66155 6.40678 6.74987 6.34166 6.81499C6.27654 6.88011 6.18823 6.91669 6.09614 6.91669H2.62391C2.53183 6.91669 2.44351 6.88011 2.37839 6.81499C2.31327 6.74987 2.27669 6.66155 2.27669 6.56947V5.87502C2.27669 5.78293 2.31327 5.69461 2.37839 5.6295C2.44351 5.56438 2.53183 5.5278 2.62391 5.5278Z" fill="#6938EF"/>
+    <path d="M15.2678 16.1479L13.6887 14.5688C13.9439 14.1455 14.08 13.661 14.0824 13.1667C14.0824 12.6173 13.9195 12.0802 13.6143 11.6234C13.309 11.1666 12.8752 10.8106 12.3676 10.6004C11.8601 10.3901 11.3016 10.3351 10.7627 10.4423C10.2239 10.5495 9.72893 10.814 9.34045 11.2025C8.95197 11.591 8.68741 12.0859 8.58023 12.6248C8.47305 13.1636 8.52806 13.7221 8.7383 14.2297C8.94855 14.7373 9.30458 15.1711 9.76138 15.4763C10.2182 15.7816 10.7552 15.9445 11.3046 15.9445C11.799 15.9421 12.2834 15.806 12.7067 15.5507L14.2859 17.1299C14.4169 17.2564 14.5923 17.3264 14.7744 17.3248C14.9564 17.3232 15.1306 17.2502 15.2594 17.1214C15.3881 16.9927 15.4612 16.8185 15.4627 16.6364C15.4643 16.4543 15.3943 16.2789 15.2678 16.1479ZM9.91575 13.1667C9.91575 12.892 9.9972 12.6235 10.1498 12.3951C10.3024 12.1667 10.5193 11.9887 10.7731 11.8835C11.0269 11.7784 11.3062 11.7509 11.5756 11.8045C11.845 11.8581 12.0925 11.9904 12.2867 12.1846C12.481 12.3788 12.6132 12.6263 12.6668 12.8957C12.7204 13.1652 12.6929 13.4444 12.5878 13.6982C12.4827 13.952 12.3047 14.1689 12.0763 14.3215C11.8479 14.4741 11.5793 14.5556 11.3046 14.5556C10.9363 14.5556 10.583 14.4093 10.3225 14.1488C10.0621 13.8883 9.91575 13.5351 9.91575 13.1667Z" fill="#6938EF"/>
+    </svg>
+    
\ No newline at end of file
diff --git a/web/app/components/datasets/create/assets/selection-mod.svg b/web/app/components/datasets/create/assets/selection-mod.svg
new file mode 100644
index 0000000000..2d0dd3b5f7
--- /dev/null
+++ b/web/app/components/datasets/create/assets/selection-mod.svg
@@ -0,0 +1,12 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M5.83317 18.3334H1.6665V14.1667H5.83317V18.3334Z" fill="#6938EF"/>
+    <path d="M12.0832 12.0834H7.9165V7.91669H12.0832V12.0834Z" fill="#6938EF"/>
+    <path d="M5.83317 12.0834H1.6665V7.91669H5.83317V12.0834Z" fill="#6938EF"/>
+    <path d="M12.0832 5.83335H7.9165V1.66669H12.0832V5.83335Z" fill="#6938EF"/>
+    <path d="M5.83317 5.83335H1.6665V1.66669H5.83317V5.83335Z" fill="#6938EF"/>
+    <path d="M18.3332 5.83335H14.1665V1.66669H18.3332V5.83335Z" fill="#6938EF"/>
+    <path d="M17.6386 14.8611H14.8608V17.6389H17.6386V14.8611Z" fill="#6938EF"/>
+    <path d="M17.6386 8.61115H14.8608V11.3889H17.6386V8.61115Z" fill="#6938EF"/>
+    <path d="M11.3886 14.8611H8.61084V17.6389H11.3886V14.8611Z" fill="#6938EF"/>
+    </svg>
+    
\ No newline at end of file
diff --git a/web/app/components/datasets/create/assets/setting-gear-mod.svg b/web/app/components/datasets/create/assets/setting-gear-mod.svg
new file mode 100644
index 0000000000..c782caade8
--- /dev/null
+++ b/web/app/components/datasets/create/assets/setting-gear-mod.svg
@@ -0,0 +1,4 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M10.0002 0.833374C10.4604 0.833374 10.8335 1.20647 10.8335 1.66671V2.54597C11.5977 2.63056 12.328 2.8301 13.0061 3.12703L13.4452 2.36666C13.6752 1.96808 14.1849 1.83152 14.5835 2.06164C14.9821 2.29176 15.1186 2.80142 14.8885 3.2L14.4488 3.96146C15.0552 4.40877 15.5915 4.94516 16.0388 5.55143L16.8003 5.11177C17.1989 4.88165 17.7086 5.01821 17.9387 5.41679C18.1688 5.81537 18.0322 6.32502 17.6337 6.55514L16.8732 6.99418C17.1702 7.67226 17.3697 8.40254 17.4543 9.16679H18.3335C18.7937 9.16679 19.1668 9.53987 19.1668 10.0001C19.1668 10.4604 18.7937 10.8335 18.3335 10.8335H17.4543C17.3697 11.5977 17.1702 12.328 16.8732 13.0061L17.6337 13.4452C18.0322 13.6753 18.1688 14.185 17.9387 14.5835C17.7086 14.9821 17.1989 15.1187 16.8003 14.8885L16.0388 14.4489C15.5915 15.0551 15.0551 15.5915 14.4488 16.0388L14.8885 16.8004C15.1186 17.1989 14.9821 17.7085 14.5835 17.9387C14.1849 18.1688 13.6752 18.0322 13.4452 17.6337L13.0061 16.8732C12.328 17.1701 11.5977 17.3697 10.8335 17.4543V18.3334C10.8335 18.7936 10.4604 19.1667 10.0002 19.1667C9.53991 19.1667 9.16683 18.7936 9.16683 18.3334V17.4543C8.40258 17.3697 7.6723 17.1701 6.99424 16.8732L6.55516 17.6337C6.32505 18.0323 5.81539 18.1689 5.41681 17.9388C5.01824 17.7086 4.88167 17.199 5.11179 16.8005L5.55149 16.0388C4.94519 15.5915 4.40878 15.0551 3.96145 14.4488L3.19993 14.8885C2.80135 15.1186 2.2917 14.982 2.06158 14.5835C1.83145 14.1849 1.96802 13.6752 2.3666 13.4451L3.12704 13.006C2.83011 12.328 2.63056 11.5977 2.54598 10.8335L1.66679 10.8334C1.20655 10.8334 0.833474 10.4602 0.833496 10C0.833521 9.53979 1.20663 9.16671 1.66687 9.16671L2.54599 9.16679C2.63058 8.40254 2.8301 7.67229 3.12701 6.99424L2.3666 6.55523C1.96802 6.32512 1.83145 5.81546 2.06157 5.41687C2.29169 5.0183 2.80135 4.88173 3.19992 5.11185L3.96142 5.55148C4.40874 4.94518 4.94515 4.40877 5.55145 3.96144L5.11179 3.19991C4.88167 2.80133 5.01823 2.29167 5.41681 2.06156C5.81539 1.83144 6.32505 1.968 6.55516 2.36657L6.9942 3.12702C7.67228 2.83009 8.40258 2.63055 9.16683 2.54597V1.66671C9.16683 1.20647 9.53991 0.833374 10.0002 0.833374ZM6.39156 5.41655C5.81089 5.87442 5.31917 6.44029 4.94695 7.08361C4.45095 7.94087 4.16681 8.93604 4.16681 10.0001C4.16681 11.0642 4.45096 12.0594 4.94698 12.9167C5.3192 13.56 5.81091 14.1259 6.39159 14.5837L8.1 11.6246C7.72651 11.1881 7.50015 10.6208 7.50015 10.0001C7.50015 9.37946 7.72651 8.81212 8.09999 8.37562L6.39156 5.41655ZM9.54316 7.54194L7.83418 4.5819C8.50325 4.31416 9.23383 4.16679 10.0002 4.16679C11.0642 4.16679 12.0594 4.45095 12.9167 4.94697C13.8022 5.45932 14.541 6.19807 15.0533 7.08357C15.4173 7.71277 15.6673 8.41629 15.7745 9.16679H12.3579C12.0147 8.19579 11.0887 7.50012 10.0002 7.50012C9.84433 7.50012 9.69149 7.51446 9.54316 7.54194ZM12.3579 10.8335C12.0147 11.8045 11.0887 12.5001 10.0002 12.5001C9.84433 12.5001 9.69149 12.4858 9.54316 12.4583L7.8342 15.4184C8.50325 15.6861 9.23383 15.8335 10.0002 15.8335C11.0642 15.8335 12.0594 15.5493 12.9167 15.0533C13.8022 14.541 14.5409 13.8022 15.0532 12.9167C15.4173 12.2875 15.6673 11.584 15.7745 10.8335H12.3579Z" fill="#444CE7"/>
+    </svg>
+    
\ No newline at end of file
diff --git a/web/app/components/datasets/create/embedding-process/index.module.css b/web/app/components/datasets/create/embedding-process/index.module.css
index 1ebb006b54..f2ab4d85a2 100644
--- a/web/app/components/datasets/create/embedding-process/index.module.css
+++ b/web/app/components/datasets/create/embedding-process/index.module.css
@@ -14,24 +14,7 @@
   border-radius: 6px;
   overflow: hidden;
 }
-.sourceItem.error {
-  background: #FEE4E2;
-}
-.sourceItem.success {
-  background: #D1FADF;
-}
-.progressbar {
-  position: absolute;
-  top: 0;
-  left: 0;
-  height: 100%;
-  background-color: #B2CCFF;
-}
-.sourceItem .info {
-  display: flex;
-  align-items: center;
-  z-index: 1;
-}
+
 .sourceItem .info .name {
   font-weight: 500;
   font-size: 12px;
@@ -55,13 +38,6 @@
   color: #05603A;
 }
 
-
-.cost {
-  @apply flex justify-between items-center text-xs text-gray-700;
-}
-.embeddingStatus {
-  @apply flex items-center justify-between text-gray-900 font-medium text-sm mr-2;
-}
 .commonIcon {
   @apply w-3 h-3 mr-1 inline-block align-middle;
 }
@@ -81,35 +57,33 @@
   @apply text-xs font-medium;
 }
 
-.fileIcon {
-  @apply w-4 h-4 mr-1 bg-center bg-no-repeat;
+.unknownFileIcon {
   background-image: url(../assets/unknown.svg);
-  background-size: 16px;
 }
-.fileIcon.csv {
+.csv {
   background-image: url(../assets/csv.svg);
 }
-.fileIcon.docx {
+.docx {
   background-image: url(../assets/docx.svg);
 }
-.fileIcon.xlsx,
-.fileIcon.xls {
+.xlsx,
+.xls {
   background-image: url(../assets/xlsx.svg);
 }
-.fileIcon.pdf {
+.pdf {
   background-image: url(../assets/pdf.svg);
 }
-.fileIcon.html,
-.fileIcon.htm {
+.html,
+.htm {
   background-image: url(../assets/html.svg);
 }
-.fileIcon.md,
-.fileIcon.markdown {
+.md,
+.markdown {
   background-image: url(../assets/md.svg);
 }
-.fileIcon.txt {
+.txt {
   background-image: url(../assets/txt.svg);
 }
-.fileIcon.json {
+.json {
   background-image: url(../assets/json.svg);
 }
diff --git a/web/app/components/datasets/create/embedding-process/index.tsx b/web/app/components/datasets/create/embedding-process/index.tsx
index 7786582085..201333ffce 100644
--- a/web/app/components/datasets/create/embedding-process/index.tsx
+++ b/web/app/components/datasets/create/embedding-process/index.tsx
@@ -6,32 +6,44 @@ import { useTranslation } from 'react-i18next'
 import { omit } from 'lodash-es'
 import { ArrowRightIcon } from '@heroicons/react/24/solid'
 import {
+  RiCheckboxCircleFill,
   RiErrorWarningFill,
+  RiLoader2Fill,
+  RiTerminalBoxLine,
 } from '@remixicon/react'
-import s from './index.module.css'
+import Image from 'next/image'
+import { indexMethodIcon, retrievalIcon } from '../icons'
+import { IndexingType } from '../step-two'
+import DocumentFileIcon from '../../common/document-file-icon'
 import cn from '@/utils/classnames'
 import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
 import Button from '@/app/components/base/button'
 import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets'
 import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets'
-import { DataSourceType } from '@/models/datasets'
+import { DataSourceType, ProcessMode } from '@/models/datasets'
 import NotionIcon from '@/app/components/base/notion-icon'
 import PriorityLabel from '@/app/components/billing/priority-label'
 import { Plan } from '@/app/components/billing/type'
 import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
 import UpgradeBtn from '@/app/components/billing/upgrade-btn'
 import { useProviderContext } from '@/context/provider-context'
-import Tooltip from '@/app/components/base/tooltip'
 import { sleep } from '@/utils'
+import { RETRIEVE_METHOD } from '@/types/app'
+import Tooltip from '@/app/components/base/tooltip'
 
 type Props = {
   datasetId: string
   batchId: string
   documents?: FullDocumentDetail[]
   indexingType?: string
+  retrievalMethod?: string
 }
 
-const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => {
+const RuleDetail: FC<{
+  sourceData?: ProcessRuleResponse
+  indexingType?: string
+  retrievalMethod?: string
+}> = ({ sourceData, indexingType, retrievalMethod }) => {
   const { t } = useTranslation()
 
   const segmentationRuleMap = {
@@ -51,29 +63,47 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) =>
       return t('datasetCreation.stepTwo.removeStopwords')
   }
 
+  const isNumber = (value: unknown) => {
+    return typeof value === 'number'
+  }
+
   const getValue = useCallback((field: string) => {
     let value: string | number | undefined = '-'
+    const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
+      ? sourceData.rules.segmentation.max_tokens
+      : value
+    const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
+      ? sourceData.rules.subchunk_segmentation.max_tokens
+      : value
     switch (field) {
       case 'mode':
-        value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string)
+        value = !sourceData?.mode
+          ? value
+          : sourceData.mode === ProcessMode.general
+            ? (t('datasetDocuments.embedding.custom') as string)
+            : `${t('datasetDocuments.embedding.hierarchical')} · ${sourceData?.rules?.parent_mode === 'paragraph'
+              ? t('dataset.parentMode.paragraph')
+              : t('dataset.parentMode.fullDoc')}`
         break
       case 'segmentLength':
-        value = sourceData?.rules?.segmentation?.max_tokens
+        value = !sourceData?.mode
+          ? value
+          : sourceData.mode === ProcessMode.general
+            ? maxTokens
+            : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}`
         break
       default:
-        value = sourceData?.mode === 'automatic'
-          ? (t('datasetDocuments.embedding.automatic') as string)
-          // eslint-disable-next-line array-callback-return
-          : sourceData?.rules?.pre_processing_rules?.map((rule) => {
-            if (rule.enabled)
-              return getRuleName(rule.id)
-          }).filter(Boolean).join(';')
+        value = !sourceData?.mode
+          ? value
+          : sourceData?.rules?.pre_processing_rules?.filter(rule =>
+            rule.enabled).map(rule => getRuleName(rule.id)).join(',')
         break
     }
     return value
+    // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [sourceData])
 
-  return <div className='flex flex-col pt-8 pb-10 first:mt-0'>
+  return <div className='flex flex-col gap-1'>
     {Object.keys(segmentationRuleMap).map((field) => {
       return <FieldInfo
         key={field}
@@ -81,10 +111,43 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) =>
         displayedValue={String(getValue(field))}
       />
     })}
+    <FieldInfo
+      label={t('datasetCreation.stepTwo.indexMode')}
+      displayedValue={t(`datasetCreation.stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`) as string}
+      valueIcon={
+        <Image
+          className='size-4'
+          src={
+            indexingType === IndexingType.ECONOMICAL
+              ? indexMethodIcon.economical
+              : indexMethodIcon.high_quality
+          }
+          alt=''
+        />
+      }
+    />
+    <FieldInfo
+      label={t('datasetSettings.form.retrievalSetting.title')}
+      // displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
+      displayedValue={t(`dataset.retrieval.${indexingType === IndexingType.ECONOMICAL ? 'invertedIndex' : retrievalMethod}.title`) as string}
+      valueIcon={
+        <Image
+          className='size-4'
+          src={
+            retrievalMethod === RETRIEVE_METHOD.fullText
+              ? retrievalIcon.fullText
+              : retrievalMethod === RETRIEVE_METHOD.hybrid
+                ? retrievalIcon.hybrid
+                : retrievalIcon.vector
+          }
+          alt=''
+        />
+      }
+    />
   </div>
 }
 
-const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType }) => {
+const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
   const { t } = useTranslation()
   const { enableBilling, plan } = useProviderContext()
 
@@ -127,6 +190,7 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
   }
 
   useEffect(() => {
+    setIsStopQuery(false)
     startQueryStatus()
     return () => {
       stopQueryStatus()
@@ -146,6 +210,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
   const navToDocumentList = () => {
     router.push(`/datasets/${datasetId}/documents`)
   }
+  const navToApiDocs = () => {
+    router.push('/datasets?category=api')
+  }
 
   const isEmbedding = useMemo(() => {
     return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
@@ -177,13 +244,17 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
 
     return doc?.data_source_info.notion_page_icon
   }
-  const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
+  const isSourceEmbedding = (detail: IndexingStatusResponse) =>
+    ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
 
   return (
     <>
-      <div className='h-5 flex items-center mb-5'>
-        <div className={s.embeddingStatus}>
-          {isEmbedding && t('datasetDocuments.embedding.processing')}
+      <div className="h-5 flex items-center mb-3">
+        <div className="flex items-center justify-between text-gray-900 font-medium text-sm mr-2">
+          {isEmbedding && <div className='flex items-center'>
+            <RiLoader2Fill className='size-4 mr-1 animate-spin' />
+            {t('datasetDocuments.embedding.processing')}
+          </div>}
           {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
         </div>
       </div>
@@ -200,69 +271,80 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
           </div>
         )
       }
-      <div className={s.progressContainer}>
+      <div className="flex flex-col gap-0.5 pb-2">
         {indexingStatusBatchDetail.map(indexingStatusDetail => (
           <div key={indexingStatusDetail.id} className={cn(
-            s.sourceItem,
-            indexingStatusDetail.indexing_status === 'error' && s.error,
-            indexingStatusDetail.indexing_status === 'completed' && s.success,
+            'relative h-[26px] bg-components-progress-bar-bg rounded-md overflow-hidden',
+            indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
+            // indexingStatusDetail.indexing_status === 'completed' && 's.success',
           )}>
             {isSourceEmbedding(indexingStatusDetail) && (
-              <div className={s.progressbar} style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }} />
+              <div className="absolute top-0 left-0 h-full min-w-0.5 bg-components-progress-bar-progress border-r-[2px] border-r-components-progress-bar-progress-highlight" style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }} />
             )}
-            <div className={`${s.info} grow`}>
+            <div className="flex gap-1 pl-[6px] pr-2 h-full items-center z-[1]">
               {getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
-                <div className={cn(s.fileIcon, s[getFileType(getSourceName(indexingStatusDetail.id))])} />
+                // <div className={cn(
+                //   'shrink-0 marker:size-4 bg-center bg-no-repeat bg-contain',
+                //   s[getFileType(getSourceName(indexingStatusDetail.id))] || s.unknownFileIcon,
+                // )} />
+                <DocumentFileIcon
+                  className="shrink-0 size-4"
+                  name={getSourceName(indexingStatusDetail.id)}
+                  extension={getFileType(getSourceName(indexingStatusDetail.id))}
+                />
               )}
               {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
                 <NotionIcon
-                  className='shrink-0 mr-1'
+                  className='shrink-0'
                   type='page'
                   src={getIcon(indexingStatusDetail.id)}
                 />
               )}
-              <div className={`${s.name} truncate`} title={getSourceName(indexingStatusDetail.id)}>{getSourceName(indexingStatusDetail.id)}</div>
-              {
-                enableBilling && (
-                  <PriorityLabel />
-                )
-              }
-            </div>
-            <div className='shrink-0'>
+              <div className="grow flex items-center gap-1 w-0" title={getSourceName(indexingStatusDetail.id)}>
+                <div className="text-xs truncate">
+                  {getSourceName(indexingStatusDetail.id)}
+                </div>
+                {
+                  enableBilling && (
+                    <PriorityLabel className='ml-0' />
+                  )
+                }
+              </div>
               {isSourceEmbedding(indexingStatusDetail) && (
-                <div className={s.percent}>{`${getSourcePercent(indexingStatusDetail)}%`}</div>
+                <div className="shrink-0 text-xs">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
               )}
-              {indexingStatusDetail.indexing_status === 'error' && indexingStatusDetail.error && (
+              {indexingStatusDetail.indexing_status === 'error' && (
                 <Tooltip
-                  popupContent={(
-                    <div className='max-w-[400px]'>
-                      {indexingStatusDetail.error}
-                    </div>
-                  )}
+                  popupClassName='px-4 py-[14px] max-w-60 text-sm leading-4 text-text-secondary border-[0.5px] border-components-panel-border rounded-xl'
+                  offset={4}
+                  popupContent={indexingStatusDetail.error}
                 >
-                  <div className={cn(s.percent, s.error, 'flex items-center')}>
-                    Error
-                    <RiErrorWarningFill className='ml-1 w-4 h-4' />
-                  </div>
+                  <span>
+                    <RiErrorWarningFill className='shrink-0 size-4 text-text-destructive' />
+                  </span>
                 </Tooltip>
               )}
-              {indexingStatusDetail.indexing_status === 'error' && !indexingStatusDetail.error && (
-                <div className={cn(s.percent, s.error, 'flex items-center')}>
-                  Error
-                </div>
-              )}
               {indexingStatusDetail.indexing_status === 'completed' && (
-                <div className={cn(s.percent, s.success)}>100%</div>
+                <RiCheckboxCircleFill className='shrink-0 size-4 text-text-success' />
               )}
             </div>
           </div>
         ))}
       </div>
-      <RuleDetail sourceData={ruleDetail} />
-      <div className='flex items-center gap-2 mt-10'>
+      <hr className="my-3 h-[1px] bg-divider-subtle border-0" />
+      <RuleDetail
+        sourceData={ruleDetail}
+        indexingType={indexingType}
+        retrievalMethod={retrievalMethod}
+      />
+      <div className='flex items-center gap-2 my-10'>
+        <Button className='w-fit' onClick={navToApiDocs}>
+          <RiTerminalBoxLine className='size-4 mr-2' />
+          <span>Access the API</span>
+        </Button>
         <Button className='w-fit' variant='primary' onClick={navToDocumentList}>
           <span>{t('datasetCreation.stepThree.navTo')}</span>
-          <ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' />
+          <ArrowRightIcon className='size-4 ml-2 stroke-current stroke-1' />
         </Button>
       </div>
     </>
diff --git a/web/app/components/datasets/create/file-preview/index.module.css b/web/app/components/datasets/create/file-preview/index.module.css
index d87522e6d0..929002e1e2 100644
--- a/web/app/components/datasets/create/file-preview/index.module.css
+++ b/web/app/components/datasets/create/file-preview/index.module.css
@@ -1,6 +1,6 @@
 .filePreview {
     @apply flex flex-col border-l border-gray-200 shrink-0;
-    width: 528px;
+    width: 100%;
     background-color: #fcfcfd;
   }
   
@@ -48,5 +48,6 @@
   }
   .fileContent {
     white-space: pre-line;
+    word-break: break-all;
   }
   
\ No newline at end of file
diff --git a/web/app/components/datasets/create/file-preview/index.tsx b/web/app/components/datasets/create/file-preview/index.tsx
index e20af64386..cb1f1d6908 100644
--- a/web/app/components/datasets/create/file-preview/index.tsx
+++ b/web/app/components/datasets/create/file-preview/index.tsx
@@ -44,7 +44,7 @@ const FilePreview = ({
   }, [file])
 
   return (
-    <div className={cn(s.filePreview)}>
+    <div className={cn(s.filePreview, 'h-full')}>
       <div className={cn(s.previewHeader)}>
         <div className={cn(s.title)}>
           <span>{t('datasetCreation.stepOne.filePreview')}</span>
@@ -59,7 +59,7 @@ const FilePreview = ({
       <div className={cn(s.previewContent)}>
         {loading && <div className={cn(s.loading)} />}
         {!loading && (
-          <div className={cn(s.fileContent)}>{previewContent}</div>
+          <div className={cn(s.fileContent, 'body-md-regular')}>{previewContent}</div>
         )}
       </div>
     </div>
diff --git a/web/app/components/datasets/create/file-uploader/index.module.css b/web/app/components/datasets/create/file-uploader/index.module.css
index bf5b7dcaf5..7d29f2ef9c 100644
--- a/web/app/components/datasets/create/file-uploader/index.module.css
+++ b/web/app/components/datasets/create/file-uploader/index.module.css
@@ -1,68 +1,3 @@
-.fileUploader {
-  @apply mb-6;
-}
-
-.fileUploader .title {
-  @apply mb-2;
-  font-weight: 500;
-  font-size: 16px;
-  line-height: 24px;
-  color: #344054;
-}
-
-.fileUploader .tip {
-  font-weight: 400;
-  font-size: 12px;
-  line-height: 18px;
-  color: #667085;
-}
-
-.uploader {
-  @apply relative box-border flex justify-center items-center mb-2 p-3;
-  flex-direction: column;
-  max-width: 640px;
-  min-height: 80px;
-  background: #F9FAFB;
-  border: 1px dashed #EAECF0;
-  border-radius: 12px;
-  font-weight: 400;
-  font-size: 14px;
-  line-height: 20px;
-  color: #667085;
-}
-
-.uploader.dragging {
-  background: #F5F8FF;
-  border: 1px dashed #B2CCFF;
-}
-
-.uploader .draggingCover {
-  position: absolute;
-  top: 0;
-  left: 0;
-  width: 100%;
-  height: 100%;
-}
-
-.uploader .uploadIcon {
-  content: '';
-  display: block;
-  margin-right: 8px;
-  width: 24px;
-  height: 24px;
-  background: center no-repeat url(../assets/upload-cloud-01.svg);
-  background-size: contain;
-}
-
-.uploader .browse {
-  @apply pl-1 cursor-pointer;
-  color: #155eef;
-}
-
-.fileList {
-  @apply space-y-2;
-}
-
 .file {
   @apply box-border relative flex items-center justify-between;
   padding: 8px 12px 8px 8px;
@@ -193,4 +128,4 @@
 
 .file:hover .actionWrapper .remove {
   display: block;
-}
\ No newline at end of file
+}
diff --git a/web/app/components/datasets/create/file-uploader/index.tsx b/web/app/components/datasets/create/file-uploader/index.tsx
index adb4bed0d1..e42a24cfef 100644
--- a/web/app/components/datasets/create/file-uploader/index.tsx
+++ b/web/app/components/datasets/create/file-uploader/index.tsx
@@ -3,10 +3,12 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
 import useSWR from 'swr'
-import s from './index.module.css'
+import { RiDeleteBinLine, RiUploadCloud2Line } from '@remixicon/react'
+import DocumentFileIcon from '../../common/document-file-icon'
 import cn from '@/utils/classnames'
 import type { CustomFile as File, FileItem } from '@/models/datasets'
 import { ToastContext } from '@/app/components/base/toast'
+import SimplePieChart from '@/app/components/base/simple-pie-chart'
 
 import { upload } from '@/service/base'
 import { fetchFileUploadConfig } from '@/service/common'
@@ -14,6 +16,8 @@ import { fetchSupportFileTypes } from '@/service/datasets'
 import I18n from '@/context/i18n'
 import { LanguagesSupported } from '@/i18n/language'
 import { IS_CE_EDITION } from '@/config'
+import { useAppContext } from '@/context/app-context'
+import { Theme } from '@/types/app'
 
 const FILES_NUMBER_LIMIT = 20
 
@@ -222,6 +226,9 @@ const FileUploader = ({
     initialUpload(files.filter(isValid))
   }, [isValid, initialUpload])
 
+  const { theme } = useAppContext()
+  const chartColor = useMemo(() => theme === Theme.dark ? '#5289ff' : '#296dff', [theme])
+
   useEffect(() => {
     dropRef.current?.addEventListener('dragenter', handleDragEnter)
     dropRef.current?.addEventListener('dragover', handleDragOver)
@@ -236,12 +243,12 @@ const FileUploader = ({
   }, [handleDrop])
 
   return (
-    <div className={s.fileUploader}>
+    <div className="mb-5 w-[640px]">
       {!hideUpload && (
         <input
           ref={fileUploader}
           id="fileUploader"
-          style={{ display: 'none' }}
+          className="hidden"
           type="file"
           multiple={!notSupportBatchUpload}
           accept={ACCEPTS.join(',')}
@@ -249,52 +256,71 @@ const FileUploader = ({
         />
       )}
 
-      <div className={cn(s.title, titleClassName)}>{t('datasetCreation.stepOne.uploader.title')}</div>
-      {!hideUpload && (
+      <div className={cn('text-text-tertiary text-sm font-semibold leading-6 mb-1', titleClassName)}>{t('datasetCreation.stepOne.uploader.title')}</div>
+
+      {!hideUpload && (
+        <div ref={dropRef} className={cn('relative box-border flex flex-col justify-center items-center gap-1 mb-2 px-4 py-3 max-w-[640px] min-h-20 leading-4 text-xs text-text-tertiary bg-components-dropzone-bg border border-dashed border-components-dropzone-border rounded-xl', dragging && 'bg-components-dropzone-bg-accent border-components-dropzone-border-accent')}>
+          <div className="flex justify-center items-center min-h-5 text-sm leading-4 text-text-secondary">
+            <RiUploadCloud2Line className='mr-2 size-5' />
 
-        <div ref={dropRef} className={cn(s.uploader, dragging && s.dragging)}>
-          <div className='flex justify-center items-center min-h-6 mb-2'>
-            <span className={s.uploadIcon} />
             <span>
               {t('datasetCreation.stepOne.uploader.button')}
-              <label className={s.browse} onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
+              {supportTypes.length > 0 && (
+                <label className="ml-1 text-text-accent cursor-pointer" onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
+              )}
             </span>
           </div>
-          <div className={s.tip}>{t('datasetCreation.stepOne.uploader.tip', {
+          <div>{t('datasetCreation.stepOne.uploader.tip', {
             size: fileUploadConfig.file_size_limit,
             supportTypes: supportTypesShowNames,
           })}</div>
-          {dragging && <div ref={dragRef} className={s.draggingCover} />}
+          {dragging && <div ref={dragRef} className='absolute top-0 left-0 w-full h-full' />}
         </div>
       )}
-      <div className={s.fileList}>
+      <div className='space-y-1 max-w-[640px] cursor-default'>
+
         {fileList.map((fileItem, index) => (
           <div
             key={`${fileItem.fileID}-${index}`}
             onClick={() => fileItem.file?.id && onPreview(fileItem.file)}
             className={cn(
-              s.file,
-              fileItem.progress < 100 && s.uploading,
+              'flex items-center h-12 max-w-[640px] bg-components-panel-on-panel-item-bg text-xs leading-3 text-text-tertiary border border-components-panel-border rounded-lg shadow-xs',
+              // 'border-state-destructive-border bg-state-destructive-hover',
             )}
           >
-            {fileItem.progress < 100 && (
-              <div className={s.progressbar} style={{ width: `${fileItem.progress}%` }} />
-            )}
-            <div className={s.fileInfo}>
-              <div className={cn(s.fileIcon, s[getFileType(fileItem.file)])} />
-              <div className={s.filename}>{fileItem.file.name}</div>
-              <div className={s.size}>{getFileSize(fileItem.file.size)}</div>
+            <div className="shrink-0 flex justify-center items-center w-12">
+              <DocumentFileIcon
+                className="shrink-0 size-6"
+                name={fileItem.file.name}
+                extension={getFileType(fileItem.file)}
+              />
             </div>
-            <div className={s.actionWrapper}>
+            <div className="grow shrink flex flex-col gap-0.5">
+              <div className='flex w-full'>
+                <div className="text-sm leading-4 text-text-secondary w-0 grow truncate">{fileItem.file.name}</div>
+              </div>
+              <div className="w-full leading-3 truncate text-text-tertiary">
+                <span className='uppercase'>{getFileType(fileItem.file)}</span>
+                <span className='px-1 text-text-quaternary'>·</span>
+                <span>{getFileSize(fileItem.file.size)}</span>
+                {/* <span className='px-1 text-text-quaternary'>·</span>
+                  <span>10k characters</span> */}
+              </div>
+            </div>
+            <div className="shrink-0 flex items-center justify-end gap-1 pr-3 w-16">
+              {/* <span className="flex justify-center items-center w-6 h-6 cursor-pointer">
+                  <RiErrorWarningFill className='size-4 text-text-warning' />
+                </span> */}
               {(fileItem.progress < 100 && fileItem.progress >= 0) && (
-                <div className={s.percent}>{`${fileItem.progress}%`}</div>
-              )}
-              {fileItem.progress === 100 && (
-                <div className={s.remove} onClick={(e) => {
-                  e.stopPropagation()
-                  removeFile(fileItem.fileID)
-                }} />
+                // <div className={s.percent}>{`${fileItem.progress}%`}</div>
+                <SimplePieChart percentage={fileItem.progress} stroke={chartColor} fill={chartColor} animationDuration={0} />
               )}
+              <span className="flex justify-center items-center w-6 h-6 cursor-pointer" onClick={(e) => {
+                e.stopPropagation()
+                removeFile(fileItem.fileID)
+              }}>
+                <RiDeleteBinLine className='size-4 text-text-tertiary' />
+              </span>
             </div>
           </div>
         ))}
diff --git a/web/app/components/datasets/create/icons.ts b/web/app/components/datasets/create/icons.ts
new file mode 100644
index 0000000000..80c4b6c944
--- /dev/null
+++ b/web/app/components/datasets/create/icons.ts
@@ -0,0 +1,16 @@
+import GoldIcon from './assets/gold.svg'
+import Piggybank from './assets/piggy-bank-mod.svg'
+import Selection from './assets/selection-mod.svg'
+import Research from './assets/research-mod.svg'
+import PatternRecognition from './assets/pattern-recognition-mod.svg'
+
+export const indexMethodIcon = {
+  high_quality: GoldIcon,
+  economical: Piggybank,
+}
+
+export const retrievalIcon = {
+  vector: Selection,
+  fullText: Research,
+  hybrid: PatternRecognition,
+}
diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx
index 98098445c7..9556b9fad5 100644
--- a/web/app/components/datasets/create/index.tsx
+++ b/web/app/components/datasets/create/index.tsx
@@ -3,10 +3,10 @@ import React, { useCallback, useEffect, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import AppUnavailable from '../../base/app-unavailable'
 import { ModelTypeEnum } from '../../header/account-setting/model-provider-page/declarations'
-import StepsNavBar from './steps-nav-bar'
 import StepOne from './step-one'
 import StepTwo from './step-two'
 import StepThree from './step-three'
+import { Topbar } from './top-bar'
 import { DataSourceType } from '@/models/datasets'
 import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
 import { fetchDataSource } from '@/service/common'
@@ -36,6 +36,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
   const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
   const [step, setStep] = useState(1)
   const [indexingTypeCache, setIndexTypeCache] = useState('')
+  const [retrievalMethodCache, setRetrievalMethodCache] = useState('')
   const [fileList, setFiles] = useState<FileItem[]>([])
   const [result, setResult] = useState<createDocumentResponse | undefined>()
   const [hasError, setHasError] = useState(false)
@@ -80,6 +81,9 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
   const updateResultCache = (res?: createDocumentResponse) => {
     setResult(res)
   }
+  const updateRetrievalMethodCache = (method: string) => {
+    setRetrievalMethodCache(method)
+  }
 
   const nextStep = useCallback(() => {
     setStep(step + 1)
@@ -118,33 +122,29 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
     return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
 
   return (
-    <div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
-      <div className="flex flex-col w-11 sm:w-56 overflow-y-auto bg-white border-r border-gray-200 shrink-0">
-        <StepsNavBar step={step} datasetId={datasetId} />
-      </div>
-      <div className="grow bg-white">
-        <div className={step === 1 ? 'block h-full' : 'hidden'}>
-          <StepOne
-            hasConnection={hasConnection}
-            onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
-            datasetId={datasetId}
-            dataSourceType={dataSourceType}
-            dataSourceTypeDisable={!!detail?.data_source_type}
-            changeType={setDataSourceType}
-            files={fileList}
-            updateFile={updateFile}
-            updateFileList={updateFileList}
-            notionPages={notionPages}
-            updateNotionPages={updateNotionPages}
-            onStepChange={nextStep}
-            websitePages={websitePages}
-            updateWebsitePages={setWebsitePages}
-            onWebsiteCrawlProviderChange={setWebsiteCrawlProvider}
-            onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId}
-            crawlOptions={crawlOptions}
-            onCrawlOptionsChange={setCrawlOptions}
-          />
-        </div>
+    <div className='flex flex-col bg-components-panel-bg' style={{ height: 'calc(100vh - 56px)' }}>
+      <Topbar activeIndex={step - 1} />
+      <div style={{ height: 'calc(100% - 52px)' }}>
+        {step === 1 && <StepOne
+          hasConnection={hasConnection}
+          onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
+          datasetId={datasetId}
+          dataSourceType={dataSourceType}
+          dataSourceTypeDisable={!!detail?.data_source_type}
+          changeType={setDataSourceType}
+          files={fileList}
+          updateFile={updateFile}
+          updateFileList={updateFileList}
+          notionPages={notionPages}
+          updateNotionPages={updateNotionPages}
+          onStepChange={nextStep}
+          websitePages={websitePages}
+          updateWebsitePages={setWebsitePages}
+          onWebsiteCrawlProviderChange={setWebsiteCrawlProvider}
+          onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId}
+          crawlOptions={crawlOptions}
+          onCrawlOptionsChange={setCrawlOptions}
+        />}
         {(step === 2 && (!datasetId || (datasetId && !!detail))) && <StepTwo
           isAPIKeySet={!!embeddingsDefaultModel}
           onSetting={() => setShowAccountSettingModal({ payload: 'provider' })}
@@ -158,6 +158,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
           websiteCrawlJobId={websiteCrawlJobId}
           onStepChange={changeStep}
           updateIndexingTypeCache={updateIndexingTypeCache}
+          updateRetrievalMethodCache={updateRetrievalMethodCache}
           updateResultCache={updateResultCache}
           crawlOptions={crawlOptions}
         />}
@@ -165,6 +166,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
           datasetId={datasetId}
           datasetName={detail?.name}
           indexingType={detail?.indexing_technique || indexingTypeCache}
+          retrievalMethod={detail?.retrieval_model_dict?.search_method || retrievalMethodCache}
           creationCache={result}
         />}
       </div>
diff --git a/web/app/components/datasets/create/notion-page-preview/index.tsx b/web/app/components/datasets/create/notion-page-preview/index.tsx
index 8225e56f04..f658f213e8 100644
--- a/web/app/components/datasets/create/notion-page-preview/index.tsx
+++ b/web/app/components/datasets/create/notion-page-preview/index.tsx
@@ -44,7 +44,7 @@ const NotionPagePreview = ({
   }, [currentPage])
 
   return (
-    <div className={cn(s.filePreview)}>
+    <div className={cn(s.filePreview, 'h-full')}>
       <div className={cn(s.previewHeader)}>
         <div className={cn(s.title)}>
           <span>{t('datasetCreation.stepOne.pagePreview')}</span>
@@ -64,7 +64,7 @@ const NotionPagePreview = ({
       <div className={cn(s.previewContent)}>
         {loading && <div className={cn(s.loading)} />}
         {!loading && (
-          <div className={cn(s.fileContent)}>{previewContent}</div>
+          <div className={cn(s.fileContent, 'body-md-regular')}>{previewContent}</div>
         )}
       </div>
     </div>
diff --git a/web/app/components/datasets/create/step-one/index.module.css b/web/app/components/datasets/create/step-one/index.module.css
index 4e3cf67cd6..bb8dd9b895 100644
--- a/web/app/components/datasets/create/step-one/index.module.css
+++ b/web/app/components/datasets/create/step-one/index.module.css
@@ -2,21 +2,19 @@
   position: sticky;
   top: 0;
   left: 0;
-  padding: 42px 64px 12px;
+  padding: 42px 64px 12px 0;
   font-weight: 600;
   font-size: 18px;
   line-height: 28px;
-  color: #101828;
 }
 
 .form {
   position: relative;
   padding: 12px 64px;
-  background-color: #fff;
 }
 
 .dataSourceItem {
-  @apply box-border relative shrink-0 flex items-center mr-3 p-3 h-14 bg-white rounded-xl cursor-pointer;
+  @apply box-border relative grow shrink-0 flex items-center p-3 h-14 bg-white rounded-xl cursor-pointer;
   border: 0.5px solid #EAECF0;
   box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05);
   font-weight: 500;
@@ -24,27 +22,32 @@
   line-height: 20px;
   color: #101828;
 }
+
 .dataSourceItem:hover {
   background-color: #f5f8ff;
   border: 0.5px solid #B2CCFF;
   box-shadow: 0px 12px 16px -4px rgba(16, 24, 40, 0.08), 0px 4px 6px -2px rgba(16, 24, 40, 0.03);
 }
+
 .dataSourceItem.active {
   background-color: #f5f8ff;
   border: 1.5px solid #528BFF;
   box-shadow: 0px 1px 3px rgba(16, 24, 40, 0.1), 0px 1px 2px rgba(16, 24, 40, 0.06);
 }
+
 .dataSourceItem.disabled {
   background-color: #f9fafb;
   border: 0.5px solid #EAECF0;
   box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05);
   cursor: default;
 }
+
 .dataSourceItem.disabled:hover {
   background-color: #f9fafb;
   border: 0.5px solid #EAECF0;
   box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05);
 }
+
 .comingTag {
   @apply flex justify-center items-center bg-white;
   position: absolute;
@@ -59,6 +62,7 @@
   line-height: 18px;
   color: #444CE7;
 }
+
 .datasetIcon {
   @apply flex mr-2 w-8 h-8 rounded-lg bg-center bg-no-repeat;
   background-color: #F5FAFF;
@@ -66,15 +70,18 @@
   background-size: 16px;
   border: 0.5px solid #D1E9FF;
 }
+
 .dataSourceItem:active .datasetIcon,
 .dataSourceItem:hover .datasetIcon {
   background-color: #F5F8FF;
   border: 0.5px solid #E0EAFF;
 }
+
 .datasetIcon.notion {
   background-image: url(../assets/notion.svg);
   background-size: 20px;
 }
+
 .datasetIcon.web {
   background-image: url(../assets/web.svg);
 }
@@ -90,29 +97,12 @@
   background-color: #eaecf0;
 }
 
-.OtherCreationOption {
-  @apply flex items-center cursor-pointer;
-  font-weight: 500;
-  font-size: 13px;
-  line-height: 18px;
-  color: #155EEF;
-}
-.OtherCreationOption::before {
-  content: '';
-  display: block;
-  margin-right: 4px;
-  width: 16px;
-  height: 16px;
-  background: center no-repeat url(../assets/folder-plus.svg);
-  background-size: contain;
-}
-
 .notionConnectionTip {
   display: flex;
   flex-direction: column;
   align-items: flex-start;
   padding: 24px;
-  max-width: 640px;
+  width: 640px;
   background: #F9FAFB;
   border-radius: 16px;
 }
@@ -138,6 +128,7 @@
   line-height: 24px;
   color: #374151;
 }
+
 .notionConnectionTip .title::after {
   content: '';
   position: absolute;
@@ -148,6 +139,7 @@
   background: center no-repeat url(../assets/Icon-3-dots.svg);
   background-size: contain;
 }
+
 .notionConnectionTip .tip {
   margin-bottom: 20px;
   font-style: normal;
@@ -155,4 +147,4 @@
   font-size: 13px;
   line-height: 18px;
   color: #6B7280;
-}
+}
\ No newline at end of file
diff --git a/web/app/components/datasets/create/step-one/index.tsx b/web/app/components/datasets/create/step-one/index.tsx
index 643932e9ae..2cca003b39 100644
--- a/web/app/components/datasets/create/step-one/index.tsx
+++ b/web/app/components/datasets/create/step-one/index.tsx
@@ -1,6 +1,7 @@
 'use client'
 import React, { useMemo, useState } from 'react'
 import { useTranslation } from 'react-i18next'
+import { RiArrowRightLine, RiFolder6Line } from '@remixicon/react'
 import FilePreview from '../file-preview'
 import FileUploader from '../file-uploader'
 import NotionPagePreview from '../notion-page-preview'
@@ -17,6 +18,7 @@ import { NotionPageSelector } from '@/app/components/base/notion-page-selector'
 import { useDatasetDetailContext } from '@/context/dataset-detail'
 import { useProviderContext } from '@/context/provider-context'
 import VectorSpaceFull from '@/app/components/billing/vector-space-full'
+import classNames from '@/utils/classnames'
 
 type IStepOneProps = {
   datasetId?: string
@@ -120,143 +122,174 @@ const StepOne = ({
       return true
     if (isShowVectorSpaceFull)
       return true
-
     return false
-  }, [files])
+  }, [files, isShowVectorSpaceFull])
+
   return (
     <div className='flex w-full h-full'>
-      <div className='grow overflow-y-auto relative'>
-        {
-          shouldShowDataSourceTypeList && (
-            <div className={s.stepHeader}>{t('datasetCreation.steps.one')}</div>
-          )
-        }
-        <div className={s.form}>
-          {
-            shouldShowDataSourceTypeList && (
-              <div className='flex items-center mb-8 flex-wrap gap-y-4'>
-                <div
-                  className={cn(
-                    s.dataSourceItem,
-                    dataSourceType === DataSourceType.FILE && s.active,
-                    dataSourceTypeDisable && dataSourceType !== DataSourceType.FILE && s.disabled,
-                  )}
-                  onClick={() => {
-                    if (dataSourceTypeDisable)
-                      return
-                    changeType(DataSourceType.FILE)
-                    hideFilePreview()
-                    hideNotionPagePreview()
-                  }}
-                >
-                  <span className={cn(s.datasetIcon)} />
-                  {t('datasetCreation.stepOne.dataSourceType.file')}
-                </div>
-                <div
-                  className={cn(
-                    s.dataSourceItem,
-                    dataSourceType === DataSourceType.NOTION && s.active,
-                    dataSourceTypeDisable && dataSourceType !== DataSourceType.NOTION && s.disabled,
-                  )}
-                  onClick={() => {
-                    if (dataSourceTypeDisable)
-                      return
-                    changeType(DataSourceType.NOTION)
-                    hideFilePreview()
-                    hideNotionPagePreview()
-                  }}
-                >
-                  <span className={cn(s.datasetIcon, s.notion)} />
-                  {t('datasetCreation.stepOne.dataSourceType.notion')}
-                </div>
-                <div
-                  className={cn(
-                    s.dataSourceItem,
-                    dataSourceType === DataSourceType.WEB && s.active,
-                    dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled,
-                  )}
-                  onClick={() => changeType(DataSourceType.WEB)}
-                >
-                  <span className={cn(s.datasetIcon, s.web)} />
-                  {t('datasetCreation.stepOne.dataSourceType.web')}
-                </div>
-              </div>
-            )
-          }
-          {dataSourceType === DataSourceType.FILE && (
-            <>
-              <FileUploader
-                fileList={files}
-                titleClassName={!shouldShowDataSourceTypeList ? 'mt-[30px] !mb-[44px] !text-lg !font-semibold !text-gray-900' : undefined}
-                prepareFileList={updateFileList}
-                onFileListUpdate={updateFileList}
-                onFileUpdate={updateFile}
-                onPreview={updateCurrentFile}
-                notSupportBatchUpload={notSupportBatchUpload}
-              />
-              {isShowVectorSpaceFull && (
-                <div className='max-w-[640px] mb-4'>
-                  <VectorSpaceFull />
-                </div>
-              )}
-              <Button disabled={nextDisabled} className={s.submitButton} variant='primary' onClick={onStepChange}>{t('datasetCreation.stepOne.button')}</Button>
-            </>
-          )}
-          {dataSourceType === DataSourceType.NOTION && (
-            <>
-              {!hasConnection && <NotionConnector onSetting={onSetting} />}
-              {hasConnection && (
-                <>
-                  <div className='mb-8 w-[640px]'>
-                    <NotionPageSelector
-                      value={notionPages.map(page => page.page_id)}
-                      onSelect={updateNotionPages}
-                      onPreview={updateCurrentPage}
-                    />
+      <div className='w-1/2 h-full overflow-y-auto relative'>
+        <div className='flex justify-end'>
+          <div className={classNames(s.form)}>
+            {
+              shouldShowDataSourceTypeList && (
+                <div className={classNames(s.stepHeader, 'z-10 text-text-secondary bg-components-panel-bg-blur')}>{t('datasetCreation.steps.one')}</div>
+              )
+            }
+            {
+              shouldShowDataSourceTypeList && (
+                <div className='flex items-center mb-8 flex-wrap gap-4'>
+                  <div
+                    className={cn(
+                      s.dataSourceItem,
+                      dataSourceType === DataSourceType.FILE && s.active,
+                      dataSourceTypeDisable && dataSourceType !== DataSourceType.FILE && s.disabled,
+                    )}
+                    onClick={() => {
+                      if (dataSourceTypeDisable)
+                        return
+                      changeType(DataSourceType.FILE)
+                      hideFilePreview()
+                      hideNotionPagePreview()
+                    }}
+                  >
+                    <span className={cn(s.datasetIcon)} />
+                    {t('datasetCreation.stepOne.dataSourceType.file')}
+                  </div>
+                  <div
+                    className={cn(
+                      s.dataSourceItem,
+                      dataSourceType === DataSourceType.NOTION && s.active,
+                      dataSourceTypeDisable && dataSourceType !== DataSourceType.NOTION && s.disabled,
+                    )}
+                    onClick={() => {
+                      if (dataSourceTypeDisable)
+                        return
+                      changeType(DataSourceType.NOTION)
+                      hideFilePreview()
+                      hideNotionPagePreview()
+                    }}
+                  >
+                    <span className={cn(s.datasetIcon, s.notion)} />
+                    {t('datasetCreation.stepOne.dataSourceType.notion')}
+                  </div>
+                  <div
+                    className={cn(
+                      s.dataSourceItem,
+                      dataSourceType === DataSourceType.WEB && s.active,
+                      dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled,
+                    )}
+                    onClick={() => changeType(DataSourceType.WEB)}
+                  >
+                    <span className={cn(s.datasetIcon, s.web)} />
+                    {t('datasetCreation.stepOne.dataSourceType.web')}
                   </div>
-                  {isShowVectorSpaceFull && (
-                    <div className='max-w-[640px] mb-4'>
-                      <VectorSpaceFull />
-                    </div>
-                  )}
-                  <Button disabled={isShowVectorSpaceFull || !notionPages.length} className={s.submitButton} variant='primary' onClick={onStepChange}>{t('datasetCreation.stepOne.button')}</Button>
-                </>
-              )}
-            </>
-          )}
-          {dataSourceType === DataSourceType.WEB && (
-            <>
-              <div className={cn('mb-8 w-[640px]', !shouldShowDataSourceTypeList && 'mt-12')}>
-                <Website
-                  onPreview={setCurrentWebsite}
-                  checkedCrawlResult={websitePages}
-                  onCheckedCrawlResultChange={updateWebsitePages}
-                  onCrawlProviderChange={onWebsiteCrawlProviderChange}
-                  onJobIdChange={onWebsiteCrawlJobIdChange}
-                  crawlOptions={crawlOptions}
-                  onCrawlOptionsChange={onCrawlOptionsChange}
-                />
-              </div>
-              {isShowVectorSpaceFull && (
-                <div className='max-w-[640px] mb-4'>
-                  <VectorSpaceFull />
                 </div>
-              )}
-              <Button disabled={isShowVectorSpaceFull || !websitePages.length} className={s.submitButton} variant='primary' onClick={onStepChange}>{t('datasetCreation.stepOne.button')}</Button>
-            </>
-          )}
-          {!datasetId && (
-            <>
-              <div className={s.dividerLine} />
-              <div onClick={modalShowHandle} className={s.OtherCreationOption}>{t('datasetCreation.stepOne.emptyDatasetCreation')}</div>
-            </>
-          )}
+              )
+            }
+            {dataSourceType === DataSourceType.FILE && (
+              <>
+                <FileUploader
+                  fileList={files}
+                  titleClassName={!shouldShowDataSourceTypeList ? 'mt-[30px] !mb-[44px] !text-lg !font-semibold !text-gray-900' : undefined}
+                  prepareFileList={updateFileList}
+                  onFileListUpdate={updateFileList}
+                  onFileUpdate={updateFile}
+                  onPreview={updateCurrentFile}
+                  notSupportBatchUpload={notSupportBatchUpload}
+                />
+                {isShowVectorSpaceFull && (
+                  <div className='max-w-[640px] mb-4'>
+                    <VectorSpaceFull />
+                  </div>
+                )}
+                <div className="flex justify-end gap-2 max-w-[640px]">
+                  {/* <Button>{t('datasetCreation.stepOne.cancel')}</Button> */}
+                  <Button disabled={nextDisabled} variant='primary' onClick={onStepChange}>
+                    <span className="flex gap-0.5 px-[10px]">
+                      <span className="px-0.5">{t('datasetCreation.stepOne.button')}</span>
+                      <RiArrowRightLine className="size-4" />
+                    </span>
+                  </Button>
+                </div>
+              </>
+            )}
+            {dataSourceType === DataSourceType.NOTION && (
+              <>
+                {!hasConnection && <NotionConnector onSetting={onSetting} />}
+                {hasConnection && (
+                  <>
+                    <div className='mb-8 w-[640px]'>
+                      <NotionPageSelector
+                        value={notionPages.map(page => page.page_id)}
+                        onSelect={updateNotionPages}
+                        onPreview={updateCurrentPage}
+                      />
+                    </div>
+                    {isShowVectorSpaceFull && (
+                      <div className='max-w-[640px] mb-4'>
+                        <VectorSpaceFull />
+                      </div>
+                    )}
+                    <div className="flex justify-end gap-2 max-w-[640px]">
+                      {/* <Button>{t('datasetCreation.stepOne.cancel')}</Button> */}
+                      <Button disabled={isShowVectorSpaceFull || !notionPages.length} variant='primary' onClick={onStepChange}>
+                        <span className="flex gap-0.5 px-[10px]">
+                          <span className="px-0.5">{t('datasetCreation.stepOne.button')}</span>
+                          <RiArrowRightLine className="size-4" />
+                        </span>
+                      </Button>
+                    </div>
+                  </>
+                )}
+              </>
+            )}
+            {dataSourceType === DataSourceType.WEB && (
+              <>
+                <div className={cn('mb-8 w-[640px]', !shouldShowDataSourceTypeList && 'mt-12')}>
+                  <Website
+                    onPreview={setCurrentWebsite}
+                    checkedCrawlResult={websitePages}
+                    onCheckedCrawlResultChange={updateWebsitePages}
+                    onCrawlProviderChange={onWebsiteCrawlProviderChange}
+                    onJobIdChange={onWebsiteCrawlJobIdChange}
+                    crawlOptions={crawlOptions}
+                    onCrawlOptionsChange={onCrawlOptionsChange}
+                  />
+                </div>
+                {isShowVectorSpaceFull && (
+                  <div className='max-w-[640px] mb-4'>
+                    <VectorSpaceFull />
+                  </div>
+                )}
+                <div className="flex justify-end gap-2 max-w-[640px]">
+                  {/* <Button>{t('datasetCreation.stepOne.cancel')}</Button> */}
+                  <Button disabled={isShowVectorSpaceFull || !websitePages.length} variant='primary' onClick={onStepChange}>
+                    <span className="flex gap-0.5 px-[10px]">
+                      <span className="px-0.5">{t('datasetCreation.stepOne.button')}</span>
+                      <RiArrowRightLine className="size-4" />
+                    </span>
+                  </Button>
+                </div>
+              </>
+            )}
+            {!datasetId && (
+              <>
+                <div className={s.dividerLine} />
+                <span className="inline-flex items-center cursor-pointer text-[13px] leading-4 text-text-accent" onClick={modalShowHandle}>
+                  <RiFolder6Line className="size-4 mr-1" />
+                  {t('datasetCreation.stepOne.emptyDatasetCreation')}
+                </span>
+              </>
+            )}
+          </div>
+          <EmptyDatasetCreationModal show={showModal} onHide={modalCloseHandle} />
         </div>
-        <EmptyDatasetCreationModal show={showModal} onHide={modalCloseHandle} />
       </div>
-      {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
-      {currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
-      {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
+      <div className='w-1/2 h-full overflow-y-auto'>
+        {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
+        {currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
+        {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
+      </div>
     </div>
   )
 }
diff --git a/web/app/components/datasets/create/step-three/index.tsx b/web/app/components/datasets/create/step-three/index.tsx
index 804a196ed5..8d979616d1 100644
--- a/web/app/components/datasets/create/step-three/index.tsx
+++ b/web/app/components/datasets/create/step-three/index.tsx
@@ -1,45 +1,51 @@
 'use client'
 import React from 'react'
 import { useTranslation } from 'react-i18next'
+import { RiBookOpenLine } from '@remixicon/react'
 import EmbeddingProcess from '../embedding-process'
 
-import s from './index.module.css'
-import cn from '@/utils/classnames'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import type { FullDocumentDetail, createDocumentResponse } from '@/models/datasets'
+import AppIcon from '@/app/components/base/app-icon'
 
 type StepThreeProps = {
   datasetId?: string
   datasetName?: string
   indexingType?: string
+  retrievalMethod?: string
   creationCache?: createDocumentResponse
 }
 
-const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: StepThreeProps) => {
+const StepThree = ({ datasetId, datasetName, indexingType, creationCache, retrievalMethod }: StepThreeProps) => {
   const { t } = useTranslation()
 
   const media = useBreakpoints()
   const isMobile = media === MediaType.mobile
 
   return (
-    <div className='flex w-full h-full'>
-      <div className={'h-full w-full overflow-y-scroll px-6 sm:px-16'}>
-        <div className='max-w-[636px]'>
+    <div className="flex justify-center w-full max-h-full h-full overflow-y-auto">
+      <div className="grow shrink-0 h-full max-w-[960px] overflow-y-auto px-14 sm:px-16">
+        <div className="mx-auto max-w-[640px]">
           {!datasetId && (
             <>
-              <div className={s.creationInfo}>
-                <div className={s.title}>{t('datasetCreation.stepThree.creationTitle')}</div>
-                <div className={s.content}>{t('datasetCreation.stepThree.creationContent')}</div>
-                <div className={s.label}>{t('datasetCreation.stepThree.label')}</div>
-                <div className={s.datasetName}>{datasetName || creationCache?.dataset?.name}</div>
+              <div className="pt-10">
+                <div className="mb-1 text-xl leading-[22px] font-semibold text-text-primary">{t('datasetCreation.stepThree.creationTitle')}</div>
+                <div className="mb-7 text-[13px] leading-4 text-text-tertiary">{t('datasetCreation.stepThree.creationContent')}</div>
+                <div className="flex gap-4">
+                  <AppIcon {...creationCache?.dataset} className="size-14 text-2xl self-center" />
+                  <div className="grow flex flex-col gap-1">
+                    <div className="text-[13px] leading-6 font-semibold">{t('datasetCreation.stepThree.label')}</div>
+                    <div className="w-full px-3 py-2 text-[13px] leading-4 bg-components-input-bg-normal rounded-lg truncate">{datasetName || creationCache?.dataset?.name}</div>
+                  </div>
+                </div>
               </div>
-              <div className={s.dividerLine} />
+              <hr className="my-6 h-[1px] bg-divider-subtle border-0" />
             </>
           )}
           {datasetId && (
-            <div className={s.creationInfo}>
-              <div className={s.title}>{t('datasetCreation.stepThree.additionTitle')}</div>
-              <div className={s.content}>{`${t('datasetCreation.stepThree.additionP1')} ${datasetName || creationCache?.dataset?.name} ${t('datasetCreation.stepThree.additionP2')}`}</div>
+            <div className="pt-10">
+              <div className="mb-1 text-xl leading-[22px] font-semibold text-text-primary">{t('datasetCreation.stepThree.additionTitle')}</div>
+              <div className="mb-7 text-[13px] leading-4 text-text-tertiary">{`${t('datasetCreation.stepThree.additionP1')} ${datasetName || creationCache?.dataset?.name} ${t('datasetCreation.stepThree.additionP2')}`}</div>
             </div>
           )}
           <EmbeddingProcess
@@ -47,16 +53,21 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
             batchId={creationCache?.batch || ''}
             documents={creationCache?.documents as FullDocumentDetail[]}
             indexingType={indexingType || creationCache?.dataset?.indexing_technique}
+            retrievalMethod={retrievalMethod || creationCache?.dataset?.retrieval_model?.search_method}
           />
         </div>
       </div>
-      {!isMobile && <div className={cn(s.sideTip)}>
-        <div className={s.tipCard}>
-          <span className={s.icon} />
-          <div className={s.title}>{t('datasetCreation.stepThree.sideTipTitle')}</div>
-          <div className={s.content}>{t('datasetCreation.stepThree.sideTipContent')}</div>
+      {!isMobile && (
+        <div className="shrink-0 pt-[88px] pr-8 text-xs">
+          <div className="flex flex-col gap-3 w-[328px] p-6 text-text-tertiary bg-background-section rounded-xl">
+            <div className="flex justify-center items-center size-10 bg-components-card-bg rounded-[10px] shadow-lg">
+              <RiBookOpenLine className="size-5 text-text-accent" />
+            </div>
+            <div className="text-base font-semibold text-text-secondary">{t('datasetCreation.stepThree.sideTipTitle')}</div>
+            <div className="text-text-tertiary">{t('datasetCreation.stepThree.sideTipContent')}</div>
+          </div>
         </div>
-      </div>}
+      )}
     </div>
   )
 }
diff --git a/web/app/components/datasets/create/step-two/index.module.css b/web/app/components/datasets/create/step-two/index.module.css
index f89d6d67ea..178cbeba85 100644
--- a/web/app/components/datasets/create/step-two/index.module.css
+++ b/web/app/components/datasets/create/step-two/index.module.css
@@ -13,18 +13,6 @@
   z-index: 10;
 }
 
-.form {
-  @apply px-16 pb-8;
-}
-
-.form .label {
-  @apply pt-6 pb-2 flex items-center;
-  font-weight: 500;
-  font-size: 16px;
-  line-height: 24px;
-  color: #344054;
-}
-
 .segmentationItem {
   min-height: 68px;
 }
@@ -75,6 +63,10 @@
   cursor: pointer;
 }
 
+.disabled {
+  cursor: not-allowed !important;
+}
+
 .indexItem.disabled:hover {
   background-color: #fcfcfd;
   border-color: #f2f4f7;
@@ -87,8 +79,7 @@
 }
 
 .radioItem {
-  @apply relative mb-2 rounded-xl border border-gray-100 cursor-pointer;
-  background-color: #fcfcfd;
+  @apply relative mb-2 rounded-xl border border-components-option-card-option-border cursor-pointer bg-components-option-card-option-bg;
 }
 
 .radioItem.segmentationItem.custom {
@@ -146,7 +137,7 @@
 }
 
 .typeIcon.economical {
-  background-image: url(../assets/piggy-bank-01.svg);
+  background-image: url(../assets/piggy-bank-mod.svg);
 }
 
 .radioItem .radio {
@@ -247,7 +238,7 @@
 }
 
 .ruleItem {
-  @apply flex items-center;
+  @apply flex items-center py-1.5;
 }
 
 .formFooter {
@@ -394,19 +385,6 @@
   max-width: 524px;
 }
 
-.previewHeader {
-  position: sticky;
-  top: 0;
-  left: 0;
-  padding-top: 42px;
-  background-color: #fff;
-  font-weight: 600;
-  font-size: 18px;
-  line-height: 28px;
-  color: #101828;
-  z-index: 10;
-}
-
 /* 
  * `fixed` must under `previewHeader` because of style override would not work
  */
@@ -432,4 +410,4 @@
     font-size: 12px;
     line-height: 18px;
   }
-}
\ No newline at end of file
+}
diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx
index f915c68fef..0d7202967a 100644
--- a/web/app/components/datasets/create/step-two/index.tsx
+++ b/web/app/components/datasets/create/step-two/index.tsx
@@ -1,65 +1,80 @@
 'use client'
-import React, { useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react'
+import type { FC, PropsWithChildren } from 'react'
+import React, { useCallback, useEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
-import { useBoolean } from 'ahooks'
-import { XMarkIcon } from '@heroicons/react/20/solid'
-import { RocketLaunchIcon } from '@heroicons/react/24/outline'
 import {
-  RiCloseLine,
+  RiAlertFill,
+  RiArrowLeftLine,
+  RiSearchEyeLine,
 } from '@remixicon/react'
 import Link from 'next/link'
-import { groupBy } from 'lodash-es'
-import PreviewItem, { PreviewType } from './preview-item'
-import LanguageSelect from './language-select'
+import Image from 'next/image'
+import { useHover } from 'ahooks'
+import SettingCog from '../assets/setting-gear-mod.svg'
+import OrangeEffect from '../assets/option-card-effect-orange.svg'
+import FamilyMod from '../assets/family-mod.svg'
+import Note from '../assets/note-mod.svg'
+import FileList from '../assets/file-list-3-fill.svg'
+import { indexMethodIcon } from '../icons'
+import { PreviewContainer } from '../../preview/container'
+import { ChunkContainer, QAPreview } from '../../chunk'
+import { PreviewHeader } from '../../preview/header'
+import { FormattedText } from '../../formatted-text/formatted'
+import { PreviewSlice } from '../../formatted-text/flavours/preview-slice'
+import PreviewDocumentPicker from '../../common/document-picker/preview-document-picker'
 import s from './index.module.css'
 import unescape from './unescape'
 import escape from './escape'
+import { OptionCard } from './option-card'
+import LanguageSelect from './language-select'
+import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
 import cn from '@/utils/classnames'
-import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
-import {
-  createDocument,
-  createFirstDocument,
-  fetchFileIndexingEstimate as didFetchFileIndexingEstimate,
-  fetchDefaultProcessRule,
-} from '@/service/datasets'
+import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DocumentItem, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
+
 import Button from '@/app/components/base/button'
-import Input from '@/app/components/base/input'
-import Loading from '@/app/components/base/loading'
 import FloatRightContainer from '@/app/components/base/float-right-container'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
 import { type RetrievalConfig } from '@/types/app'
 import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import Toast from '@/app/components/base/toast'
-import { formatNumber } from '@/utils/format'
 import type { NotionPage } from '@/models/common'
 import { DataSourceProvider } from '@/models/common'
-import { DataSourceType, DocForm } from '@/models/datasets'
-import NotionIcon from '@/app/components/base/notion-icon'
-import Switch from '@/app/components/base/switch'
-import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
+import { ChunkingMode, DataSourceType, RerankingModeEnum } from '@/models/datasets'
 import { useDatasetDetailContext } from '@/context/dataset-detail'
 import I18n from '@/context/i18n'
-import { IS_CE_EDITION } from '@/config'
 import { RETRIEVE_METHOD } from '@/types/app'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
-import Tooltip from '@/app/components/base/tooltip'
 import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { LanguagesSupported } from '@/i18n/language'
 import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
 import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
-import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
+import Checkbox from '@/app/components/base/checkbox'
+import RadioCard from '@/app/components/base/radio-card'
+import { IS_CE_EDITION } from '@/config'
+import Divider from '@/app/components/base/divider'
+import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument, useFetchDefaultProcessRule, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/knowledge/use-create-dataset'
+import Badge from '@/app/components/base/badge'
+import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
+import Tooltip from '@/app/components/base/tooltip'
+import CustomDialog from '@/app/components/base/dialog'
+import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem'
+import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
+
+const TextLabel: FC<PropsWithChildren> = (props) => {
+  return <label className='text-text-secondary system-sm-semibold'>{props.children}</label>
+}
 
-type ValueOf<T> = T[keyof T]
 type StepTwoProps = {
   isSetting?: boolean
   documentDetail?: FullDocumentDetail
   isAPIKeySet: boolean
   onSetting: () => void
   datasetId?: string
-  indexingType?: ValueOf<IndexingType>
+  indexingType?: IndexingType
+  retrievalMethod?: string
   dataSourceType: DataSourceType
   files: CustomFile[]
   notionPages?: NotionPage[]
@@ -69,21 +84,48 @@ type StepTwoProps = {
   websiteCrawlJobId?: string
   onStepChange?: (delta: number) => void
   updateIndexingTypeCache?: (type: string) => void
+  updateRetrievalMethodCache?: (method: string) => void
   updateResultCache?: (res: createDocumentResponse) => void
   onSave?: () => void
   onCancel?: () => void
 }
 
-enum SegmentType {
+export enum SegmentType {
   AUTO = 'automatic',
   CUSTOM = 'custom',
 }
-enum IndexingType {
+export enum IndexingType {
   QUALIFIED = 'high_quality',
   ECONOMICAL = 'economy',
 }
 
 const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
+const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500
+const DEFAULT_OVERLAP = 50
+
+type ParentChildConfig = {
+  chunkForContext: ParentMode
+  parent: {
+    delimiter: string
+    maxLength: number
+  }
+  child: {
+    delimiter: string
+    maxLength: number
+  }
+}
+
+const defaultParentChildConfig: ParentChildConfig = {
+  chunkForContext: 'paragraph',
+  parent: {
+    delimiter: '\\n\\n',
+    maxLength: 500,
+  },
+  child: {
+    delimiter: '\\n',
+    maxLength: 200,
+  },
+}
 
 const StepTwo = ({
   isSetting,
@@ -104,6 +146,7 @@ const StepTwo = ({
   updateResultCache,
   onSave,
   onCancel,
+  updateRetrievalMethodCache,
 }: StepTwoProps) => {
   const { t } = useTranslation()
   const { locale } = useContext(I18n)
@@ -111,66 +154,166 @@ const StepTwo = ({
   const isMobile = media === MediaType.mobile
 
   const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext()
+
+  const isInUpload = Boolean(currentDataset)
+  const isUploadInEmptyDataset = isInUpload && !currentDataset?.doc_form
+  const isNotUploadInEmptyDataset = !isUploadInEmptyDataset
+  const isInInit = !isInUpload && !isSetting
+
   const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
   const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
-  const scrollRef = useRef<HTMLDivElement>(null)
-  const [scrolled, setScrolled] = useState(false)
-  const previewScrollRef = useRef<HTMLDivElement>(null)
-  const [previewScrolled, setPreviewScrolled] = useState(false)
-  const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO)
+  const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.CUSTOM)
   const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
-  const setSegmentIdentifier = useCallback((value: string) => {
-    doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER)
+  const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
+    doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER))
   }, [])
-  const [maxChunkLength, setMaxChunkLength] = useState(4000) // default chunk length
+  const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length
   const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
-  const [overlap, setOverlap] = useState(50)
+  const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
   const [rules, setRules] = useState<PreProcessingRule[]>([])
   const [defaultConfig, setDefaultConfig] = useState<Rules>()
   const hasSetIndexType = !!indexingType
-  const [indexType, setIndexType] = useState<ValueOf<IndexingType>>(
+  const [indexType, setIndexType] = useState<IndexingType>(
     (indexingType
       || isAPIKeySet)
       ? IndexingType.QUALIFIED
       : IndexingType.ECONOMICAL,
   )
-  const [isLanguageSelectDisabled, setIsLanguageSelectDisabled] = useState(false)
-  const [docForm, setDocForm] = useState<DocForm | string>(
-    (datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT,
+
+  const [previewFile, setPreviewFile] = useState<DocumentItem>(
+    (datasetId && documentDetail)
+      ? documentDetail.file
+      : files[0],
   )
+  const [previewNotionPage, setPreviewNotionPage] = useState<NotionPage>(
+    (datasetId && documentDetail)
+      ? documentDetail.notion_page
+      : notionPages[0],
+  )
+
+  const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(
+    (datasetId && documentDetail)
+      ? documentDetail.website_page
+      : websitePages[0],
+  )
+
+  // QA Related
+  const [isLanguageSelectDisabled, _setIsLanguageSelectDisabled] = useState(false)
+  const [isQAConfirmDialogOpen, setIsQAConfirmDialogOpen] = useState(false)
+  const [docForm, setDocForm] = useState<ChunkingMode>(
+    (datasetId && documentDetail) ? documentDetail.doc_form as ChunkingMode : ChunkingMode.text,
+  )
+  const handleChangeDocform = (value: ChunkingMode) => {
+    if (value === ChunkingMode.qa && indexType === IndexingType.ECONOMICAL) {
+      setIsQAConfirmDialogOpen(true)
+      return
+    }
+    if (value === ChunkingMode.parentChild && indexType === IndexingType.ECONOMICAL)
+      setIndexType(IndexingType.QUALIFIED)
+    setDocForm(value)
+    // eslint-disable-next-line @typescript-eslint/no-use-before-define
+    currentEstimateMutation.reset()
+  }
+
   const [docLanguage, setDocLanguage] = useState<string>(
     (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'),
   )
-  const [QATipHide, setQATipHide] = useState(false)
-  const [previewSwitched, setPreviewSwitched] = useState(false)
-  const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
-  const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
-  const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
 
-  const fileIndexingEstimate = (() => {
-    return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate
-  })()
-  const [isCreating, setIsCreating] = useState(false)
+  const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
 
-  const scrollHandle = (e: Event) => {
-    if ((e.target as HTMLDivElement).scrollTop > 0)
-      setScrolled(true)
+  const getIndexing_technique = () => indexingType || indexType
+  const currentDocForm = currentDataset?.doc_form || docForm
 
-    else
-      setScrolled(false)
+  const getProcessRule = (): ProcessRule => {
+    if (currentDocForm === ChunkingMode.parentChild) {
+      return {
+        rules: {
+          pre_processing_rules: rules,
+          segmentation: {
+            separator: unescape(
+              parentChildConfig.parent.delimiter,
+            ),
+            max_tokens: parentChildConfig.parent.maxLength,
+          },
+          parent_mode: parentChildConfig.chunkForContext,
+          subchunk_segmentation: {
+            separator: unescape(parentChildConfig.child.delimiter),
+            max_tokens: parentChildConfig.child.maxLength,
+          },
+        },
+        mode: 'hierarchical',
+      } as ProcessRule
+    }
+    return {
+      rules: {
+        pre_processing_rules: rules,
+        segmentation: {
+          separator: unescape(segmentIdentifier),
+          max_tokens: maxChunkLength,
+          chunk_overlap: overlap,
+        },
+      }, // api will check this. It will be removed after api refactored.
+      mode: segmentationType,
+    } as ProcessRule
   }
 
-  const previewScrollHandle = (e: Event) => {
-    if ((e.target as HTMLDivElement).scrollTop > 0)
-      setPreviewScrolled(true)
+  const fileIndexingEstimateQuery = useFetchFileIndexingEstimateForFile({
+    docForm: currentDocForm,
+    docLanguage,
+    dataSourceType: DataSourceType.FILE,
+    files: previewFile
+      ? [files.find(file => file.name === previewFile.name)!]
+      : files,
+    indexingTechnique: getIndexing_technique() as any,
+    processRule: getProcessRule(),
+    dataset_id: datasetId!,
+  })
+  const notionIndexingEstimateQuery = useFetchFileIndexingEstimateForNotion({
+    docForm: currentDocForm,
+    docLanguage,
+    dataSourceType: DataSourceType.NOTION,
+    notionPages: [previewNotionPage],
+    indexingTechnique: getIndexing_technique() as any,
+    processRule: getProcessRule(),
+    dataset_id: datasetId || '',
+  })
 
-    else
-      setPreviewScrolled(false)
-  }
-  const getFileName = (name: string) => {
-    const arr = name.split('.')
-    return arr.slice(0, -1).join('.')
-  }
+  const websiteIndexingEstimateQuery = useFetchFileIndexingEstimateForWeb({
+    docForm: currentDocForm,
+    docLanguage,
+    dataSourceType: DataSourceType.WEB,
+    websitePages: [previewWebsitePage],
+    crawlOptions,
+    websiteCrawlProvider,
+    websiteCrawlJobId,
+    indexingTechnique: getIndexing_technique() as any,
+    processRule: getProcessRule(),
+    dataset_id: datasetId || '',
+  })
+
+  const currentEstimateMutation = dataSourceType === DataSourceType.FILE
+    ? fileIndexingEstimateQuery
+    : dataSourceType === DataSourceType.NOTION
+      ? notionIndexingEstimateQuery
+      : websiteIndexingEstimateQuery
+
+  const fetchEstimate = useCallback(() => {
+    if (dataSourceType === DataSourceType.FILE)
+      fileIndexingEstimateQuery.mutate()
+
+    if (dataSourceType === DataSourceType.NOTION)
+      notionIndexingEstimateQuery.mutate()
+
+    if (dataSourceType === DataSourceType.WEB)
+      websiteIndexingEstimateQuery.mutate()
+  }, [dataSourceType, fileIndexingEstimateQuery, notionIndexingEstimateQuery, websiteIndexingEstimateQuery])
+
+  const estimate
+    = dataSourceType === DataSourceType.FILE
+      ? fileIndexingEstimateQuery.data
+      : dataSourceType === DataSourceType.NOTION
+        ? notionIndexingEstimateQuery.data
+        : websiteIndexingEstimateQuery.data
 
   const getRuleName = (key: string) => {
     if (key === 'remove_extra_spaces')
@@ -198,128 +341,20 @@ const StepTwo = ({
     if (defaultConfig) {
       setSegmentIdentifier(defaultConfig.segmentation.separator)
       setMaxChunkLength(defaultConfig.segmentation.max_tokens)
-      setOverlap(defaultConfig.segmentation.chunk_overlap)
+      setOverlap(defaultConfig.segmentation.chunk_overlap!)
       setRules(defaultConfig.pre_processing_rules)
     }
+    setParentChildConfig(defaultParentChildConfig)
   }
 
-  const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT, language?: string) => {
-    // eslint-disable-next-line @typescript-eslint/no-use-before-define
-    const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm, language)!)
-    if (segmentationType === SegmentType.CUSTOM)
-      setCustomFileIndexingEstimate(res)
-    else
-      setAutomaticFileIndexingEstimate(res)
-  }
-
-  const confirmChangeCustomConfig = () => {
-    if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
-      Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
+  const updatePreview = () => {
+    if (segmentationType === SegmentType.CUSTOM && maxChunkLength > 4000) {
+      Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
       return
     }
-    setCustomFileIndexingEstimate(null)
-    setShowPreview()
-    fetchFileIndexingEstimate()
-    setPreviewSwitched(false)
+    fetchEstimate()
   }
 
-  const getIndexing_technique = () => indexingType || indexType
-
-  const getProcessRule = () => {
-    const processRule: ProcessRule = {
-      rules: {} as any, // api will check this. It will be removed after api refactored.
-      mode: segmentationType,
-    }
-    if (segmentationType === SegmentType.CUSTOM) {
-      const ruleObj = {
-        pre_processing_rules: rules,
-        segmentation: {
-          separator: unescape(segmentIdentifier),
-          max_tokens: maxChunkLength,
-          chunk_overlap: overlap,
-        },
-      }
-      processRule.rules = ruleObj
-    }
-    return processRule
-  }
-
-  const getNotionInfo = () => {
-    const workspacesMap = groupBy(notionPages, 'workspace_id')
-    const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
-      return {
-        workspaceId,
-        pages: workspacesMap[workspaceId],
-      }
-    })
-    return workspaces.map((workspace) => {
-      return {
-        workspace_id: workspace.workspaceId,
-        pages: workspace.pages.map((page) => {
-          const { page_id, page_name, page_icon, type } = page
-          return {
-            page_id,
-            page_name,
-            page_icon,
-            type,
-          }
-        }),
-      }
-    }) as NotionInfo[]
-  }
-
-  const getWebsiteInfo = () => {
-    return {
-      provider: websiteCrawlProvider,
-      job_id: websiteCrawlJobId,
-      urls: websitePages.map(page => page.source_url),
-      only_main_content: crawlOptions?.only_main_content,
-    }
-  }
-
-  const getFileIndexingEstimateParams = (docForm: DocForm, language?: string): IndexingEstimateParams | undefined => {
-    if (dataSourceType === DataSourceType.FILE) {
-      return {
-        info_list: {
-          data_source_type: dataSourceType,
-          file_info_list: {
-            file_ids: files.map(file => file.id) as string[],
-          },
-        },
-        indexing_technique: getIndexing_technique() as string,
-        process_rule: getProcessRule(),
-        doc_form: docForm,
-        doc_language: language || docLanguage,
-        dataset_id: datasetId as string,
-      }
-    }
-    if (dataSourceType === DataSourceType.NOTION) {
-      return {
-        info_list: {
-          data_source_type: dataSourceType,
-          notion_info_list: getNotionInfo(),
-        },
-        indexing_technique: getIndexing_technique() as string,
-        process_rule: getProcessRule(),
-        doc_form: docForm,
-        doc_language: language || docLanguage,
-        dataset_id: datasetId as string,
-      }
-    }
-    if (dataSourceType === DataSourceType.WEB) {
-      return {
-        info_list: {
-          data_source_type: dataSourceType,
-          website_info_list: getWebsiteInfo(),
-        },
-        indexing_technique: getIndexing_technique() as string,
-        process_rule: getProcessRule(),
-        doc_form: docForm,
-        doc_language: language || docLanguage,
-        dataset_id: datasetId as string,
-      }
-    }
-  }
   const {
     modelList: rerankModelList,
     defaultModel: rerankDefaultModel,
@@ -351,13 +386,14 @@ const StepTwo = ({
     if (isSetting) {
       params = {
         original_document_id: documentDetail?.id,
-        doc_form: docForm,
+        doc_form: currentDocForm,
         doc_language: docLanguage,
         process_rule: getProcessRule(),
         // eslint-disable-next-line @typescript-eslint/no-use-before-define
         retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
         embedding_model: embeddingModel.model, // Readonly
         embedding_model_provider: embeddingModel.provider, // Readonly
+        indexing_technique: getIndexing_technique(),
       } as CreateDocumentReq
     }
     else { // create
@@ -377,8 +413,12 @@ const StepTwo = ({
       }
       const postRetrievalConfig = ensureRerankModelSelected({
         rerankDefaultModel: rerankDefaultModel!,
-        // eslint-disable-next-line @typescript-eslint/no-use-before-define
-        retrievalConfig,
+        retrievalConfig: {
+          // eslint-disable-next-line @typescript-eslint/no-use-before-define
+          ...retrievalConfig,
+          // eslint-disable-next-line @typescript-eslint/no-use-before-define
+          reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
+        },
         indexMethod: indexMethod as string,
       })
       params = {
@@ -390,7 +430,7 @@ const StepTwo = ({
         },
         indexing_technique: getIndexing_technique(),
         process_rule: getProcessRule(),
-        doc_form: docForm,
+        doc_form: currentDocForm,
         doc_language: docLanguage,
 
         retrieval_model: postRetrievalConfig,
@@ -403,29 +443,36 @@ const StepTwo = ({
         }
       }
       if (dataSourceType === DataSourceType.NOTION)
-        params.data_source.info_list.notion_info_list = getNotionInfo()
+        params.data_source.info_list.notion_info_list = getNotionInfo(notionPages)
 
-      if (dataSourceType === DataSourceType.WEB)
-        params.data_source.info_list.website_info_list = getWebsiteInfo()
+      if (dataSourceType === DataSourceType.WEB) {
+        params.data_source.info_list.website_info_list = getWebsiteInfo({
+          websiteCrawlProvider,
+          websiteCrawlJobId,
+          websitePages,
+        })
+      }
     }
     return params
   }
 
-  const getRules = async () => {
-    try {
-      const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
-      const separator = res.rules.segmentation.separator
+  const fetchDefaultProcessRuleMutation = useFetchDefaultProcessRule({
+    onSuccess(data) {
+      const separator = data.rules.segmentation.separator
       setSegmentIdentifier(separator)
-      setMaxChunkLength(res.rules.segmentation.max_tokens)
-      setLimitMaxChunkLength(res.limits.indexing_max_segmentation_tokens_length)
-      setOverlap(res.rules.segmentation.chunk_overlap)
-      setRules(res.rules.pre_processing_rules)
-      setDefaultConfig(res.rules)
-    }
-    catch (err) {
-      console.log(err)
-    }
-  }
+      setMaxChunkLength(data.rules.segmentation.max_tokens)
+      setOverlap(data.rules.segmentation.chunk_overlap!)
+      setRules(data.rules.pre_processing_rules)
+      setDefaultConfig(data.rules)
+      setLimitMaxChunkLength(data.limits.indexing_max_segmentation_tokens_length)
+    },
+    onError(error) {
+      Toast.notify({
+        type: 'error',
+        message: `${error}`,
+      })
+    },
+  })
 
   const getRulesFromDetail = () => {
     if (documentDetail) {
@@ -435,7 +482,7 @@ const StepTwo = ({
       const overlap = rules.segmentation.chunk_overlap
       setSegmentIdentifier(separator)
       setMaxChunkLength(max)
-      setOverlap(overlap)
+      setOverlap(overlap!)
       setRules(rules.pre_processing_rules)
       setDefaultConfig(rules)
     }
@@ -443,119 +490,81 @@ const StepTwo = ({
 
   const getDefaultMode = () => {
     if (documentDetail)
+      // @ts-expect-error fix after api refactored
       setSegmentationType(documentDetail.dataset_process_rule.mode)
   }
 
-  const createHandle = async () => {
-    if (isCreating)
-      return
-    setIsCreating(true)
-    try {
-      let res
-      const params = getCreationParams()
-      if (!params)
-        return false
-
-      setIsCreating(true)
-      if (!datasetId) {
-        res = await createFirstDocument({
-          body: params as CreateDocumentReq,
-        })
-        updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
-        updateResultCache && updateResultCache(res)
-      }
-      else {
-        res = await createDocument({
-          datasetId,
-          body: params as CreateDocumentReq,
-        })
-        updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
-        updateResultCache && updateResultCache(res)
-      }
-      if (mutateDatasetRes)
-        mutateDatasetRes()
-      onStepChange && onStepChange(+1)
-      isSetting && onSave && onSave()
-    }
-    catch (err) {
+  const createFirstDocumentMutation = useCreateFirstDocument({
+    onError(error) {
       Toast.notify({
         type: 'error',
-        message: `${err}`,
+        message: `${error}`,
+      })
+    },
+  })
+  const createDocumentMutation = useCreateDocument(datasetId!, {
+    onError(error) {
+      Toast.notify({
+        type: 'error',
+        message: `${error}`,
+      })
+    },
+  })
+
+  const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending
+
+  const createHandle = async () => {
+    const params = getCreationParams()
+    if (!params)
+      return false
+
+    if (!datasetId) {
+      await createFirstDocumentMutation.mutateAsync(
+        params,
+        {
+          onSuccess(data) {
+            updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
+            updateResultCache && updateResultCache(data)
+            // eslint-disable-next-line @typescript-eslint/no-use-before-define
+            updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
+          },
+        },
+      )
+    }
+    else {
+      await createDocumentMutation.mutateAsync(params, {
+        onSuccess(data) {
+          updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
+          updateResultCache && updateResultCache(data)
+        },
       })
     }
-    finally {
-      setIsCreating(false)
-    }
-  }
-
-  const handleSwitch = (state: boolean) => {
-    if (state)
-      setDocForm(DocForm.QA)
-    else
-      setDocForm(DocForm.TEXT)
-  }
-
-  const previewSwitch = async (language?: string) => {
-    setPreviewSwitched(true)
-    setIsLanguageSelectDisabled(true)
-    if (segmentationType === SegmentType.AUTO)
-      setAutomaticFileIndexingEstimate(null)
-    else
-      setCustomFileIndexingEstimate(null)
-    try {
-      await fetchFileIndexingEstimate(DocForm.QA, language)
-    }
-    finally {
-      setIsLanguageSelectDisabled(false)
-    }
-  }
-
-  const handleSelect = (language: string) => {
-    setDocLanguage(language)
-    // Switch language, re-cutter
-    if (docForm === DocForm.QA && previewSwitched)
-      previewSwitch(language)
+    if (mutateDatasetRes)
+      mutateDatasetRes()
+    onStepChange && onStepChange(+1)
+    isSetting && onSave && onSave()
   }
 
   const changeToEconomicalType = () => {
-    if (!hasSetIndexType) {
+    if (docForm !== ChunkingMode.text)
+      return
+
+    if (!hasSetIndexType)
       setIndexType(IndexingType.ECONOMICAL)
-      setDocForm(DocForm.TEXT)
-    }
   }
 
   useEffect(() => {
     // fetch rules
     if (!isSetting) {
-      getRules()
+      fetchDefaultProcessRuleMutation.mutate('/datasets/process-rule')
     }
     else {
       getRulesFromDetail()
       getDefaultMode()
     }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [])
 
-  useEffect(() => {
-    scrollRef.current?.addEventListener('scroll', scrollHandle)
-    return () => {
-      scrollRef.current?.removeEventListener('scroll', scrollHandle)
-    }
-  }, [])
-
-  useLayoutEffect(() => {
-    if (showPreview) {
-      previewScrollRef.current?.addEventListener('scroll', previewScrollHandle)
-      return () => {
-        previewScrollRef.current?.removeEventListener('scroll', previewScrollHandle)
-      }
-    }
-  }, [showPreview])
-
-  useEffect(() => {
-    if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA)
-      setDocForm(DocForm.TEXT)
-  }, [indexingType, docForm])
-
   useEffect(() => {
     // get indexing type by props
     if (indexingType)
@@ -565,20 +574,6 @@ const StepTwo = ({
       setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
   }, [isAPIKeySet, indexingType, datasetId])
 
-  useEffect(() => {
-    if (segmentationType === SegmentType.AUTO) {
-      setAutomaticFileIndexingEstimate(null)
-      !isMobile && setShowPreview()
-      fetchFileIndexingEstimate()
-      setPreviewSwitched(false)
-    }
-    else {
-      hidePreview()
-      setCustomFileIndexingEstimate(null)
-      setPreviewSwitched(false)
-    }
-  }, [segmentationType, indexType])
-
   const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
     search_method: RETRIEVE_METHOD.semantic,
     reranking_enable: false,
@@ -591,433 +586,589 @@ const StepTwo = ({
     score_threshold: 0.5,
   } as RetrievalConfig)
 
+  const economyDomRef = useRef<HTMLDivElement>(null)
+  const isHoveringEconomy = useHover(economyDomRef)
+
   return (
     <div className='flex w-full h-full'>
-      <div ref={scrollRef} className='relative h-full w-full overflow-y-scroll'>
-        <div className={cn(s.pageHeader, scrolled && s.fixed, isMobile && '!px-6')}>
-          <span>{t('datasetCreation.steps.two')}</span>
-          {(isMobile || !showPreview) && (
-            <Button
-              className='border-[0.5px] !h-8 hover:outline hover:outline-[0.5px] hover:outline-gray-300 text-gray-700 font-medium bg-white shadow-[0px_1px_2px_0px_rgba(16,24,40,0.05)]'
-              onClick={setShowPreview}
-            >
-              <Tooltip>
-                <div className="flex flex-row items-center">
-                  <RocketLaunchIcon className="h-4 w-4 mr-1.5 stroke-[1.8px]" />
-                  <span className="text-[13px]">{t('datasetCreation.stepTwo.previewTitleButton')}</span>
-                </div>
-              </Tooltip>
-            </Button>
-          )}
-        </div>
-        <div className={cn(s.form, isMobile && '!px-4')}>
-          <div className={s.label}>{t('datasetCreation.stepTwo.segmentation')}</div>
-          <div className='max-w-[640px]'>
-            <div
-              className={cn(
-                s.radioItem,
-                s.segmentationItem,
-                segmentationType === SegmentType.AUTO && s.active,
-              )}
-              onClick={() => setSegmentationType(SegmentType.AUTO)}
-            >
-              <span className={cn(s.typeIcon, s.auto)} />
-              <span className={cn(s.radio)} />
-              <div className={s.typeHeader}>
-                <div className={s.title}>{t('datasetCreation.stepTwo.auto')}</div>
-                <div className={s.tip}>{t('datasetCreation.stepTwo.autoDescription')}</div>
-              </div>
-            </div>
-            <div
-              className={cn(
-                s.radioItem,
-                s.segmentationItem,
-                segmentationType === SegmentType.CUSTOM && s.active,
-                segmentationType === SegmentType.CUSTOM && s.custom,
-              )}
-              onClick={() => setSegmentationType(SegmentType.CUSTOM)}
-            >
-              <span className={cn(s.typeIcon, s.customize)} />
-              <span className={cn(s.radio)} />
-              <div className={s.typeHeader}>
-                <div className={s.title}>{t('datasetCreation.stepTwo.custom')}</div>
-                <div className={s.tip}>{t('datasetCreation.stepTwo.customDescription')}</div>
-              </div>
-              {segmentationType === SegmentType.CUSTOM && (
-                <div className={s.typeFormBody}>
-                  <div className={s.formRow}>
-                    <div className='w-full'>
-                      <div className={s.label}>
-                        {t('datasetCreation.stepTwo.separator')}
-                        <Tooltip
-                          popupContent={
-                            <div className='max-w-[200px]'>
-                              {t('datasetCreation.stepTwo.separatorTip')}
-                            </div>
-                          }
-                        />
-                      </div>
-                      <Input
-                        type="text"
-                        className='h-9'
-                        placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} value={segmentIdentifier}
-                        onChange={e => setSegmentIdentifier(e.target.value)}
-                      />
-                    </div>
-                  </div>
-                  <div className={s.formRow}>
-                    <div className='w-full'>
-                      <div className={s.label}>{t('datasetCreation.stepTwo.maxLength')}</div>
-                      <Input
-                        type="number"
-                        className='h-9'
-                        placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
-                        value={maxChunkLength}
-                        max={limitMaxChunkLength}
-                        min={1}
-                        onChange={e => setMaxChunkLength(parseInt(e.target.value.replace(/^0+/, ''), 10))}
-                      />
-                    </div>
-                  </div>
-                  <div className={s.formRow}>
-                    <div className='w-full'>
-                      <div className={s.label}>
-                        {t('datasetCreation.stepTwo.overlap')}
-                        <Tooltip
-                          popupContent={
-                            <div className='max-w-[200px]'>
-                              {t('datasetCreation.stepTwo.overlapTip')}
-                            </div>
-                          }
-                        />
-                      </div>
-                      <Input
-                        type="number"
-                        className='h-9'
-                        placeholder={t('datasetCreation.stepTwo.overlap') || ''}
-                        value={overlap}
-                        min={1}
-                        onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
-                      />
-                    </div>
-                  </div>
-                  <div className={s.formRow}>
-                    <div className='w-full flex flex-col gap-1'>
-                      <div className={s.label}>{t('datasetCreation.stepTwo.rules')}</div>
-                      {rules.map(rule => (
-                        <div key={rule.id} className={s.ruleItem}>
-                          <input id={rule.id} type="checkbox" checked={rule.enabled} onChange={() => ruleChangeHandle(rule.id)} className="w-4 h-4 rounded border-gray-300 text-blue-700 focus:ring-blue-700" />
-                          <label htmlFor={rule.id} className="ml-2 text-sm font-normal cursor-pointer text-gray-800">{getRuleName(rule.id)}</label>
-                        </div>
-                      ))}
-                    </div>
-                  </div>
-                  <div className={s.formFooter}>
-                    <Button variant="primary" className={cn(s.button)} onClick={confirmChangeCustomConfig}>{t('datasetCreation.stepTwo.preview')}</Button>
-                    <Button className={cn(s.button, 'ml-2')} onClick={resetRules}>{t('datasetCreation.stepTwo.reset')}</Button>
-                  </div>
-                </div>
-              )}
-            </div>
-          </div>
-          <div className={s.label}>{t('datasetCreation.stepTwo.indexMode')}</div>
-          <div className='max-w-[640px]'>
-            <div className='flex items-center gap-3 flex-wrap sm:flex-nowrap'>
-              {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
-                <div
-                  className={cn(
-                    s.radioItem,
-                    s.indexItem,
-                    !isAPIKeySet && s.disabled,
-                    !hasSetIndexType && indexType === IndexingType.QUALIFIED && s.active,
-                    hasSetIndexType && s.disabled,
-                    hasSetIndexType && '!w-full !min-h-[96px]',
-                  )}
-                  onClick={() => {
-                    if (isAPIKeySet)
-                      setIndexType(IndexingType.QUALIFIED)
-                  }}
-                >
-                  <span className={cn(s.typeIcon, s.qualified)} />
-                  {!hasSetIndexType && <span className={cn(s.radio)} />}
-                  <div className={s.typeHeader}>
-                    <div className={s.title}>
-                      {t('datasetCreation.stepTwo.qualified')}
-                      {!hasSetIndexType && <span className={s.recommendTag}>{t('datasetCreation.stepTwo.recommend')}</span>}
-                    </div>
-                    <div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div>
-                  </div>
-                  {!isAPIKeySet && (
-                    <div className={s.warningTip}>
-                      <span>{t('datasetCreation.stepTwo.warning')}&nbsp;</span>
-                      <span className={s.click} onClick={onSetting}>{t('datasetCreation.stepTwo.click')}</span>
-                    </div>
-                  )}
-                </div>
-              )}
-
-              {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && (
-                <div
-                  className={cn(
-                    s.radioItem,
-                    s.indexItem,
-                    !hasSetIndexType && indexType === IndexingType.ECONOMICAL && s.active,
-                    hasSetIndexType && s.disabled,
-                    hasSetIndexType && '!w-full !min-h-[96px]',
-                  )}
-                  onClick={changeToEconomicalType}
-                >
-                  <span className={cn(s.typeIcon, s.economical)} />
-                  {!hasSetIndexType && <span className={cn(s.radio)} />}
-                  <div className={s.typeHeader}>
-                    <div className={s.title}>{t('datasetCreation.stepTwo.economical')}</div>
-                    <div className={s.tip}>{t('datasetCreation.stepTwo.economicalTip')}</div>
-                  </div>
-                </div>
-              )}
-            </div>
-            {hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
-              <div className='mt-2 text-xs text-gray-500 font-medium'>
-                {t('datasetCreation.stepTwo.indexSettingTip')}
-                <Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
-              </div>
-            )}
-            {IS_CE_EDITION && indexType === IndexingType.QUALIFIED && (
-              <div className='mt-3 rounded-xl bg-gray-50 border border-gray-100'>
-                <div className='flex justify-between items-center px-5 py-4'>
-                  <div className='flex justify-center items-center w-8 h-8 rounded-lg bg-indigo-50'>
-                    <MessageChatSquare className='w-4 h-4' />
-                  </div>
-                  <div className='grow mx-3'>
-                    <div className='mb-[2px] text-md font-medium text-gray-900'>{t('datasetCreation.stepTwo.QATitle')}</div>
-                    <div className='inline-flex items-center text-[13px] leading-[18px] text-gray-500'>
-                      <span className='pr-1'>{t('datasetCreation.stepTwo.QALanguage')}</span>
-                      <LanguageSelect currentLanguage={docLanguage} onSelect={handleSelect} disabled={isLanguageSelectDisabled} />
-                    </div>
-                  </div>
-                  <div className='shrink-0'>
-                    <Switch
-                      defaultValue={docForm === DocForm.QA}
-                      onChange={handleSwitch}
-                      size='md'
-                    />
-                  </div>
-                </div>
-                {docForm === DocForm.QA && !QATipHide && (
-                  <div className='flex justify-between items-center px-5 py-2 bg-orange-50 border-t border-amber-100 rounded-b-xl text-[13px] leading-[18px] text-medium text-amber-500'>
-                    {t('datasetCreation.stepTwo.QATip')}
-                    <RiCloseLine className='w-4 h-4 text-gray-500 cursor-pointer' onClick={() => setQATipHide(true)} />
-                  </div>
-                )}
-              </div>
-            )}
-            {/* Embedding model */}
-            {indexType === IndexingType.QUALIFIED && (
-              <div className='mb-2'>
-                <div className={cn(s.label, datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
-                <ModelSelector
-                  readonly={!!datasetId}
-                  defaultModel={embeddingModel}
-                  modelList={embeddingModelList}
-                  onSelect={(model: DefaultModel) => {
-                    setEmbeddingModel(model)
-                  }}
+      <div className={cn('relative h-full w-1/2 py-6 overflow-y-auto', isMobile ? 'px-4' : 'px-12')}>
+        <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.segmentation')}</div>
+        {((isInUpload && [ChunkingMode.text, ChunkingMode.qa].includes(currentDataset!.doc_form))
+          || isUploadInEmptyDataset
+          || isInInit)
+          && <OptionCard
+            className='bg-background-section mb-2'
+            title={t('datasetCreation.stepTwo.general')}
+            icon={<Image width={20} height={20} src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />}
+            activeHeaderClassName='bg-dataset-option-card-blue-gradient'
+            description={t('datasetCreation.stepTwo.generalTip')}
+            isActive={
+              [ChunkingMode.text, ChunkingMode.qa].includes(currentDocForm)
+            }
+            onSwitched={() =>
+              handleChangeDocform(ChunkingMode.text)
+            }
+            actions={
+              <>
+                <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
+                  <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
+                  {t('datasetCreation.stepTwo.previewChunk')}
+                </Button>
+                <Button variant={'ghost'} onClick={resetRules}>
+                  {t('datasetCreation.stepTwo.reset')}
+                </Button>
+              </>
+            }
+            noHighlight={isInUpload && isNotUploadInEmptyDataset}
+          >
+            <div className='flex flex-col gap-y-4'>
+              <div className='flex gap-3'>
+                <DelimiterInput
+                  value={segmentIdentifier}
+                  onChange={e => setSegmentIdentifier(e.target.value, true)}
+                />
+                <MaxLengthInput
+                  unit='tokens'
+                  value={maxChunkLength}
+                  onChange={setMaxChunkLength}
+                />
+                <OverlapInput
+                  unit='tokens'
+                  value={overlap}
+                  min={1}
+                  onChange={setOverlap}
                 />
-                {!!datasetId && (
-                  <div className='mt-2 text-xs text-gray-500 font-medium'>
-                    {t('datasetCreation.stepTwo.indexSettingTip')}
-                    <Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
-                  </div>
-                )}
               </div>
-            )}
-            {/* Retrieval Method Config */}
-            <div>
-              {!datasetId
-                ? (
-                  <div className={s.label}>
-                    <div className='shrink-0 mr-4'>{t('datasetSettings.form.retrievalSetting.title')}</div>
-                    <div className='leading-[18px] text-xs font-normal text-gray-500'>
-                      <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
-                      {t('datasetSettings.form.retrievalSetting.longDescription')}
+              <div className='w-full flex flex-col'>
+                <div className='flex items-center gap-x-2'>
+                  <div className='inline-flex shrink-0'>
+                    <TextLabel>{t('datasetCreation.stepTwo.rules')}</TextLabel>
+                  </div>
+                  <Divider className='grow' bgStyle='gradient' />
+                </div>
+                <div className='mt-1'>
+                  {rules.map(rule => (
+                    <div key={rule.id} className={s.ruleItem} onClick={() => {
+                      ruleChangeHandle(rule.id)
+                    }}>
+                      <Checkbox
+                        checked={rule.enabled}
+                      />
+                      <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
                     </div>
-                  </div>
-                )
-                : (
-                  <div className={cn(s.label, 'flex justify-between items-center')}>
-                    <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
-                  </div>
-                )}
-
-              <div className='max-w-[640px]'>
-                {
-                  getIndexing_technique() === IndexingType.QUALIFIED
-                    ? (
-                      <RetrievalMethodConfig
-                        value={retrievalConfig}
-                        onChange={setRetrievalConfig}
+                  ))}
+                  {IS_CE_EDITION && <>
+                    <Divider type='horizontal' className='my-4 bg-divider-subtle' />
+                    <div className='flex items-center py-0.5'>
+                      <div className='flex items-center' onClick={() => {
+                        if (currentDataset?.doc_form)
+                          return
+                        if (docForm === ChunkingMode.qa)
+                          handleChangeDocform(ChunkingMode.text)
+                        else
+                          handleChangeDocform(ChunkingMode.qa)
+                      }}>
+                        <Checkbox
+                          checked={currentDocForm === ChunkingMode.qa}
+                          disabled={!!currentDataset?.doc_form}
+                        />
+                        <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">
+                          {t('datasetCreation.stepTwo.useQALanguage')}
+                        </label>
+                      </div>
+                      <LanguageSelect
+                        currentLanguage={docLanguage || locale}
+                        onSelect={setDocLanguage}
+                        disabled={currentDocForm !== ChunkingMode.qa}
                       />
-                    )
-                    : (
-                      <EconomicalRetrievalMethodConfig
-                        value={retrievalConfig}
-                        onChange={setRetrievalConfig}
-                      />
-                    )
-                }
+                      <Tooltip popupContent={t('datasetCreation.stepTwo.QATip')} />
+                    </div>
+                    {currentDocForm === ChunkingMode.qa && (
+                      <div
+                        style={{
+                          background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)',
+                        }}
+                        className='h-10 mt-2 flex items-center gap-2 rounded-xl backdrop-blur-[5px] border-components-panel-border border shadow-xs px-3 text-xs'
+                      >
+                        <RiAlertFill className='size-4 text-text-warning-secondary' />
+                        <span className='system-xs-medium text-text-primary'>
+                          {t('datasetCreation.stepTwo.QATip')}
+                        </span>
+                      </div>
+                    )}
+                  </>}
+                </div>
               </div>
             </div>
-
-            <div className={s.source}>
-              <div className={s.sourceContent}>
-                {dataSourceType === DataSourceType.FILE && (
-                  <>
-                    <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.fileSource')}</div>
-                    <div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
-                      <span className={cn(s.fileIcon, files.length && s[files[0].extension || ''])} />
-                      {getFileName(files[0].name || '')}
-                      {files.length > 1 && (
-                        <span className={s.sourceCount}>
-                          <span>{t('datasetCreation.stepTwo.other')}</span>
-                          <span>{files.length - 1}</span>
-                          <span>{t('datasetCreation.stepTwo.fileUnit')}</span>
-                        </span>
-                      )}
-                    </div>
-                  </>
-                )}
-                {dataSourceType === DataSourceType.NOTION && (
-                  <>
-                    <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.notionSource')}</div>
-                    <div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
-                      <NotionIcon
-                        className='shrink-0 mr-1'
-                        type='page'
-                        src={notionPages[0]?.page_icon}
+          </OptionCard>}
+        {
+          (
+            (isInUpload && currentDataset!.doc_form === ChunkingMode.parentChild)
+            || isUploadInEmptyDataset
+            || isInInit
+          )
+          && <OptionCard
+            title={t('datasetCreation.stepTwo.parentChild')}
+            icon={<Image width={20} height={20} src={FamilyMod} alt={t('datasetCreation.stepTwo.parentChild')} />}
+            effectImg={OrangeEffect.src}
+            activeHeaderClassName='bg-dataset-option-card-orange-gradient'
+            description={t('datasetCreation.stepTwo.parentChildTip')}
+            isActive={currentDocForm === ChunkingMode.parentChild}
+            onSwitched={() => handleChangeDocform(ChunkingMode.parentChild)}
+            actions={
+              <>
+                <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
+                  <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
+                  {t('datasetCreation.stepTwo.previewChunk')}
+                </Button>
+                <Button variant={'ghost'} onClick={resetRules}>
+                  {t('datasetCreation.stepTwo.reset')}
+                </Button>
+              </>
+            }
+            noHighlight={isInUpload && isNotUploadInEmptyDataset}
+          >
+            <div className='flex flex-col gap-4'>
+              <div>
+                <div className='flex items-center gap-x-2'>
+                  <div className='inline-flex shrink-0'>
+                    <TextLabel>{t('datasetCreation.stepTwo.parentChunkForContext')}</TextLabel>
+                  </div>
+                  <Divider className='grow' bgStyle='gradient' />
+                </div>
+                <RadioCard className='mt-1'
+                  icon={<Image src={Note} alt='' />}
+                  title={t('datasetCreation.stepTwo.paragraph')}
+                  description={t('datasetCreation.stepTwo.paragraphTip')}
+                  isChosen={parentChildConfig.chunkForContext === 'paragraph'}
+                  onChosen={() => setParentChildConfig(
+                    {
+                      ...parentChildConfig,
+                      chunkForContext: 'paragraph',
+                    },
+                  )}
+                  chosenConfig={
+                    <div className='flex gap-3'>
+                      <DelimiterInput
+                        value={parentChildConfig.parent.delimiter}
+                        tooltip={t('datasetCreation.stepTwo.parentChildDelimiterTip')!}
+                        onChange={e => setParentChildConfig({
+                          ...parentChildConfig,
+                          parent: {
+                            ...parentChildConfig.parent,
+                            delimiter: e.target.value ? escape(e.target.value) : '',
+                          },
+                        })}
+                      />
+                      <MaxLengthInput
+                        unit='tokens'
+                        value={parentChildConfig.parent.maxLength}
+                        onChange={value => setParentChildConfig({
+                          ...parentChildConfig,
+                          parent: {
+                            ...parentChildConfig.parent,
+                            maxLength: value,
+                          },
+                        })}
                       />
-                      {notionPages[0]?.page_name}
-                      {notionPages.length > 1 && (
-                        <span className={s.sourceCount}>
-                          <span>{t('datasetCreation.stepTwo.other')}</span>
-                          <span>{notionPages.length - 1}</span>
-                          <span>{t('datasetCreation.stepTwo.notionUnit')}</span>
-                        </span>
-                      )}
                     </div>
-                  </>
-                )}
-                {dataSourceType === DataSourceType.WEB && (
-                  <>
-                    <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.websiteSource')}</div>
-                    <div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
-                      <Globe01 className='shrink-0 mr-1' />
-                      <span className='grow w-0 truncate'>{websitePages[0].source_url}</span>
-                      {websitePages.length > 1 && (
-                        <span className={s.sourceCount}>
-                          <span>{t('datasetCreation.stepTwo.other')}</span>
-                          <span>{websitePages.length - 1}</span>
-                          <span>{t('datasetCreation.stepTwo.webpageUnit')}</span>
-                        </span>
-                      )}
-                    </div>
-                  </>
-                )}
-              </div>
-              <div className={s.divider} />
-              <div className={s.segmentCount}>
-                <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.estimateSegment')}</div>
-                <div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
-                  {
-                    fileIndexingEstimate
-                      ? (
-                        <div className='text-xs font-medium text-gray-800'>{formatNumber(fileIndexingEstimate.total_segments)} </div>
-                      )
-                      : (
-                        <div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>
-                      )
                   }
+                />
+                <RadioCard className='mt-2'
+                  icon={<Image src={FileList} alt='' />}
+                  title={t('datasetCreation.stepTwo.fullDoc')}
+                  description={t('datasetCreation.stepTwo.fullDocTip')}
+                  onChosen={() => setParentChildConfig(
+                    {
+                      ...parentChildConfig,
+                      chunkForContext: 'full-doc',
+                    },
+                  )}
+                  isChosen={parentChildConfig.chunkForContext === 'full-doc'}
+                />
+              </div>
+
+              <div>
+                <div className='flex items-center gap-x-2'>
+                  <div className='inline-flex shrink-0'>
+                    <TextLabel>{t('datasetCreation.stepTwo.childChunkForRetrieval')}</TextLabel>
+                  </div>
+                  <Divider className='grow' bgStyle='gradient' />
+                </div>
+                <div className='flex gap-3 mt-1'>
+                  <DelimiterInput
+                    value={parentChildConfig.child.delimiter}
+                    tooltip={t('datasetCreation.stepTwo.parentChildChunkDelimiterTip')!}
+                    onChange={e => setParentChildConfig({
+                      ...parentChildConfig,
+                      child: {
+                        ...parentChildConfig.child,
+                        delimiter: e.target.value ? escape(e.target.value) : '',
+                      },
+                    })}
+                  />
+                  <MaxLengthInput
+                    unit='tokens'
+                    value={parentChildConfig.child.maxLength}
+                    onChange={value => setParentChildConfig({
+                      ...parentChildConfig,
+                      child: {
+                        ...parentChildConfig.child,
+                        maxLength: value,
+                      },
+                    })}
+                  />
+                </div>
+              </div>
+              <div>
+                <div className='flex items-center gap-x-2'>
+                  <div className='inline-flex shrink-0'>
+                    <TextLabel>{t('datasetCreation.stepTwo.rules')}</TextLabel>
+                  </div>
+                  <Divider className='grow' bgStyle='gradient' />
+                </div>
+                <div className='mt-1'>
+                  {rules.map(rule => (
+                    <div key={rule.id} className={s.ruleItem} onClick={() => {
+                      ruleChangeHandle(rule.id)
+                    }}>
+                      <Checkbox
+                        checked={rule.enabled}
+                      />
+                      <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
+                    </div>
+                  ))}
                 </div>
               </div>
             </div>
-            {!isSetting
-              ? (
-                <div className='flex items-center mt-8 py-2'>
-                  <Button onClick={() => onStepChange && onStepChange(-1)}>{t('datasetCreation.stepTwo.previousStep')}</Button>
-                  <div className={s.divider} />
-                  <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
+          </OptionCard>}
+        <Divider className='my-5' />
+        <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.indexMode')}</div>
+        <div className='flex items-center gap-2'>
+          {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
+            <OptionCard className='flex-1'
+              title={<div className='flex items-center'>
+                {t('datasetCreation.stepTwo.qualified')}
+                <Badge className={cn('ml-1 h-[18px]', (!hasSetIndexType && indexType === IndexingType.QUALIFIED) ? 'border-text-accent-secondary text-text-accent-secondary' : '')} uppercase>
+                  {t('datasetCreation.stepTwo.recommend')}
+                </Badge>
+                <span className='ml-auto'>
+                  {!hasSetIndexType && <span className={cn(s.radio)} />}
+                </span>
+              </div>}
+              description={t('datasetCreation.stepTwo.qualifiedTip')}
+              icon={<Image src={indexMethodIcon.high_quality} alt='' />}
+              isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED}
+              disabled={!isAPIKeySet || hasSetIndexType}
+              onSwitched={() => {
+                if (isAPIKeySet)
+                  setIndexType(IndexingType.QUALIFIED)
+              }}
+            />
+          )}
+
+          {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && (
+            <>
+              <CustomDialog show={isQAConfirmDialogOpen} onClose={() => setIsQAConfirmDialogOpen(false)} className='w-[432px]'>
+                <header className='pt-6 mb-4'>
+                  <h2 className='text-lg font-semibold'>
+                    {t('datasetCreation.stepTwo.qaSwitchHighQualityTipTitle')}
+                  </h2>
+                  <p className='font-normal text-sm mt-2'>
+                    {t('datasetCreation.stepTwo.qaSwitchHighQualityTipContent')}
+                  </p>
+                </header>
+                <div className='flex gap-2 pb-6'>
+                  <Button className='ml-auto' onClick={() => {
+                    setIsQAConfirmDialogOpen(false)
+                  }}>
+                    {t('datasetCreation.stepTwo.cancel')}
+                  </Button>
+                  <Button variant={'primary'} onClick={() => {
+                    setIsQAConfirmDialogOpen(false)
+                    setIndexType(IndexingType.QUALIFIED)
+                    setDocForm(ChunkingMode.qa)
+                  }}>
+                    {t('datasetCreation.stepTwo.switch')}
+                  </Button>
                 </div>
-              )
-              : (
-                <div className='flex items-center mt-8 py-2'>
-                  <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
-                  <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
-                </div>
-              )}
-          </div>
+              </CustomDialog>
+              <PortalToFollowElem
+                open={
+                  isHoveringEconomy && docForm !== ChunkingMode.text
+                }
+                placement={'top'}
+              >
+                <PortalToFollowElemTrigger asChild>
+                  <OptionCard className='flex-1'
+                    title={t('datasetCreation.stepTwo.economical')}
+                    description={t('datasetCreation.stepTwo.economicalTip')}
+                    icon={<Image src={indexMethodIcon.economical} alt='' />}
+                    isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
+                    disabled={!isAPIKeySet || hasSetIndexType || docForm !== ChunkingMode.text}
+                    ref={economyDomRef}
+                    onSwitched={() => {
+                      if (isAPIKeySet && docForm === ChunkingMode.text)
+                        setIndexType(IndexingType.ECONOMICAL)
+                    }}
+                  />
+                </PortalToFollowElemTrigger>
+                <PortalToFollowElemContent>
+                  <div className='p-3 bg-components-tooltip-bg border-components-panel-border text-xs font-medium text-text-secondary rounded-lg shadow-lg'>
+                    {
+                      docForm === ChunkingMode.qa
+                        ? t('datasetCreation.stepTwo.notAvailableForQA')
+                        : t('datasetCreation.stepTwo.notAvailableForParentChild')
+                    }
+                  </div>
+                </PortalToFollowElemContent>
+              </PortalToFollowElem>
+            </>)}
         </div>
-      </div>
-      <FloatRightContainer isMobile={isMobile} isOpen={showPreview} onClose={hidePreview} footer={null}>
-        {showPreview && <div ref={previewScrollRef} className={cn(s.previewWrap, isMobile && s.isMobile, 'relative h-full overflow-y-scroll border-l border-[#F2F4F7]')}>
-          <div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`)}>
-            <div className='flex items-center justify-between px-8'>
-              <div className='grow flex items-center'>
-                <div>{t('datasetCreation.stepTwo.previewTitle')}</div>
-                {docForm === DocForm.QA && !previewSwitched && (
-                  <Button className='ml-2' variant='secondary-accent' onClick={() => previewSwitch()}>{t('datasetCreation.stepTwo.previewButton')}</Button>
-                )}
-              </div>
-              <div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
-                <XMarkIcon className='h-4 w-4'></XMarkIcon>
-              </div>
-            </div>
-            {docForm === DocForm.QA && !previewSwitched && (
-              <div className='px-8 pr-12 text-xs text-gray-500'>
-                <span>{t('datasetCreation.stepTwo.previewSwitchTipStart')}</span>
-                <span className='text-amber-600'>{t('datasetCreation.stepTwo.previewSwitchTipEnd')}</span>
-              </div>
-            )}
-          </div>
-          <div className='my-4 px-8 space-y-4'>
-            {previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && (
-              <>
-                {fileIndexingEstimate?.qa_preview.map((item, index) => (
-                  <PreviewItem type={PreviewType.QA} key={item.question} qa={item} index={index + 1} />
-                ))}
-              </>
-            )}
-            {(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && (
-              <>
-                {fileIndexingEstimate?.preview.map((item, index) => (
-                  <PreviewItem type={PreviewType.TEXT} key={item} content={item} index={index + 1} />
-                ))}
-              </>
-            )}
-            {previewSwitched && docForm === DocForm.QA && !fileIndexingEstimate?.qa_preview && (
-              <div className='flex items-center justify-center h-[200px]'>
-                <Loading type='area' />
-              </div>
-            )}
-            {!previewSwitched && !fileIndexingEstimate?.preview && (
-              <div className='flex items-center justify-center h-[200px]'>
-                <Loading type='area' />
-              </div>
-            )}
-          </div>
-        </div>}
-        {!showPreview && (
-          <div className={cn(s.sideTip)}>
-            <div className={s.tipCard}>
-              <span className={s.icon} />
-              <div className={s.title}>{t('datasetCreation.stepTwo.sideTipTitle')}</div>
-              <div className={s.content}>
-                <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP1')}</p>
-                <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP2')}</p>
-                <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP3')}</p>
-                <p>{t('datasetCreation.stepTwo.sideTipP4')}</p>
-              </div>
+        {!hasSetIndexType && indexType === IndexingType.QUALIFIED && (
+          <div className='mt-2 h-10 p-2 flex items-center gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border overflow-hidden bg-components-panel-bg-blur backdrop-blur-[5px] shadow-xs'>
+            <div className='absolute top-0 left-0 right-0 bottom-0 bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)] opacity-40'></div>
+            <div className='p-1'>
+              <AlertTriangle className='size-4 text-text-warning-secondary' />
             </div>
+            <span className='system-xs-medium'>{t('datasetCreation.stepTwo.highQualityTip')}</span>
           </div>
         )}
+        {hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
+          <div className='mt-2 system-xs-medium'>
+            {t('datasetCreation.stepTwo.indexSettingTip')}
+            <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
+          </div>
+        )}
+        {/* Embedding model */}
+        {indexType === IndexingType.QUALIFIED && (
+          <div className='mt-5'>
+            <div className={cn('system-md-semibold mb-1', datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
+            <ModelSelector
+              readonly={!!datasetId}
+              defaultModel={embeddingModel}
+              modelList={embeddingModelList}
+              onSelect={(model: DefaultModel) => {
+                setEmbeddingModel(model)
+              }}
+            />
+            {!!datasetId && (
+              <div className='mt-2 system-xs-medium'>
+                {t('datasetCreation.stepTwo.indexSettingTip')}
+                <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
+              </div>
+            )}
+          </div>
+        )}
+        <Divider className='my-5' />
+        {/* Retrieval Method Config */}
+        <div>
+          {!datasetId
+            ? (
+              <div className={'mb-1'}>
+                <div className='system-md-semibold mb-0.5'>{t('datasetSettings.form.retrievalSetting.title')}</div>
+                <div className='body-xs-regular text-text-tertiary'>
+                  <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+                  {t('datasetSettings.form.retrievalSetting.longDescription')}
+                </div>
+              </div>
+            )
+            : (
+              <div className={cn('system-md-semibold mb-0.5', 'flex justify-between items-center')}>
+                <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
+              </div>
+            )}
+
+          <div className=''>
+            {
+              getIndexing_technique() === IndexingType.QUALIFIED
+                ? (
+                  <RetrievalMethodConfig
+                    value={retrievalConfig}
+                    onChange={setRetrievalConfig}
+                  />
+                )
+                : (
+                  <EconomicalRetrievalMethodConfig
+                    value={retrievalConfig}
+                    onChange={setRetrievalConfig}
+                  />
+                )
+            }
+          </div>
+        </div>
+
+        {!isSetting
+          ? (
+            <div className='flex items-center mt-8 py-2'>
+              <Button onClick={() => onStepChange && onStepChange(-1)}>
+                <RiArrowLeftLine className='w-4 h-4 mr-1' />
+                {t('datasetCreation.stepTwo.previousStep')}
+              </Button>
+              <Button className='ml-auto' loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
+            </div>
+          )
+          : (
+            <div className='flex items-center mt-8 py-2'>
+              <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
+              <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
+            </div>
+          )}
+      </div>
+      <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={() => { }} footer={null}>
+        <PreviewContainer
+          header={<PreviewHeader
+            title={t('datasetCreation.stepTwo.preview')}
+          >
+            <div className='flex items-center gap-1'>
+              {dataSourceType === DataSourceType.FILE
+                && <PreviewDocumentPicker
+                  files={files as Array<Required<CustomFile>>}
+                  onChange={(selected) => {
+                    currentEstimateMutation.reset()
+                    setPreviewFile(selected)
+                    currentEstimateMutation.mutate()
+                  }}
+                  // when it is from setting, it just has one file
+                  value={isSetting ? (files[0]! as Required<CustomFile>) : previewFile}
+                />
+              }
+              {dataSourceType === DataSourceType.NOTION
+                && <PreviewDocumentPicker
+                  files={
+                    notionPages.map(page => ({
+                      id: page.page_id,
+                      name: page.page_name,
+                      extension: 'md',
+                    }))
+                  }
+                  onChange={(selected) => {
+                    currentEstimateMutation.reset()
+                    const selectedPage = notionPages.find(page => page.page_id === selected.id)
+                    setPreviewNotionPage(selectedPage!)
+                    currentEstimateMutation.mutate()
+                  }}
+                  value={{
+                    id: previewNotionPage?.page_id || '',
+                    name: previewNotionPage?.page_name || '',
+                    extension: 'md',
+                  }}
+                />
+              }
+              {dataSourceType === DataSourceType.WEB
+                && <PreviewDocumentPicker
+                  files={
+                    websitePages.map(page => ({
+                      id: page.source_url,
+                      name: page.title,
+                      extension: 'md',
+                    }))
+                  }
+                  onChange={(selected) => {
+                    currentEstimateMutation.reset()
+                    const selectedPage = websitePages.find(page => page.source_url === selected.id)
+                    setPreviewWebsitePage(selectedPage!)
+                    currentEstimateMutation.mutate()
+                  }}
+                  value={
+                    {
+                      id: previewWebsitePage?.source_url || '',
+                      name: previewWebsitePage?.title || '',
+                      extension: 'md',
+                    }
+                  }
+                />
+              }
+              {
+                currentDocForm !== ChunkingMode.qa
+                  && <Badge text={t(
+                    'datasetCreation.stepTwo.previewChunkCount', {
+                      count: estimate?.total_segments || 0,
+                    }) as string}
+                  />
+              }
+            </div>
+          </PreviewHeader>}
+          className={cn('flex shrink-0 w-1/2 p-4 pr-0 relative h-full', isMobile && 'w-full max-w-[524px]')}
+          mainClassName='space-y-6'
+        >
+          {currentDocForm === ChunkingMode.qa && estimate?.qa_preview && (
+            estimate?.qa_preview.map((item, index) => (
+              <ChunkContainer
+                key={item.question}
+                label={`Chunk-${index + 1}`}
+                characterCount={item.question.length + item.answer.length}
+              >
+                <QAPreview qa={item} />
+              </ChunkContainer>
+            ))
+          )}
+          {currentDocForm === ChunkingMode.text && estimate?.preview && (
+            estimate?.preview.map((item, index) => (
+              <ChunkContainer
+                key={item.content}
+                label={`Chunk-${index + 1}`}
+                characterCount={item.content.length}
+              >
+                {item.content}
+              </ChunkContainer>
+            ))
+          )}
+          {currentDocForm === ChunkingMode.parentChild && currentEstimateMutation.data?.preview && (
+            estimate?.preview?.map((item, index) => {
+              const indexForLabel = index + 1
+              return (
+                <ChunkContainer
+                  key={item.content}
+                  label={`Chunk-${indexForLabel}`}
+                  characterCount={item.content.length}
+                >
+                  <FormattedText>
+                    {item.child_chunks.map((child, index) => {
+                      const indexForLabel = index + 1
+                      return (
+                        <PreviewSlice
+                          key={child}
+                          label={`C-${indexForLabel}`}
+                          text={child}
+                          tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
+                          labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
+                          dividerClassName='leading-7'
+                        />
+                      )
+                    })}
+                  </FormattedText>
+                </ChunkContainer>
+              )
+            })
+          )}
+          {currentEstimateMutation.isIdle && (
+            <div className='h-full w-full flex items-center justify-center'>
+              <div className='flex flex-col items-center justify-center gap-3'>
+                <RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
+                <p className='text-sm text-text-tertiary'>
+                  {t('datasetCreation.stepTwo.previewChunkTip')}
+                </p>
+              </div>
+            </div>
+          )}
+          {currentEstimateMutation.isPending && (
+            <div className='space-y-6'>
+              {Array.from({ length: 10 }, (_, i) => (
+                <SkeletonContainer key={i}>
+                  <SkeletonRow>
+                    <SkeletonRectangle className="w-20" />
+                    <SkeletonPoint />
+                    <SkeletonRectangle className="w-24" />
+                  </SkeletonRow>
+                  <SkeletonRectangle className="w-full" />
+                  <SkeletonRectangle className="w-full" />
+                  <SkeletonRectangle className="w-[422px]" />
+                </SkeletonContainer>
+              ))}
+            </div>
+          )}
+        </PreviewContainer>
       </FloatRightContainer>
     </div>
   )
diff --git a/web/app/components/datasets/create/step-two/inputs.tsx b/web/app/components/datasets/create/step-two/inputs.tsx
new file mode 100644
index 0000000000..4231f6242d
--- /dev/null
+++ b/web/app/components/datasets/create/step-two/inputs.tsx
@@ -0,0 +1,77 @@
+import type { FC, PropsWithChildren, ReactNode } from 'react'
+import { useTranslation } from 'react-i18next'
+import type { InputProps } from '@/app/components/base/input'
+import Input from '@/app/components/base/input'
+import Tooltip from '@/app/components/base/tooltip'
+import type { InputNumberProps } from '@/app/components/base/input-number'
+import { InputNumber } from '@/app/components/base/input-number'
+
+const TextLabel: FC<PropsWithChildren> = (props) => {
+  return <label className='text-text-secondary text-xs font-semibold leading-none'>{props.children}</label>
+}
+
+const FormField: FC<PropsWithChildren<{ label: ReactNode }>> = (props) => {
+  return <div className='space-y-2 flex-1'>
+    <TextLabel>{props.label}</TextLabel>
+    {props.children}
+  </div>
+}
+
+export const DelimiterInput: FC<InputProps & { tooltip?: string }> = (props) => {
+  const { t } = useTranslation()
+  return <FormField label={<div className='flex items-center mb-1'>
+    <span className='system-sm-semibold mr-0.5'>{t('datasetCreation.stepTwo.separator')}</span>
+    <Tooltip
+      popupContent={
+        <div className='max-w-[200px]'>
+          {props.tooltip || t('datasetCreation.stepTwo.separatorTip')}
+        </div>
+      }
+    />
+  </div>}>
+    <Input
+      type="text"
+      className='h-9'
+      placeholder={t('datasetCreation.stepTwo.separatorPlaceholder')!}
+      {...props}
+    />
+  </FormField>
+}
+
+export const MaxLengthInput: FC<InputNumberProps> = (props) => {
+  const { t } = useTranslation()
+  return <FormField label={<div className='system-sm-semibold mb-1'>
+    {t('datasetCreation.stepTwo.maxLength')}
+  </div>}>
+    <InputNumber
+      type="number"
+      className='h-9'
+      placeholder={'≤ 4000'}
+      max={4000}
+      min={1}
+      {...props}
+    />
+  </FormField>
+}
+
+export const OverlapInput: FC<InputNumberProps> = (props) => {
+  const { t } = useTranslation()
+  return <FormField label={<div className='flex items-center mb-1'>
+    <span className='system-sm-semibold'>{t('datasetCreation.stepTwo.overlap')}</span>
+    <Tooltip
+      popupContent={
+        <div className='max-w-[200px]'>
+          {t('datasetCreation.stepTwo.overlapTip')}
+        </div>
+      }
+    />
+  </div>}>
+    <InputNumber
+      type="number"
+      className='h-9'
+      placeholder={t('datasetCreation.stepTwo.overlap') || ''}
+      min={1}
+      {...props}
+    />
+  </FormField>
+}
diff --git a/web/app/components/datasets/create/step-two/language-select/index.tsx b/web/app/components/datasets/create/step-two/language-select/index.tsx
index 41f3e0abb5..9cbf1a40d1 100644
--- a/web/app/components/datasets/create/step-two/language-select/index.tsx
+++ b/web/app/components/datasets/create/step-two/language-select/index.tsx
@@ -1,7 +1,7 @@
 'use client'
 import type { FC } from 'react'
 import React from 'react'
-import { RiArrowDownSLine } from '@remixicon/react'
+import { RiArrowDownSLine, RiCheckLine } from '@remixicon/react'
 import cn from '@/utils/classnames'
 import Popover from '@/app/components/base/popover'
 import { languages } from '@/i18n/language'
@@ -22,25 +22,40 @@ const LanguageSelect: FC<ILanguageSelectProps> = ({
       manualClose
       trigger='click'
       disabled={disabled}
+      popupClassName='z-20'
       htmlContent={
-        <div className='w-full py-1'>
+        <div className='w-full p-1'>
           {languages.filter(language => language.supported).map(({ prompt_name }) => (
             <div
               key={prompt_name}
-              className='py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer text-gray-700 text-sm'
-              onClick={() => onSelect(prompt_name)}>{prompt_name}
+              className='w-full py-2 px-3 inline-flex items-center justify-between hover:bg-state-base-hover rounded-lg cursor-pointer'
+              onClick={() => onSelect(prompt_name)}
+            >
+              <span className='text-text-secondary system-sm-medium'>{prompt_name}</span>
+              {(currentLanguage === prompt_name) && <RiCheckLine className='size-4 text-text-accent' />}
             </div>
           ))}
         </div>
       }
       btnElement={
-        <div className='inline-flex items-center'>
-          <span className='pr-[2px] text-xs leading-[18px] font-medium'>{currentLanguage}</span>
-          <RiArrowDownSLine className='w-3 h-3 opacity-60' />
+        <div className={cn('inline-flex items-center gap-x-[1px]', disabled && 'cursor-not-allowed')}>
+          <span className={cn(
+            'px-[3px] system-xs-semibold text-components-button-tertiary-text',
+            disabled ? 'text-components-button-tertiary-text-disabled' : '',
+          )}>
+            {currentLanguage}
+          </span>
+          <RiArrowDownSLine className={cn(
+            'size-3.5 text-components-button-tertiary-text',
+            disabled ? 'text-components-button-tertiary-text-disabled' : '',
+          )} />
         </div>
       }
-      btnClassName={open => cn('!border-0 !px-0 !py-0 !bg-inherit !hover:bg-inherit', open ? 'text-blue-600' : 'text-gray-500')}
-      className='!w-[120px] h-fit !z-20 !translate-x-0 !left-[-16px]'
+      btnClassName={() => cn(
+        '!border-0 rounded-md !px-1.5 !py-1 !mx-1 !bg-components-button-tertiary-bg !hover:bg-components-button-tertiary-bg',
+        disabled ? 'bg-components-button-tertiary-bg-disabled' : '',
+      )}
+      className='!w-[140px] h-fit !z-20 !translate-x-0 !left-1'
     />
   )
 }
diff --git a/web/app/components/datasets/create/step-two/option-card.tsx b/web/app/components/datasets/create/step-two/option-card.tsx
new file mode 100644
index 0000000000..d0efdaabb1
--- /dev/null
+++ b/web/app/components/datasets/create/step-two/option-card.tsx
@@ -0,0 +1,98 @@
+import { type ComponentProps, type FC, type ReactNode, forwardRef } from 'react'
+import Image from 'next/image'
+import classNames from '@/utils/classnames'
+
+const TriangleArrow: FC<ComponentProps<'svg'>> = props => (
+  <svg xmlns="http://www.w3.org/2000/svg" width="24" height="11" viewBox="0 0 24 11" fill="none" {...props}>
+    <path d="M9.87868 1.12132C11.0503 -0.0502525 12.9497 -0.0502525 14.1213 1.12132L23.3137 10.3137H0.686292L9.87868 1.12132Z" fill="currentColor"/>
+  </svg>
+)
+
+type OptionCardHeaderProps = {
+  icon: ReactNode
+  title: ReactNode
+  description: string
+  isActive?: boolean
+  activeClassName?: string
+  effectImg?: string
+}
+
+export const OptionCardHeader: FC<OptionCardHeaderProps> = (props) => {
+  const { icon, title, description, isActive, activeClassName, effectImg } = props
+  return <div className={classNames(
+    'flex h-full overflow-hidden rounded-t-xl relative',
+    isActive && activeClassName,
+  )}>
+    <div className='size-14 flex items-center justify-center relative overflow-hidden'>
+      {isActive && effectImg && <Image src={effectImg} className='absolute top-0 left-0 w-full h-full' alt='' width={56} height={56} />}
+      <div className='p-1'>
+        <div className='size-8 rounded-lg border p-1.5 shadow-md border-components-panel-border-subtle justify-center flex bg-background-default-dodge'>
+          {icon}
+        </div>
+      </div>
+    </div>
+    <TriangleArrow
+      className='absolute left-4 -bottom-1.5 text-components-panel-bg'
+    />
+    <div className='flex-1 space-y-0.5 py-3 pr-4'>
+      <div className='text-text-secondary system-md-semibold'>{title}</div>
+      <div className='text-text-tertiary system-xs-regular'>{description}</div>
+    </div>
+  </div>
+}
+
+type OptionCardProps = {
+  icon: ReactNode
+  className?: string
+  activeHeaderClassName?: string
+  title: ReactNode
+  description: string
+  isActive?: boolean
+  actions?: ReactNode
+  effectImg?: string
+  onSwitched?: () => void
+  noHighlight?: boolean
+  disabled?: boolean
+} & Omit<ComponentProps<'div'>, 'title' | 'onClick'>
+
+export const OptionCard: FC<OptionCardProps> = forwardRef((props, ref) => {
+  const { icon, className, title, description, isActive, children, actions, activeHeaderClassName, style, effectImg, onSwitched, noHighlight, disabled, ...rest } = props
+  return <div
+    className={classNames(
+      'rounded-xl bg-components-option-card-option-bg shadow-xs',
+      (isActive && !noHighlight)
+        ? 'border-[1.5px] border-components-option-card-option-selected-border'
+        : 'border border-components-option-card-option-border',
+      disabled && 'opacity-50',
+      className,
+    )}
+    style={{
+      ...style,
+    }}
+    onClick={() => {
+      if (!isActive && !disabled)
+        onSwitched?.()
+    }}
+    {...rest}
+    ref={ref}
+  >
+    <OptionCardHeader
+      icon={icon}
+      title={title}
+      description={description}
+      isActive={isActive && !noHighlight}
+      activeClassName={activeHeaderClassName}
+      effectImg={effectImg}
+    />
+    {/** Body */}
+    {isActive && (children || actions) && <div className='py-3 px-4 bg-components-panel-bg rounded-b-xl'>
+      {children}
+      {actions && <div className='flex gap-2 mt-4'>
+        {actions}
+      </div>
+      }
+    </div>}
+  </div>
+})
+
+OptionCard.displayName = 'OptionCard'
diff --git a/web/app/components/datasets/create/stepper/index.tsx b/web/app/components/datasets/create/stepper/index.tsx
new file mode 100644
index 0000000000..317c1a76ee
--- /dev/null
+++ b/web/app/components/datasets/create/stepper/index.tsx
@@ -0,0 +1,27 @@
+import { type FC, Fragment } from 'react'
+import type { Step } from './step'
+import { StepperStep } from './step'
+
+export type StepperProps = {
+  steps: Step[]
+  activeIndex: number
+}
+
+export const Stepper: FC<StepperProps> = (props) => {
+  const { steps, activeIndex } = props
+  return <div className='flex items-center gap-3'>
+    {steps.map((step, index) => {
+      const isLast = index === steps.length - 1
+      return (
+        <Fragment key={index}>
+          <StepperStep
+            {...step}
+            activeIndex={activeIndex}
+            index={index}
+          />
+          {!isLast && <div className='w-4 h-px bg-divider-deep' />}
+        </Fragment>
+      )
+    })}
+  </div>
+}
diff --git a/web/app/components/datasets/create/stepper/step.tsx b/web/app/components/datasets/create/stepper/step.tsx
new file mode 100644
index 0000000000..c230de1a6e
--- /dev/null
+++ b/web/app/components/datasets/create/stepper/step.tsx
@@ -0,0 +1,46 @@
+import type { FC } from 'react'
+import classNames from '@/utils/classnames'
+
+export type Step = {
+  name: string
+}
+
+export type StepperStepProps = Step & {
+  index: number
+  activeIndex: number
+}
+
+export const StepperStep: FC<StepperStepProps> = (props) => {
+  const { name, activeIndex, index } = props
+  const isActive = index === activeIndex
+  const isDisabled = activeIndex < index
+  const label = isActive ? `STEP ${index + 1}` : `${index + 1}`
+  return <div className='flex items-center gap-2'>
+    <div className={classNames(
+      'h-5 px-2 py-1 rounded-3xl flex-col justify-center items-center gap-2 inline-flex',
+      isActive
+        ? 'bg-state-accent-solid'
+        : !isDisabled
+          ? 'border border-text-quaternary'
+          : 'border border-divider-deep',
+    )}>
+      <div className={classNames(
+        'text-center system-2xs-semibold-uppercase',
+        isActive
+          ? 'text-text-primary-on-surface'
+          : !isDisabled
+            ? 'text-text-tertiary'
+            : 'text-text-quaternary',
+      )}>
+        {label}
+      </div>
+    </div>
+    <div className={classNames('system-xs-medium-uppercase',
+      isActive
+        ? 'text-text-accent system-xs-semibold-uppercase'
+        : !isDisabled
+          ? 'text-text-tertiary'
+          : 'text-text-quaternary',
+    )}>{name}</div>
+  </div>
+}
diff --git a/web/app/components/datasets/create/top-bar/index.tsx b/web/app/components/datasets/create/top-bar/index.tsx
new file mode 100644
index 0000000000..20ba7158db
--- /dev/null
+++ b/web/app/components/datasets/create/top-bar/index.tsx
@@ -0,0 +1,41 @@
+import type { FC } from 'react'
+import { RiArrowLeftLine } from '@remixicon/react'
+import Link from 'next/link'
+import { useTranslation } from 'react-i18next'
+import { Stepper, type StepperProps } from '../stepper'
+import classNames from '@/utils/classnames'
+
+export type TopbarProps = Pick<StepperProps, 'activeIndex'> & {
+  className?: string
+}
+
+const STEP_T_MAP: Record<number, string> = {
+  1: 'datasetCreation.steps.one',
+  2: 'datasetCreation.steps.two',
+  3: 'datasetCreation.steps.three',
+}
+
+export const Topbar: FC<TopbarProps> = (props) => {
+  const { className, ...rest } = props
+  const { t } = useTranslation()
+  return <div className={classNames('flex shrink-0 h-[52px] items-center justify-between relative border-b border-b-divider-subtle', className)}>
+    <Link href={'/datasets'} className="h-12 pl-2 pr-6 py-2 justify-start items-center gap-1 inline-flex">
+      <div className='p-2'>
+        <RiArrowLeftLine className='size-4 text-text-primary' />
+      </div>
+      <p className="text-text-primary system-sm-semibold-uppercase">
+        {t('datasetCreation.steps.header.creation')}
+      </p>
+    </Link>
+    <div className={
+      'top-1/2 left-1/2 transform -translate-x-1/2 -translate-y-1/2 absolute'
+    }>
+      <Stepper
+        steps={Array.from({ length: 3 }, (_, i) => ({
+          name: t(STEP_T_MAP[i + 1]),
+        }))}
+        {...rest}
+      />
+    </div>
+  </div>
+}
diff --git a/web/app/components/datasets/create/website/base/error-message.tsx b/web/app/components/datasets/create/website/base/error-message.tsx
index aa337ec4bf..f061c4624e 100644
--- a/web/app/components/datasets/create/website/base/error-message.tsx
+++ b/web/app/components/datasets/create/website/base/error-message.tsx
@@ -18,7 +18,7 @@ const ErrorMessage: FC<Props> = ({
   return (
     <div className={cn(className, 'py-2 px-4 border-t border-gray-200 bg-[#FFFAEB]')}>
       <div className='flex items-center h-5'>
-        <AlertTriangle className='mr-2 w-4 h-4 text-[#F79009]' />
+        <AlertTriangle className='mr-2 w-4 h-4 text-text-warning-secondary' />
         <div className='text-sm font-medium text-[#DC6803]'>{title}</div>
       </div>
       {errorMsg && (
diff --git a/web/app/components/datasets/create/website/jina-reader/index.tsx b/web/app/components/datasets/create/website/jina-reader/index.tsx
index 51d77d7121..1c133f935c 100644
--- a/web/app/components/datasets/create/website/jina-reader/index.tsx
+++ b/web/app/components/datasets/create/website/jina-reader/index.tsx
@@ -94,7 +94,6 @@ const JinaReader: FC<Props> = ({
   const waitForCrawlFinished = useCallback(async (jobId: string) => {
     try {
       const res = await checkJinaReaderTaskStatus(jobId) as any
-      console.log('res', res)
       if (res.status === 'completed') {
         return {
           isError: false,
diff --git a/web/app/components/datasets/create/website/preview.tsx b/web/app/components/datasets/create/website/preview.tsx
index 65abe83ed7..5180a83442 100644
--- a/web/app/components/datasets/create/website/preview.tsx
+++ b/web/app/components/datasets/create/website/preview.tsx
@@ -18,7 +18,7 @@ const WebsitePreview = ({
   const { t } = useTranslation()
 
   return (
-    <div className={cn(s.filePreview)}>
+    <div className={cn(s.filePreview, 'h-full')}>
       <div className={cn(s.previewHeader)}>
         <div className={cn(s.title)}>
           <span>{t('datasetCreation.stepOne.pagePreview')}</span>
@@ -32,7 +32,7 @@ const WebsitePreview = ({
         <div className='truncate leading-[18px] text-xs font-normal text-gray-500' title={payload.source_url}>{payload.source_url}</div>
       </div>
       <div className={cn(s.previewContent)}>
-        <div className={cn(s.fileContent)}>{payload.markdown}</div>
+        <div className={cn(s.fileContent, 'body-md-regular')}>{payload.markdown}</div>
       </div>
     </div>
   )
diff --git a/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx b/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx
index 36216aa7c8..6602244a48 100644
--- a/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx
+++ b/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx
@@ -7,7 +7,7 @@ import {
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
 import { Download02 as DownloadIcon } from '@/app/components/base/icons/src/vender/solid/general'
-import { DocForm } from '@/models/datasets'
+import { ChunkingMode } from '@/models/datasets'
 import I18n from '@/context/i18n'
 import { LanguagesSupported } from '@/i18n/language'
 
@@ -32,18 +32,18 @@ const CSV_TEMPLATE_CN = [
   ['内容 2'],
 ]
 
-const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => {
+const CSVDownload: FC<{ docForm: ChunkingMode }> = ({ docForm }) => {
   const { t } = useTranslation()
   const { locale } = useContext(I18n)
   const { CSVDownloader, Type } = useCSVDownloader()
 
   const getTemplate = () => {
     if (locale === LanguagesSupported[1]) {
-      if (docForm === DocForm.QA)
+      if (docForm === ChunkingMode.qa)
         return CSV_TEMPLATE_QA_CN
       return CSV_TEMPLATE_CN
     }
-    if (docForm === DocForm.QA)
+    if (docForm === ChunkingMode.qa)
       return CSV_TEMPLATE_QA_EN
     return CSV_TEMPLATE_EN
   }
@@ -52,7 +52,7 @@ const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => {
     <div className='mt-6'>
       <div className='text-sm text-gray-900 font-medium'>{t('share.generation.csvStructureTitle')}</div>
       <div className='mt-2 max-h-[500px] overflow-auto'>
-        {docForm === DocForm.QA && (
+        {docForm === ChunkingMode.qa && (
           <table className='table-fixed w-full border-separate border-spacing-0 border border-gray-200 rounded-lg text-xs'>
             <thead className='text-gray-500'>
               <tr>
@@ -72,7 +72,7 @@ const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => {
             </tbody>
           </table>
         )}
-        {docForm === DocForm.TEXT && (
+        {docForm === ChunkingMode.text && (
           <table className='table-fixed w-full border-separate border-spacing-0 border border-gray-200 rounded-lg text-xs'>
             <thead className='text-gray-500'>
               <tr>
@@ -97,7 +97,7 @@ const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => {
         bom={true}
         data={getTemplate()}
       >
-        <div className='flex items-center h-[18px] space-x-1 text-[#155EEF] text-xs font-medium'>
+        <div className='flex items-center h-[18px] space-x-1 text-text-accent text-xs font-medium'>
           <DownloadIcon className='w-3 h-3 mr-1' />
           {t('datasetDocuments.list.batchModal.template')}
         </div>
diff --git a/web/app/components/datasets/documents/detail/batch-modal/index.tsx b/web/app/components/datasets/documents/detail/batch-modal/index.tsx
index 139a364cb4..c666ba6715 100644
--- a/web/app/components/datasets/documents/detail/batch-modal/index.tsx
+++ b/web/app/components/datasets/documents/detail/batch-modal/index.tsx
@@ -7,11 +7,11 @@ import CSVUploader from './csv-uploader'
 import CSVDownloader from './csv-downloader'
 import Button from '@/app/components/base/button'
 import Modal from '@/app/components/base/modal'
-import type { DocForm } from '@/models/datasets'
+import type { ChunkingMode } from '@/models/datasets'
 
 export type IBatchModalProps = {
   isShow: boolean
-  docForm: DocForm
+  docForm: ChunkingMode
   onCancel: () => void
   onConfirm: (file: File) => void
 }
diff --git a/web/app/components/datasets/documents/detail/completed/InfiniteVirtualList.tsx b/web/app/components/datasets/documents/detail/completed/InfiniteVirtualList.tsx
deleted file mode 100644
index 7b510bcf21..0000000000
--- a/web/app/components/datasets/documents/detail/completed/InfiniteVirtualList.tsx
+++ /dev/null
@@ -1,98 +0,0 @@
-import type { CSSProperties, FC } from 'react'
-import React from 'react'
-import { FixedSizeList as List } from 'react-window'
-import InfiniteLoader from 'react-window-infinite-loader'
-import SegmentCard from './SegmentCard'
-import s from './style.module.css'
-import type { SegmentDetailModel } from '@/models/datasets'
-
-type IInfiniteVirtualListProps = {
-  hasNextPage?: boolean // Are there more items to load? (This information comes from the most recent API request.)
-  isNextPageLoading: boolean // Are we currently loading a page of items? (This may be an in-flight flag in your Redux store for example.)
-  items: Array<SegmentDetailModel[]> // Array of items loaded so far.
-  loadNextPage: () => Promise<void> // Callback function responsible for loading the next page of items.
-  onClick: (detail: SegmentDetailModel) => void
-  onChangeSwitch: (segId: string, enabled: boolean) => Promise<void>
-  onDelete: (segId: string) => Promise<void>
-  archived?: boolean
-  embeddingAvailable: boolean
-}
-
-const InfiniteVirtualList: FC<IInfiniteVirtualListProps> = ({
-  hasNextPage,
-  isNextPageLoading,
-  items,
-  loadNextPage,
-  onClick: onClickCard,
-  onChangeSwitch,
-  onDelete,
-  archived,
-  embeddingAvailable,
-}) => {
-  // If there are more items to be loaded then add an extra row to hold a loading indicator.
-  const itemCount = hasNextPage ? items.length + 1 : items.length
-
-  // Only load 1 page of items at a time.
-  // Pass an empty callback to InfiniteLoader in case it asks us to load more than once.
-  const loadMoreItems = isNextPageLoading ? () => { } : loadNextPage
-
-  // Every row is loaded except for our loading indicator row.
-  const isItemLoaded = (index: number) => !hasNextPage || index < items.length
-
-  // Render an item or a loading indicator.
-  const Item = ({ index, style }: { index: number; style: CSSProperties }) => {
-    let content
-    if (!isItemLoaded(index)) {
-      content = (
-        <>
-          {[1, 2, 3].map(v => (
-            <SegmentCard key={v} loading={true} detail={{ position: v } as any} />
-          ))}
-        </>
-      )
-    }
-    else {
-      content = items[index].map(segItem => (
-        <SegmentCard
-          key={segItem.id}
-          detail={segItem}
-          onClick={() => onClickCard(segItem)}
-          onChangeSwitch={onChangeSwitch}
-          onDelete={onDelete}
-          loading={false}
-          archived={archived}
-          embeddingAvailable={embeddingAvailable}
-        />
-      ))
-    }
-
-    return (
-      <div style={style} className={s.cardWrapper}>
-        {content}
-      </div>
-    )
-  }
-
-  return (
-    <InfiniteLoader
-      itemCount={itemCount}
-      isItemLoaded={isItemLoaded}
-      loadMoreItems={loadMoreItems}
-    >
-      {({ onItemsRendered, ref }) => (
-        <List
-          ref={ref}
-          className="List"
-          height={800}
-          width={'100%'}
-          itemSize={200}
-          itemCount={itemCount}
-          onItemsRendered={onItemsRendered}
-        >
-          {Item}
-        </List>
-      )}
-    </InfiniteLoader>
-  )
-}
-export default InfiniteVirtualList
diff --git a/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx b/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx
index 5b76acc936..264d62b68a 100644
--- a/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx
+++ b/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx
@@ -6,9 +6,9 @@ import {
   RiDeleteBinLine,
 } from '@remixicon/react'
 import { StatusItem } from '../../list'
-import { DocumentTitle } from '../index'
+import style from '../../style.module.css'
 import s from './style.module.css'
-import { SegmentIndexTag } from './index'
+import { SegmentIndexTag } from './common/segment-index-tag'
 import cn from '@/utils/classnames'
 import Confirm from '@/app/components/base/confirm'
 import Switch from '@/app/components/base/switch'
@@ -31,6 +31,22 @@ const ProgressBar: FC<{ percent: number; loading: boolean }> = ({ percent, loadi
   )
 }
 
+type DocumentTitleProps = {
+  extension?: string
+  name?: string
+  iconCls?: string
+  textCls?: string
+  wrapperCls?: string
+}
+
+const DocumentTitle: FC<DocumentTitleProps> = ({ extension, name, iconCls, textCls, wrapperCls }) => {
+  const localExtension = extension?.toLowerCase() || name?.split('.')?.pop()?.toLowerCase()
+  return <div className={cn('flex items-center justify-start flex-1', wrapperCls)}>
+    <div className={cn(s[`${localExtension || 'txt'}Icon`], style.titleIcon, iconCls)}></div>
+    <span className={cn('font-semibold text-lg text-gray-900 ml-1', textCls)}> {name || '--'}</span>
+  </div>
+}
+
 export type UsageScene = 'doc' | 'hitTesting'
 
 type ISegmentCardProps = {
diff --git a/web/app/components/datasets/documents/detail/completed/child-segment-detail.tsx b/web/app/components/datasets/documents/detail/completed/child-segment-detail.tsx
new file mode 100644
index 0000000000..085bfddc16
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/child-segment-detail.tsx
@@ -0,0 +1,134 @@
+import React, { type FC, useMemo, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import {
+  RiCloseLine,
+  RiExpandDiagonalLine,
+} from '@remixicon/react'
+import ActionButtons from './common/action-buttons'
+import ChunkContent from './common/chunk-content'
+import Dot from './common/dot'
+import { SegmentIndexTag } from './common/segment-index-tag'
+import { useSegmentListContext } from './index'
+import type { ChildChunkDetail, ChunkingMode } from '@/models/datasets'
+import { useEventEmitterContextContext } from '@/context/event-emitter'
+import { formatNumber } from '@/utils/format'
+import classNames from '@/utils/classnames'
+import Divider from '@/app/components/base/divider'
+import { formatTime } from '@/utils/time'
+
+type IChildSegmentDetailProps = {
+  chunkId: string
+  childChunkInfo?: Partial<ChildChunkDetail> & { id: string }
+  onUpdate: (segmentId: string, childChunkId: string, content: string) => void
+  onCancel: () => void
+  docForm: ChunkingMode
+}
+
+/**
+ * Show all the contents of the segment
+ */
+const ChildSegmentDetail: FC<IChildSegmentDetailProps> = ({
+  chunkId,
+  childChunkInfo,
+  onUpdate,
+  onCancel,
+  docForm,
+}) => {
+  const { t } = useTranslation()
+  const [content, setContent] = useState(childChunkInfo?.content || '')
+  const { eventEmitter } = useEventEmitterContextContext()
+  const [loading, setLoading] = useState(false)
+  const fullScreen = useSegmentListContext(s => s.fullScreen)
+  const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
+
+  eventEmitter?.useSubscription((v) => {
+    if (v === 'update-child-segment')
+      setLoading(true)
+    if (v === 'update-child-segment-done')
+      setLoading(false)
+  })
+
+  const handleCancel = () => {
+    onCancel()
+    setContent(childChunkInfo?.content || '')
+  }
+
+  const handleSave = () => {
+    onUpdate(chunkId, childChunkInfo?.id || '', content)
+  }
+
+  const wordCountText = useMemo(() => {
+    const count = content.length
+    return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [content.length])
+
+  const EditTimeText = useMemo(() => {
+    const timeText = formatTime({
+      date: (childChunkInfo?.updated_at ?? 0) * 1000,
+      dateFormat: 'MM/DD/YYYY h:mm:ss',
+    })
+    return `${t('datasetDocuments.segment.editedAt')} ${timeText}`
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [childChunkInfo?.updated_at])
+
+  return (
+    <div className={'flex flex-col h-full'}>
+      <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
+        <div className='flex flex-col'>
+          <div className='text-text-primary system-xl-semibold'>{t('datasetDocuments.segment.editChildChunk')}</div>
+          <div className='flex items-center gap-x-2'>
+            <SegmentIndexTag positionId={childChunkInfo?.position || ''} labelPrefix={t('datasetDocuments.segment.childChunk') as string} />
+            <Dot />
+            <span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
+            <Dot />
+            <span className='text-text-tertiary system-xs-medium'>
+              {EditTimeText}
+            </span>
+          </div>
+        </div>
+        <div className='flex items-center'>
+          {fullScreen && (
+            <>
+              <ActionButtons
+                handleCancel={handleCancel}
+                handleSave={handleSave}
+                loading={loading}
+                isChildChunk={true}
+              />
+              <Divider type='vertical' className='h-3.5 bg-divider-regular ml-4 mr-2' />
+            </>
+          )}
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer mr-1' onClick={toggleFullScreen}>
+            <RiExpandDiagonalLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer' onClick={onCancel}>
+            <RiCloseLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+        </div>
+      </div>
+      <div className={classNames('flex grow w-full', fullScreen ? 'flex-row justify-center px-6 pt-6' : 'py-3 px-4')}>
+        <div className={classNames('break-all overflow-hidden whitespace-pre-line h-full', fullScreen ? 'w-1/2' : 'w-full')}>
+          <ChunkContent
+            docForm={docForm}
+            question={content}
+            onQuestionChange={content => setContent(content)}
+            isEditMode={true}
+          />
+        </div>
+      </div>
+      {!fullScreen && (
+        <div className='flex items-center justify-end p-4 pt-3 border-t-[1px] border-t-divider-subtle'>
+          <ActionButtons
+            handleCancel={handleCancel}
+            handleSave={handleSave}
+            loading={loading}
+            isChildChunk={true}
+          />
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default React.memo(ChildSegmentDetail)
diff --git a/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx b/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx
new file mode 100644
index 0000000000..1615ea98cf
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx
@@ -0,0 +1,195 @@
+import { type FC, useMemo, useState } from 'react'
+import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
+import { useTranslation } from 'react-i18next'
+import { EditSlice } from '../../../formatted-text/flavours/edit-slice'
+import { useDocumentContext } from '../index'
+import { FormattedText } from '../../../formatted-text/formatted'
+import Empty from './common/empty'
+import FullDocListSkeleton from './skeleton/full-doc-list-skeleton'
+import { useSegmentListContext } from './index'
+import type { ChildChunkDetail } from '@/models/datasets'
+import Input from '@/app/components/base/input'
+import classNames from '@/utils/classnames'
+import Divider from '@/app/components/base/divider'
+import { formatNumber } from '@/utils/format'
+
+type IChildSegmentCardProps = {
+  childChunks: ChildChunkDetail[]
+  parentChunkId: string
+  handleInputChange?: (value: string) => void
+  handleAddNewChildChunk?: (parentChunkId: string) => void
+  enabled: boolean
+  onDelete?: (segId: string, childChunkId: string) => Promise<void>
+  onClickSlice?: (childChunk: ChildChunkDetail) => void
+  total?: number
+  inputValue?: string
+  onClearFilter?: () => void
+  isLoading?: boolean
+  focused?: boolean
+}
+
+const ChildSegmentList: FC<IChildSegmentCardProps> = ({
+  childChunks,
+  parentChunkId,
+  handleInputChange,
+  handleAddNewChildChunk,
+  enabled,
+  onDelete,
+  onClickSlice,
+  total,
+  inputValue,
+  onClearFilter,
+  isLoading,
+  focused = false,
+}) => {
+  const { t } = useTranslation()
+  const parentMode = useDocumentContext(s => s.parentMode)
+  const currChildChunk = useSegmentListContext(s => s.currChildChunk)
+
+  const [collapsed, setCollapsed] = useState(true)
+
+  const toggleCollapse = () => {
+    setCollapsed(!collapsed)
+  }
+
+  const isParagraphMode = useMemo(() => {
+    return parentMode === 'paragraph'
+  }, [parentMode])
+
+  const isFullDocMode = useMemo(() => {
+    return parentMode === 'full-doc'
+  }, [parentMode])
+
+  const contentOpacity = useMemo(() => {
+    return (enabled || focused) ? '' : 'opacity-50 group-hover/card:opacity-100'
+  }, [enabled, focused])
+
+  const totalText = useMemo(() => {
+    const isSearch = inputValue !== '' && isFullDocMode
+    if (!isSearch) {
+      const text = isFullDocMode
+        ? !total
+          ? '--'
+          : formatNumber(total)
+        : formatNumber(childChunks.length)
+      const count = isFullDocMode
+        ? text === '--'
+          ? 0
+          : total
+        : childChunks.length
+      return `${text} ${t('datasetDocuments.segment.childChunks', { count })}`
+    }
+    else {
+      const text = !total ? '--' : formatNumber(total)
+      const count = text === '--' ? 0 : total
+      return `${count} ${t('datasetDocuments.segment.searchResults', { count })}`
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isFullDocMode, total, childChunks.length, inputValue])
+
+  return (
+    <div className={classNames(
+      'flex flex-col',
+      contentOpacity,
+      isParagraphMode ? 'pt-1 pb-2' : 'px-3 grow',
+      (isFullDocMode && isLoading) && 'overflow-y-hidden',
+    )}>
+      {isFullDocMode ? <Divider type='horizontal' className='h-[1px] bg-divider-subtle my-1' /> : null}
+      <div className={classNames('flex items-center justify-between', isFullDocMode ? 'pt-2 pb-3 sticky -top-2 left-0 bg-background-default' : '')}>
+        <div className={classNames(
+          'h-7 flex items-center pl-1 pr-3 rounded-lg',
+          isParagraphMode && 'cursor-pointer',
+          (isParagraphMode && collapsed) && 'bg-dataset-child-chunk-expand-btn-bg',
+          isFullDocMode && 'pl-0',
+        )}
+        onClick={(event) => {
+          event.stopPropagation()
+          toggleCollapse()
+        }}
+        >
+          {
+            isParagraphMode
+              ? collapsed
+                ? (
+                  <RiArrowRightSLine className='w-4 h-4 text-text-secondary opacity-50 mr-0.5' />
+                )
+                : (<RiArrowDownSLine className='w-4 h-4 text-text-secondary mr-0.5' />)
+              : null
+          }
+          <span className='text-text-secondary system-sm-semibold-uppercase'>{totalText}</span>
+          <span className={classNames('text-text-quaternary text-xs font-medium pl-1.5', isParagraphMode ? 'hidden group-hover/card:inline-block' : '')}>·</span>
+          <button
+            type='button'
+            className={classNames(
+              'px-1.5 py-1 text-components-button-secondary-accent-text system-xs-semibold-uppercase',
+              isParagraphMode ? 'hidden group-hover/card:inline-block' : '',
+              (isFullDocMode && isLoading) ? 'text-components-button-secondary-accent-text-disabled' : '',
+            )}
+            onClick={(event) => {
+              event.stopPropagation()
+              handleAddNewChildChunk?.(parentChunkId)
+            }}
+            disabled={isLoading}
+          >
+            {t('common.operation.add')}
+          </button>
+        </div>
+        {isFullDocMode
+          ? <Input
+            showLeftIcon
+            showClearIcon
+            wrapperClassName='!w-52'
+            value={inputValue}
+            onChange={e => handleInputChange?.(e.target.value)}
+            onClear={() => handleInputChange?.('')}
+          />
+          : null}
+      </div>
+      {isLoading ? <FullDocListSkeleton /> : null}
+      {((isFullDocMode && !isLoading) || !collapsed)
+        ? <div className={classNames('flex gap-x-0.5', isFullDocMode ? 'grow mb-6' : 'items-center')}>
+          {isParagraphMode && (
+            <div className='self-stretch'>
+              <Divider type='vertical' className='w-[2px] mx-[7px] bg-text-accent-secondary' />
+            </div>
+          )}
+          {childChunks.length > 0
+            ? <FormattedText className={classNames('w-full !leading-6 flex flex-col', isParagraphMode ? 'gap-y-2' : 'gap-y-3')}>
+              {childChunks.map((childChunk) => {
+                const edited = childChunk.updated_at !== childChunk.created_at
+                const focused = currChildChunk?.childChunkInfo?.id === childChunk.id
+                return <EditSlice
+                  key={childChunk.id}
+                  label={`C-${childChunk.position}${edited ? ` · ${t('datasetDocuments.segment.edited')}` : ''}`}
+                  text={childChunk.content}
+                  onDelete={() => onDelete?.(childChunk.segment_id, childChunk.id)}
+                  labelClassName={focused ? 'bg-state-accent-solid text-text-primary-on-surface' : ''}
+                  labelInnerClassName={'text-[10px] font-semibold align-bottom leading-6'}
+                  contentClassName={classNames('!leading-6', focused ? 'bg-state-accent-hover-alt text-text-primary' : '')}
+                  showDivider={false}
+                  onClick={(e) => {
+                    e.stopPropagation()
+                    onClickSlice?.(childChunk)
+                  }}
+                  offsetOptions={({ rects }) => {
+                    return {
+                      mainAxis: isFullDocMode ? -rects.floating.width : 12 - rects.floating.width,
+                      crossAxis: (20 - rects.floating.height) / 2,
+                    }
+                  }}
+                />
+              })}
+            </FormattedText>
+            : inputValue !== ''
+              ? <div className='h-full w-full'>
+                <Empty onClearFilter={onClearFilter!} />
+              </div>
+              : null
+          }
+        </div>
+        : null}
+    </div>
+  )
+}
+
+export default ChildSegmentList
diff --git a/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx b/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx
new file mode 100644
index 0000000000..1238d98a9c
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx
@@ -0,0 +1,86 @@
+import React, { type FC, useMemo } from 'react'
+import { useTranslation } from 'react-i18next'
+import { useKeyPress } from 'ahooks'
+import { useDocumentContext } from '../../index'
+import Button from '@/app/components/base/button'
+import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '@/app/components/workflow/utils'
+
+type IActionButtonsProps = {
+  handleCancel: () => void
+  handleSave: () => void
+  loading: boolean
+  actionType?: 'edit' | 'add'
+  handleRegeneration?: () => void
+  isChildChunk?: boolean
+}
+
+const ActionButtons: FC<IActionButtonsProps> = ({
+  handleCancel,
+  handleSave,
+  loading,
+  actionType = 'edit',
+  handleRegeneration,
+  isChildChunk = false,
+}) => {
+  const { t } = useTranslation()
+  const mode = useDocumentContext(s => s.mode)
+  const parentMode = useDocumentContext(s => s.parentMode)
+
+  useKeyPress(['esc'], (e) => {
+    e.preventDefault()
+    handleCancel()
+  })
+
+  useKeyPress(`${getKeyboardKeyCodeBySystem('ctrl')}.s`, (e) => {
+    e.preventDefault()
+    if (loading)
+      return
+    handleSave()
+  }
+  , { exactMatch: true, useCapture: true })
+
+  const isParentChildParagraphMode = useMemo(() => {
+    return mode === 'hierarchical' && parentMode === 'paragraph'
+  }, [mode, parentMode])
+
+  return (
+    <div className='flex items-center gap-x-2'>
+      <Button
+        onClick={handleCancel}
+      >
+        <div className='flex items-center gap-x-1'>
+          <span className='text-components-button-secondary-text system-sm-medium'>{t('common.operation.cancel')}</span>
+          <span className='px-[1px] bg-components-kbd-bg-gray rounded-[4px] text-text-tertiary system-kbd'>ESC</span>
+        </div>
+      </Button>
+      {(isParentChildParagraphMode && actionType === 'edit' && !isChildChunk)
+        ? <Button
+          onClick={handleRegeneration}
+          disabled={loading}
+        >
+          <span className='text-components-button-secondary-text system-sm-medium'>
+            {t('common.operation.saveAndRegenerate')}
+          </span>
+        </Button>
+        : null
+      }
+      <Button
+        variant='primary'
+        onClick={handleSave}
+        disabled={loading}
+      >
+        <div className='flex items-center gap-x-1'>
+          <span className='text-components-button-primary-text'>{t('common.operation.save')}</span>
+          <div className='flex items-center gap-x-0.5'>
+            <span className='w-4 h-4 bg-components-kbd-bg-white rounded-[4px] text-text-primary-on-surface system-kbd capitalize'>{getKeyboardKeyNameBySystem('ctrl')}</span>
+            <span className='w-4 h-4 bg-components-kbd-bg-white rounded-[4px] text-text-primary-on-surface system-kbd'>S</span>
+          </div>
+        </div>
+      </Button>
+    </div>
+  )
+}
+
+ActionButtons.displayName = 'ActionButtons'
+
+export default React.memo(ActionButtons)
diff --git a/web/app/components/datasets/documents/detail/completed/common/add-another.tsx b/web/app/components/datasets/documents/detail/completed/common/add-another.tsx
new file mode 100644
index 0000000000..444560e55f
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/add-another.tsx
@@ -0,0 +1,32 @@
+import React, { type FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import classNames from '@/utils/classnames'
+import Checkbox from '@/app/components/base/checkbox'
+
+type AddAnotherProps = {
+  className?: string
+  isChecked: boolean
+  onCheck: () => void
+}
+
+const AddAnother: FC<AddAnotherProps> = ({
+  className,
+  isChecked,
+  onCheck,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <div className={classNames('flex items-center gap-x-1 pl-1', className)}>
+      <Checkbox
+        key='add-another-checkbox'
+        className='shrink-0'
+        checked={isChecked}
+        onCheck={onCheck}
+      />
+      <span className='text-text-tertiary system-xs-medium'>{t('datasetDocuments.segment.addAnother')}</span>
+    </div>
+  )
+}
+
+export default React.memo(AddAnother)
diff --git a/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx b/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx
new file mode 100644
index 0000000000..3dd3689b64
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx
@@ -0,0 +1,103 @@
+import React, { type FC } from 'react'
+import { RiArchive2Line, RiCheckboxCircleLine, RiCloseCircleLine, RiDeleteBinLine } from '@remixicon/react'
+import { useTranslation } from 'react-i18next'
+import { useBoolean } from 'ahooks'
+import Divider from '@/app/components/base/divider'
+import classNames from '@/utils/classnames'
+import Confirm from '@/app/components/base/confirm'
+
+const i18nPrefix = 'dataset.batchAction'
+type IBatchActionProps = {
+  className?: string
+  selectedIds: string[]
+  onBatchEnable: () => void
+  onBatchDisable: () => void
+  onBatchDelete: () => Promise<void>
+  onArchive?: () => void
+  onCancel: () => void
+}
+
+const BatchAction: FC<IBatchActionProps> = ({
+  className,
+  selectedIds,
+  onBatchEnable,
+  onBatchDisable,
+  onArchive,
+  onBatchDelete,
+  onCancel,
+}) => {
+  const { t } = useTranslation()
+  const [isShowDeleteConfirm, {
+    setTrue: showDeleteConfirm,
+    setFalse: hideDeleteConfirm,
+  }] = useBoolean(false)
+  const [isDeleting, {
+    setTrue: setIsDeleting,
+  }] = useBoolean(false)
+
+  const handleBatchDelete = async () => {
+    setIsDeleting()
+    await onBatchDelete()
+    hideDeleteConfirm()
+  }
+  return (
+    <div className={classNames('w-full flex justify-center gap-x-2', className)}>
+      <div className='flex items-center gap-x-1 p-1 rounded-[10px] bg-components-actionbar-bg-accent border border-components-actionbar-border-accent shadow-xl shadow-shadow-shadow-5 backdrop-blur-[5px]'>
+        <div className='inline-flex items-center gap-x-2 pl-2 pr-3 py-1'>
+          <span className='w-5 h-5 flex items-center justify-center px-1 py-0.5 bg-text-accent rounded-md text-text-primary-on-surface text-xs font-medium'>
+            {selectedIds.length}
+          </span>
+          <span className='text-text-accent text-[13px] font-semibold leading-[16px]'>{t(`${i18nPrefix}.selected`)}</span>
+        </div>
+        <Divider type='vertical' className='mx-0.5 h-3.5 bg-divider-regular' />
+        <div className='flex items-center gap-x-0.5 px-3 py-2'>
+          <RiCheckboxCircleLine className='w-4 h-4 text-components-button-ghost-text' />
+          <button type='button' className='px-0.5 text-components-button-ghost-text text-[13px] font-medium leading-[16px]' onClick={onBatchEnable}>
+            {t(`${i18nPrefix}.enable`)}
+          </button>
+        </div>
+        <div className='flex items-center gap-x-0.5 px-3 py-2'>
+          <RiCloseCircleLine className='w-4 h-4 text-components-button-ghost-text' />
+          <button type='button' className='px-0.5 text-components-button-ghost-text text-[13px] font-medium leading-[16px]' onClick={onBatchDisable}>
+            {t(`${i18nPrefix}.disable`)}
+          </button>
+        </div>
+        {onArchive && (
+          <div className='flex items-center gap-x-0.5 px-3 py-2'>
+            <RiArchive2Line className='w-4 h-4 text-components-button-ghost-text' />
+            <button type='button' className='px-0.5 text-components-button-ghost-text text-[13px] font-medium leading-[16px]' onClick={onArchive}>
+              {t(`${i18nPrefix}.archive`)}
+            </button>
+          </div>
+        )}
+        <div className='flex items-center gap-x-0.5 px-3 py-2'>
+          <RiDeleteBinLine className='w-4 h-4 text-components-button-destructive-ghost-text' />
+          <button type='button' className='px-0.5 text-components-button-destructive-ghost-text text-[13px] font-medium leading-[16px]' onClick={showDeleteConfirm}>
+            {t(`${i18nPrefix}.delete`)}
+          </button>
+        </div>
+
+        <Divider type='vertical' className='mx-0.5 h-3.5 bg-divider-regular' />
+        <button type='button' className='px-3.5 py-2 text-components-button-ghost-text text-[13px] font-medium leading-[16px]' onClick={onCancel}>
+          {t(`${i18nPrefix}.cancel`)}
+        </button>
+      </div>
+      {
+        isShowDeleteConfirm && (
+          <Confirm
+            isShow
+            title={t('datasetDocuments.list.delete.title')}
+            content={t('datasetDocuments.list.delete.content')}
+            confirmText={t('common.operation.sure')}
+            onConfirm={handleBatchDelete}
+            onCancel={hideDeleteConfirm}
+            isLoading={isDeleting}
+            isDisabled={isDeleting}
+          />
+        )
+      }
+    </div>
+  )
+}
+
+export default React.memo(BatchAction)
diff --git a/web/app/components/datasets/documents/detail/completed/common/chunk-content.tsx b/web/app/components/datasets/documents/detail/completed/common/chunk-content.tsx
new file mode 100644
index 0000000000..e6403fa12f
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/chunk-content.tsx
@@ -0,0 +1,192 @@
+import React, { useEffect, useRef, useState } from 'react'
+import type { ComponentProps, FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import { ChunkingMode } from '@/models/datasets'
+import classNames from '@/utils/classnames'
+
+type IContentProps = ComponentProps<'textarea'>
+
+const Textarea: FC<IContentProps> = React.memo(({
+  value,
+  placeholder,
+  className,
+  disabled,
+  ...rest
+}) => {
+  return (
+    <textarea
+      className={classNames(
+        'disabled:bg-transparent inset-0 outline-none border-none appearance-none resize-none w-full overflow-y-auto',
+        className,
+      )}
+      placeholder={placeholder}
+      value={value}
+      disabled={disabled}
+      {...rest}
+    />
+  )
+})
+
+Textarea.displayName = 'Textarea'
+
+type IAutoResizeTextAreaProps = ComponentProps<'textarea'> & {
+  containerRef: React.RefObject<HTMLDivElement>
+  labelRef: React.RefObject<HTMLDivElement>
+}
+
+const AutoResizeTextArea: FC<IAutoResizeTextAreaProps> = React.memo(({
+  className,
+  placeholder,
+  value,
+  disabled,
+  containerRef,
+  labelRef,
+  ...rest
+}) => {
+  const textareaRef = useRef<HTMLTextAreaElement>(null)
+  const observerRef = useRef<ResizeObserver>()
+  const [maxHeight, setMaxHeight] = useState(0)
+
+  useEffect(() => {
+    const textarea = textareaRef.current
+    if (!textarea)
+      return
+    textarea.style.height = 'auto'
+    const lineHeight = parseInt(getComputedStyle(textarea).lineHeight)
+    const textareaHeight = Math.max(textarea.scrollHeight, lineHeight)
+    textarea.style.height = `${textareaHeight}px`
+  }, [value])
+
+  useEffect(() => {
+    const container = containerRef.current
+    const label = labelRef.current
+    if (!container || !label)
+      return
+    const updateMaxHeight = () => {
+      const containerHeight = container.clientHeight
+      const labelHeight = label.clientHeight
+      const padding = 32
+      const space = 12
+      const maxHeight = Math.floor((containerHeight - 2 * labelHeight - padding - space) / 2)
+      setMaxHeight(maxHeight)
+    }
+    updateMaxHeight()
+    observerRef.current = new ResizeObserver(updateMaxHeight)
+    observerRef.current.observe(container)
+    return () => {
+      observerRef.current?.disconnect()
+    }
+  }, [])
+
+  return (
+    <textarea
+      ref={textareaRef}
+      className={classNames(
+        'disabled:bg-transparent inset-0 outline-none border-none appearance-none resize-none w-full',
+        className,
+      )}
+      style={{
+        maxHeight,
+      }}
+      placeholder={placeholder}
+      value={value}
+      disabled={disabled}
+      {...rest}
+    />
+  )
+})
+
+AutoResizeTextArea.displayName = 'AutoResizeTextArea'
+
+type IQATextAreaProps = {
+  question: string
+  answer?: string
+  onQuestionChange: (question: string) => void
+  onAnswerChange?: (answer: string) => void
+  isEditMode?: boolean
+}
+
+const QATextArea: FC<IQATextAreaProps> = React.memo(({
+  question,
+  answer,
+  onQuestionChange,
+  onAnswerChange,
+  isEditMode = true,
+}) => {
+  const { t } = useTranslation()
+  const containerRef = useRef<HTMLDivElement>(null)
+  const labelRef = useRef<HTMLDivElement>(null)
+
+  return (
+    <div ref={containerRef} className='h-full overflow-hidden'>
+      <div ref={labelRef} className='text-text-tertiary text-xs font-medium mb-1'>QUESTION</div>
+      <AutoResizeTextArea
+        className='text-text-secondary text-sm tracking-[-0.07px] caret-[#295EFF]'
+        value={question}
+        placeholder={t('datasetDocuments.segment.questionPlaceholder') || ''}
+        onChange={e => onQuestionChange(e.target.value)}
+        disabled={!isEditMode}
+        containerRef={containerRef}
+        labelRef={labelRef}
+      />
+      <div className='text-text-tertiary text-xs font-medium mb-1 mt-6'>ANSWER</div>
+      <AutoResizeTextArea
+        className='text-text-secondary text-sm tracking-[-0.07px] caret-[#295EFF]'
+        value={answer}
+        placeholder={t('datasetDocuments.segment.answerPlaceholder') || ''}
+        onChange={e => onAnswerChange?.(e.target.value)}
+        disabled={!isEditMode}
+        autoFocus
+        containerRef={containerRef}
+        labelRef={labelRef}
+      />
+    </div>
+  )
+})
+
+QATextArea.displayName = 'QATextArea'
+
+type IChunkContentProps = {
+  question: string
+  answer?: string
+  onQuestionChange: (question: string) => void
+  onAnswerChange?: (answer: string) => void
+  isEditMode?: boolean
+  docForm: ChunkingMode
+}
+
+const ChunkContent: FC<IChunkContentProps> = ({
+  question,
+  answer,
+  onQuestionChange,
+  onAnswerChange,
+  isEditMode,
+  docForm,
+}) => {
+  const { t } = useTranslation()
+
+  if (docForm === ChunkingMode.qa) {
+    return <QATextArea
+      question={question}
+      answer={answer}
+      onQuestionChange={onQuestionChange}
+      onAnswerChange={onAnswerChange}
+      isEditMode={isEditMode}
+    />
+  }
+
+  return (
+    <Textarea
+      className='h-full w-full pb-6 body-md-regular text-text-secondary tracking-[-0.07px] caret-[#295EFF]'
+      value={question}
+      placeholder={t('datasetDocuments.segment.contentPlaceholder') || ''}
+      onChange={e => onQuestionChange(e.target.value)}
+      disabled={!isEditMode}
+      autoFocus
+    />
+  )
+}
+
+ChunkContent.displayName = 'ChunkContent'
+
+export default React.memo(ChunkContent)
diff --git a/web/app/components/datasets/documents/detail/completed/common/dot.tsx b/web/app/components/datasets/documents/detail/completed/common/dot.tsx
new file mode 100644
index 0000000000..aa6a9f13b7
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/dot.tsx
@@ -0,0 +1,11 @@
+import React from 'react'
+
+const Dot = () => {
+  return (
+    <div className='text-text-quaternary system-xs-medium'>·</div>
+  )
+}
+
+Dot.displayName = 'Dot'
+
+export default React.memo(Dot)
diff --git a/web/app/components/datasets/documents/detail/completed/common/empty.tsx b/web/app/components/datasets/documents/detail/completed/common/empty.tsx
new file mode 100644
index 0000000000..cabcb6d4db
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/empty.tsx
@@ -0,0 +1,78 @@
+import React, { type FC } from 'react'
+import { RiFileList2Line } from '@remixicon/react'
+import { useTranslation } from 'react-i18next'
+
+type IEmptyProps = {
+  onClearFilter: () => void
+}
+
+const EmptyCard = React.memo(() => {
+  return (
+    <div className='w-full h-32 rounded-xl opacity-30 bg-background-section-burn shrink-0' />
+  )
+})
+
+EmptyCard.displayName = 'EmptyCard'
+
+type LineProps = {
+  className?: string
+}
+
+const Line = React.memo(({
+  className,
+}: LineProps) => {
+  return (
+    <svg xmlns="http://www.w3.org/2000/svg" width="2" height="241" viewBox="0 0 2 241" fill="none" className={className}>
+      <path d="M1 0.5L1 240.5" stroke="url(#paint0_linear_1989_74474)"/>
+      <defs>
+        <linearGradient id="paint0_linear_1989_74474" x1="-7.99584" y1="240.5" x2="-7.88094" y2="0.50004" gradientUnits="userSpaceOnUse">
+          <stop stopColor="white" stopOpacity="0.01"/>
+          <stop offset="0.503965" stopColor="#101828" stopOpacity="0.08"/>
+          <stop offset="1" stopColor="white" stopOpacity="0.01"/>
+        </linearGradient>
+      </defs>
+    </svg>
+  )
+})
+
+Line.displayName = 'Line'
+
+const Empty: FC<IEmptyProps> = ({
+  onClearFilter,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <div className={'h-full relative flex items-center justify-center z-0'}>
+      <div className='flex flex-col items-center'>
+        <div className='relative z-10 flex items-center justify-center w-14 h-14 border border-divider-subtle bg-components-card-bg rounded-xl shadow-lg shadow-shadow-shadow-5'>
+          <RiFileList2Line className='w-6 h-6 text-text-secondary' />
+          <Line className='absolute -right-[1px] top-1/2 -translate-y-1/2' />
+          <Line className='absolute -left-[1px] top-1/2 -translate-y-1/2' />
+          <Line className='absolute top-0 left-1/2 -translate-x-1/2 -translate-y-1/2 rotate-90' />
+          <Line className='absolute top-full left-1/2 -translate-x-1/2 -translate-y-1/2 rotate-90' />
+        </div>
+        <div className='text-text-tertiary system-md-regular mt-3'>
+          {t('datasetDocuments.segment.empty')}
+        </div>
+        <button
+          type='button'
+          className='text-text-accent system-sm-medium mt-1'
+          onClick={onClearFilter}
+        >
+          {t('datasetDocuments.segment.clearFilter')}
+        </button>
+      </div>
+      <div className='h-full w-full absolute top-0 left-0 flex flex-col gap-y-3 -z-20 overflow-hidden'>
+        {
+          Array.from({ length: 10 }).map((_, i) => (
+            <EmptyCard key={i} />
+          ))
+        }
+      </div>
+      <div className='h-full w-full absolute top-0 left-0 bg-dataset-chunk-list-mask-bg -z-10' />
+    </div>
+  )
+}
+
+export default React.memo(Empty)
diff --git a/web/app/components/datasets/documents/detail/completed/common/full-screen-drawer.tsx b/web/app/components/datasets/documents/detail/completed/common/full-screen-drawer.tsx
new file mode 100644
index 0000000000..0d86d2de61
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/full-screen-drawer.tsx
@@ -0,0 +1,35 @@
+import React, { type FC } from 'react'
+import Drawer from '@/app/components/base/drawer'
+import classNames from '@/utils/classnames'
+
+type IFullScreenDrawerProps = {
+  isOpen: boolean
+  onClose?: () => void
+  fullScreen: boolean
+  children: React.ReactNode
+}
+
+const FullScreenDrawer: FC<IFullScreenDrawerProps> = ({
+  isOpen,
+  onClose = () => {},
+  fullScreen,
+  children,
+}) => {
+  return (
+    <Drawer
+      isOpen={isOpen}
+      onClose={onClose}
+      panelClassname={classNames('!p-0 bg-components-panel-bg',
+        fullScreen
+          ? '!max-w-full !w-full'
+          : 'mt-16 mr-2 mb-2 !max-w-[560px] !w-[560px] border-[0.5px] border-components-panel-border rounded-xl',
+      )}
+      mask={false}
+      unmount
+      footer={null}
+    >
+      {children}
+    </Drawer>)
+}
+
+export default FullScreenDrawer
diff --git a/web/app/components/datasets/documents/detail/completed/common/keywords.tsx b/web/app/components/datasets/documents/detail/completed/common/keywords.tsx
new file mode 100644
index 0000000000..8c85ec1378
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/keywords.tsx
@@ -0,0 +1,47 @@
+import React, { type FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import classNames from '@/utils/classnames'
+import type { SegmentDetailModel } from '@/models/datasets'
+import TagInput from '@/app/components/base/tag-input'
+
+type IKeywordsProps = {
+  segInfo?: Partial<SegmentDetailModel> & { id: string }
+  className?: string
+  keywords: string[]
+  onKeywordsChange: (keywords: string[]) => void
+  isEditMode?: boolean
+  actionType?: 'edit' | 'add' | 'view'
+}
+
+const Keywords: FC<IKeywordsProps> = ({
+  segInfo,
+  className,
+  keywords,
+  onKeywordsChange,
+  isEditMode,
+  actionType = 'view',
+}) => {
+  const { t } = useTranslation()
+  return (
+    <div className={classNames('flex flex-col', className)}>
+      <div className='text-text-tertiary system-xs-medium-uppercase'>{t('datasetDocuments.segment.keywords')}</div>
+      <div className='text-text-tertiary w-full max-h-[200px] overflow-auto flex flex-wrap gap-1'>
+        {(!segInfo?.keywords?.length && actionType === 'view')
+          ? '-'
+          : (
+            <TagInput
+              items={keywords}
+              onChange={newKeywords => onKeywordsChange(newKeywords)}
+              disableAdd={!isEditMode}
+              disableRemove={!isEditMode || (keywords.length === 1)}
+            />
+          )
+        }
+      </div>
+    </div>
+  )
+}
+
+Keywords.displayName = 'Keywords'
+
+export default React.memo(Keywords)
diff --git a/web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx b/web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx
new file mode 100644
index 0000000000..c9356b7f8a
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx
@@ -0,0 +1,131 @@
+import React, { type FC, useRef, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { RiLoader2Line } from '@remixicon/react'
+import { useCountDown } from 'ahooks'
+import Modal from '@/app/components/base/modal'
+import Button from '@/app/components/base/button'
+import { useEventEmitterContextContext } from '@/context/event-emitter'
+
+type IDefaultContentProps = {
+  onCancel: () => void
+  onConfirm: () => void
+}
+
+const DefaultContent: FC<IDefaultContentProps> = React.memo(({
+  onCancel,
+  onConfirm,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <>
+      <div className='pb-4'>
+        <span className='text-text-primary title-2xl-semi-bold'>{t('datasetDocuments.segment.regenerationConfirmTitle')}</span>
+        <p className='text-text-secondary system-md-regular'>{t('datasetDocuments.segment.regenerationConfirmMessage')}</p>
+      </div>
+      <div className='flex justify-end gap-x-2 pt-6'>
+        <Button onClick={onCancel}>
+          {t('common.operation.cancel')}
+        </Button>
+        <Button variant='warning' destructive onClick={onConfirm}>
+          {t('common.operation.regenerate')}
+        </Button>
+      </div>
+    </>
+  )
+})
+
+DefaultContent.displayName = 'DefaultContent'
+
+const RegeneratingContent: FC = React.memo(() => {
+  const { t } = useTranslation()
+
+  return (
+    <>
+      <div className='pb-4'>
+        <span className='text-text-primary title-2xl-semi-bold'>{t('datasetDocuments.segment.regeneratingTitle')}</span>
+        <p className='text-text-secondary system-md-regular'>{t('datasetDocuments.segment.regeneratingMessage')}</p>
+      </div>
+      <div className='flex justify-end pt-6'>
+        <Button variant='warning' destructive disabled className='inline-flex items-center gap-x-0.5'>
+          <RiLoader2Line className='w-4 h-4 text-components-button-destructive-primary-text-disabled animate-spin' />
+          <span>{t('common.operation.regenerate')}</span>
+        </Button>
+      </div>
+    </>
+  )
+})
+
+RegeneratingContent.displayName = 'RegeneratingContent'
+
+type IRegenerationCompletedContentProps = {
+  onClose: () => void
+}
+
+const RegenerationCompletedContent: FC<IRegenerationCompletedContentProps> = React.memo(({
+  onClose,
+}) => {
+  const { t } = useTranslation()
+  const targetTime = useRef(Date.now() + 5000)
+  const [countdown] = useCountDown({
+    targetDate: targetTime.current,
+    onEnd: () => {
+      onClose()
+    },
+  })
+
+  return (
+    <>
+      <div className='pb-4'>
+        <span className='text-text-primary title-2xl-semi-bold'>{t('datasetDocuments.segment.regenerationSuccessTitle')}</span>
+        <p className='text-text-secondary system-md-regular'>{t('datasetDocuments.segment.regenerationSuccessMessage')}</p>
+      </div>
+      <div className='flex justify-end pt-6'>
+        <Button variant='primary' onClick={onClose}>
+          {`${t('common.operation.close')}${countdown === 0 ? '' : `(${Math.round(countdown / 1000)})`}`}
+        </Button>
+      </div>
+    </>
+  )
+})
+
+RegenerationCompletedContent.displayName = 'RegenerationCompletedContent'
+
+type IRegenerationModalProps = {
+  isShow: boolean
+  onConfirm: () => void
+  onCancel: () => void
+  onClose: () => void
+}
+
+const RegenerationModal: FC<IRegenerationModalProps> = ({
+  isShow,
+  onConfirm,
+  onCancel,
+  onClose,
+}) => {
+  const [loading, setLoading] = useState(false)
+  const [updateSucceeded, setUpdateSucceeded] = useState(false)
+  const { eventEmitter } = useEventEmitterContextContext()
+
+  eventEmitter?.useSubscription((v) => {
+    if (v === 'update-segment') {
+      setLoading(true)
+      setUpdateSucceeded(false)
+    }
+    if (v === 'update-segment-success')
+      setUpdateSucceeded(true)
+    if (v === 'update-segment-done')
+      setLoading(false)
+  })
+
+  return (
+    <Modal isShow={isShow} onClose={() => {}} className='!max-w-[480px] !rounded-2xl'>
+      {!loading && !updateSucceeded && <DefaultContent onCancel={onCancel} onConfirm={onConfirm} />}
+      {loading && !updateSucceeded && <RegeneratingContent />}
+      {!loading && updateSucceeded && <RegenerationCompletedContent onClose={onClose} />}
+    </Modal>
+  )
+}
+
+export default RegenerationModal
diff --git a/web/app/components/datasets/documents/detail/completed/common/segment-index-tag.tsx b/web/app/components/datasets/documents/detail/completed/common/segment-index-tag.tsx
new file mode 100644
index 0000000000..436bfb3543
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/segment-index-tag.tsx
@@ -0,0 +1,40 @@
+import React, { type FC, useMemo } from 'react'
+import { Chunk } from '@/app/components/base/icons/src/public/knowledge'
+import cn from '@/utils/classnames'
+
+type ISegmentIndexTagProps = {
+  positionId?: string | number
+  label?: string
+  className?: string
+  labelPrefix?: string
+  iconClassName?: string
+  labelClassName?: string
+}
+
+export const SegmentIndexTag: FC<ISegmentIndexTagProps> = ({
+  positionId,
+  label,
+  className,
+  labelPrefix = 'Chunk',
+  iconClassName,
+  labelClassName,
+}) => {
+  const localPositionId = useMemo(() => {
+    const positionIdStr = String(positionId)
+    if (positionIdStr.length >= 2)
+      return `${labelPrefix}-${positionId}`
+    return `${labelPrefix}-${positionIdStr.padStart(2, '0')}`
+  }, [positionId, labelPrefix])
+  return (
+    <div className={cn('flex items-center', className)}>
+      <Chunk className={cn('w-3 h-3 p-[1px] text-text-tertiary mr-0.5', iconClassName)} />
+      <div className={cn('text-text-tertiary system-xs-medium', labelClassName)}>
+        {label || localPositionId}
+      </div>
+    </div>
+  )
+}
+
+SegmentIndexTag.displayName = 'SegmentIndexTag'
+
+export default React.memo(SegmentIndexTag)
diff --git a/web/app/components/datasets/documents/detail/completed/common/tag.tsx b/web/app/components/datasets/documents/detail/completed/common/tag.tsx
new file mode 100644
index 0000000000..bb7261ec91
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/common/tag.tsx
@@ -0,0 +1,15 @@
+import React from 'react'
+import cn from '@/utils/classnames'
+
+const Tag = ({ text, className }: { text: string; className?: string }) => {
+  return (
+    <div className={cn('inline-flex items-center gap-x-0.5', className)}>
+      <span className='text-text-quaternary text-xs font-medium'>#</span>
+      <span className='text-text-tertiary text-xs max-w-12 line-clamp-1 shrink-0'>{text}</span>
+    </div>
+  )
+}
+
+Tag.displayName = 'Tag'
+
+export default React.memo(Tag)
diff --git a/web/app/components/datasets/documents/detail/completed/display-toggle.tsx b/web/app/components/datasets/documents/detail/completed/display-toggle.tsx
new file mode 100644
index 0000000000..b58b20c5f8
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/display-toggle.tsx
@@ -0,0 +1,40 @@
+import React, { type FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import { RiLineHeight } from '@remixicon/react'
+import Tooltip from '@/app/components/base/tooltip'
+import { Collapse } from '@/app/components/base/icons/src/public/knowledge'
+
+type DisplayToggleProps = {
+  isCollapsed: boolean
+  toggleCollapsed: () => void
+}
+
+const DisplayToggle: FC<DisplayToggleProps> = ({
+  isCollapsed,
+  toggleCollapsed,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <Tooltip
+      popupContent={isCollapsed ? t('datasetDocuments.segment.expandChunks') : t('datasetDocuments.segment.collapseChunks')}
+      popupClassName='text-text-secondary system-xs-medium border-[0.5px] border-components-panel-border'
+    >
+      <button
+        type='button'
+        className='flex items-center justify-center p-2 rounded-lg bg-components-button-secondary-bg
+        border-[0.5px] border-components-button-secondary-border shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px]'
+        onClick={toggleCollapsed}
+      >
+        {
+          isCollapsed
+            ? <RiLineHeight className='w-4 h-4 text-components-button-secondary-text' />
+            : <Collapse className='w-4 h-4 text-components-button-secondary-text' />
+        }
+      </button>
+
+    </Tooltip>
+  )
+}
+
+export default React.memo(DisplayToggle)
diff --git a/web/app/components/datasets/documents/detail/completed/index.tsx b/web/app/components/datasets/documents/detail/completed/index.tsx
index 2c9e6ca2ea..8385bde04b 100644
--- a/web/app/components/datasets/documents/detail/completed/index.tsx
+++ b/web/app/components/datasets/documents/detail/completed/index.tsx
@@ -1,220 +1,79 @@
 'use client'
 import type { FC } from 'react'
-import React, { memo, useEffect, useMemo, useState } from 'react'
+import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { useDebounceFn } from 'ahooks'
-import { HashtagIcon } from '@heroicons/react/24/solid'
 import { useTranslation } from 'react-i18next'
-import { useContext } from 'use-context-selector'
-import { isNil, omitBy } from 'lodash-es'
-import {
-  RiCloseLine,
-  RiEditLine,
-} from '@remixicon/react'
-import { StatusItem } from '../../list'
-import { DocumentContext } from '../index'
+import { createContext, useContext, useContextSelector } from 'use-context-selector'
+import { useDocumentContext } from '../index'
 import { ProcessStatus } from '../segment-add'
 import s from './style.module.css'
-import InfiniteVirtualList from './InfiniteVirtualList'
+import SegmentList from './segment-list'
+import DisplayToggle from './display-toggle'
+import BatchAction from './common/batch-action'
+import SegmentDetail from './segment-detail'
+import SegmentCard from './segment-card'
+import ChildSegmentList from './child-segment-list'
+import NewChildSegment from './new-child-segment'
+import FullScreenDrawer from './common/full-screen-drawer'
+import ChildSegmentDetail from './child-segment-detail'
+import StatusItem from './status-item'
+import Pagination from '@/app/components/base/pagination'
 import cn from '@/utils/classnames'
 import { formatNumber } from '@/utils/format'
-import Modal from '@/app/components/base/modal'
-import Switch from '@/app/components/base/switch'
 import Divider from '@/app/components/base/divider'
 import Input from '@/app/components/base/input'
 import { ToastContext } from '@/app/components/base/toast'
 import type { Item } from '@/app/components/base/select'
 import { SimpleSelect } from '@/app/components/base/select'
-import { deleteSegment, disableSegment, enableSegment, fetchSegments, updateSegment } from '@/service/datasets'
-import type { SegmentDetailModel, SegmentUpdater, SegmentsQuery, SegmentsResponse } from '@/models/datasets'
-import { asyncRunSafe } from '@/utils'
-import type { CommonResponse } from '@/models/common'
-import AutoHeightTextarea from '@/app/components/base/auto-height-textarea/common'
-import Button from '@/app/components/base/button'
-import NewSegmentModal from '@/app/components/datasets/documents/detail/new-segment-modal'
-import TagInput from '@/app/components/base/tag-input'
+import { type ChildChunkDetail, ChunkingMode, type SegmentDetailModel, type SegmentUpdater } from '@/models/datasets'
+import NewSegment from '@/app/components/datasets/documents/detail/new-segment'
 import { useEventEmitterContextContext } from '@/context/event-emitter'
+import Checkbox from '@/app/components/base/checkbox'
+import {
+  useChildSegmentList,
+  useChildSegmentListKey,
+  useDeleteChildSegment,
+  useDeleteSegment,
+  useDisableSegment,
+  useEnableSegment,
+  useSegmentList,
+  useSegmentListKey,
+  useUpdateChildSegment,
+  useUpdateSegment,
+} from '@/service/knowledge/use-segment'
+import { useInvalid } from '@/service/use-base'
 
-export const SegmentIndexTag: FC<{ positionId: string | number; className?: string }> = ({ positionId, className }) => {
-  const localPositionId = useMemo(() => {
-    const positionIdStr = String(positionId)
-    if (positionIdStr.length >= 3)
-      return positionId
-    return positionIdStr.padStart(3, '0')
-  }, [positionId])
-  return (
-    <div className={`text-gray-500 border border-gray-200 box-border flex items-center rounded-md italic text-[11px] pl-1 pr-1.5 font-medium ${className ?? ''}`}>
-      <HashtagIcon className='w-3 h-3 text-gray-400 fill-current mr-1 stroke-current stroke-1' />
-      {localPositionId}
-    </div>
-  )
+const DEFAULT_LIMIT = 10
+
+type CurrSegmentType = {
+  segInfo?: SegmentDetailModel
+  showModal: boolean
+  isEditMode?: boolean
 }
 
-type ISegmentDetailProps = {
-  embeddingAvailable: boolean
-  segInfo?: Partial<SegmentDetailModel> & { id: string }
-  onChangeSwitch?: (segId: string, enabled: boolean) => Promise<void>
-  onUpdate: (segmentId: string, q: string, a: string, k: string[]) => void
-  onCancel: () => void
-  archived?: boolean
+type CurrChildChunkType = {
+  childChunkInfo?: ChildChunkDetail
+  showModal: boolean
 }
-/**
- * Show all the contents of the segment
- */
-const SegmentDetailComponent: FC<ISegmentDetailProps> = ({
-  embeddingAvailable,
-  segInfo,
-  archived,
-  onChangeSwitch,
-  onUpdate,
-  onCancel,
-}) => {
-  const { t } = useTranslation()
-  const [isEditing, setIsEditing] = useState(false)
-  const [question, setQuestion] = useState(segInfo?.content || '')
-  const [answer, setAnswer] = useState(segInfo?.answer || '')
-  const [keywords, setKeywords] = useState<string[]>(segInfo?.keywords || [])
-  const { eventEmitter } = useEventEmitterContextContext()
-  const [loading, setLoading] = useState(false)
 
-  eventEmitter?.useSubscription((v) => {
-    if (v === 'update-segment')
-      setLoading(true)
-    else
-      setLoading(false)
-  })
-
-  const handleCancel = () => {
-    setIsEditing(false)
-    setQuestion(segInfo?.content || '')
-    setAnswer(segInfo?.answer || '')
-    setKeywords(segInfo?.keywords || [])
-  }
-  const handleSave = () => {
-    onUpdate(segInfo?.id || '', question, answer, keywords)
-  }
-
-  const renderContent = () => {
-    if (segInfo?.answer) {
-      return (
-        <>
-          <div className='mb-1 text-xs font-medium text-gray-500'>QUESTION</div>
-          <AutoHeightTextarea
-            outerClassName='mb-4'
-            className='leading-6 text-md text-gray-800'
-            value={question}
-            placeholder={t('datasetDocuments.segment.questionPlaceholder') || ''}
-            onChange={e => setQuestion(e.target.value)}
-            disabled={!isEditing}
-          />
-          <div className='mb-1 text-xs font-medium text-gray-500'>ANSWER</div>
-          <AutoHeightTextarea
-            outerClassName='mb-4'
-            className='leading-6 text-md text-gray-800'
-            value={answer}
-            placeholder={t('datasetDocuments.segment.answerPlaceholder') || ''}
-            onChange={e => setAnswer(e.target.value)}
-            disabled={!isEditing}
-            autoFocus
-          />
-        </>
-      )
-    }
-
-    return (
-      <AutoHeightTextarea
-        className='leading-6 text-md text-gray-800'
-        value={question}
-        placeholder={t('datasetDocuments.segment.contentPlaceholder') || ''}
-        onChange={e => setQuestion(e.target.value)}
-        disabled={!isEditing}
-        autoFocus
-      />
-    )
-  }
-
-  return (
-    <div className={'flex flex-col relative'}>
-      <div className='absolute right-0 top-0 flex items-center h-7'>
-        {isEditing && (
-          <>
-            <Button
-              onClick={handleCancel}>
-              {t('common.operation.cancel')}
-            </Button>
-            <Button
-              variant='primary'
-              className='ml-3'
-              onClick={handleSave}
-              disabled={loading}
-            >
-              {t('common.operation.save')}
-            </Button>
-          </>
-        )}
-        {!isEditing && !archived && embeddingAvailable && (
-          <>
-            <div className='group relative flex justify-center items-center w-6 h-6 hover:bg-gray-100 rounded-md cursor-pointer'>
-              <div className={cn(s.editTip, 'hidden items-center absolute -top-10 px-3 h-[34px] bg-white rounded-lg whitespace-nowrap text-xs font-semibold text-gray-700 group-hover:flex')}>{t('common.operation.edit')}</div>
-              <RiEditLine className='w-4 h-4 text-gray-500' onClick={() => setIsEditing(true)} />
-            </div>
-            <div className='mx-3 w-[1px] h-3 bg-gray-200' />
-          </>
-        )}
-        <div className='flex justify-center items-center w-6 h-6 cursor-pointer' onClick={onCancel}>
-          <RiCloseLine className='w-4 h-4 text-gray-500' />
-        </div>
-      </div>
-      <SegmentIndexTag positionId={segInfo?.position || ''} className='w-fit mt-[2px] mb-6' />
-      <div className={s.segModalContent}>{renderContent()}</div>
-      <div className={s.keywordTitle}>{t('datasetDocuments.segment.keywords')}</div>
-      <div className={s.keywordWrapper}>
-        {!segInfo?.keywords?.length
-          ? '-'
-          : (
-            <TagInput
-              items={keywords}
-              onChange={newKeywords => setKeywords(newKeywords)}
-              disableAdd={!isEditing}
-              disableRemove={!isEditing || (keywords.length === 1)}
-            />
-          )
-        }
-      </div>
-      <div className={cn(s.footer, s.numberInfo)}>
-        <div className='flex items-center flex-wrap gap-y-2'>
-          <div className={cn(s.commonIcon, s.typeSquareIcon)} /><span className='mr-8'>{formatNumber(segInfo?.word_count as number)} {t('datasetDocuments.segment.characters')}</span>
-          <div className={cn(s.commonIcon, s.targetIcon)} /><span className='mr-8'>{formatNumber(segInfo?.hit_count as number)} {t('datasetDocuments.segment.hitCount')}</span>
-          <div className={cn(s.commonIcon, s.bezierCurveIcon)} /><span className={s.hashText}>{t('datasetDocuments.segment.vectorHash')}{segInfo?.index_node_hash}</span>
-        </div>
-        <div className='flex items-center'>
-          <StatusItem status={segInfo?.enabled ? 'enabled' : 'disabled'} reverse textCls='text-gray-500 text-xs' />
-          {embeddingAvailable && (
-            <>
-              <Divider type='vertical' className='!h-2' />
-              <Switch
-                size='md'
-                defaultValue={segInfo?.enabled}
-                onChange={async (val) => {
-                  await onChangeSwitch?.(segInfo?.id || '', val)
-                }}
-                disabled={archived}
-              />
-            </>
-          )}
-        </div>
-      </div>
-    </div>
-  )
+type SegmentListContextValue = {
+  isCollapsed: boolean
+  fullScreen: boolean
+  toggleFullScreen: (fullscreen?: boolean) => void
+  currSegment: CurrSegmentType
+  currChildChunk: CurrChildChunkType
 }
-export const SegmentDetail = memo(SegmentDetailComponent)
 
-export const splitArray = (arr: any[], size = 3) => {
-  if (!arr || !arr.length)
-    return []
-  const result = []
-  for (let i = 0; i < arr.length; i += size)
-    result.push(arr.slice(i, i + size))
-  return result
+const SegmentListContext = createContext<SegmentListContextValue>({
+  isCollapsed: true,
+  fullScreen: false,
+  toggleFullScreen: () => {},
+  currSegment: { showModal: false },
+  currChildChunk: { showModal: false },
+})
+
+export const useSegmentListContext = (selector: (value: SegmentListContextValue) => any) => {
+  return useContextSelector(SegmentListContext, selector)
 }
 
 type ICompletedProps = {
@@ -223,7 +82,6 @@ type ICompletedProps = {
   onNewSegmentModalChange: (state: boolean) => void
   importStatus: ProcessStatus | string | undefined
   archived?: boolean
-  // data: Array<{}> // all/part segments
 }
 /**
  * Embedding done, show list of all segments
@@ -238,22 +96,42 @@ const Completed: FC<ICompletedProps> = ({
 }) => {
   const { t } = useTranslation()
   const { notify } = useContext(ToastContext)
-  const { datasetId = '', documentId = '', docForm } = useContext(DocumentContext)
+  const datasetId = useDocumentContext(s => s.datasetId) || ''
+  const documentId = useDocumentContext(s => s.documentId) || ''
+  const docForm = useDocumentContext(s => s.docForm)
+  const mode = useDocumentContext(s => s.mode)
+  const parentMode = useDocumentContext(s => s.parentMode)
   // the current segment id and whether to show the modal
-  const [currSegment, setCurrSegment] = useState<{ segInfo?: SegmentDetailModel; showModal: boolean }>({ showModal: false })
+  const [currSegment, setCurrSegment] = useState<CurrSegmentType>({ showModal: false })
+  const [currChildChunk, setCurrChildChunk] = useState<CurrChildChunkType>({ showModal: false })
+  const [currChunkId, setCurrChunkId] = useState('')
 
   const [inputValue, setInputValue] = useState<string>('') // the input value
   const [searchValue, setSearchValue] = useState<string>('') // the search value
   const [selectedStatus, setSelectedStatus] = useState<boolean | 'all'>('all') // the selected status, enabled/disabled/undefined
 
-  const [lastSegmentsRes, setLastSegmentsRes] = useState<SegmentsResponse | undefined>(undefined)
-  const [allSegments, setAllSegments] = useState<Array<SegmentDetailModel[]>>([]) // all segments data
-  const [loading, setLoading] = useState(false)
-  const [total, setTotal] = useState<number | undefined>()
+  const [segments, setSegments] = useState<SegmentDetailModel[]>([]) // all segments data
+  const [childSegments, setChildSegments] = useState<ChildChunkDetail[]>([]) // all child segments data
+  const [selectedSegmentIds, setSelectedSegmentIds] = useState<string[]>([])
   const { eventEmitter } = useEventEmitterContextContext()
+  const [isCollapsed, setIsCollapsed] = useState(true)
+  const [currentPage, setCurrentPage] = useState(1) // start from 1
+  const [limit, setLimit] = useState(DEFAULT_LIMIT)
+  const [fullScreen, setFullScreen] = useState(false)
+  const [showNewChildSegmentModal, setShowNewChildSegmentModal] = useState(false)
+
+  const segmentListRef = useRef<HTMLDivElement>(null)
+  const childSegmentListRef = useRef<HTMLDivElement>(null)
+  const needScrollToBottom = useRef(false)
+  const statusList = useRef<Item[]>([
+    { value: 'all', name: t('datasetDocuments.list.index.all') },
+    { value: 0, name: t('datasetDocuments.list.status.disabled') },
+    { value: 1, name: t('datasetDocuments.list.status.enabled') },
+  ])
 
   const { run: handleSearch } = useDebounceFn(() => {
     setSearchValue(inputValue)
+    setCurrentPage(1)
   }, { wait: 500 })
 
   const handleInputChange = (value: string) => {
@@ -263,78 +141,145 @@ const Completed: FC<ICompletedProps> = ({
 
   const onChangeStatus = ({ value }: Item) => {
     setSelectedStatus(value === 'all' ? 'all' : !!value)
+    setCurrentPage(1)
   }
 
-  const getSegments = async (needLastId?: boolean) => {
-    const finalLastId = lastSegmentsRes?.data?.[lastSegmentsRes.data.length - 1]?.id || ''
-    setLoading(true)
-    const [e, res] = await asyncRunSafe<SegmentsResponse>(fetchSegments({
+  const isFullDocMode = useMemo(() => {
+    return mode === 'hierarchical' && parentMode === 'full-doc'
+  }, [mode, parentMode])
+
+  const { isFetching: isLoadingSegmentList, data: segmentListData } = useSegmentList(
+    {
       datasetId,
       documentId,
-      params: omitBy({
-        last_id: !needLastId ? undefined : finalLastId,
-        limit: 12,
-        keyword: searchValue,
+      params: {
+        page: isFullDocMode ? 1 : currentPage,
+        limit: isFullDocMode ? 10 : limit,
+        keyword: isFullDocMode ? '' : searchValue,
         enabled: selectedStatus === 'all' ? 'all' : !!selectedStatus,
-      }, isNil) as SegmentsQuery,
-    }) as Promise<SegmentsResponse>)
-    if (!e) {
-      setAllSegments([...(!needLastId ? [] : allSegments), ...splitArray(res.data || [])])
-      setLastSegmentsRes(res)
-      if (!lastSegmentsRes || !needLastId)
-        setTotal(res?.total || 0)
+      },
+    },
+    currentPage === 0,
+  )
+  const invalidSegmentList = useInvalid(useSegmentListKey)
+
+  useEffect(() => {
+    if (segmentListData) {
+      setSegments(segmentListData.data || [])
+      if (segmentListData.total_pages < currentPage)
+        setCurrentPage(segmentListData.total_pages)
     }
-    setLoading(false)
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [segmentListData])
+
+  useEffect(() => {
+    if (segmentListRef.current && needScrollToBottom.current) {
+      segmentListRef.current.scrollTo({ top: segmentListRef.current.scrollHeight, behavior: 'smooth' })
+      needScrollToBottom.current = false
+    }
+  }, [segments])
+
+  const { isFetching: isLoadingChildSegmentList, data: childChunkListData } = useChildSegmentList(
+    {
+      datasetId,
+      documentId,
+      segmentId: segments[0]?.id || '',
+      params: {
+        page: currentPage,
+        limit,
+        keyword: searchValue,
+      },
+    },
+    !isFullDocMode || segments.length === 0 || currentPage === 0,
+  )
+  const invalidChildSegmentList = useInvalid(useChildSegmentListKey)
+
+  useEffect(() => {
+    if (childSegmentListRef.current && needScrollToBottom.current) {
+      childSegmentListRef.current.scrollTo({ top: childSegmentListRef.current.scrollHeight, behavior: 'smooth' })
+      needScrollToBottom.current = false
+    }
+  }, [childSegments])
+
+  useEffect(() => {
+    if (childChunkListData) {
+      setChildSegments(childChunkListData.data || [])
+      if (childChunkListData.total_pages < currentPage)
+        setCurrentPage(childChunkListData.total_pages)
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [childChunkListData])
+
+  const resetList = useCallback(() => {
+    setSegments([])
+    setSelectedSegmentIds([])
+    invalidSegmentList()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  const resetChildList = useCallback(() => {
+    setChildSegments([])
+    invalidChildSegmentList()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  const onClickCard = (detail: SegmentDetailModel, isEditMode = false) => {
+    setCurrSegment({ segInfo: detail, showModal: true, isEditMode })
   }
 
-  const resetList = () => {
-    setLastSegmentsRes(undefined)
-    setAllSegments([])
-    setLoading(false)
-    setTotal(undefined)
-    getSegments(false)
-  }
+  const onCloseSegmentDetail = useCallback(() => {
+    setCurrSegment({ showModal: false })
+    setFullScreen(false)
+  }, [])
 
-  const onClickCard = (detail: SegmentDetailModel) => {
-    setCurrSegment({ segInfo: detail, showModal: true })
-  }
+  const { mutateAsync: enableSegment } = useEnableSegment()
+  const { mutateAsync: disableSegment } = useDisableSegment()
 
-  const onCloseModal = () => {
-    setCurrSegment({ ...currSegment, showModal: false })
-  }
-
-  const onChangeSwitch = async (segId: string, enabled: boolean) => {
-    const opApi = enabled ? enableSegment : disableSegment
-    const [e] = await asyncRunSafe<CommonResponse>(opApi({ datasetId, segmentId: segId }) as Promise<CommonResponse>)
-    if (!e) {
-      notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
-      for (const item of allSegments) {
-        for (const seg of item) {
-          if (seg.id === segId)
-            seg.enabled = enabled
+  const onChangeSwitch = useCallback(async (enable: boolean, segId?: string) => {
+    const operationApi = enable ? enableSegment : disableSegment
+    await operationApi({ datasetId, documentId, segmentIds: segId ? [segId] : selectedSegmentIds }, {
+      onSuccess: () => {
+        notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+        for (const seg of segments) {
+          if (segId ? seg.id === segId : selectedSegmentIds.includes(seg.id))
+            seg.enabled = enable
         }
-      }
-      setAllSegments([...allSegments])
-    }
-    else {
-      notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') })
-    }
-  }
+        setSegments([...segments])
+      },
+      onError: () => {
+        notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') })
+      },
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [datasetId, documentId, selectedSegmentIds, segments])
 
-  const onDelete = async (segId: string) => {
-    const [e] = await asyncRunSafe<CommonResponse>(deleteSegment({ datasetId, documentId, segmentId: segId }) as Promise<CommonResponse>)
-    if (!e) {
-      notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
-      resetList()
-    }
-    else {
-      notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') })
-    }
-  }
+  const { mutateAsync: deleteSegment } = useDeleteSegment()
 
-  const handleUpdateSegment = async (segmentId: string, question: string, answer: string, keywords: string[]) => {
+  const onDelete = useCallback(async (segId?: string) => {
+    await deleteSegment({ datasetId, documentId, segmentIds: segId ? [segId] : selectedSegmentIds }, {
+      onSuccess: () => {
+        notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+        resetList()
+        !segId && setSelectedSegmentIds([])
+      },
+      onError: () => {
+        notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') })
+      },
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [datasetId, documentId, selectedSegmentIds])
+
+  const { mutateAsync: updateSegment } = useUpdateSegment()
+
+  const handleUpdateSegment = useCallback(async (
+    segmentId: string,
+    question: string,
+    answer: string,
+    keywords: string[],
+    needRegenerate = false,
+  ) => {
     const params: SegmentUpdater = { content: '' }
-    if (docForm === 'qa_model') {
+    if (docForm === ChunkingMode.qa) {
       if (!question.trim())
         return notify({ type: 'error', message: t('datasetDocuments.segment.questionEmpty') })
       if (!answer.trim())
@@ -353,55 +298,259 @@ const Completed: FC<ICompletedProps> = ({
     if (keywords.length)
       params.keywords = keywords
 
-    try {
-      eventEmitter?.emit('update-segment')
-      const res = await updateSegment({ datasetId, documentId, segmentId, body: params })
-      notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
-      onCloseModal()
-      for (const item of allSegments) {
-        for (const seg of item) {
+    if (needRegenerate)
+      params.regenerate_child_chunks = needRegenerate
+
+    eventEmitter?.emit('update-segment')
+    await updateSegment({ datasetId, documentId, segmentId, body: params }, {
+      onSuccess(res) {
+        notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+        if (!needRegenerate)
+          onCloseSegmentDetail()
+        for (const seg of segments) {
           if (seg.id === segmentId) {
             seg.answer = res.data.answer
             seg.content = res.data.content
             seg.keywords = res.data.keywords
             seg.word_count = res.data.word_count
             seg.hit_count = res.data.hit_count
-            seg.index_node_hash = res.data.index_node_hash
             seg.enabled = res.data.enabled
+            seg.updated_at = res.data.updated_at
+            seg.child_chunks = res.data.child_chunks
           }
         }
-      }
-      setAllSegments([...allSegments])
-    }
-    finally {
-      eventEmitter?.emit('')
-    }
-  }
-
-  useEffect(() => {
-    if (lastSegmentsRes !== undefined)
-      getSegments(false)
-  }, [selectedStatus, searchValue])
+        setSegments([...segments])
+        eventEmitter?.emit('update-segment-success')
+      },
+      onSettled() {
+        eventEmitter?.emit('update-segment-done')
+      },
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [segments, datasetId, documentId])
 
   useEffect(() => {
     if (importStatus === ProcessStatus.COMPLETED)
       resetList()
-  }, [importStatus])
+  }, [importStatus, resetList])
+
+  const onCancelBatchOperation = useCallback(() => {
+    setSelectedSegmentIds([])
+  }, [])
+
+  const onSelected = useCallback((segId: string) => {
+    setSelectedSegmentIds(prev =>
+      prev.includes(segId)
+        ? prev.filter(id => id !== segId)
+        : [...prev, segId],
+    )
+  }, [])
+
+  const isAllSelected = useMemo(() => {
+    return segments.length > 0 && segments.every(seg => selectedSegmentIds.includes(seg.id))
+  }, [segments, selectedSegmentIds])
+
+  const isSomeSelected = useMemo(() => {
+    return segments.some(seg => selectedSegmentIds.includes(seg.id))
+  }, [segments, selectedSegmentIds])
+
+  const onSelectedAll = useCallback(() => {
+    setSelectedSegmentIds((prev) => {
+      const currentAllSegIds = segments.map(seg => seg.id)
+      const prevSelectedIds = prev.filter(item => !currentAllSegIds.includes(item))
+      return [...prevSelectedIds, ...((isAllSelected || selectedSegmentIds.length > 0) ? [] : currentAllSegIds)]
+    })
+  }, [segments, isAllSelected, selectedSegmentIds])
+
+  const totalText = useMemo(() => {
+    const isSearch = searchValue !== '' || selectedStatus !== 'all'
+    if (!isSearch) {
+      const total = segmentListData?.total ? formatNumber(segmentListData.total) : '--'
+      const count = total === '--' ? 0 : segmentListData!.total
+      const translationKey = (mode === 'hierarchical' && parentMode === 'paragraph')
+        ? 'datasetDocuments.segment.parentChunks'
+        : 'datasetDocuments.segment.chunks'
+      return `${total} ${t(translationKey, { count })}`
+    }
+    else {
+      const total = typeof segmentListData?.total === 'number' ? formatNumber(segmentListData.total) : 0
+      const count = segmentListData?.total || 0
+      return `${total} ${t('datasetDocuments.segment.searchResults', { count })}`
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [segmentListData?.total, mode, parentMode, searchValue, selectedStatus])
+
+  const toggleFullScreen = useCallback(() => {
+    setFullScreen(!fullScreen)
+  }, [fullScreen])
+
+  const viewNewlyAddedChunk = useCallback(async () => {
+    const totalPages = segmentListData?.total_pages || 0
+    const total = segmentListData?.total || 0
+    const newPage = Math.ceil((total + 1) / limit)
+    needScrollToBottom.current = true
+    if (newPage > totalPages) {
+      setCurrentPage(totalPages + 1)
+    }
+    else {
+      resetList()
+      currentPage !== totalPages && setCurrentPage(totalPages)
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [segmentListData, limit, currentPage])
+
+  const { mutateAsync: deleteChildSegment } = useDeleteChildSegment()
+
+  const onDeleteChildChunk = useCallback(async (segmentId: string, childChunkId: string) => {
+    await deleteChildSegment(
+      { datasetId, documentId, segmentId, childChunkId },
+      {
+        onSuccess: () => {
+          notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+          if (parentMode === 'paragraph')
+            resetList()
+          else
+            resetChildList()
+        },
+        onError: () => {
+          notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') })
+        },
+      },
+    )
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [datasetId, documentId, parentMode])
+
+  const handleAddNewChildChunk = useCallback((parentChunkId: string) => {
+    setShowNewChildSegmentModal(true)
+    setCurrChunkId(parentChunkId)
+  }, [])
+
+  const onSaveNewChildChunk = useCallback((newChildChunk?: ChildChunkDetail) => {
+    if (parentMode === 'paragraph') {
+      for (const seg of segments) {
+        if (seg.id === currChunkId)
+          seg.child_chunks?.push(newChildChunk!)
+      }
+      setSegments([...segments])
+    }
+    else {
+      resetChildList()
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [parentMode, currChunkId, segments])
+
+  const viewNewlyAddedChildChunk = useCallback(() => {
+    const totalPages = childChunkListData?.total_pages || 0
+    const total = childChunkListData?.total || 0
+    const newPage = Math.ceil((total + 1) / limit)
+    needScrollToBottom.current = true
+    if (newPage > totalPages) {
+      setCurrentPage(totalPages + 1)
+    }
+    else {
+      resetChildList()
+      currentPage !== totalPages && setCurrentPage(totalPages)
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [childChunkListData, limit, currentPage])
+
+  const onClickSlice = useCallback((detail: ChildChunkDetail) => {
+    setCurrChildChunk({ childChunkInfo: detail, showModal: true })
+    setCurrChunkId(detail.segment_id)
+  }, [])
+
+  const onCloseChildSegmentDetail = useCallback(() => {
+    setCurrChildChunk({ showModal: false })
+    setFullScreen(false)
+  }, [])
+
+  const { mutateAsync: updateChildSegment } = useUpdateChildSegment()
+
+  const handleUpdateChildChunk = useCallback(async (
+    segmentId: string,
+    childChunkId: string,
+    content: string,
+  ) => {
+    const params: SegmentUpdater = { content: '' }
+    if (!content.trim())
+      return notify({ type: 'error', message: t('datasetDocuments.segment.contentEmpty') })
+
+    params.content = content
+
+    eventEmitter?.emit('update-child-segment')
+    await updateChildSegment({ datasetId, documentId, segmentId, childChunkId, body: params }, {
+      onSuccess: (res) => {
+        notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+        onCloseChildSegmentDetail()
+        if (parentMode === 'paragraph') {
+          for (const seg of segments) {
+            if (seg.id === segmentId) {
+              for (const childSeg of seg.child_chunks!) {
+                if (childSeg.id === childChunkId) {
+                  childSeg.content = res.data.content
+                  childSeg.type = res.data.type
+                  childSeg.word_count = res.data.word_count
+                  childSeg.updated_at = res.data.updated_at
+                }
+              }
+            }
+          }
+          setSegments([...segments])
+        }
+        else {
+          for (const childSeg of childSegments) {
+            if (childSeg.id === childChunkId) {
+              childSeg.content = res.data.content
+              childSeg.type = res.data.type
+              childSeg.word_count = res.data.word_count
+              childSeg.updated_at = res.data.updated_at
+            }
+          }
+          setChildSegments([...childSegments])
+        }
+      },
+      onSettled: () => {
+        eventEmitter?.emit('update-child-segment-done')
+      },
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [segments, childSegments, datasetId, documentId, parentMode])
+
+  const onClearFilter = useCallback(() => {
+    setInputValue('')
+    setSearchValue('')
+    setSelectedStatus('all')
+    setCurrentPage(1)
+  }, [])
 
   return (
-    <>
-      <div className={s.docSearchWrapper}>
-        <div className={s.totalText}>{total ? formatNumber(total) : '--'} {t('datasetDocuments.segment.paragraphs')}</div>
+    <SegmentListContext.Provider value={{
+      isCollapsed,
+      fullScreen,
+      toggleFullScreen,
+      currSegment,
+      currChildChunk,
+    }}>
+      {/* Menu Bar */}
+      {!isFullDocMode && <div className={s.docSearchWrapper}>
+        <Checkbox
+          className='shrink-0'
+          checked={isAllSelected}
+          mixed={!isAllSelected && isSomeSelected}
+          onCheck={onSelectedAll}
+          disabled={isLoadingSegmentList}
+        />
+        <div className={'system-sm-semibold-uppercase pl-5 text-text-secondary flex-1'}>{totalText}</div>
         <SimpleSelect
           onSelect={onChangeStatus}
-          items={[
-            { value: 'all', name: t('datasetDocuments.list.index.all') },
-            { value: 0, name: t('datasetDocuments.list.status.disabled') },
-            { value: 1, name: t('datasetDocuments.list.status.enabled') },
-          ]}
+          items={statusList.current}
           defaultValue={'all'}
           className={s.select}
-          wrapperClassName='h-fit w-[120px] mr-2' />
+          wrapperClassName='h-fit mr-2'
+          optionWrapClassName='w-[160px]'
+          optionClassName='p-0'
+          renderOption={({ item, selected }) => <StatusItem item={item} selected={selected} />}
+        />
         <Input
           showLeftIcon
           showClearIcon
@@ -410,35 +559,133 @@ const Completed: FC<ICompletedProps> = ({
           onChange={e => handleInputChange(e.target.value)}
           onClear={() => handleInputChange('')}
         />
-      </div>
-      <InfiniteVirtualList
-        embeddingAvailable={embeddingAvailable}
-        hasNextPage={lastSegmentsRes?.has_more ?? true}
-        isNextPageLoading={loading}
-        items={allSegments}
-        loadNextPage={getSegments}
-        onChangeSwitch={onChangeSwitch}
-        onDelete={onDelete}
-        onClick={onClickCard}
-        archived={archived}
+        <Divider type='vertical' className='h-3.5 mx-3' />
+        <DisplayToggle isCollapsed={isCollapsed} toggleCollapsed={() => setIsCollapsed(!isCollapsed)} />
+      </div>}
+      {/* Segment list */}
+      {
+        isFullDocMode
+          ? <div className={cn(
+            'flex flex-col grow overflow-x-hidden',
+            (isLoadingSegmentList || isLoadingChildSegmentList) ? 'overflow-y-hidden' : 'overflow-y-auto',
+          )}>
+            <SegmentCard
+              detail={segments[0]}
+              onClick={() => onClickCard(segments[0])}
+              loading={isLoadingSegmentList}
+              focused={{
+                segmentIndex: currSegment?.segInfo?.id === segments[0]?.id,
+                segmentContent: currSegment?.segInfo?.id === segments[0]?.id,
+              }}
+            />
+            <ChildSegmentList
+              parentChunkId={segments[0]?.id}
+              onDelete={onDeleteChildChunk}
+              childChunks={childSegments}
+              handleInputChange={handleInputChange}
+              handleAddNewChildChunk={handleAddNewChildChunk}
+              onClickSlice={onClickSlice}
+              enabled={!archived}
+              total={childChunkListData?.total || 0}
+              inputValue={inputValue}
+              onClearFilter={onClearFilter}
+              isLoading={isLoadingSegmentList || isLoadingChildSegmentList}
+            />
+          </div>
+          : <SegmentList
+            ref={segmentListRef}
+            embeddingAvailable={embeddingAvailable}
+            isLoading={isLoadingSegmentList}
+            items={segments}
+            selectedSegmentIds={selectedSegmentIds}
+            onSelected={onSelected}
+            onChangeSwitch={onChangeSwitch}
+            onDelete={onDelete}
+            onClick={onClickCard}
+            archived={archived}
+            onDeleteChildChunk={onDeleteChildChunk}
+            handleAddNewChildChunk={handleAddNewChildChunk}
+            onClickSlice={onClickSlice}
+            onClearFilter={onClearFilter}
+          />
+      }
+      {/* Pagination */}
+      <Divider type='horizontal' className='w-auto h-[1px] my-0 mx-6 bg-divider-subtle' />
+      <Pagination
+        current={currentPage - 1}
+        onChange={cur => setCurrentPage(cur + 1)}
+        total={(isFullDocMode ? childChunkListData?.total : segmentListData?.total) || 0}
+        limit={limit}
+        onLimitChange={limit => setLimit(limit)}
+        className={isFullDocMode ? 'px-3' : ''}
       />
-      <Modal isShow={currSegment.showModal} onClose={() => { }} className='!max-w-[640px] !overflow-visible'>
+      {/* Edit or view segment detail */}
+      <FullScreenDrawer
+        isOpen={currSegment.showModal}
+        fullScreen={fullScreen}
+      >
         <SegmentDetail
-          embeddingAvailable={embeddingAvailable}
           segInfo={currSegment.segInfo ?? { id: '' }}
-          onChangeSwitch={onChangeSwitch}
+          docForm={docForm}
+          isEditMode={currSegment.isEditMode}
           onUpdate={handleUpdateSegment}
-          onCancel={onCloseModal}
-          archived={archived}
+          onCancel={onCloseSegmentDetail}
         />
-      </Modal>
-      <NewSegmentModal
-        isShow={showNewSegmentModal}
-        docForm={docForm}
-        onCancel={() => onNewSegmentModalChange(false)}
-        onSave={resetList}
-      />
-    </>
+      </FullScreenDrawer>
+      {/* Create New Segment */}
+      <FullScreenDrawer
+        isOpen={showNewSegmentModal}
+        fullScreen={fullScreen}
+      >
+        <NewSegment
+          docForm={docForm}
+          onCancel={() => {
+            onNewSegmentModalChange(false)
+            setFullScreen(false)
+          }}
+          onSave={resetList}
+          viewNewlyAddedChunk={viewNewlyAddedChunk}
+        />
+      </FullScreenDrawer>
+      {/* Edit or view child segment detail */}
+      <FullScreenDrawer
+        isOpen={currChildChunk.showModal}
+        fullScreen={fullScreen}
+      >
+        <ChildSegmentDetail
+          chunkId={currChunkId}
+          childChunkInfo={currChildChunk.childChunkInfo ?? { id: '' }}
+          docForm={docForm}
+          onUpdate={handleUpdateChildChunk}
+          onCancel={onCloseChildSegmentDetail}
+        />
+      </FullScreenDrawer>
+      {/* Create New Child Segment */}
+      <FullScreenDrawer
+        isOpen={showNewChildSegmentModal}
+        fullScreen={fullScreen}
+      >
+        <NewChildSegment
+          chunkId={currChunkId}
+          onCancel={() => {
+            setShowNewChildSegmentModal(false)
+            setFullScreen(false)
+          }}
+          onSave={onSaveNewChildChunk}
+          viewNewlyAddedChildChunk={viewNewlyAddedChildChunk}
+        />
+      </FullScreenDrawer>
+      {/* Batch Action Buttons */}
+      {selectedSegmentIds.length > 0
+        && <BatchAction
+          className='absolute left-0 bottom-16 z-20'
+          selectedIds={selectedSegmentIds}
+          onBatchEnable={onChangeSwitch.bind(null, true, '')}
+          onBatchDisable={onChangeSwitch.bind(null, false, '')}
+          onBatchDelete={onDelete.bind(null, '')}
+          onCancel={onCancelBatchOperation}
+        />}
+    </SegmentListContext.Provider>
   )
 }
 
diff --git a/web/app/components/datasets/documents/detail/completed/new-child-segment.tsx b/web/app/components/datasets/documents/detail/completed/new-child-segment.tsx
new file mode 100644
index 0000000000..55766d8ac4
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/new-child-segment.tsx
@@ -0,0 +1,175 @@
+import { memo, useMemo, useRef, useState } from 'react'
+import type { FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import { useContext } from 'use-context-selector'
+import { useParams } from 'next/navigation'
+import { RiCloseLine, RiExpandDiagonalLine } from '@remixicon/react'
+import { useShallow } from 'zustand/react/shallow'
+import { useDocumentContext } from '../index'
+import { SegmentIndexTag } from './common/segment-index-tag'
+import ActionButtons from './common/action-buttons'
+import ChunkContent from './common/chunk-content'
+import AddAnother from './common/add-another'
+import Dot from './common/dot'
+import { useSegmentListContext } from './index'
+import { useStore as useAppStore } from '@/app/components/app/store'
+import { ToastContext } from '@/app/components/base/toast'
+import { type ChildChunkDetail, ChunkingMode, type SegmentUpdater } from '@/models/datasets'
+import classNames from '@/utils/classnames'
+import { formatNumber } from '@/utils/format'
+import Divider from '@/app/components/base/divider'
+import { useAddChildSegment } from '@/service/knowledge/use-segment'
+
+type NewChildSegmentModalProps = {
+  chunkId: string
+  onCancel: () => void
+  onSave: (ChildChunk?: ChildChunkDetail) => void
+  viewNewlyAddedChildChunk?: () => void
+}
+
+const NewChildSegmentModal: FC<NewChildSegmentModalProps> = ({
+  chunkId,
+  onCancel,
+  onSave,
+  viewNewlyAddedChildChunk,
+}) => {
+  const { t } = useTranslation()
+  const { notify } = useContext(ToastContext)
+  const [content, setContent] = useState('')
+  const { datasetId, documentId } = useParams<{ datasetId: string; documentId: string }>()
+  const [loading, setLoading] = useState(false)
+  const [addAnother, setAddAnother] = useState(true)
+  const fullScreen = useSegmentListContext(s => s.fullScreen)
+  const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
+  const { appSidebarExpand } = useAppStore(useShallow(state => ({
+    appSidebarExpand: state.appSidebarExpand,
+  })))
+  const parentMode = useDocumentContext(s => s.parentMode)
+
+  const refreshTimer = useRef<any>(null)
+
+  const isFullDocMode = useMemo(() => {
+    return parentMode === 'full-doc'
+  }, [parentMode])
+
+  const CustomButton = <>
+    <Divider type='vertical' className='h-3 mx-1 bg-divider-regular' />
+    <button
+      type='button'
+      className='text-text-accent system-xs-semibold'
+      onClick={() => {
+        clearTimeout(refreshTimer.current)
+        viewNewlyAddedChildChunk?.()
+      }}>
+      {t('common.operation.view')}
+    </button>
+  </>
+
+  const handleCancel = (actionType: 'esc' | 'add' = 'esc') => {
+    if (actionType === 'esc' || !addAnother)
+      onCancel()
+    setContent('')
+  }
+
+  const { mutateAsync: addChildSegment } = useAddChildSegment()
+
+  const handleSave = async () => {
+    const params: SegmentUpdater = { content: '' }
+
+    if (!content.trim())
+      return notify({ type: 'error', message: t('datasetDocuments.segment.contentEmpty') })
+
+    params.content = content
+
+    setLoading(true)
+    await addChildSegment({ datasetId, documentId, segmentId: chunkId, body: params }, {
+      onSuccess(res) {
+        notify({
+          type: 'success',
+          message: t('datasetDocuments.segment.childChunkAdded'),
+          className: `!w-[296px] !bottom-0 ${appSidebarExpand === 'expand' ? '!left-[216px]' : '!left-14'}
+          !top-auto !right-auto !mb-[52px] !ml-11`,
+          customComponent: isFullDocMode && CustomButton,
+        })
+        handleCancel('add')
+        if (isFullDocMode) {
+          refreshTimer.current = setTimeout(() => {
+            onSave()
+          }, 3000)
+        }
+        else {
+          onSave(res.data)
+        }
+      },
+      onSettled() {
+        setLoading(false)
+      },
+    })
+  }
+
+  const wordCountText = useMemo(() => {
+    const count = content.length
+    return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [content.length])
+
+  return (
+    <div className={'flex flex-col h-full'}>
+      <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
+        <div className='flex flex-col'>
+          <div className='text-text-primary system-xl-semibold'>{t('datasetDocuments.segment.addChildChunk')}</div>
+          <div className='flex items-center gap-x-2'>
+            <SegmentIndexTag label={t('datasetDocuments.segment.newChildChunk') as string} />
+            <Dot />
+            <span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
+          </div>
+        </div>
+        <div className='flex items-center'>
+          {fullScreen && (
+            <>
+              <AddAnother className='mr-3' isChecked={addAnother} onCheck={() => setAddAnother(!addAnother)} />
+              <ActionButtons
+                handleCancel={handleCancel.bind(null, 'esc')}
+                handleSave={handleSave}
+                loading={loading}
+                actionType='add'
+                isChildChunk={true}
+              />
+              <Divider type='vertical' className='h-3.5 bg-divider-regular ml-4 mr-2' />
+            </>
+          )}
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer mr-1' onClick={toggleFullScreen}>
+            <RiExpandDiagonalLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer' onClick={handleCancel.bind(null, 'esc')}>
+            <RiCloseLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+        </div>
+      </div>
+      <div className={classNames('flex grow w-full', fullScreen ? 'flex-row justify-center px-6 pt-6' : 'py-3 px-4')}>
+        <div className={classNames('break-all overflow-hidden whitespace-pre-line h-full', fullScreen ? 'w-1/2' : 'w-full')}>
+          <ChunkContent
+            docForm={ChunkingMode.parentChild}
+            question={content}
+            onQuestionChange={content => setContent(content)}
+            isEditMode={true}
+          />
+        </div>
+      </div>
+      {!fullScreen && (
+        <div className='flex items-center justify-between p-4 pt-3 border-t-[1px] border-t-divider-subtle'>
+          <AddAnother isChecked={addAnother} onCheck={() => setAddAnother(!addAnother)} />
+          <ActionButtons
+            handleCancel={handleCancel.bind(null, 'esc')}
+            handleSave={handleSave}
+            loading={loading}
+            actionType='add'
+            isChildChunk={true}
+          />
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default memo(NewChildSegmentModal)
diff --git a/web/app/components/datasets/documents/detail/completed/segment-card.tsx b/web/app/components/datasets/documents/detail/completed/segment-card.tsx
new file mode 100644
index 0000000000..1cadd0cf96
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/segment-card.tsx
@@ -0,0 +1,280 @@
+import React, { type FC, useCallback, useMemo, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { RiDeleteBinLine, RiEditLine } from '@remixicon/react'
+import { StatusItem } from '../../list'
+import { useDocumentContext } from '../index'
+import ChildSegmentList from './child-segment-list'
+import Tag from './common/tag'
+import Dot from './common/dot'
+import { SegmentIndexTag } from './common/segment-index-tag'
+import ParentChunkCardSkeleton from './skeleton/parent-chunk-card-skeleton'
+import { useSegmentListContext } from './index'
+import type { ChildChunkDetail, SegmentDetailModel } from '@/models/datasets'
+import Switch from '@/app/components/base/switch'
+import Divider from '@/app/components/base/divider'
+import { formatNumber } from '@/utils/format'
+import Confirm from '@/app/components/base/confirm'
+import cn from '@/utils/classnames'
+import Badge from '@/app/components/base/badge'
+import { isAfter } from '@/utils/time'
+import Tooltip from '@/app/components/base/tooltip'
+
+type ISegmentCardProps = {
+  loading: boolean
+  detail?: SegmentDetailModel & { document?: { name: string } }
+  onClick?: () => void
+  onChangeSwitch?: (enabled: boolean, segId?: string) => Promise<void>
+  onDelete?: (segId: string) => Promise<void>
+  onDeleteChildChunk?: (segId: string, childChunkId: string) => Promise<void>
+  handleAddNewChildChunk?: (parentChunkId: string) => void
+  onClickSlice?: (childChunk: ChildChunkDetail) => void
+  onClickEdit?: () => void
+  className?: string
+  archived?: boolean
+  embeddingAvailable?: boolean
+  focused: {
+    segmentIndex: boolean
+    segmentContent: boolean
+  }
+}
+
+const SegmentCard: FC<ISegmentCardProps> = ({
+  detail = {},
+  onClick,
+  onChangeSwitch,
+  onDelete,
+  onDeleteChildChunk,
+  handleAddNewChildChunk,
+  onClickSlice,
+  onClickEdit,
+  loading = true,
+  className = '',
+  archived,
+  embeddingAvailable,
+  focused,
+}) => {
+  const { t } = useTranslation()
+  const {
+    id,
+    position,
+    enabled,
+    content,
+    word_count,
+    hit_count,
+    answer,
+    keywords,
+    child_chunks = [],
+    created_at,
+    updated_at,
+  } = detail as Required<ISegmentCardProps>['detail']
+  const [showModal, setShowModal] = useState(false)
+  const isCollapsed = useSegmentListContext(s => s.isCollapsed)
+  const mode = useDocumentContext(s => s.mode)
+  const parentMode = useDocumentContext(s => s.parentMode)
+
+  const isGeneralMode = useMemo(() => {
+    return mode === 'custom'
+  }, [mode])
+
+  const isParentChildMode = useMemo(() => {
+    return mode === 'hierarchical'
+  }, [mode])
+
+  const isParagraphMode = useMemo(() => {
+    return mode === 'hierarchical' && parentMode === 'paragraph'
+  }, [mode, parentMode])
+
+  const isFullDocMode = useMemo(() => {
+    return mode === 'hierarchical' && parentMode === 'full-doc'
+  }, [mode, parentMode])
+
+  const chunkEdited = useMemo(() => {
+    if (mode === 'hierarchical' && parentMode === 'full-doc')
+      return false
+    return isAfter(updated_at * 1000, created_at * 1000)
+  }, [mode, parentMode, updated_at, created_at])
+
+  const contentOpacity = useMemo(() => {
+    return (enabled || focused.segmentContent) ? '' : 'opacity-50 group-hover/card:opacity-100'
+  }, [enabled, focused.segmentContent])
+
+  const handleClickCard = useCallback(() => {
+    if (mode !== 'hierarchical' || parentMode !== 'full-doc')
+      onClick?.()
+  }, [mode, parentMode, onClick])
+
+  const renderContent = () => {
+    if (answer) {
+      return (
+        <>
+          <div className='flex gap-x-1'>
+            <div className='w-4 text-[13px] font-medium leading-[20px] text-text-tertiary shrink-0'>Q</div>
+            <div
+              className={cn('text-text-secondary body-md-regular',
+                isCollapsed ? 'line-clamp-2' : 'line-clamp-20',
+              )}>
+              {content}
+            </div>
+          </div>
+          <div className='flex gap-x-1'>
+            <div className='w-4 text-[13px] font-medium leading-[20px] text-text-tertiary shrink-0'>A</div>
+            <div className={cn('text-text-secondary body-md-regular',
+              isCollapsed ? 'line-clamp-2' : 'line-clamp-20',
+            )}>
+              {answer}
+            </div>
+          </div>
+        </>
+      )
+    }
+    return content
+  }
+
+  const wordCountText = useMemo(() => {
+    const total = formatNumber(word_count)
+    return `${total} ${t('datasetDocuments.segment.characters', { count: word_count })}`
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [word_count])
+
+  const labelPrefix = useMemo(() => {
+    return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isParentChildMode])
+
+  if (loading)
+    return <ParentChunkCardSkeleton />
+
+  return (
+    <div
+      className={cn(
+        'w-full px-3 rounded-xl group/card',
+        isFullDocMode ? '' : 'pt-2.5 pb-2 hover:bg-dataset-chunk-detail-card-hover-bg',
+        focused.segmentContent ? 'bg-dataset-chunk-detail-card-hover-bg' : '',
+        className,
+      )}
+      onClick={handleClickCard}
+    >
+      <div className='h-5 relative flex items-center justify-between'>
+        <>
+          <div className='flex items-center gap-x-2'>
+            <SegmentIndexTag
+              className={cn(contentOpacity)}
+              iconClassName={focused.segmentIndex ? 'text-text-accent' : ''}
+              labelClassName={focused.segmentIndex ? 'text-text-accent' : ''}
+              positionId={position}
+              label={isFullDocMode ? labelPrefix : ''}
+              labelPrefix={labelPrefix}
+            />
+            <Dot />
+            <div className={cn('text-text-tertiary system-xs-medium', contentOpacity)}>{wordCountText}</div>
+            <Dot />
+            <div className={cn('text-text-tertiary system-xs-medium', contentOpacity)}>{`${formatNumber(hit_count)} ${t('datasetDocuments.segment.hitCount')}`}</div>
+            {chunkEdited && (
+              <>
+                <Dot />
+                <Badge text={t('datasetDocuments.segment.edited') as string} uppercase className={contentOpacity} />
+              </>
+            )}
+          </div>
+          {!isFullDocMode
+            ? <div className='flex items-center'>
+              <StatusItem status={enabled ? 'enabled' : 'disabled'} reverse textCls="text-text-tertiary system-xs-regular" />
+              {embeddingAvailable && (
+                <div className="absolute -top-2 -right-2.5 z-20 hidden group-hover/card:flex items-center gap-x-0.5 p-1
+                      rounded-[10px] border-[0.5px] border-components-actionbar-border bg-components-actionbar-bg shadow-md backdrop-blur-[5px]">
+                  {!archived && (
+                    <>
+                      <Tooltip
+                        popupContent='Edit'
+                        popupClassName='text-text-secondary system-xs-medium'
+                      >
+                        <div
+                          className='shrink-0 w-6 h-6 flex items-center justify-center rounded-lg hover:bg-state-base-hover cursor-pointer'
+                          onClick={(e) => {
+                            e.stopPropagation()
+                            onClickEdit?.()
+                          }}>
+                          <RiEditLine className='w-4 h-4 text-text-tertiary' />
+                        </div>
+                      </Tooltip>
+                      <Tooltip
+                        popupContent='Delete'
+                        popupClassName='text-text-secondary system-xs-medium'
+                      >
+                        <div className='shrink-0 w-6 h-6 flex items-center justify-center rounded-lg hover:bg-state-destructive-hover cursor-pointer group/delete'
+                          onClick={(e) => {
+                            e.stopPropagation()
+                            setShowModal(true)
+                          }
+                          }>
+                          <RiDeleteBinLine className='w-4 h-4 text-text-tertiary group-hover/delete:text-text-destructive' />
+                        </div>
+                      </Tooltip>
+                      <Divider type="vertical" className="h-3.5 bg-divider-regular" />
+                    </>
+                  )}
+                  <div
+                    onClick={(e: React.MouseEvent<HTMLDivElement, MouseEvent>) =>
+                      e.stopPropagation()
+                    }
+                    className="flex items-center"
+                  >
+                    <Switch
+                      size='md'
+                      disabled={archived || detail?.status !== 'completed'}
+                      defaultValue={enabled}
+                      onChange={async (val) => {
+                        await onChangeSwitch?.(val, id)
+                      }}
+                    />
+                  </div>
+                </div>
+              )}
+            </div>
+            : null}
+        </>
+      </div>
+      <div className={cn('text-text-secondary body-md-regular -tracking-[0.07px] mt-0.5',
+        contentOpacity,
+        isFullDocMode ? 'line-clamp-3' : isCollapsed ? 'line-clamp-2' : 'line-clamp-20',
+      )}>
+        {renderContent()}
+      </div>
+      {isGeneralMode && <div className={cn('flex flex-wrap items-center gap-2 py-1.5', contentOpacity)}>
+        {keywords?.map(keyword => <Tag key={keyword} text={keyword} />)}
+      </div>}
+      {
+        isFullDocMode
+          ? <button
+            type='button'
+            className='mt-0.5 mb-2 text-text-accent system-xs-semibold-uppercase'
+            onClick={() => onClick?.()}
+          >{t('common.operation.viewMore')}</button>
+          : null
+      }
+      {
+        isParagraphMode && child_chunks.length > 0
+          && <ChildSegmentList
+            parentChunkId={id}
+            childChunks={child_chunks}
+            enabled={enabled}
+            onDelete={onDeleteChildChunk!}
+            handleAddNewChildChunk={handleAddNewChildChunk}
+            onClickSlice={onClickSlice}
+            focused={focused.segmentContent}
+          />
+      }
+      {showModal
+        && <Confirm
+          isShow={showModal}
+          title={t('datasetDocuments.segment.delete')}
+          confirmText={t('common.operation.sure')}
+          onConfirm={async () => { await onDelete?.(id) }}
+          onCancel={() => setShowModal(false)}
+        />
+      }
+    </div>
+  )
+}
+
+export default React.memo(SegmentCard)
diff --git a/web/app/components/datasets/documents/detail/completed/segment-detail.tsx b/web/app/components/datasets/documents/detail/completed/segment-detail.tsx
new file mode 100644
index 0000000000..307a5cfb80
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/segment-detail.tsx
@@ -0,0 +1,190 @@
+import React, { type FC, useMemo, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import {
+  RiCloseLine,
+  RiExpandDiagonalLine,
+} from '@remixicon/react'
+import { useDocumentContext } from '../index'
+import ActionButtons from './common/action-buttons'
+import ChunkContent from './common/chunk-content'
+import Keywords from './common/keywords'
+import RegenerationModal from './common/regeneration-modal'
+import { SegmentIndexTag } from './common/segment-index-tag'
+import Dot from './common/dot'
+import { useSegmentListContext } from './index'
+import { ChunkingMode, type SegmentDetailModel } from '@/models/datasets'
+import { useEventEmitterContextContext } from '@/context/event-emitter'
+import { formatNumber } from '@/utils/format'
+import classNames from '@/utils/classnames'
+import Divider from '@/app/components/base/divider'
+
+type ISegmentDetailProps = {
+  segInfo?: Partial<SegmentDetailModel> & { id: string }
+  onUpdate: (segmentId: string, q: string, a: string, k: string[], needRegenerate?: boolean) => void
+  onCancel: () => void
+  isEditMode?: boolean
+  docForm: ChunkingMode
+}
+
+/**
+ * Show all the contents of the segment
+ */
+const SegmentDetail: FC<ISegmentDetailProps> = ({
+  segInfo,
+  onUpdate,
+  onCancel,
+  isEditMode,
+  docForm,
+}) => {
+  const { t } = useTranslation()
+  const [question, setQuestion] = useState(segInfo?.content || '')
+  const [answer, setAnswer] = useState(segInfo?.answer || '')
+  const [keywords, setKeywords] = useState<string[]>(segInfo?.keywords || [])
+  const { eventEmitter } = useEventEmitterContextContext()
+  const [loading, setLoading] = useState(false)
+  const [showRegenerationModal, setShowRegenerationModal] = useState(false)
+  const fullScreen = useSegmentListContext(s => s.fullScreen)
+  const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
+  const mode = useDocumentContext(s => s.mode)
+  const parentMode = useDocumentContext(s => s.parentMode)
+
+  eventEmitter?.useSubscription((v) => {
+    if (v === 'update-segment')
+      setLoading(true)
+    if (v === 'update-segment-done')
+      setLoading(false)
+  })
+
+  const handleCancel = () => {
+    onCancel()
+    setQuestion(segInfo?.content || '')
+    setAnswer(segInfo?.answer || '')
+    setKeywords(segInfo?.keywords || [])
+  }
+
+  const handleSave = () => {
+    onUpdate(segInfo?.id || '', question, answer, keywords)
+  }
+
+  const handleRegeneration = () => {
+    setShowRegenerationModal(true)
+  }
+
+  const onCancelRegeneration = () => {
+    setShowRegenerationModal(false)
+  }
+
+  const onConfirmRegeneration = () => {
+    onUpdate(segInfo?.id || '', question, answer, keywords, true)
+  }
+
+  const isParentChildMode = useMemo(() => {
+    return mode === 'hierarchical'
+  }, [mode])
+
+  const isFullDocMode = useMemo(() => {
+    return mode === 'hierarchical' && parentMode === 'full-doc'
+  }, [mode, parentMode])
+
+  const titleText = useMemo(() => {
+    return isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail')
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isEditMode])
+
+  const isQAModel = useMemo(() => {
+    return docForm === ChunkingMode.qa
+  }, [docForm])
+
+  const wordCountText = useMemo(() => {
+    const contentLength = isQAModel ? (question.length + answer.length) : question.length
+    const total = formatNumber(isEditMode ? contentLength : segInfo!.word_count as number)
+    const count = isEditMode ? contentLength : segInfo!.word_count as number
+    return `${total} ${t('datasetDocuments.segment.characters', { count })}`
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isEditMode, question.length, answer.length, segInfo?.word_count, isQAModel])
+
+  const labelPrefix = useMemo(() => {
+    return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isParentChildMode])
+
+  return (
+    <div className={'flex flex-col h-full'}>
+      <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
+        <div className='flex flex-col'>
+          <div className='text-text-primary system-xl-semibold'>{titleText}</div>
+          <div className='flex items-center gap-x-2'>
+            <SegmentIndexTag positionId={segInfo?.position || ''} label={isFullDocMode ? labelPrefix : ''} labelPrefix={labelPrefix} />
+            <Dot />
+            <span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
+          </div>
+        </div>
+        <div className='flex items-center'>
+          {isEditMode && fullScreen && (
+            <>
+              <ActionButtons
+                handleCancel={handleCancel}
+                handleRegeneration={handleRegeneration}
+                handleSave={handleSave}
+                loading={loading}
+              />
+              <Divider type='vertical' className='h-3.5 bg-divider-regular ml-4 mr-2' />
+            </>
+          )}
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer mr-1' onClick={toggleFullScreen}>
+            <RiExpandDiagonalLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer' onClick={onCancel}>
+            <RiCloseLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+        </div>
+      </div>
+      <div className={classNames(
+        'flex grow',
+        fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4',
+        !isEditMode && 'pb-0',
+      )}>
+        <div className={classNames('break-all overflow-hidden whitespace-pre-line', fullScreen ? 'w-1/2' : 'grow')}>
+          <ChunkContent
+            docForm={docForm}
+            question={question}
+            answer={answer}
+            onQuestionChange={question => setQuestion(question)}
+            onAnswerChange={answer => setAnswer(answer)}
+            isEditMode={isEditMode}
+          />
+        </div>
+        {mode === 'custom' && <Keywords
+          className={fullScreen ? 'w-1/5' : ''}
+          actionType={isEditMode ? 'edit' : 'view'}
+          segInfo={segInfo}
+          keywords={keywords}
+          isEditMode={isEditMode}
+          onKeywordsChange={keywords => setKeywords(keywords)}
+        />}
+      </div>
+      {isEditMode && !fullScreen && (
+        <div className='flex items-center justify-end p-4 pt-3 border-t-[1px] border-t-divider-subtle'>
+          <ActionButtons
+            handleCancel={handleCancel}
+            handleRegeneration={handleRegeneration}
+            handleSave={handleSave}
+            loading={loading}
+          />
+        </div>
+      )}
+      {
+        showRegenerationModal && (
+          <RegenerationModal
+            isShow={showRegenerationModal}
+            onConfirm={onConfirmRegeneration}
+            onCancel={onCancelRegeneration}
+            onClose={onCancelRegeneration}
+          />
+        )
+      }
+    </div>
+  )
+}
+
+export default React.memo(SegmentDetail)
diff --git a/web/app/components/datasets/documents/detail/completed/segment-list.tsx b/web/app/components/datasets/documents/detail/completed/segment-list.tsx
new file mode 100644
index 0000000000..c31345ff3b
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/segment-list.tsx
@@ -0,0 +1,116 @@
+import React, { type ForwardedRef, useMemo } from 'react'
+import { useDocumentContext } from '../index'
+import SegmentCard from './segment-card'
+import Empty from './common/empty'
+import GeneralListSkeleton from './skeleton/general-list-skeleton'
+import ParagraphListSkeleton from './skeleton/paragraph-list-skeleton'
+import { useSegmentListContext } from './index'
+import type { ChildChunkDetail, SegmentDetailModel } from '@/models/datasets'
+import Checkbox from '@/app/components/base/checkbox'
+import Divider from '@/app/components/base/divider'
+
+type ISegmentListProps = {
+  isLoading: boolean
+  items: SegmentDetailModel[]
+  selectedSegmentIds: string[]
+  onSelected: (segId: string) => void
+  onClick: (detail: SegmentDetailModel, isEditMode?: boolean) => void
+  onChangeSwitch: (enabled: boolean, segId?: string,) => Promise<void>
+  onDelete: (segId: string) => Promise<void>
+  onDeleteChildChunk: (sgId: string, childChunkId: string) => Promise<void>
+  handleAddNewChildChunk: (parentChunkId: string) => void
+  onClickSlice: (childChunk: ChildChunkDetail) => void
+  archived?: boolean
+  embeddingAvailable: boolean
+  onClearFilter: () => void
+}
+
+const SegmentList = React.forwardRef(({
+  isLoading,
+  items,
+  selectedSegmentIds,
+  onSelected,
+  onClick: onClickCard,
+  onChangeSwitch,
+  onDelete,
+  onDeleteChildChunk,
+  handleAddNewChildChunk,
+  onClickSlice,
+  archived,
+  embeddingAvailable,
+  onClearFilter,
+}: ISegmentListProps,
+ref: ForwardedRef<HTMLDivElement>,
+) => {
+  const mode = useDocumentContext(s => s.mode)
+  const parentMode = useDocumentContext(s => s.parentMode)
+  const currSegment = useSegmentListContext(s => s.currSegment)
+  const currChildChunk = useSegmentListContext(s => s.currChildChunk)
+
+  const Skeleton = useMemo(() => {
+    return (mode === 'hierarchical' && parentMode === 'paragraph') ? ParagraphListSkeleton : GeneralListSkeleton
+  }, [mode, parentMode])
+
+  // Loading skeleton
+  if (isLoading)
+    return <Skeleton />
+  // Search result is empty
+  if (items.length === 0) {
+    return (
+      <div className='h-full pl-6'>
+        <Empty onClearFilter={onClearFilter} />
+      </div>
+    )
+  }
+  return (
+    <div ref={ref} className={'flex flex-col grow overflow-y-auto'}>
+      {
+        items.map((segItem) => {
+          const isLast = items[items.length - 1].id === segItem.id
+          const segmentIndexFocused
+            = currSegment?.segInfo?.id === segItem.id
+            || (!currSegment && currChildChunk?.childChunkInfo?.segment_id === segItem.id)
+          const segmentContentFocused = currSegment?.segInfo?.id === segItem.id
+            || currChildChunk?.childChunkInfo?.segment_id === segItem.id
+          return (
+            <div key={segItem.id} className='flex items-start gap-x-2'>
+              <Checkbox
+                key={`${segItem.id}-checkbox`}
+                className='shrink-0 mt-3.5'
+                checked={selectedSegmentIds.includes(segItem.id)}
+                onCheck={() => onSelected(segItem.id)}
+              />
+              <div className='grow'>
+                <SegmentCard
+                  key={`${segItem.id}-card`}
+                  detail={segItem}
+                  onClick={() => onClickCard(segItem, true)}
+                  onChangeSwitch={onChangeSwitch}
+                  onClickEdit={() => onClickCard(segItem, true)}
+                  onDelete={onDelete}
+                  onDeleteChildChunk={onDeleteChildChunk}
+                  handleAddNewChildChunk={handleAddNewChildChunk}
+                  onClickSlice={onClickSlice}
+                  loading={false}
+                  archived={archived}
+                  embeddingAvailable={embeddingAvailable}
+                  focused={{
+                    segmentIndex: segmentIndexFocused,
+                    segmentContent: segmentContentFocused,
+                  }}
+                />
+                {!isLast && <div className='w-full px-3'>
+                  <Divider type='horizontal' className='bg-divider-subtle my-1' />
+                </div>}
+              </div>
+            </div>
+          )
+        })
+      }
+    </div>
+  )
+})
+
+SegmentList.displayName = 'SegmentList'
+
+export default SegmentList
diff --git a/web/app/components/datasets/documents/detail/completed/skeleton/full-doc-list-skeleton.tsx b/web/app/components/datasets/documents/detail/completed/skeleton/full-doc-list-skeleton.tsx
new file mode 100644
index 0000000000..7ae5e8c4f2
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/skeleton/full-doc-list-skeleton.tsx
@@ -0,0 +1,25 @@
+import React from 'react'
+
+const Slice = React.memo(() => {
+  return (
+    <div className='flex flex-col gap-y-1'>
+      <div className='w-full h-5 bg-state-base-hover flex items-center'>
+        <span className='w-[30px] h-5 bg-state-base-hover-alt' />
+      </div>
+      <div className='w-2/3 h-5 bg-state-base-hover' />
+    </div>
+  )
+})
+
+Slice.displayName = 'Slice'
+
+const FullDocListSkeleton = () => {
+  return (
+    <div className='w-full grow flex flex-col gap-y-3 relative z-10 overflow-y-hidden'>
+      <div className='absolute top-0 left-0 bottom-14 w-full h-full bg-dataset-chunk-list-mask-bg z-20' />
+      {[...Array(15)].map((_, index) => <Slice key={index} />)}
+    </div>
+  )
+}
+
+export default React.memo(FullDocListSkeleton)
diff --git a/web/app/components/datasets/documents/detail/completed/skeleton/general-list-skeleton.tsx b/web/app/components/datasets/documents/detail/completed/skeleton/general-list-skeleton.tsx
new file mode 100644
index 0000000000..c9b9c0887b
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/skeleton/general-list-skeleton.tsx
@@ -0,0 +1,74 @@
+import React from 'react'
+import {
+  SkeletonContainer,
+  SkeletonPoint,
+  SkeletonRectangle,
+  SkeletonRow,
+} from '@/app/components/base/skeleton'
+import Checkbox from '@/app/components/base/checkbox'
+import Divider from '@/app/components/base/divider'
+
+const CardSkelton = React.memo(() => {
+  return (
+    <SkeletonContainer className='p-1 pb-2 gap-y-0'>
+      <SkeletonContainer className='px-2 pt-1.5 gap-y-0.5'>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-[72px] bg-text-quaternary' />
+          <SkeletonPoint className='opacity-20' />
+          <SkeletonRectangle className='w-24 bg-text-quaternary' />
+          <SkeletonPoint className='opacity-20' />
+          <SkeletonRectangle className='w-24 bg-text-quaternary' />
+          <SkeletonRow className='grow justify-end gap-1'>
+            <SkeletonRectangle className='w-12 bg-text-quaternary' />
+            <SkeletonRectangle className='w-2 bg-text-quaternary mx-1' />
+          </SkeletonRow>
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-full bg-text-quaternary' />
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-full bg-text-quaternary' />
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-2/3 bg-text-quaternary' />
+        </SkeletonRow>
+      </SkeletonContainer>
+      <SkeletonContainer className='px-2 py-1.5'>
+        <SkeletonRow>
+          <SkeletonRectangle className='w-14 bg-text-quaternary' />
+          <SkeletonRectangle className='w-[88px] bg-text-quaternary' />
+          <SkeletonRectangle className='w-14 bg-text-quaternary' />
+        </SkeletonRow>
+      </SkeletonContainer>
+    </SkeletonContainer>
+  )
+})
+
+CardSkelton.displayName = 'CardSkelton'
+
+const GeneralListSkeleton = () => {
+  return (
+    <div className='relative flex flex-col grow overflow-y-hidden z-10'>
+      <div className='absolute top-0 left-0 w-full h-full bg-dataset-chunk-list-mask-bg z-20' />
+      {[...Array(10)].map((_, index) => {
+        return (
+          <div key={index} className='flex items-start gap-x-2'>
+            <Checkbox
+              key={`${index}-checkbox`}
+              className='shrink-0 mt-3.5'
+              disabled
+            />
+            <div className='grow'>
+              <CardSkelton />
+              {index !== 9 && <div className='w-full px-3'>
+                <Divider type='horizontal' className='bg-divider-subtle my-1' />
+              </div>}
+            </div>
+          </div>
+        )
+      })}
+    </div>
+  )
+}
+
+export default React.memo(GeneralListSkeleton)
diff --git a/web/app/components/datasets/documents/detail/completed/skeleton/paragraph-list-skeleton.tsx b/web/app/components/datasets/documents/detail/completed/skeleton/paragraph-list-skeleton.tsx
new file mode 100644
index 0000000000..4e88d49f91
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/skeleton/paragraph-list-skeleton.tsx
@@ -0,0 +1,76 @@
+import React from 'react'
+import { RiArrowRightSLine } from '@remixicon/react'
+import {
+  SkeletonContainer,
+  SkeletonPoint,
+  SkeletonRectangle,
+  SkeletonRow,
+} from '@/app/components/base/skeleton'
+import Checkbox from '@/app/components/base/checkbox'
+import Divider from '@/app/components/base/divider'
+
+const CardSkelton = React.memo(() => {
+  return (
+    <SkeletonContainer className='p-1 pb-2 gap-y-0'>
+      <SkeletonContainer className='px-2 pt-1.5 gap-y-0.5'>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-[72px] bg-text-quaternary' />
+          <SkeletonPoint className='opacity-20' />
+          <SkeletonRectangle className='w-24 bg-text-quaternary' />
+          <SkeletonPoint className='opacity-20' />
+          <SkeletonRectangle className='w-24 bg-text-quaternary' />
+          <SkeletonRow className='grow justify-end gap-1'>
+            <SkeletonRectangle className='w-12 bg-text-quaternary' />
+            <SkeletonRectangle className='w-2 bg-text-quaternary mx-1' />
+          </SkeletonRow>
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-full bg-text-quaternary' />
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-full bg-text-quaternary' />
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-2/3 bg-text-quaternary' />
+        </SkeletonRow>
+      </SkeletonContainer>
+      <SkeletonContainer className='p-1 pb-2'>
+        <SkeletonRow>
+          <SkeletonRow className='h-7 pl-1 pr-3 gap-x-0.5 rounded-lg bg-dataset-child-chunk-expand-btn-bg'>
+            <RiArrowRightSLine className='w-4 h-4 text-text-secondary opacity-20' />
+            <SkeletonRectangle className='w-32 bg-text-quaternary' />
+          </SkeletonRow>
+        </SkeletonRow>
+      </SkeletonContainer>
+    </SkeletonContainer>
+  )
+})
+
+CardSkelton.displayName = 'CardSkelton'
+
+const ParagraphListSkeleton = () => {
+  return (
+    <div className='relative flex flex-col h-full overflow-y-hidden z-10'>
+      <div className='absolute top-0 left-0 w-full h-full bg-dataset-chunk-list-mask-bg z-20' />
+      {[...Array(10)].map((_, index) => {
+        return (
+          <div key={index} className='flex items-start gap-x-2'>
+            <Checkbox
+              key={`${index}-checkbox`}
+              className='shrink-0 mt-3.5'
+              disabled
+            />
+            <div className='grow'>
+              <CardSkelton />
+              {index !== 9 && <div className='w-full px-3'>
+                <Divider type='horizontal' className='bg-divider-subtle my-1' />
+              </div>}
+            </div>
+          </div>
+        )
+      })}
+    </div>
+  )
+}
+
+export default React.memo(ParagraphListSkeleton)
diff --git a/web/app/components/datasets/documents/detail/completed/skeleton/parent-chunk-card-skeleton.tsx b/web/app/components/datasets/documents/detail/completed/skeleton/parent-chunk-card-skeleton.tsx
new file mode 100644
index 0000000000..46f5cccc03
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/skeleton/parent-chunk-card-skeleton.tsx
@@ -0,0 +1,45 @@
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import {
+  SkeletonContainer,
+  SkeletonPoint,
+  SkeletonRectangle,
+  SkeletonRow,
+} from '@/app/components/base/skeleton'
+
+const ParentChunkCardSkelton = () => {
+  const { t } = useTranslation()
+  return (
+    <div className='flex flex-col pb-2'>
+      <SkeletonContainer className='p-1 pb-0 gap-y-0'>
+        <SkeletonContainer className='px-2 pt-1.5 gap-y-0.5'>
+          <SkeletonRow className='py-0.5'>
+            <SkeletonRectangle className='w-[72px] bg-text-quaternary' />
+            <SkeletonPoint className='opacity-20' />
+            <SkeletonRectangle className='w-24 bg-text-quaternary' />
+            <SkeletonPoint className='opacity-20' />
+            <SkeletonRectangle className='w-24 bg-text-quaternary' />
+          </SkeletonRow>
+          <SkeletonRow className='py-0.5'>
+            <SkeletonRectangle className='w-full bg-text-quaternary' />
+          </SkeletonRow>
+          <SkeletonRow className='py-0.5'>
+            <SkeletonRectangle className='w-full bg-text-quaternary' />
+          </SkeletonRow>
+          <SkeletonRow className='py-0.5'>
+            <SkeletonRectangle className='w-2/3 bg-text-quaternary' />
+          </SkeletonRow>
+        </SkeletonContainer>
+      </SkeletonContainer>
+      <div className='flex items-center px-3 mt-0.5'>
+        <button type='button' className='pt-0.5 text-components-button-secondary-accent-text-disabled system-xs-semibold-uppercase' disabled>
+          {t('common.operation.viewMore')}
+        </button>
+      </div>
+    </div>
+  )
+}
+
+ParentChunkCardSkelton.displayName = 'ParentChunkCardSkelton'
+
+export default React.memo(ParentChunkCardSkelton)
diff --git a/web/app/components/datasets/documents/detail/completed/status-item.tsx b/web/app/components/datasets/documents/detail/completed/status-item.tsx
new file mode 100644
index 0000000000..06cf70338d
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/completed/status-item.tsx
@@ -0,0 +1,22 @@
+import React, { type FC } from 'react'
+import { RiCheckLine } from '@remixicon/react'
+import type { Item } from '@/app/components/base/select'
+
+type IStatusItemProps = {
+  item: Item
+  selected: boolean
+}
+
+const StatusItem: FC<IStatusItemProps> = ({
+  item,
+  selected,
+}) => {
+  return (
+    <div className='flex items-center justify-between py-1.5 px-2'>
+      <span className='system-md-regular'>{item.name}</span>
+      {selected && <RiCheckLine className='w-4 h-4 text-text-accent' />}
+    </div>
+  )
+}
+
+export default React.memo(StatusItem)
diff --git a/web/app/components/datasets/documents/detail/completed/style.module.css b/web/app/components/datasets/documents/detail/completed/style.module.css
index 7633d53209..902ef22eb6 100644
--- a/web/app/components/datasets/documents/detail/completed/style.module.css
+++ b/web/app/components/datasets/documents/detail/completed/style.module.css
@@ -1,14 +1,5 @@
-/* .cardWrapper {
-  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(290px, auto));
-  grid-gap: 16px;
-  grid-auto-rows: 180px;
-} */
-.totalText {
-  @apply text-gray-900 font-medium text-base flex-1;
-}
 .docSearchWrapper {
-  @apply sticky w-full py-1 -top-3 bg-white flex items-center mb-3 justify-between z-10 flex-wrap gap-y-1;
+  @apply sticky w-full -top-3 flex items-center mb-3 justify-between z-[11] flex-wrap gap-y-1 pr-3;
 }
 .listContainer {
   height: calc(100% - 3.25rem);
@@ -41,7 +32,7 @@
   @apply text-primary-600 font-semibold text-xs absolute right-0 hidden h-12 pl-12 items-center;
 }
 .select {
-  @apply h-8 py-0 bg-gray-50 hover:bg-gray-100 rounded-lg shadow-none !important;
+  @apply h-8 py-0 pr-5 w-[100px] shadow-none !important;
 }
 .segModalContent {
   @apply h-96 text-gray-800 text-base break-all overflow-y-scroll;
diff --git a/web/app/components/datasets/documents/detail/embedding/index.tsx b/web/app/components/datasets/documents/detail/embedding/index.tsx
index 1bc6c91c2a..c6a90a0e29 100644
--- a/web/app/components/datasets/documents/detail/embedding/index.tsx
+++ b/web/app/components/datasets/documents/detail/embedding/index.tsx
@@ -1,59 +1,52 @@
-import type { FC, SVGProps } from 'react'
+import type { FC } from 'react'
 import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import useSWR from 'swr'
-import { useRouter } from 'next/navigation'
 import { useContext } from 'use-context-selector'
 import { useTranslation } from 'react-i18next'
 import { omit } from 'lodash-es'
-import { ArrowRightIcon } from '@heroicons/react/24/solid'
-import SegmentCard from '../completed/SegmentCard'
+import { RiLoader2Line, RiPauseCircleLine, RiPlayCircleLine } from '@remixicon/react'
+import Image from 'next/image'
 import { FieldInfo } from '../metadata'
-import style from '../completed/style.module.css'
-import { DocumentContext } from '../index'
-import s from './style.module.css'
+import { useDocumentContext } from '../index'
+import { IndexingType } from '../../../create/step-two'
+import { indexMethodIcon, retrievalIcon } from '../../../create/icons'
+import EmbeddingSkeleton from './skeleton'
+import { RETRIEVE_METHOD } from '@/types/app'
 import cn from '@/utils/classnames'
-import Button from '@/app/components/base/button'
 import Divider from '@/app/components/base/divider'
 import { ToastContext } from '@/app/components/base/toast'
-import type { FullDocumentDetail, ProcessRuleResponse } from '@/models/datasets'
+import { ProcessMode, type ProcessRuleResponse } from '@/models/datasets'
 import type { CommonResponse } from '@/models/common'
 import { asyncRunSafe, sleep } from '@/utils'
-import { fetchIndexingStatus as doFetchIndexingStatus, fetchProcessRule, pauseDocIndexing, resumeDocIndexing } from '@/service/datasets'
-import StopEmbeddingModal from '@/app/components/datasets/create/stop-embedding-modal'
+import {
+  fetchIndexingStatus as doFetchIndexingStatus,
+  fetchProcessRule,
+  pauseDocIndexing,
+  resumeDocIndexing,
+} from '@/service/datasets'
 
-type Props = {
-  detail?: FullDocumentDetail
-  stopPosition?: 'top' | 'bottom'
+type IEmbeddingDetailProps = {
   datasetId?: string
   documentId?: string
-  indexingType?: string
+  indexingType?: IndexingType
+  retrievalMethod?: RETRIEVE_METHOD
   detailUpdate: VoidFunction
 }
 
-const StopIcon = ({ className }: SVGProps<SVGElement>) => {
-  return <svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
-    <g clipPath="url(#clip0_2328_2798)">
-      <path d="M1.5 3.9C1.5 3.05992 1.5 2.63988 1.66349 2.31901C1.8073 2.03677 2.03677 1.8073 2.31901 1.66349C2.63988 1.5 3.05992 1.5 3.9 1.5H8.1C8.94008 1.5 9.36012 1.5 9.68099 1.66349C9.96323 1.8073 10.1927 2.03677 10.3365 2.31901C10.5 2.63988 10.5 3.05992 10.5 3.9V8.1C10.5 8.94008 10.5 9.36012 10.3365 9.68099C10.1927 9.96323 9.96323 10.1927 9.68099 10.3365C9.36012 10.5 8.94008 10.5 8.1 10.5H3.9C3.05992 10.5 2.63988 10.5 2.31901 10.3365C2.03677 10.1927 1.8073 9.96323 1.66349 9.68099C1.5 9.36012 1.5 8.94008 1.5 8.1V3.9Z" stroke="#344054" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
-    </g>
-    <defs>
-      <clipPath id="clip0_2328_2798">
-        <rect width="12" height="12" fill="white" />
-      </clipPath>
-    </defs>
-  </svg>
+type IRuleDetailProps = {
+  sourceData?: ProcessRuleResponse
+  indexingType?: IndexingType
+  retrievalMethod?: RETRIEVE_METHOD
 }
 
-const ResumeIcon = ({ className }: SVGProps<SVGElement>) => {
-  return <svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
-    <path d="M10 3.5H5C3.34315 3.5 2 4.84315 2 6.5C2 8.15685 3.34315 9.5 5 9.5H10M10 3.5L8 1.5M10 3.5L8 5.5" stroke="#344054" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
-  </svg>
-}
-
-const RuleDetail: FC<{ sourceData?: ProcessRuleResponse; docName?: string }> = ({ sourceData, docName }) => {
+const RuleDetail: FC<IRuleDetailProps> = React.memo(({
+  sourceData,
+  indexingType,
+  retrievalMethod,
+}) => {
   const { t } = useTranslation()
 
   const segmentationRuleMap = {
-    docName: t('datasetDocuments.embedding.docName'),
     mode: t('datasetDocuments.embedding.mode'),
     segmentLength: t('datasetDocuments.embedding.segmentLength'),
     textCleaning: t('datasetDocuments.embedding.textCleaning'),
@@ -70,48 +63,106 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse; docName?: string }> = (
       return t('datasetCreation.stepTwo.removeStopwords')
   }
 
+  const isNumber = (value: unknown) => {
+    return typeof value === 'number'
+  }
+
   const getValue = useCallback((field: string) => {
     let value: string | number | undefined = '-'
+    const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
+      ? sourceData.rules.segmentation.max_tokens
+      : value
+    const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
+      ? sourceData.rules.subchunk_segmentation.max_tokens
+      : value
     switch (field) {
-      case 'docName':
-        value = docName
-        break
       case 'mode':
-        value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string)
+        value = !sourceData?.mode
+          ? value
+          : sourceData.mode === ProcessMode.general
+            ? (t('datasetDocuments.embedding.custom') as string)
+            : `${t('datasetDocuments.embedding.hierarchical')} · ${sourceData?.rules?.parent_mode === 'paragraph'
+              ? t('dataset.parentMode.paragraph')
+              : t('dataset.parentMode.fullDoc')}`
         break
       case 'segmentLength':
-        value = sourceData?.rules?.segmentation?.max_tokens
+        value = !sourceData?.mode
+          ? value
+          : sourceData.mode === ProcessMode.general
+            ? maxTokens
+            : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}`
         break
       default:
-        value = sourceData?.mode === 'automatic'
-          ? (t('datasetDocuments.embedding.automatic') as string)
-          // eslint-disable-next-line array-callback-return
-          : sourceData?.rules?.pre_processing_rules?.map((rule) => {
-            if (rule.enabled)
-              return getRuleName(rule.id)
-          }).filter(Boolean).join(';')
+        value = !sourceData?.mode
+          ? value
+          : sourceData?.rules?.pre_processing_rules?.filter(rule =>
+            rule.enabled).map(rule => getRuleName(rule.id)).join(',')
         break
     }
     return value
-  }, [sourceData, docName])
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [sourceData])
 
-  return <div className='flex flex-col pt-8 pb-10 first:mt-0'>
-    {Object.keys(segmentationRuleMap).map((field) => {
-      return <FieldInfo
-        key={field}
-        label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
-        displayedValue={String(getValue(field))}
-      />
-    })}
+  return <div className='py-3'>
+    <div className='flex flex-col gap-y-1'>
+      {Object.keys(segmentationRuleMap).map((field) => {
+        return <FieldInfo
+          key={field}
+          label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
+          displayedValue={String(getValue(field))}
+        />
+      })}
+    </div>
+    <Divider type='horizontal' className='bg-divider-subtle' />
+    <FieldInfo
+      label={t('datasetCreation.stepTwo.indexMode')}
+      displayedValue={t(`datasetCreation.stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`) as string}
+      valueIcon={
+        <Image
+          className='size-4'
+          src={
+            indexingType === IndexingType.ECONOMICAL
+              ? indexMethodIcon.economical
+              : indexMethodIcon.high_quality
+          }
+          alt=''
+        />
+      }
+    />
+    <FieldInfo
+      label={t('datasetSettings.form.retrievalSetting.title')}
+      displayedValue={t(`dataset.retrieval.${indexingType === IndexingType.ECONOMICAL ? 'invertedIndex' : retrievalMethod}.title`) as string}
+      valueIcon={
+        <Image
+          className='size-4'
+          src={
+            retrievalMethod === RETRIEVE_METHOD.fullText
+              ? retrievalIcon.fullText
+              : retrievalMethod === RETRIEVE_METHOD.hybrid
+                ? retrievalIcon.hybrid
+                : retrievalIcon.vector
+          }
+          alt=''
+        />
+      }
+    />
   </div>
-}
+})
 
-const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: dstId, documentId: docId, detailUpdate }) => {
-  const onTop = stopPosition === 'top'
+RuleDetail.displayName = 'RuleDetail'
+
+const EmbeddingDetail: FC<IEmbeddingDetailProps> = ({
+  datasetId: dstId,
+  documentId: docId,
+  detailUpdate,
+  indexingType,
+  retrievalMethod,
+}) => {
   const { t } = useTranslation()
   const { notify } = useContext(ToastContext)
 
-  const { datasetId = '', documentId = '' } = useContext(DocumentContext)
+  const datasetId = useDocumentContext(s => s.datasetId)
+  const documentId = useDocumentContext(s => s.documentId)
   const localDatasetId = dstId ?? datasetId
   const localDocumentId = docId ?? documentId
 
@@ -146,6 +197,7 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
       await sleep(2500)
       await startQueryStatus()
     }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [stopQueryStatus])
 
   useEffect(() => {
@@ -156,21 +208,13 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
     }
   }, [startQueryStatus, stopQueryStatus])
 
-  const { data: ruleDetail, error: ruleError } = useSWR({
+  const { data: ruleDetail } = useSWR({
     action: 'fetchProcessRule',
     params: { documentId: localDocumentId },
   }, apiParams => fetchProcessRule(omit(apiParams, 'action')), {
     revalidateOnFocus: false,
   })
 
-  const [showModal, setShowModal] = useState(false)
-  const modalShowHandle = () => setShowModal(true)
-  const modalCloseHandle = () => setShowModal(false)
-  const router = useRouter()
-  const navToDocument = () => {
-    router.push(`/datasets/${localDatasetId}/documents/${localDocumentId}`)
-  }
-
   const isEmbedding = useMemo(() => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail])
   const isEmbeddingCompleted = useMemo(() => ['completed'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail])
   const isEmbeddingPaused = useMemo(() => ['paused'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail])
@@ -189,6 +233,12 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
     const [e] = await asyncRunSafe<CommonResponse>(opApi({ datasetId: localDatasetId, documentId: localDocumentId }) as Promise<CommonResponse>)
     if (!e) {
       notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+      // if the embedding is resumed from paused, we need to start the query status
+      if (isEmbeddingPaused) {
+        isStopQuery.current = false
+        startQueryStatus()
+        detailUpdate()
+      }
       setIndexingStatusDetail(null)
     }
     else {
@@ -196,78 +246,66 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
     }
   }
 
-  // if (!ruleDetail && !error)
-  //   return <Loading type='app' />
-
   return (
     <>
-      <div className={s.embeddingStatus}>
-        {isEmbedding && t('datasetDocuments.embedding.processing')}
-        {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
-        {isEmbeddingPaused && t('datasetDocuments.embedding.paused')}
-        {isEmbeddingError && t('datasetDocuments.embedding.error')}
-        {onTop && isEmbedding && (
-          <Button onClick={handleSwitch} className={s.opBtn}>
-            <StopIcon className={s.opIcon} />
-            {t('datasetDocuments.embedding.stop')}
-          </Button>
-        )}
-        {onTop && isEmbeddingPaused && (
-          <Button onClick={handleSwitch} className={s.opBtn}>
-            <ResumeIcon className={s.opIcon} />
-            {t('datasetDocuments.embedding.resume')}
-          </Button>
-        )}
-      </div>
-      {/* progress bar */}
-      <div className={s.progressContainer}>
-        {new Array(10).fill('').map((_, idx) => <div
-          key={idx}
-          className={cn(s.progressBgItem, isEmbedding ? 'bg-primary-50' : 'bg-gray-100')}
-        />)}
-        <div
-          className={cn(
-            'rounded-l-md',
-            s.progressBar,
-            (isEmbedding || isEmbeddingCompleted) && s.barProcessing,
-            (isEmbeddingPaused || isEmbeddingError) && s.barPaused,
-            indexingStatusDetail?.indexing_status === 'completed' && 'rounded-r-md',
-          )}
-          style={{ width: `${percent}%` }}
-        />
-      </div>
-      <div className={s.progressData}>
-        <div>{t('datasetDocuments.embedding.segments')} {indexingStatusDetail?.completed_segments}/{indexingStatusDetail?.total_segments} · {percent}%</div>
-      </div>
-      <RuleDetail sourceData={ruleDetail} docName={detail?.name} />
-      {!onTop && (
-        <div className='flex items-center gap-2 mt-10'>
+      <div className='py-12 px-16 flex flex-col gap-y-2'>
+        <div className='flex items-center gap-x-1 h-6'>
+          {isEmbedding && <RiLoader2Line className='h-4 w-4 text-text-secondary animate-spin' />}
+          <span className='grow text-text-secondary system-md-semibold-uppercase'>
+            {isEmbedding && t('datasetDocuments.embedding.processing')}
+            {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
+            {isEmbeddingPaused && t('datasetDocuments.embedding.paused')}
+            {isEmbeddingError && t('datasetDocuments.embedding.error')}
+          </span>
           {isEmbedding && (
-            <Button onClick={modalShowHandle} className='w-fit'>
-              {t('datasetCreation.stepThree.stop')}
-            </Button>
+            <button
+              type='button'
+              className={`px-1.5 py-1 border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg
+              shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px] flex items-center gap-x-1 rounded-md`}
+              onClick={handleSwitch}
+            >
+              <RiPauseCircleLine className='w-3.5 h-3.5 text-components-button-secondary-text' />
+              <span className='pr-[3px] text-components-button-secondary-text system-xs-medium'>
+                {t('datasetDocuments.embedding.pause')}
+              </span>
+            </button>
           )}
           {isEmbeddingPaused && (
-            <Button onClick={handleSwitch} className='w-fit'>
-              {t('datasetCreation.stepThree.resume')}
-            </Button>
+            <button
+              type='button'
+              className={`px-1.5 py-1 border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg
+              shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px] flex items-center gap-x-1 rounded-md`}
+              onClick={handleSwitch}
+            >
+              <RiPlayCircleLine className='w-3.5 h-3.5 text-components-button-secondary-text' />
+              <span className='pr-[3px] text-components-button-secondary-text system-xs-medium'>
+                {t('datasetDocuments.embedding.resume')}
+              </span>
+            </button>
           )}
-          <Button className='w-fit' variant='primary' onClick={navToDocument}>
-            <span>{t('datasetCreation.stepThree.navTo')}</span>
-            <ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' />
-          </Button>
         </div>
-      )}
-      {onTop && <>
-        <Divider />
-        <div className={s.previewTip}>{t('datasetDocuments.embedding.previewTip')}</div>
-        <div className={style.cardWrapper}>
-          {[1, 2, 3].map((v, index) => (
-            <SegmentCard key={index} loading={true} detail={{ position: v } as any} />
-          ))}
+        {/* progress bar */}
+        <div className={cn(
+          'flex items-center w-full h-2 rounded-md border border-components-progress-bar-border overflow-hidden',
+          isEmbedding ? 'bg-components-progress-bar-bg bg-opacity-50' : 'bg-components-progress-bar-bg',
+        )}>
+          <div
+            className={cn(
+              'h-full',
+              (isEmbedding || isEmbeddingCompleted) && 'bg-components-progress-bar-progress-solid',
+              (isEmbeddingPaused || isEmbeddingError) && 'bg-components-progress-bar-progress-highlight',
+            )}
+            style={{ width: `${percent}%` }}
+          />
         </div>
-      </>}
-      <StopEmbeddingModal show={showModal} onConfirm={handleSwitch} onHide={modalCloseHandle} />
+        <div className={'w-full flex items-center'}>
+          <span className='text-text-secondary system-xs-medium'>
+            {`${t('datasetDocuments.embedding.segments')} ${indexingStatusDetail?.completed_segments || '--'}/${indexingStatusDetail?.total_segments || '--'} · ${percent}%`}
+          </span>
+        </div>
+        <RuleDetail sourceData={ruleDetail} indexingType={indexingType} retrievalMethod={retrievalMethod} />
+      </div>
+      <EmbeddingSkeleton />
     </>
   )
 }
diff --git a/web/app/components/datasets/documents/detail/embedding/skeleton/index.tsx b/web/app/components/datasets/documents/detail/embedding/skeleton/index.tsx
new file mode 100644
index 0000000000..ebf5dc19d5
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/embedding/skeleton/index.tsx
@@ -0,0 +1,66 @@
+import React from 'react'
+import {
+  SkeletonContainer,
+  SkeletonPoint,
+  SkeletonRectangle,
+  SkeletonRow,
+} from '@/app/components/base/skeleton'
+import Divider from '@/app/components/base/divider'
+
+const CardSkelton = React.memo(() => {
+  return (
+    <SkeletonContainer className='p-1 pb-2 gap-y-0'>
+      <SkeletonContainer className='px-2 pt-1.5 gap-y-0.5'>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-[72px] bg-text-quaternary' />
+          <SkeletonPoint className='opacity-20' />
+          <SkeletonRectangle className='w-24 bg-text-quaternary' />
+          <SkeletonPoint className='opacity-20' />
+          <SkeletonRectangle className='w-24 bg-text-quaternary' />
+          <SkeletonRow className='grow justify-end gap-1'>
+            <SkeletonRectangle className='w-12 bg-text-quaternary' />
+            <SkeletonRectangle className='w-2 bg-text-quaternary mx-1' />
+          </SkeletonRow>
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-full bg-text-quaternary' />
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-full bg-text-quaternary' />
+        </SkeletonRow>
+        <SkeletonRow className='py-0.5'>
+          <SkeletonRectangle className='w-2/3 bg-text-quaternary' />
+        </SkeletonRow>
+      </SkeletonContainer>
+      <SkeletonContainer className='px-2 py-1.5'>
+        <SkeletonRow>
+          <SkeletonRectangle className='w-14 bg-text-quaternary' />
+          <SkeletonRectangle className='w-[88px] bg-text-quaternary' />
+          <SkeletonRectangle className='w-14 bg-text-quaternary' />
+        </SkeletonRow>
+      </SkeletonContainer>
+    </SkeletonContainer>
+  )
+})
+
+CardSkelton.displayName = 'CardSkelton'
+
+const EmbeddingSkeleton = () => {
+  return (
+    <div className='relative flex flex-col grow overflow-y-hidden z-10'>
+      <div className='absolute top-0 left-0 w-full h-full bg-dataset-chunk-list-mask-bg z-20' />
+      {[...Array(5)].map((_, index) => {
+        return (
+          <div key={index} className='w-full px-11'>
+            <CardSkelton />
+            {index !== 9 && <div className='w-full px-3'>
+              <Divider type='horizontal' className='bg-divider-subtle my-1' />
+            </div>}
+          </div>
+        )
+      })}
+    </div>
+  )
+}
+
+export default React.memo(EmbeddingSkeleton)
diff --git a/web/app/components/datasets/documents/detail/index.tsx b/web/app/components/datasets/documents/detail/index.tsx
index 4f1e850fc8..8e7f993123 100644
--- a/web/app/components/datasets/documents/detail/index.tsx
+++ b/web/app/components/datasets/documents/detail/index.tsx
@@ -1,14 +1,12 @@
 'use client'
 import type { FC } from 'react'
-import React, { useState } from 'react'
-import useSWR from 'swr'
-import { ArrowLeftIcon } from '@heroicons/react/24/solid'
-import { createContext, useContext } from 'use-context-selector'
+import React, { useMemo, useState } from 'react'
+import { createContext, useContext, useContextSelector } from 'use-context-selector'
 import { useTranslation } from 'react-i18next'
 import { useRouter } from 'next/navigation'
-import { omit } from 'lodash-es'
+import { RiArrowLeftLine, RiLayoutRight2Line } from '@remixicon/react'
 import { OperationAction, StatusItem } from '../list'
-import s from '../style.module.css'
+import DocumentPicker from '../../common/document-picker'
 import Completed from './completed'
 import Embedding from './embedding'
 import Metadata from './metadata'
@@ -18,30 +16,58 @@ import style from './style.module.css'
 import cn from '@/utils/classnames'
 import Divider from '@/app/components/base/divider'
 import Loading from '@/app/components/base/loading'
-import type { MetadataType } from '@/service/datasets'
-import { checkSegmentBatchImportProgress, fetchDocumentDetail, segmentBatchImport } from '@/service/datasets'
 import { ToastContext } from '@/app/components/base/toast'
-import type { DocForm } from '@/models/datasets'
+import type { ChunkingMode, ParentMode, ProcessMode } from '@/models/datasets'
 import { useDatasetDetailContext } from '@/context/dataset-detail'
 import FloatRightContainer from '@/app/components/base/float-right-container'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
+import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
+import { useCheckSegmentBatchImportProgress, useSegmentBatchImport } from '@/service/knowledge/use-segment'
+import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'
 
-export const DocumentContext = createContext<{ datasetId?: string; documentId?: string; docForm: string }>({ docForm: '' })
+type DocumentContextValue = {
+  datasetId?: string
+  documentId?: string
+  docForm: string
+  mode?: ProcessMode
+  parentMode?: ParentMode
+}
+
+export const DocumentContext = createContext<DocumentContextValue>({ docForm: '' })
+
+export const useDocumentContext = (selector: (value: DocumentContextValue) => any) => {
+  return useContextSelector(DocumentContext, selector)
+}
 
 type DocumentTitleProps = {
+  datasetId: string
   extension?: string
   name?: string
+  processMode?: ProcessMode
+  parent_mode?: ParentMode
   iconCls?: string
   textCls?: string
   wrapperCls?: string
 }
 
-export const DocumentTitle: FC<DocumentTitleProps> = ({ extension, name, iconCls, textCls, wrapperCls }) => {
-  const localExtension = extension?.toLowerCase() || name?.split('.')?.pop()?.toLowerCase()
-  return <div className={cn('flex items-center justify-start flex-1', wrapperCls)}>
-    <div className={cn(s[`${localExtension || 'txt'}Icon`], style.titleIcon, iconCls)}></div>
-    <span className={cn('font-semibold text-lg text-gray-900 ml-1', textCls)}> {name || '--'}</span>
-  </div>
+export const DocumentTitle: FC<DocumentTitleProps> = ({ datasetId, extension, name, processMode, parent_mode, wrapperCls }) => {
+  const router = useRouter()
+  return (
+    <div className={cn('flex items-center justify-start flex-1', wrapperCls)}>
+      <DocumentPicker
+        datasetId={datasetId}
+        value={{
+          name,
+          extension,
+          processMode,
+          parentMode: parent_mode,
+        }}
+        onChange={(doc) => {
+          router.push(`/datasets/${datasetId}/documents/${doc.id}`)
+        }}
+      />
+    </div>
+  )
 }
 
 type Props = {
@@ -67,49 +93,52 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
   const showBatchModal = () => setBatchModalVisible(true)
   const hideBatchModal = () => setBatchModalVisible(false)
   const resetProcessStatus = () => setImportStatus('')
+
+  const { mutateAsync: checkSegmentBatchImportProgress } = useCheckSegmentBatchImportProgress()
   const checkProcess = async (jobID: string) => {
-    try {
-      const res = await checkSegmentBatchImportProgress({ jobID })
-      setImportStatus(res.job_status)
-      if (res.job_status === ProcessStatus.WAITING || res.job_status === ProcessStatus.PROCESSING)
-        setTimeout(() => checkProcess(res.job_id), 2500)
-      if (res.job_status === ProcessStatus.ERROR)
-        notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}` })
-    }
-    catch (e: any) {
-      notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
-    }
+    await checkSegmentBatchImportProgress({ jobID }, {
+      onSuccess: (res) => {
+        setImportStatus(res.job_status)
+        if (res.job_status === ProcessStatus.WAITING || res.job_status === ProcessStatus.PROCESSING)
+          setTimeout(() => checkProcess(res.job_id), 2500)
+        if (res.job_status === ProcessStatus.ERROR)
+          notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}` })
+      },
+      onError: (e) => {
+        notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
+      },
+    })
   }
+
+  const { mutateAsync: segmentBatchImport } = useSegmentBatchImport()
   const runBatch = async (csv: File) => {
     const formData = new FormData()
     formData.append('file', csv)
-    try {
-      const res = await segmentBatchImport({
-        url: `/datasets/${datasetId}/documents/${documentId}/segments/batch_import`,
-        body: formData,
-      })
-      setImportStatus(res.job_status)
-      checkProcess(res.job_id)
-    }
-    catch (e: any) {
-      notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
-    }
+    await segmentBatchImport({
+      url: `/datasets/${datasetId}/documents/${documentId}/segments/batch_import`,
+      body: formData,
+    }, {
+      onSuccess: (res) => {
+        setImportStatus(res.job_status)
+        checkProcess(res.job_id)
+      },
+      onError: (e) => {
+        notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
+      },
+    })
   }
 
-  const { data: documentDetail, error, mutate: detailMutate } = useSWR({
-    action: 'fetchDocumentDetail',
+  const { data: documentDetail, error, refetch: detailMutate } = useDocumentDetail({
     datasetId,
     documentId,
-    params: { metadata: 'without' as MetadataType },
-  }, apiParams => fetchDocumentDetail(omit(apiParams, 'action')))
+    params: { metadata: 'without' },
+  })
 
-  const { data: documentMetadata, error: metadataErr, mutate: metadataMutate } = useSWR({
-    action: 'fetchDocumentDetail',
+  const { data: documentMetadata, error: metadataErr, refetch: metadataMutate } = useDocumentMetadata({
     datasetId,
     documentId,
-    params: { metadata: 'only' as MetadataType },
-  }, apiParams => fetchDocumentDetail(omit(apiParams, 'action')),
-  )
+    params: { metadata: 'only' },
+  })
 
   const backToPrev = () => {
     router.push(`/datasets/${datasetId}/documents`)
@@ -127,25 +156,65 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
       detailMutate()
   }
 
+  const mode = useMemo(() => {
+    return documentDetail?.document_process_rule?.mode
+  }, [documentDetail?.document_process_rule])
+
+  const parentMode = useMemo(() => {
+    return documentDetail?.document_process_rule?.rules?.parent_mode
+  }, [documentDetail?.document_process_rule])
+
+  const isFullDocMode = useMemo(() => {
+    return mode === 'hierarchical' && parentMode === 'full-doc'
+  }, [mode, parentMode])
+
   return (
-    <DocumentContext.Provider value={{ datasetId, documentId, docForm: documentDetail?.doc_form || '' }}>
-      <div className='flex flex-col h-full'>
-        <div className='flex min-h-16 border-b-gray-100 border-b items-center p-4 justify-between flex-wrap gap-y-2'>
-          <div onClick={backToPrev} className={'shrink-0 rounded-full w-8 h-8 flex justify-center items-center border-gray-100 cursor-pointer border hover:border-gray-300 shadow-[0px_12px_16px_-4px_rgba(16,24,40,0.08),0px_4px_6px_-2px_rgba(16,24,40,0.03)]'}>
-            <ArrowLeftIcon className='text-primary-600 fill-current stroke-current h-4 w-4' />
+    <DocumentContext.Provider value={{
+      datasetId,
+      documentId,
+      docForm: documentDetail?.doc_form || '',
+      mode,
+      parentMode,
+    }}>
+      <div className='flex flex-col h-full bg-background-default'>
+        <div className='flex items-center justify-between flex-wrap min-h-16 pl-3 pr-4 py-2.5 border-b border-b-divider-subtle'>
+          <div onClick={backToPrev} className={'shrink-0 rounded-full w-8 h-8 flex justify-center items-center cursor-pointer hover:bg-components-button-tertiary-bg'}>
+            <RiArrowLeftLine className='text-components-button-ghost-text hover:text-text-tertiary w-4 h-4' />
           </div>
-          <Divider className='!h-4' type='vertical' />
-          <DocumentTitle extension={documentDetail?.data_source_info?.upload_file?.extension} name={documentDetail?.name} />
-          <div className='flex items-center flex-wrap gap-y-2'>
-            <StatusItem status={documentDetail?.display_status || 'available'} scene='detail' errorMessage={documentDetail?.error || ''} />
-            {embeddingAvailable && documentDetail && !documentDetail.archived && (
-              <SegmentAdd
-                importStatus={importStatus}
-                clearProcessStatus={resetProcessStatus}
-                showNewSegmentModal={showNewSegmentModal}
-                showBatchModal={showBatchModal}
-              />
+          <DocumentTitle
+            datasetId={datasetId}
+            extension={documentDetail?.data_source_info?.upload_file?.extension}
+            name={documentDetail?.name}
+            wrapperCls='mr-2'
+            parent_mode={parentMode}
+            processMode={mode}
+          />
+          <div className='flex items-center flex-wrap'>
+            {embeddingAvailable && documentDetail && !documentDetail.archived && !isFullDocMode && (
+              <>
+                <SegmentAdd
+                  importStatus={importStatus}
+                  clearProcessStatus={resetProcessStatus}
+                  showNewSegmentModal={showNewSegmentModal}
+                  showBatchModal={showBatchModal}
+                  embedding={embedding}
+                />
+                <Divider type='vertical' className='!bg-divider-regular !h-[14px] !mx-3' />
+              </>
             )}
+            <StatusItem
+              status={documentDetail?.display_status || 'available'}
+              scene='detail'
+              errorMessage={documentDetail?.error || ''}
+              textCls='font-semibold text-xs uppercase'
+              detail={{
+                enabled: documentDetail?.enabled || false,
+                archived: documentDetail?.archived || false,
+                id: documentId,
+              }}
+              datasetId={datasetId}
+              onUpdate={handleOperate}
+            />
             <OperationAction
               scene='detail'
               embeddingAvailable={embeddingAvailable}
@@ -159,20 +228,32 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
               }}
               datasetId={datasetId}
               onUpdate={handleOperate}
-              className='!w-[216px]'
+              className='!w-[200px]'
             />
             <button
-              className={cn(style.layoutRightIcon, showMetadata ? style.iconShow : style.iconClose)}
+              className={style.layoutRightIcon}
               onClick={() => setShowMetadata(!showMetadata)}
-            />
+            >
+              {
+                showMetadata
+                  ? <LayoutRight2LineMod className='w-4 h-4 text-components-button-secondary-text' />
+                  : <RiLayoutRight2Line className='w-4 h-4 text-components-button-secondary-text' />
+              }
+            </button>
           </div>
         </div>
         <div className='flex flex-row flex-1' style={{ height: 'calc(100% - 4rem)' }}>
           {isDetailLoading
             ? <Loading type='app' />
-            : <div className={`h-full w-full flex flex-col ${embedding ? 'px-6 py-3 sm:py-12 sm:px-16' : 'pb-[30px] pt-3 px-6'}`}>
+            : <div className={cn('h-full w-full flex flex-col',
+              embedding ? '' : isFullDocMode ? 'relative pt-4 pr-11 pl-11' : 'relative pt-3 pr-11 pl-5',
+            )}>
               {embedding
-                ? <Embedding detail={documentDetail} detailUpdate={detailMutate} />
+                ? <Embedding
+                  detailUpdate={detailMutate}
+                  indexingType={dataset?.indexing_technique}
+                  retrievalMethod={dataset?.retrieval_model_dict?.search_method}
+                />
                 : <Completed
                   embeddingAvailable={embeddingAvailable}
                   showNewSegmentModal={newSegmentModalVisible}
@@ -195,7 +276,7 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
           isShow={batchModalVisible}
           onCancel={hideBatchModal}
           onConfirm={runBatch}
-          docForm={documentDetail?.doc_form as DocForm}
+          docForm={documentDetail?.doc_form as ChunkingMode}
         />
       </div>
     </DocumentContext.Provider>
diff --git a/web/app/components/datasets/documents/detail/metadata/index.tsx b/web/app/components/datasets/documents/detail/metadata/index.tsx
index 9990ff7404..4a5560203e 100644
--- a/web/app/components/datasets/documents/detail/metadata/index.tsx
+++ b/web/app/components/datasets/documents/detail/metadata/index.tsx
@@ -1,11 +1,11 @@
 'use client'
-import type { FC } from 'react'
+import type { FC, ReactNode } from 'react'
 import React, { useEffect, useState } from 'react'
 import { PencilIcon } from '@heroicons/react/24/outline'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
 import { get } from 'lodash-es'
-import { DocumentContext } from '../index'
+import { useDocumentContext } from '../index'
 import s from './style.module.css'
 import cn from '@/utils/classnames'
 import Input from '@/app/components/base/input'
@@ -32,6 +32,7 @@ const map2Options = (map: { [key: string]: string }) => {
 type IFieldInfoProps = {
   label: string
   value?: string
+  valueIcon?: ReactNode
   displayedValue?: string
   defaultValue?: string
   showEdit?: boolean
@@ -43,6 +44,7 @@ type IFieldInfoProps = {
 export const FieldInfo: FC<IFieldInfoProps> = ({
   label,
   value = '',
+  valueIcon,
   displayedValue = '',
   defaultValue,
   showEdit = false,
@@ -56,9 +58,10 @@ export const FieldInfo: FC<IFieldInfoProps> = ({
   const readAlignTop = !showEdit && textNeedWrap
 
   return (
-    <div className={cn(s.fieldInfo, editAlignTop && '!items-start', readAlignTop && '!items-start pt-1')}>
-      <div className={cn(s.label, editAlignTop && 'pt-1')}>{label}</div>
-      <div className={s.value}>
+    <div className={cn('flex items-center gap-1 py-0.5 min-h-5 text-xs', editAlignTop && '!items-start', readAlignTop && '!items-start pt-1')}>
+      <div className={cn('w-[200px] text-text-tertiary overflow-hidden text-ellipsis whitespace-nowrap shrink-0', editAlignTop && 'pt-1')}>{label}</div>
+      <div className="grow flex items-center gap-1 text-text-secondary">
+        {valueIcon}
         {!showEdit
           ? displayedValue
           : inputType === 'select'
@@ -147,7 +150,8 @@ const Metadata: FC<IMetadataProps> = ({ docDetail, loading, onUpdate }) => {
   const [saveLoading, setSaveLoading] = useState(false)
 
   const { notify } = useContext(ToastContext)
-  const { datasetId = '', documentId = '' } = useContext(DocumentContext)
+  const datasetId = useDocumentContext(s => s.datasetId)
+  const documentId = useDocumentContext(s => s.documentId)
 
   useEffect(() => {
     if (docDetail?.doc_type) {
@@ -348,7 +352,7 @@ const Metadata: FC<IMetadataProps> = ({ docDetail, loading, onUpdate }) => {
                       ·
                       <div
                         onClick={() => { setShowDocTypes(true) }}
-                        className='cursor-pointer hover:text-[#155EEF]'
+                        className='cursor-pointer hover:text-text-accent'
                       >
                         {t('common.operation.change')}
                       </div>
diff --git a/web/app/components/datasets/documents/detail/metadata/style.module.css b/web/app/components/datasets/documents/detail/metadata/style.module.css
index 60420c196e..37796d38dc 100644
--- a/web/app/components/datasets/documents/detail/metadata/style.module.css
+++ b/web/app/components/datasets/documents/detail/metadata/style.module.css
@@ -53,18 +53,7 @@
 .desc {
   @apply text-gray-500 text-xs;
 }
-.fieldInfo {
-  /* height: 1.75rem; */
-  min-height: 1.75rem;
-  @apply flex flex-row items-center gap-4;
-}
-.fieldInfo > .label {
-  @apply w-2/5 max-w-[128px] text-gray-500 text-xs font-medium overflow-hidden text-ellipsis whitespace-nowrap;
-}
-.fieldInfo > .value {
-  overflow-wrap: anywhere;
-  @apply w-3/5 text-gray-700 font-normal text-xs;
-}
+
 .changeTip {
   @apply text-[#D92D20] text-xs text-center;
 }
diff --git a/web/app/components/datasets/documents/detail/new-segment-modal.tsx b/web/app/components/datasets/documents/detail/new-segment-modal.tsx
deleted file mode 100644
index dae9cf19fb..0000000000
--- a/web/app/components/datasets/documents/detail/new-segment-modal.tsx
+++ /dev/null
@@ -1,156 +0,0 @@
-import { memo, useState } from 'react'
-import type { FC } from 'react'
-import { useTranslation } from 'react-i18next'
-import { useContext } from 'use-context-selector'
-import { useParams } from 'next/navigation'
-import { RiCloseLine } from '@remixicon/react'
-import Modal from '@/app/components/base/modal'
-import Button from '@/app/components/base/button'
-import AutoHeightTextarea from '@/app/components/base/auto-height-textarea/common'
-import { Hash02 } from '@/app/components/base/icons/src/vender/line/general'
-import { ToastContext } from '@/app/components/base/toast'
-import type { SegmentUpdater } from '@/models/datasets'
-import { addSegment } from '@/service/datasets'
-import TagInput from '@/app/components/base/tag-input'
-
-type NewSegmentModalProps = {
-  isShow: boolean
-  onCancel: () => void
-  docForm: string
-  onSave: () => void
-}
-
-const NewSegmentModal: FC<NewSegmentModalProps> = ({
-  isShow,
-  onCancel,
-  docForm,
-  onSave,
-}) => {
-  const { t } = useTranslation()
-  const { notify } = useContext(ToastContext)
-  const [question, setQuestion] = useState('')
-  const [answer, setAnswer] = useState('')
-  const { datasetId, documentId } = useParams()
-  const [keywords, setKeywords] = useState<string[]>([])
-  const [loading, setLoading] = useState(false)
-
-  const handleCancel = () => {
-    setQuestion('')
-    setAnswer('')
-    onCancel()
-    setKeywords([])
-  }
-
-  const handleSave = async () => {
-    const params: SegmentUpdater = { content: '' }
-    if (docForm === 'qa_model') {
-      if (!question.trim())
-        return notify({ type: 'error', message: t('datasetDocuments.segment.questionEmpty') })
-      if (!answer.trim())
-        return notify({ type: 'error', message: t('datasetDocuments.segment.answerEmpty') })
-
-      params.content = question
-      params.answer = answer
-    }
-    else {
-      if (!question.trim())
-        return notify({ type: 'error', message: t('datasetDocuments.segment.contentEmpty') })
-
-      params.content = question
-    }
-
-    if (keywords?.length)
-      params.keywords = keywords
-
-    setLoading(true)
-    try {
-      await addSegment({ datasetId, documentId, body: params })
-      notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
-      handleCancel()
-      onSave()
-    }
-    finally {
-      setLoading(false)
-    }
-  }
-
-  const renderContent = () => {
-    if (docForm === 'qa_model') {
-      return (
-        <>
-          <div className='mb-1 text-xs font-medium text-gray-500'>QUESTION</div>
-          <AutoHeightTextarea
-            outerClassName='mb-4'
-            className='leading-6 text-md text-gray-800'
-            value={question}
-            placeholder={t('datasetDocuments.segment.questionPlaceholder') || ''}
-            onChange={e => setQuestion(e.target.value)}
-            autoFocus
-          />
-          <div className='mb-1 text-xs font-medium text-gray-500'>ANSWER</div>
-          <AutoHeightTextarea
-            outerClassName='mb-4'
-            className='leading-6 text-md text-gray-800'
-            value={answer}
-            placeholder={t('datasetDocuments.segment.answerPlaceholder') || ''}
-            onChange={e => setAnswer(e.target.value)}
-          />
-        </>
-      )
-    }
-
-    return (
-      <AutoHeightTextarea
-        className='leading-6 text-md text-gray-800'
-        value={question}
-        placeholder={t('datasetDocuments.segment.contentPlaceholder') || ''}
-        onChange={e => setQuestion(e.target.value)}
-        autoFocus
-      />
-    )
-  }
-
-  return (
-    <Modal isShow={isShow} onClose={() => { }} className='pt-8 px-8 pb-6 !max-w-[640px] !rounded-xl'>
-      <div className={'flex flex-col relative'}>
-        <div className='absolute right-0 -top-0.5 flex items-center h-6'>
-          <div className='flex justify-center items-center w-6 h-6 cursor-pointer' onClick={handleCancel}>
-            <RiCloseLine className='w-4 h-4 text-gray-500' />
-          </div>
-        </div>
-        <div className='mb-[14px]'>
-          <span className='inline-flex items-center px-1.5 h-5 border border-gray-200 rounded-md'>
-            <Hash02 className='mr-0.5 w-3 h-3 text-gray-400' />
-            <span className='text-[11px] font-medium text-gray-500 italic'>
-              {
-                docForm === 'qa_model'
-                  ? t('datasetDocuments.segment.newQaSegment')
-                  : t('datasetDocuments.segment.newTextSegment')
-              }
-            </span>
-          </span>
-        </div>
-        <div className='mb-4 py-1.5 h-[420px] overflow-auto'>{renderContent()}</div>
-        <div className='text-xs font-medium text-gray-500'>{t('datasetDocuments.segment.keywords')}</div>
-        <div className='mb-8'>
-          <TagInput items={keywords} onChange={newKeywords => setKeywords(newKeywords)} />
-        </div>
-        <div className='flex justify-end'>
-          <Button
-            onClick={handleCancel}>
-            {t('common.operation.cancel')}
-          </Button>
-          <Button
-            variant='primary'
-            onClick={handleSave}
-            disabled={loading}
-          >
-            {t('common.operation.save')}
-          </Button>
-        </div>
-      </div>
-    </Modal>
-  )
-}
-
-export default memo(NewSegmentModal)
diff --git a/web/app/components/datasets/documents/detail/new-segment.tsx b/web/app/components/datasets/documents/detail/new-segment.tsx
new file mode 100644
index 0000000000..e0eec87f0a
--- /dev/null
+++ b/web/app/components/datasets/documents/detail/new-segment.tsx
@@ -0,0 +1,208 @@
+import { memo, useMemo, useRef, useState } from 'react'
+import type { FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import { useContext } from 'use-context-selector'
+import { useParams } from 'next/navigation'
+import { RiCloseLine, RiExpandDiagonalLine } from '@remixicon/react'
+import { useShallow } from 'zustand/react/shallow'
+import { useSegmentListContext } from './completed'
+import { SegmentIndexTag } from './completed/common/segment-index-tag'
+import ActionButtons from './completed/common/action-buttons'
+import Keywords from './completed/common/keywords'
+import ChunkContent from './completed/common/chunk-content'
+import AddAnother from './completed/common/add-another'
+import Dot from './completed/common/dot'
+import { useDocumentContext } from './index'
+import { useStore as useAppStore } from '@/app/components/app/store'
+import { ToastContext } from '@/app/components/base/toast'
+import { ChunkingMode, type SegmentUpdater } from '@/models/datasets'
+import classNames from '@/utils/classnames'
+import { formatNumber } from '@/utils/format'
+import Divider from '@/app/components/base/divider'
+import { useAddSegment } from '@/service/knowledge/use-segment'
+
+type NewSegmentModalProps = {
+  onCancel: () => void
+  docForm: ChunkingMode
+  onSave: () => void
+  viewNewlyAddedChunk: () => void
+}
+
+const NewSegmentModal: FC<NewSegmentModalProps> = ({
+  onCancel,
+  docForm,
+  onSave,
+  viewNewlyAddedChunk,
+}) => {
+  const { t } = useTranslation()
+  const { notify } = useContext(ToastContext)
+  const [question, setQuestion] = useState('')
+  const [answer, setAnswer] = useState('')
+  const { datasetId, documentId } = useParams<{ datasetId: string; documentId: string }>()
+  const [keywords, setKeywords] = useState<string[]>([])
+  const [loading, setLoading] = useState(false)
+  const [addAnother, setAddAnother] = useState(true)
+  const fullScreen = useSegmentListContext(s => s.fullScreen)
+  const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
+  const mode = useDocumentContext(s => s.mode)
+  const { appSidebarExpand } = useAppStore(useShallow(state => ({
+    appSidebarExpand: state.appSidebarExpand,
+  })))
+  const refreshTimer = useRef<any>(null)
+
+  const CustomButton = <>
+    <Divider type='vertical' className='h-3 mx-1 bg-divider-regular' />
+    <button
+      type='button'
+      className='text-text-accent system-xs-semibold'
+      onClick={() => {
+        clearTimeout(refreshTimer.current)
+        viewNewlyAddedChunk()
+      }}>
+      {t('common.operation.view')}
+    </button>
+  </>
+
+  const isQAModel = useMemo(() => {
+    return docForm === ChunkingMode.qa
+  }, [docForm])
+
+  const handleCancel = (actionType: 'esc' | 'add' = 'esc') => {
+    if (actionType === 'esc' || !addAnother)
+      onCancel()
+    setQuestion('')
+    setAnswer('')
+    setKeywords([])
+  }
+
+  const { mutateAsync: addSegment } = useAddSegment()
+
+  const handleSave = async () => {
+    const params: SegmentUpdater = { content: '' }
+    if (isQAModel) {
+      if (!question.trim()) {
+        return notify({
+          type: 'error',
+          message: t('datasetDocuments.segment.questionEmpty'),
+        })
+      }
+      if (!answer.trim()) {
+        return notify({
+          type: 'error',
+          message: t('datasetDocuments.segment.answerEmpty'),
+        })
+      }
+
+      params.content = question
+      params.answer = answer
+    }
+    else {
+      if (!question.trim()) {
+        return notify({
+          type: 'error',
+          message: t('datasetDocuments.segment.contentEmpty'),
+        })
+      }
+
+      params.content = question
+    }
+
+    if (keywords?.length)
+      params.keywords = keywords
+
+    setLoading(true)
+    await addSegment({ datasetId, documentId, body: params }, {
+      onSuccess() {
+        notify({
+          type: 'success',
+          message: t('datasetDocuments.segment.chunkAdded'),
+          className: `!w-[296px] !bottom-0 ${appSidebarExpand === 'expand' ? '!left-[216px]' : '!left-14'}
+          !top-auto !right-auto !mb-[52px] !ml-11`,
+          customComponent: CustomButton,
+        })
+        handleCancel('add')
+        refreshTimer.current = setTimeout(() => {
+          onSave()
+        }, 3000)
+      },
+      onSettled() {
+        setLoading(false)
+      },
+    })
+  }
+
+  const wordCountText = useMemo(() => {
+    const count = isQAModel ? (question.length + answer.length) : question.length
+    return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [question.length, answer.length, isQAModel])
+
+  return (
+    <div className={'flex flex-col h-full'}>
+      <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
+        <div className='flex flex-col'>
+          <div className='text-text-primary system-xl-semibold'>{
+            t('datasetDocuments.segment.addChunk')
+          }</div>
+          <div className='flex items-center gap-x-2'>
+            <SegmentIndexTag label={t('datasetDocuments.segment.newChunk')!} />
+            <Dot />
+            <span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
+          </div>
+        </div>
+        <div className='flex items-center'>
+          {fullScreen && (
+            <>
+              <AddAnother className='mr-3' isChecked={addAnother} onCheck={() => setAddAnother(!addAnother)} />
+              <ActionButtons
+                handleCancel={handleCancel.bind(null, 'esc')}
+                handleSave={handleSave}
+                loading={loading}
+                actionType='add'
+              />
+              <Divider type='vertical' className='h-3.5 bg-divider-regular ml-4 mr-2' />
+            </>
+          )}
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer mr-1' onClick={toggleFullScreen}>
+            <RiExpandDiagonalLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+          <div className='w-8 h-8 flex justify-center items-center p-1.5 cursor-pointer' onClick={handleCancel.bind(null, 'esc')}>
+            <RiCloseLine className='w-4 h-4 text-text-tertiary' />
+          </div>
+        </div>
+      </div>
+      <div className={classNames('flex grow', fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4')}>
+        <div className={classNames('break-all overflow-hidden whitespace-pre-line', fullScreen ? 'w-1/2' : 'grow')}>
+          <ChunkContent
+            docForm={docForm}
+            question={question}
+            answer={answer}
+            onQuestionChange={question => setQuestion(question)}
+            onAnswerChange={answer => setAnswer(answer)}
+            isEditMode={true}
+          />
+        </div>
+        {mode === 'custom' && <Keywords
+          className={fullScreen ? 'w-1/5' : ''}
+          actionType='add'
+          keywords={keywords}
+          isEditMode={true}
+          onKeywordsChange={keywords => setKeywords(keywords)}
+        />}
+      </div>
+      {!fullScreen && (
+        <div className='flex items-center justify-between p-4 pt-3 border-t-[1px] border-t-divider-subtle'>
+          <AddAnother isChecked={addAnother} onCheck={() => setAddAnother(!addAnother)} />
+          <ActionButtons
+            handleCancel={handleCancel.bind(null, 'esc')}
+            handleSave={handleSave}
+            loading={loading}
+            actionType='add'
+          />
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default memo(NewSegmentModal)
diff --git a/web/app/components/datasets/documents/detail/segment-add/index.tsx b/web/app/components/datasets/documents/detail/segment-add/index.tsx
index e69f3e9ab0..7123c99b34 100644
--- a/web/app/components/datasets/documents/detail/segment-add/index.tsx
+++ b/web/app/components/datasets/documents/detail/segment-add/index.tsx
@@ -1,13 +1,14 @@
 'use client'
 import type { FC } from 'react'
-import React from 'react'
+import React, { useMemo } from 'react'
 import { useTranslation } from 'react-i18next'
 import {
+  RiAddLine,
+  RiArrowDownSLine,
   RiErrorWarningFill,
   RiLoader2Line,
 } from '@remixicon/react'
 import cn from '@/utils/classnames'
-import { FilePlus02 } from '@/app/components/base/icons/src/vender/line/files'
 import { CheckCircle } from '@/app/components/base/icons/src/vender/solid/general'
 import Popover from '@/app/components/base/popover'
 
@@ -16,6 +17,7 @@ export type ISegmentAddProps = {
   clearProcessStatus: () => void
   showNewSegmentModal: () => void
   showBatchModal: () => void
+  embedding: boolean
 }
 
 export enum ProcessStatus {
@@ -30,32 +32,49 @@ const SegmentAdd: FC<ISegmentAddProps> = ({
   clearProcessStatus,
   showNewSegmentModal,
   showBatchModal,
+  embedding,
 }) => {
   const { t } = useTranslation()
+  const textColor = useMemo(() => {
+    return embedding
+      ? 'text-components-button-secondary-accent-text-disabled'
+      : 'text-components-button-secondary-accent-text'
+  }, [embedding])
 
   if (importStatus) {
     return (
       <>
         {(importStatus === ProcessStatus.WAITING || importStatus === ProcessStatus.PROCESSING) && (
-          <div className='relative overflow-hidden inline-flex items-center mr-2 px-3 py-[6px] text-blue-700 bg-[#F5F8FF] rounded-lg border border-black/5'>
-            {importStatus === ProcessStatus.WAITING && <div className='absolute left-0 top-0 w-3/12 h-full bg-[#D1E0FF] z-0' />}
-            {importStatus === ProcessStatus.PROCESSING && <div className='absolute left-0 top-0 w-2/3 h-full bg-[#D1E0FF] z-0' />}
-            <RiLoader2Line className='animate-spin mr-2 w-4 h-4' />
-            <span className='font-medium text-[13px] leading-[18px] z-10'>{t('datasetDocuments.list.batchModal.processing')}</span>
+          <div className='relative overflow-hidden inline-flex items-center mr-2 px-2.5 py-2 text-components-button-secondary-accent-text
+            bg-components-progress-bar-border rounded-lg border-[0.5px] border-components-progress-bar-border
+            shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px]'>
+            <div className={cn('absolute left-0 top-0 h-full bg-components-progress-bar-progress border-r-[1.5px] border-r-components-progress-bar-progress-highlight z-0', importStatus === ProcessStatus.WAITING ? 'w-3/12' : 'w-2/3')} />
+            <RiLoader2Line className='animate-spin mr-1 w-4 h-4' />
+            <span className='system-sm-medium z-10 pr-0.5'>{t('datasetDocuments.list.batchModal.processing')}</span>
           </div>
         )}
         {importStatus === ProcessStatus.COMPLETED && (
-          <div className='inline-flex items-center mr-2 px-3 py-[6px] text-gray-700 bg-[#F6FEF9] rounded-lg border border-black/5'>
-            <CheckCircle className='mr-2 w-4 h-4 text-[#039855]' />
-            <span className='font-medium text-[13px] leading-[18px]'>{t('datasetDocuments.list.batchModal.completed')}</span>
-            <span className='pl-2 font-medium text-[13px] leading-[18px] text-[#155EEF] cursor-pointer' onClick={clearProcessStatus}>{t('datasetDocuments.list.batchModal.ok')}</span>
+          <div className='relative inline-flex items-center mr-2 bg-components-panel-bg rounded-lg border-[0.5px] border-components-panel-border shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px] overflow-hidden'>
+            <div className='inline-flex items-center px-2.5 py-2 text-text-success border-r border-r-divider-subtle'>
+              <CheckCircle className='mr-1 w-4 h-4' />
+              <span className='system-sm-medium pr-0.5'>{t('datasetDocuments.list.batchModal.completed')}</span>
+            </div>
+            <div className='m-1 inline-flex items-center'>
+              <span className='system-xs-medium text-components-button-ghost-text hover:bg-components-button-ghost-bg-hover px-1.5 py-1 rounded-md cursor-pointer' onClick={clearProcessStatus}>{t('datasetDocuments.list.batchModal.ok')}</span>
+            </div>
+            <div className='absolute top-0 left-0 w-full h-full bg-dataset-chunk-process-success-bg opacity-40 -z-10' />
           </div>
         )}
         {importStatus === ProcessStatus.ERROR && (
-          <div className='inline-flex items-center mr-2 px-3 py-[6px] text-red-600 bg-red-100 rounded-lg border border-black/5'>
-            <RiErrorWarningFill className='mr-2 w-4 h-4 text-[#D92D20]' />
-            <span className='font-medium text-[13px] leading-[18px]'>{t('datasetDocuments.list.batchModal.error')}</span>
-            <span className='pl-2 font-medium text-[13px] leading-[18px] text-[#155EEF] cursor-pointer' onClick={clearProcessStatus}>{t('datasetDocuments.list.batchModal.ok')}</span>
+          <div className='relative inline-flex items-center mr-2 bg-components-panel-bg rounded-lg border-[0.5px] border-components-panel-border shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px] overflow-hidden'>
+            <div className='inline-flex items-center px-2.5 py-2 text-text-destructive border-r border-r-divider-subtle'>
+              <RiErrorWarningFill className='mr-1 w-4 h-4' />
+              <span className='system-sm-medium pr-0.5'>{t('datasetDocuments.list.batchModal.error')}</span>
+            </div>
+            <div className='m-1 inline-flex items-center'>
+              <span className='system-xs-medium text-components-button-ghost-text hover:bg-components-button-ghost-bg-hover px-1.5 py-1 rounded-md cursor-pointer' onClick={clearProcessStatus}>{t('datasetDocuments.list.batchModal.ok')}</span>
+            </div>
+            <div className='absolute top-0 left-0 w-full h-full bg-dataset-chunk-process-error-bg opacity-40 -z-10' />
           </div>
         )}
       </>
@@ -63,24 +82,53 @@ const SegmentAdd: FC<ISegmentAddProps> = ({
   }
 
   return (
-    <Popover
-      manualClose
-      trigger='click'
-      htmlContent={
-        <div className='w-full py-1'>
-          <div className='py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer text-gray-700 text-sm' onClick={showNewSegmentModal}>{t('datasetDocuments.list.action.add')}</div>
-          <div className='py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer text-gray-700 text-sm' onClick={showBatchModal}>{t('datasetDocuments.list.action.batchAdd')}</div>
-        </div>
-      }
-      btnElement={
-        <div className='inline-flex items-center'>
-          <FilePlus02 className='w-4 h-4 text-gray-700' />
-          <span className='pl-1'>{t('datasetDocuments.list.action.addButton')}</span>
-        </div>
-      }
-      btnClassName={open => cn('mr-2 !py-[6px] !text-[13px] !leading-[18px] hover:bg-gray-50 border border-gray-200 hover:border-gray-300 hover:shadow-[0_1px_2px_rgba(16,24,40,0.05)]', open ? '!bg-gray-100 !shadow-none' : '!bg-transparent')}
-      className='!w-[132px] h-fit !z-20  !translate-x-0 !left-0'
-    />
+    <div className={cn(
+      'flex items-center rounded-lg border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px] relative z-20',
+      embedding && 'border-components-button-secondary-border-disabled bg-components-button-secondary-bg-disabled',
+    )}>
+      <button
+        type='button'
+        className={`inline-flex items-center px-2.5 py-2 rounded-l-lg border-r-[1px] border-r-divider-subtle
+          hover:bg-state-base-hover disabled:cursor-not-allowed disabled:hover:bg-transparent`}
+        onClick={showNewSegmentModal}
+        disabled={embedding}
+      >
+        <RiAddLine className={cn('w-4 h-4', textColor)} />
+        <span className={cn('text-[13px] leading-[16px] font-medium capitalize px-0.5 ml-0.5', textColor)}>
+          {t('datasetDocuments.list.action.addButton')}
+        </span>
+      </button>
+      <Popover
+        position='br'
+        manualClose
+        trigger='click'
+        htmlContent={
+          <div className='w-full p-1'>
+            <button
+              type='button'
+              className='w-full py-1.5 px-2 flex items-center hover:bg-state-base-hover rounded-lg text-text-secondary system-md-regular'
+              onClick={showBatchModal}
+            >
+              {t('datasetDocuments.list.action.batchAdd')}
+            </button>
+          </div>
+        }
+        btnElement={
+          <div className='flex justify-center items-center' >
+            <RiArrowDownSLine className={cn('w-4 h-4', textColor)}/>
+          </div>
+        }
+        btnClassName={open => cn(
+          `!p-2 !border-0 !rounded-l-none !rounded-r-lg !hover:bg-state-base-hover backdrop-blur-[5px]
+          disabled:cursor-not-allowed disabled:bg-transparent disabled:hover:bg-transparent`,
+          open ? '!bg-state-base-hover' : '',
+        )}
+        popupClassName='!min-w-[128px] !bg-components-panel-bg-blur !rounded-xl border-[0.5px] !ring-0
+          border-components-panel-border !shadow-xl !shadow-shadow-shadow-5 backdrop-blur-[5px]'
+        className='min-w-[128px] h-fit'
+        disabled={embedding}
+      />
+    </div>
   )
 }
 export default React.memo(SegmentAdd)
diff --git a/web/app/components/datasets/documents/detail/settings/index.tsx b/web/app/components/datasets/documents/detail/settings/index.tsx
index b264665458..05c52d4de8 100644
--- a/web/app/components/datasets/documents/detail/settings/index.tsx
+++ b/web/app/components/datasets/documents/detail/settings/index.tsx
@@ -1,13 +1,11 @@
 'use client'
-import React, { useEffect, useMemo, useState } from 'react'
+import React, { useMemo } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useBoolean } from 'ahooks'
 import { useContext } from 'use-context-selector'
 import { useRouter } from 'next/navigation'
 import DatasetDetailContext from '@/context/dataset-detail'
-import type { CrawlOptions, CustomFile, FullDocumentDetail } from '@/models/datasets'
-import type { MetadataType } from '@/service/datasets'
-import { fetchDocumentDetail } from '@/service/datasets'
+import type { CrawlOptions, CustomFile } from '@/models/datasets'
 
 import Loading from '@/app/components/base/loading'
 import StepTwo from '@/app/components/datasets/create/step-two'
@@ -16,6 +14,7 @@ import AppUnavailable from '@/app/components/base/app-unavailable'
 import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
 import type { NotionPage } from '@/models/common'
+import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
 
 type DocumentSettingsProps = {
   datasetId: string
@@ -26,15 +25,23 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
   const { t } = useTranslation()
   const router = useRouter()
   const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
-  const [hasError, setHasError] = useState(false)
   const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
   const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
 
-  const saveHandler = () => router.push(`/datasets/${datasetId}/documents/${documentId}`)
+  const invalidDocumentDetail = useInvalidDocumentDetailKey()
+  const saveHandler = () => {
+    invalidDocumentDetail()
+    router.push(`/datasets/${datasetId}/documents/${documentId}`)
+  }
 
   const cancelHandler = () => router.back()
 
-  const [documentDetail, setDocumentDetail] = useState<FullDocumentDetail | null>(null)
+  const { data: documentDetail, error } = useDocumentDetail({
+    datasetId,
+    documentId,
+    params: { metadata: 'without' },
+  })
+
   const currentPage = useMemo(() => {
     return {
       workspace_id: documentDetail?.data_source_info.notion_workspace_id,
@@ -44,23 +51,8 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
       type: documentDetail?.data_source_type,
     }
   }, [documentDetail])
-  useEffect(() => {
-    (async () => {
-      try {
-        const detail = await fetchDocumentDetail({
-          datasetId,
-          documentId,
-          params: { metadata: 'without' as MetadataType },
-        })
-        setDocumentDetail(detail)
-      }
-      catch (e) {
-        setHasError(true)
-      }
-    })()
-  }, [datasetId, documentId])
 
-  if (hasError)
+  if (error)
     return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
 
   return (
@@ -85,7 +77,7 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
             websiteCrawlProvider={documentDetail.data_source_info?.provider}
             websiteCrawlJobId={documentDetail.data_source_info?.job_id}
             crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
-            indexingType={indexingTechnique || ''}
+            indexingType={indexingTechnique}
             isSetting
             documentDetail={documentDetail}
             files={[documentDetail.data_source_info.upload_file as CustomFile]}
diff --git a/web/app/components/datasets/documents/detail/style.module.css b/web/app/components/datasets/documents/detail/style.module.css
index 69295ab31c..8a59ef6f06 100644
--- a/web/app/components/datasets/documents/detail/style.module.css
+++ b/web/app/components/datasets/documents/detail/style.module.css
@@ -5,11 +5,7 @@
   @apply h-6 w-6 !important;
 }
 .layoutRightIcon {
-  @apply w-8 h-8 ml-2 box-border border border-gray-200 rounded-lg hover:bg-gray-50 cursor-pointer hover:shadow-[0_1px_2px_rgba(16,24,40,0.05)];
-}
-.iconShow {
-  background: center center url(../assets/layoutRightShow.svg) no-repeat;
-}
-.iconClose {
-  background: center center url(../assets/layoutRightClose.svg) no-repeat;
+  @apply p-2 ml-2 border-[0.5px] border-components-button-secondary-border hover:border-components-button-secondary-border-hover
+  rounded-lg bg-components-button-secondary-bg hover:bg-components-button-secondary-bg-hover cursor-pointer
+  shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px];
 }
diff --git a/web/app/components/datasets/documents/index.tsx b/web/app/components/datasets/documents/index.tsx
index 7a61d4d580..7365ff9850 100644
--- a/web/app/components/datasets/documents/index.tsx
+++ b/web/app/components/datasets/documents/index.tsx
@@ -1,18 +1,19 @@
 'use client'
 import type { FC } from 'react'
-import React, { useMemo, useState } from 'react'
+import React, { useCallback, useEffect, useMemo, useState } from 'react'
 import useSWR from 'swr'
 import { useTranslation } from 'react-i18next'
 import { useRouter } from 'next/navigation'
 import { useDebounce, useDebounceFn } from 'ahooks'
 import { groupBy, omit } from 'lodash-es'
 import { PlusIcon } from '@heroicons/react/24/solid'
+import { RiExternalLinkLine } from '@remixicon/react'
+import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document'
 import List from './list'
 import s from './style.module.css'
 import Loading from '@/app/components/base/loading'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
-import Pagination from '@/app/components/base/pagination'
 import { get } from '@/service/base'
 import { createDocument, fetchDocuments } from '@/service/datasets'
 import { useDatasetDetailContext } from '@/context/dataset-detail'
@@ -20,10 +21,9 @@ import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selec
 import type { NotionPage } from '@/models/common'
 import type { CreateDocumentReq } from '@/models/datasets'
 import { DataSourceType } from '@/models/datasets'
-import RetryButton from '@/app/components/base/retry-button'
-// Custom page count is not currently supported.
-const limit = 15
-
+import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
+import { useProviderContext } from '@/context/provider-context'
+import cn from '@/utils/classnames'
 const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
   return <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
     <path d="M10.8332 5.83333L9.90355 3.9741C9.63601 3.439 9.50222 3.17144 9.30265 2.97597C9.12615 2.80311 8.91344 2.67164 8.6799 2.59109C8.41581 2.5 8.11668 2.5 7.51841 2.5H4.33317C3.39975 2.5 2.93304 2.5 2.57652 2.68166C2.26292 2.84144 2.00795 3.09641 1.84816 3.41002C1.6665 3.76654 1.6665 4.23325 1.6665 5.16667V5.83333M1.6665 5.83333H14.3332C15.7333 5.83333 16.4334 5.83333 16.9681 6.10582C17.4386 6.3455 17.821 6.72795 18.0607 7.19836C18.3332 7.73314 18.3332 8.4332 18.3332 9.83333V13.5C18.3332 14.9001 18.3332 15.6002 18.0607 16.135C17.821 16.6054 17.4386 16.9878 16.9681 17.2275C16.4334 17.5 15.7333 17.5 14.3332 17.5H5.6665C4.26637 17.5 3.56631 17.5 3.03153 17.2275C2.56112 16.9878 2.17867 16.6054 1.93899 16.135C1.6665 15.6002 1.6665 14.9001 1.6665 13.5V5.83333ZM9.99984 14.1667V9.16667M7.49984 11.6667H12.4998" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
@@ -74,12 +74,16 @@ type IDocumentsProps = {
 }
 
 export const fetcher = (url: string) => get(url, {}, {})
+const DEFAULT_LIMIT = 15
 
 const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
   const { t } = useTranslation()
+  const { plan } = useProviderContext()
+  const isFreePlan = plan.type === 'sandbox'
   const [inputValue, setInputValue] = useState<string>('') // the input value
   const [searchValue, setSearchValue] = useState<string>('')
   const [currPage, setCurrPage] = React.useState<number>(0)
+  const [limit, setLimit] = useState<number>(DEFAULT_LIMIT)
   const router = useRouter()
   const { dataset } = useDatasetDetailContext()
   const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false)
@@ -93,9 +97,9 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
 
   const query = useMemo(() => {
     return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' }
-  }, [currPage, debouncedSearchValue, isDataSourceNotion])
+  }, [currPage, debouncedSearchValue, isDataSourceNotion, limit])
 
-  const { data: documentsRes, error, mutate } = useSWR(
+  const { data: documentsRes, error, mutate, isLoading: isListLoading } = useSWR(
     {
       action: 'fetchDocuments',
       datasetId,
@@ -105,6 +109,17 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
     { refreshInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0 },
   )
 
+  const [isMuting, setIsMuting] = useState(false)
+  useEffect(() => {
+    if (!isListLoading && isMuting)
+      setIsMuting(false)
+  }, [isListLoading, isMuting])
+
+  const handleUpdate = useCallback(() => {
+    setIsMuting(true)
+    mutate()
+  }, [mutate])
+
   const documentsWithProgress = useMemo(() => {
     let completedNum = 0
     let percent = 0
@@ -146,7 +161,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
     router.push(`/datasets/${datasetId}/documents/create`)
   }
 
-  const isLoading = !documentsRes && !error
+  const isLoading = isListLoading // !documentsRes && !error
 
   const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => {
     const workspacesMap = groupBy(selectedPages, 'workspace_id')
@@ -195,7 +210,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
   }
 
   const documentsList = isDataSourceNotion ? documentsWithProgress?.data : documentsRes?.data
-
+  const [selectedIds, setSelectedIds] = useState<string[]>([])
   const { run: handleSearch } = useDebounceFn(() => {
     setSearchValue(inputValue)
   }, { wait: 500 })
@@ -208,8 +223,17 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
   return (
     <div className='flex flex-col h-full overflow-y-auto'>
       <div className='flex flex-col justify-center gap-1 px-6 pt-4'>
-        <h1 className={s.title}>{t('datasetDocuments.list.title')}</h1>
-        <p className={s.desc}>{t('datasetDocuments.list.desc')}</p>
+        <h1 className='text-base font-semibold text-text-primary'>{t('datasetDocuments.list.title')}</h1>
+        <div className='flex items-center text-sm font-normal text-text-tertiary space-x-0.5'>
+          <span>{t('datasetDocuments.list.desc')}</span>
+          <a
+            className='flex items-center text-text-accent'
+            target='_blank'
+            href='https://docs.dify.ai/guides/knowledge-base/integrate-knowledge-within-application'>
+            <span>{t('datasetDocuments.list.learnMore')}</span>
+            <RiExternalLinkLine className='w-3 h-3' />
+          </a>
+        </div>
       </div>
       <div className='flex flex-col px-6 py-4 flex-1'>
         <div className='flex items-center justify-between flex-wrap'>
@@ -222,27 +246,38 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
             onClear={() => handleInputChange('')}
           />
           <div className='flex gap-2 justify-center items-center !h-8'>
-            <RetryButton datasetId={datasetId} />
+            {!isFreePlan && <AutoDisabledDocument datasetId={datasetId} />}
+            <IndexFailed datasetId={datasetId} />
             {embeddingAvailable && (
               <Button variant='primary' onClick={routeToDocCreate} className='shrink-0'>
-                <PlusIcon className='h-4 w-4 mr-2 stroke-current' />
+                <PlusIcon className={cn('h-4 w-4 mr-2 stroke-current')} />
                 {isDataSourceNotion && t('datasetDocuments.list.addPages')}
                 {isDataSourceWeb && t('datasetDocuments.list.addUrl')}
-                {isDataSourceFile && t('datasetDocuments.list.addFile')}
+                {(!dataset?.data_source_type || isDataSourceFile) && t('datasetDocuments.list.addFile')}
               </Button>
             )}
           </div>
         </div>
-        {isLoading
+        {(isLoading && !isMuting)
           ? <Loading type='app' />
           : total > 0
-            ? <List embeddingAvailable={embeddingAvailable} documents={documentsList || []} datasetId={datasetId} onUpdate={mutate} />
+            ? <List
+              embeddingAvailable={embeddingAvailable}
+              documents={documentsList || []}
+              datasetId={datasetId}
+              onUpdate={handleUpdate}
+              selectedIds={selectedIds}
+              onSelectedIdChange={setSelectedIds}
+              pagination={{
+                total,
+                limit,
+                onLimitChange: setLimit,
+                current: currPage,
+                onChange: setCurrPage,
+              }}
+            />
             : <EmptyElement canAdd={embeddingAvailable} onClick={routeToDocCreate} type={isDataSourceNotion ? 'sync' : 'upload'} />
         }
-        {/* Show Pagination only if the total is more than the limit */}
-        {(total && total > limit)
-          ? <Pagination current={currPage} onChange={setCurrPage} total={total} limit={limit} />
-          : null}
         <NotionPageSelectorModal
           isShow={notionPageSelectorModalVisible}
           onClose={() => setNotionPageSelectorModalVisible(false)}
diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx
index 0e0eebb034..26c26414df 100644
--- a/web/app/components/datasets/documents/list.tsx
+++ b/web/app/components/datasets/documents/list.tsx
@@ -1,11 +1,15 @@
-/* eslint-disable no-mixed-operators */
 'use client'
-import type { FC, SVGProps } from 'react'
-import React, { useCallback, useEffect, useState } from 'react'
+import type { FC } from 'react'
+import React, { useCallback, useEffect, useMemo, useState } from 'react'
 import { useBoolean, useDebounceFn } from 'ahooks'
-import { ArrowDownIcon, TrashIcon } from '@heroicons/react/24/outline'
-import { pick } from 'lodash-es'
+import { ArrowDownIcon } from '@heroicons/react/24/outline'
+import { pick, uniq } from 'lodash-es'
 import {
+  RiArchive2Line,
+  RiDeleteBinLine,
+  RiEditLine,
+  RiEqualizer2Line,
+  RiLoopLeftLine,
   RiMoreFill,
 } from '@remixicon/react'
 import { useContext } from 'use-context-selector'
@@ -14,49 +18,33 @@ import { useTranslation } from 'react-i18next'
 import dayjs from 'dayjs'
 import { Edit03 } from '../../base/icons/src/vender/solid/general'
 import { Globe01 } from '../../base/icons/src/vender/line/mapsAndTravel'
+import ChunkingModeLabel from '../common/chunking-mode-label'
+import FileTypeIcon from '../../base/file-uploader/file-type-icon'
 import s from './style.module.css'
 import RenameModal from './rename-modal'
+import BatchAction from './detail/completed/common/batch-action'
 import cn from '@/utils/classnames'
 import Switch from '@/app/components/base/switch'
 import Divider from '@/app/components/base/divider'
 import Popover from '@/app/components/base/popover'
 import Confirm from '@/app/components/base/confirm'
 import Tooltip from '@/app/components/base/tooltip'
-import { ToastContext } from '@/app/components/base/toast'
-import type { IndicatorProps } from '@/app/components/header/indicator'
+import Toast, { ToastContext } from '@/app/components/base/toast'
+import type { ColorMap, IndicatorProps } from '@/app/components/header/indicator'
 import Indicator from '@/app/components/header/indicator'
 import { asyncRunSafe } from '@/utils'
 import { formatNumber } from '@/utils/format'
-import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets'
 import NotionIcon from '@/app/components/base/notion-icon'
 import ProgressBar from '@/app/components/base/progress-bar'
-import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
+import { ChunkingMode, DataSourceType, DocumentActionType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
 import type { CommonResponse } from '@/models/common'
 import useTimestamp from '@/hooks/use-timestamp'
-
-export const SettingsIcon = ({ className }: SVGProps<SVGElement>) => {
-  return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
-    <path d="M2 5.33325L10 5.33325M10 5.33325C10 6.43782 10.8954 7.33325 12 7.33325C13.1046 7.33325 14 6.43782 14 5.33325C14 4.22868 13.1046 3.33325 12 3.33325C10.8954 3.33325 10 4.22868 10 5.33325ZM6 10.6666L14 10.6666M6 10.6666C6 11.7712 5.10457 12.6666 4 12.6666C2.89543 12.6666 2 11.7712 2 10.6666C2 9.56202 2.89543 8.66659 4 8.66659C5.10457 8.66659 6 9.56202 6 10.6666Z" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
-  </svg>
-}
-
-export const SyncIcon = () => {
-  return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
-    <path d="M5.69773 13.1783C7.29715 13.8879 9.20212 13.8494 10.8334 12.9075C13.5438 11.3427 14.4724 7.87704 12.9076 5.16672L12.7409 4.87804M3.09233 10.8335C1.52752 8.12314 2.45615 4.65746 5.16647 3.09265C6.7978 2.15081 8.70277 2.11227 10.3022 2.82185M1.66226 10.8892L3.48363 11.3773L3.97166 9.5559M12.0284 6.44393L12.5164 4.62256L14.3378 5.1106" stroke="#667085" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" />
-  </svg>
-}
-
-export const FilePlusIcon = ({ className }: SVGProps<SVGElement>) => {
-  return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
-    <path d="M13.3332 6.99992V4.53325C13.3332 3.41315 13.3332 2.85309 13.1152 2.42527C12.9234 2.04895 12.6175 1.74299 12.2412 1.55124C11.8133 1.33325 11.2533 1.33325 10.1332 1.33325H5.8665C4.7464 1.33325 4.18635 1.33325 3.75852 1.55124C3.3822 1.74299 3.07624 2.04895 2.88449 2.42527C2.6665 2.85309 2.6665 3.41315 2.6665 4.53325V11.4666C2.6665 12.5867 2.6665 13.1467 2.88449 13.5746C3.07624 13.9509 3.3822 14.2569 3.75852 14.4486C4.18635 14.6666 4.7464 14.6666 5.8665 14.6666H7.99984M9.33317 7.33325H5.33317M6.6665 9.99992H5.33317M10.6665 4.66659H5.33317M11.9998 13.9999V9.99992M9.99984 11.9999H13.9998" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
-  </svg>
-}
-
-export const ArchiveIcon = ({ className }: SVGProps<SVGElement>) => {
-  return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
-    <path d="M2.66683 5.33106C2.55749 5.32824 2.47809 5.32191 2.40671 5.30771C1.87779 5.2025 1.46432 4.78904 1.35912 4.26012C1.3335 4.13132 1.3335 3.97644 1.3335 3.66667C1.3335 3.3569 1.3335 3.20201 1.35912 3.07321C1.46432 2.54429 1.87779 2.13083 2.40671 2.02562C2.53551 2 2.69039 2 3.00016 2H13.0002C13.3099 2 13.4648 2 13.5936 2.02562C14.1225 2.13083 14.536 2.54429 14.6412 3.07321C14.6668 3.20201 14.6668 3.3569 14.6668 3.66667C14.6668 3.97644 14.6668 4.13132 14.6412 4.26012C14.536 4.78904 14.1225 5.2025 13.5936 5.30771C13.5222 5.32191 13.4428 5.32824 13.3335 5.33106M6.66683 8.66667H9.3335M2.66683 5.33333H13.3335V10.8C13.3335 11.9201 13.3335 12.4802 13.1155 12.908C12.9238 13.2843 12.6178 13.5903 12.2415 13.782C11.8137 14 11.2536 14 10.1335 14H5.86683C4.74672 14 4.18667 14 3.75885 13.782C3.38252 13.5903 3.07656 13.2843 2.88482 12.908C2.66683 12.4802 2.66683 11.9201 2.66683 10.8V5.33333Z" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
-  </svg>
-}
+import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail'
+import type { Props as PaginationProps } from '@/app/components/base/pagination'
+import Pagination from '@/app/components/base/pagination'
+import Checkbox from '@/app/components/base/checkbox'
+import { useDocumentArchive, useDocumentDelete, useDocumentDisable, useDocumentEnable, useDocumentUnArchive, useSyncDocument, useSyncWebsite } from '@/service/knowledge/use-document'
+import { extensionToFileType } from '@/app/components/datasets/hit-testing/utils/extension-to-file-type'
 
 export const useIndexStatus = () => {
   const { t } = useTranslation()
@@ -72,6 +60,15 @@ export const useIndexStatus = () => {
   }
 }
 
+const STATUS_TEXT_COLOR_MAP: ColorMap = {
+  green: 'text-util-colors-green-green-600',
+  orange: 'text-util-colors-warning-warning-600',
+  red: 'text-util-colors-red-red-600',
+  blue: 'text-util-colors-blue-light-blue-light-600',
+  yellow: 'text-util-colors-warning-warning-600',
+  gray: 'text-text-tertiary',
+}
+
 // status item for list
 export const StatusItem: FC<{
   status: DocumentDisplayStatus
@@ -79,16 +76,82 @@ export const StatusItem: FC<{
   scene?: 'list' | 'detail'
   textCls?: string
   errorMessage?: string
-}> = ({ status, reverse = false, scene = 'list', textCls = '', errorMessage }) => {
+  detail?: {
+    enabled: boolean
+    archived: boolean
+    id: string
+  }
+  datasetId?: string
+  onUpdate?: (operationName?: string) => void
+
+}> = ({ status, reverse = false, scene = 'list', textCls = '', errorMessage, datasetId = '', detail, onUpdate }) => {
   const DOC_INDEX_STATUS_MAP = useIndexStatus()
   const localStatus = status.toLowerCase() as keyof typeof DOC_INDEX_STATUS_MAP
+  const { enabled = false, archived = false, id = '' } = detail || {}
+  const { notify } = useContext(ToastContext)
+  const { t } = useTranslation()
+  const { mutateAsync: enableDocument } = useDocumentEnable()
+  const { mutateAsync: disableDocument } = useDocumentDisable()
+  const { mutateAsync: deleteDocument } = useDocumentDelete()
+
+  const onOperate = async (operationName: OperationName) => {
+    let opApi = deleteDocument
+    switch (operationName) {
+      case 'enable':
+        opApi = enableDocument
+        break
+      case 'disable':
+        opApi = disableDocument
+        break
+    }
+    const [e] = await asyncRunSafe<CommonResponse>(opApi({ datasetId, documentId: id }) as Promise<CommonResponse>)
+    if (!e) {
+      notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+      onUpdate?.(operationName)
+    }
+    else { notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) }
+  }
+
+  const { run: handleSwitch } = useDebounceFn((operationName: OperationName) => {
+    if (operationName === 'enable' && enabled)
+      return
+    if (operationName === 'disable' && !enabled)
+      return
+    onOperate(operationName)
+  }, { wait: 500 })
+
+  const embedding = useMemo(() => {
+    return ['queuing', 'indexing', 'paused'].includes(localStatus)
+  }, [localStatus])
+
   return <div className={
     cn('flex items-center',
       reverse ? 'flex-row-reverse' : '',
       scene === 'detail' ? s.statusItemDetail : '')
   }>
     <Indicator color={DOC_INDEX_STATUS_MAP[localStatus]?.color as IndicatorProps['color']} className={reverse ? 'ml-2' : 'mr-2'} />
-    <span className={cn('text-gray-700 text-sm', textCls)}>{DOC_INDEX_STATUS_MAP[localStatus]?.text}</span>
+    <span className={cn(`${STATUS_TEXT_COLOR_MAP[DOC_INDEX_STATUS_MAP[localStatus].color as keyof typeof STATUS_TEXT_COLOR_MAP]} text-sm`, textCls)}>
+      {DOC_INDEX_STATUS_MAP[localStatus]?.text}
+    </span>
+    {
+      scene === 'detail' && (
+        <div className='flex justify-between items-center ml-1.5'>
+          <Tooltip
+            popupContent={t('datasetDocuments.list.action.enableWarning')}
+            popupClassName='text-text-secondary system-xs-medium'
+            needsDelay
+            disabled={!archived}
+          >
+            <Switch
+              defaultValue={archived ? false : enabled}
+              onChange={v => !archived && handleSwitch(v ? 'enable' : 'disable')}
+              disabled={embedding || archived}
+              size='md'
+            />
+          </Tooltip>
+        </div>
+      )
+    }
     {
       errorMessage && (
         <Tooltip
@@ -126,7 +189,13 @@ export const OperationAction: FC<{
   const { notify } = useContext(ToastContext)
   const { t } = useTranslation()
   const router = useRouter()
-
+  const { mutateAsync: archiveDocument } = useDocumentArchive()
+  const { mutateAsync: unArchiveDocument } = useDocumentUnArchive()
+  const { mutateAsync: enableDocument } = useDocumentEnable()
+  const { mutateAsync: disableDocument } = useDocumentDisable()
+  const { mutateAsync: deleteDocument } = useDocumentDelete()
+  const { mutateAsync: syncDocument } = useSyncDocument()
+  const { mutateAsync: syncWebsite } = useSyncWebsite()
   const isListScene = scene === 'list'
 
   const onOperate = async (operationName: OperationName) => {
@@ -147,10 +216,8 @@ export const OperationAction: FC<{
       case 'sync':
         if (data_source_type === 'notion_import')
           opApi = syncDocument
-
         else
           opApi = syncWebsite
-
         break
       default:
         opApi = deleteDocument
@@ -158,13 +225,13 @@ export const OperationAction: FC<{
         break
     }
     const [e] = await asyncRunSafe<CommonResponse>(opApi({ datasetId, documentId: id }) as Promise<CommonResponse>)
-    if (!e)
+    if (!e) {
       notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
-    else
-      notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') })
+      onUpdate(operationName)
+    }
+    else { notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) }
     if (operationName === 'delete')
       setDeleting(false)
-    onUpdate(operationName)
   }
 
   const { run: handleSwitch } = useDebounceFn((operationName: OperationName) => {
@@ -216,85 +283,71 @@ export const OperationAction: FC<{
       </>
     )}
     {embeddingAvailable && (
-      <Popover
-        htmlContent={
-          <div className='w-full py-1'>
-            {!isListScene && <>
-              <div className='flex justify-between items-center mx-4 pt-2'>
-                <span className={cn(s.actionName, 'font-medium')}>
-                  {!archived && enabled ? t('datasetDocuments.list.index.enable') : t('datasetDocuments.list.index.disable')}
-                </span>
-                <Tooltip
-                  popupContent={t('datasetDocuments.list.action.enableWarning')}
-                  popupClassName='!font-semibold'
-                  needsDelay
-                  disabled={!archived}
-                >
-                  <div>
-                    <Switch
-                      defaultValue={archived ? false : enabled}
-                      onChange={v => !archived && handleSwitch(v ? 'enable' : 'disable')}
-                      disabled={archived}
-                      size='md'
-                    />
+      <>
+        <Tooltip
+          popupContent={t('datasetDocuments.list.action.settings')}
+          popupClassName='text-text-secondary system-xs-medium'
+        >
+          <button
+            className={cn('rounded-lg mr-2 cursor-pointer',
+              !isListScene
+                ? 'p-2 bg-components-button-secondary-bg hover:bg-components-button-secondary-bg-hover border-[0.5px] border-components-button-secondary-border hover:border-components-button-secondary-border-hover shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px]'
+                : 'p-0.5 hover:bg-state-base-hover')}
+            onClick={() => router.push(`/datasets/${datasetId}/documents/${detail.id}/settings`)}>
+            <RiEqualizer2Line className='w-4 h-4 text-components-button-secondary-text' />
+          </button>
+        </Tooltip>
+        <Popover
+          htmlContent={
+            <div className='w-full py-1'>
+              {!archived && (
+                <>
+                  <div className={s.actionItem} onClick={() => {
+                    handleShowRenameModal({
+                      id: detail.id,
+                      name: detail.name,
+                    })
+                  }}>
+                    <RiEditLine className='w-4 h-4 text-text-tertiary' />
+                    <span className={s.actionName}>{t('datasetDocuments.list.table.rename')}</span>
                   </div>
-                </Tooltip>
-              </div>
-              <div className='mx-4 pb-1 pt-0.5 text-xs text-gray-500'>
-                {!archived && enabled ? t('datasetDocuments.list.index.enableTip') : t('datasetDocuments.list.index.disableTip')}
-              </div>
-              <Divider />
-            </>}
-            {!archived && (
-              <>
-                <div className={s.actionItem} onClick={() => {
-                  handleShowRenameModal({
-                    id: detail.id,
-                    name: detail.name,
-                  })
-                }}>
-                  <Edit03 className='w-4 h-4 text-gray-500' />
-                  <span className={s.actionName}>{t('datasetDocuments.list.table.rename')}</span>
+                  {['notion_import', DataSourceType.WEB].includes(data_source_type) && (
+                    <div className={s.actionItem} onClick={() => onOperate('sync')}>
+                      <RiLoopLeftLine className='w-4 h-4 text-text-tertiary' />
+                      <span className={s.actionName}>{t('datasetDocuments.list.action.sync')}</span>
+                    </div>
+                  )}
+                  <Divider className='my-1' />
+                </>
+              )}
+              {!archived && <div className={s.actionItem} onClick={() => onOperate('archive')}>
+                <RiArchive2Line className='w-4 h-4 text-text-tertiary' />
+                <span className={s.actionName}>{t('datasetDocuments.list.action.archive')}</span>
+              </div>}
+              {archived && (
+                <div className={s.actionItem} onClick={() => onOperate('un_archive')}>
+                  <RiArchive2Line className='w-4 h-4 text-text-tertiary' />
+                  <span className={s.actionName}>{t('datasetDocuments.list.action.unarchive')}</span>
                 </div>
-                <div className={s.actionItem} onClick={() => router.push(`/datasets/${datasetId}/documents/${detail.id}/settings`)}>
-                  <SettingsIcon />
-                  <span className={s.actionName}>{t('datasetDocuments.list.action.settings')}</span>
-                </div>
-                {['notion_import', DataSourceType.WEB].includes(data_source_type) && (
-                  <div className={s.actionItem} onClick={() => onOperate('sync')}>
-                    <SyncIcon />
-                    <span className={s.actionName}>{t('datasetDocuments.list.action.sync')}</span>
-                  </div>
-                )}
-                <Divider className='my-1' />
-              </>
-            )}
-            {!archived && <div className={s.actionItem} onClick={() => onOperate('archive')}>
-              <ArchiveIcon />
-              <span className={s.actionName}>{t('datasetDocuments.list.action.archive')}</span>
-            </div>}
-            {archived && (
-              <div className={s.actionItem} onClick={() => onOperate('un_archive')}>
-                <ArchiveIcon />
-                <span className={s.actionName}>{t('datasetDocuments.list.action.unarchive')}</span>
+              )}
+              <div className={cn(s.actionItem, s.deleteActionItem, 'group')} onClick={() => setShowModal(true)}>
+                <RiDeleteBinLine className={'w-4 h-4 text-text-tertiary group-hover:text-text-destructive'} />
+                <span className={cn(s.actionName, 'group-hover:text-text-destructive')}>{t('datasetDocuments.list.action.delete')}</span>
               </div>
-            )}
-            <div className={cn(s.actionItem, s.deleteActionItem, 'group')} onClick={() => setShowModal(true)}>
-              <TrashIcon className={'w-4 h-4 stroke-current text-gray-500 stroke-2 group-hover:text-red-500'} />
-              <span className={cn(s.actionName, 'group-hover:text-red-500')}>{t('datasetDocuments.list.action.delete')}</span>
             </div>
-          </div>
-        }
-        trigger='click'
-        position='br'
-        btnElement={
-          <div className={cn(s.commonIcon)}>
-            <RiMoreFill className='w-4 h-4 text-gray-700' />
-          </div>
-        }
-        btnClassName={open => cn(isListScene ? s.actionIconWrapperList : s.actionIconWrapperDetail, open ? '!bg-gray-100 !shadow-none' : '!bg-transparent')}
-        className={`flex justify-end !w-[200px] h-fit !z-20 ${className}`}
-      />
+          }
+          trigger='click'
+          position='br'
+          btnElement={
+            <div className={cn(s.commonIcon)}>
+              <RiMoreFill className='w-4 h-4 text-text-components-button-secondary-text' />
+            </div>
+          }
+          btnClassName={open => cn(isListScene ? s.actionIconWrapperList : s.actionIconWrapperDetail, open ? '!hover:bg-state-base-hover !shadow-none' : '!bg-transparent')}
+          popupClassName='!w-full'
+          className={`flex justify-end !w-[200px] h-fit !z-20 ${className}`}
+        />
+      </>
     )}
     {showModal
       && <Confirm
@@ -323,7 +376,7 @@ export const OperationAction: FC<{
 
 export const renderTdValue = (value: string | number | null, isEmptyStyle = false) => {
   return (
-    <div className={cn(isEmptyStyle ? 'text-gray-400' : 'text-gray-700', s.tdValue)}>
+    <div className={cn(isEmptyStyle ? 'text-text-tertiary' : 'text-text-secondary', s.tdValue)}>
       {value ?? '-'}
     </div>
   )
@@ -343,19 +396,34 @@ type LocalDoc = SimpleDocumentDetail & { percent?: number }
 type IDocumentListProps = {
   embeddingAvailable: boolean
   documents: LocalDoc[]
+  selectedIds: string[]
+  onSelectedIdChange: (selectedIds: string[]) => void
   datasetId: string
+  pagination: PaginationProps
   onUpdate: () => void
 }
 
 /**
  * Document list component including basic information
  */
-const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents = [], datasetId, onUpdate }) => {
+const DocumentList: FC<IDocumentListProps> = ({
+  embeddingAvailable,
+  documents = [],
+  selectedIds,
+  onSelectedIdChange,
+  datasetId,
+  pagination,
+  onUpdate,
+}) => {
   const { t } = useTranslation()
   const { formatTime } = useTimestamp()
   const router = useRouter()
+  const [datasetConfig] = useDatasetDetailContext(s => [s.dataset])
+  const chunkingMode = datasetConfig?.doc_form
+  const isGeneralMode = chunkingMode !== ChunkingMode.parentChild
+  const isQAMode = chunkingMode === ChunkingMode.qa
   const [localDocs, setLocalDocs] = useState<LocalDoc[]>(documents)
-  const [enableSort, setEnableSort] = useState(false)
+  const [enableSort, setEnableSort] = useState(true)
 
   useEffect(() => {
     setLocalDocs(documents)
@@ -363,7 +431,7 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
 
   const onClickSort = () => {
     setEnableSort(!enableSort)
-    if (!enableSort) {
+    if (enableSort) {
       const sortedDocs = [...localDocs].sort((a, b) => dayjs(a.created_at).isBefore(dayjs(b.created_at)) ? -1 : 1)
       setLocalDocs(sortedDocs)
     }
@@ -385,46 +453,119 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
     onUpdate()
   }, [onUpdate])
 
+  const isAllSelected = useMemo(() => {
+    return localDocs.length > 0 && localDocs.every(doc => selectedIds.includes(doc.id))
+  }, [localDocs, selectedIds])
+
+  const isSomeSelected = useMemo(() => {
+    return localDocs.some(doc => selectedIds.includes(doc.id))
+  }, [localDocs, selectedIds])
+
+  const onSelectedAll = useCallback(() => {
+    if (isAllSelected)
+      onSelectedIdChange([])
+    else
+      onSelectedIdChange(uniq([...selectedIds, ...localDocs.map(doc => doc.id)]))
+  }, [isAllSelected, localDocs, onSelectedIdChange, selectedIds])
+  const { mutateAsync: archiveDocument } = useDocumentArchive()
+  const { mutateAsync: enableDocument } = useDocumentEnable()
+  const { mutateAsync: disableDocument } = useDocumentDisable()
+  const { mutateAsync: deleteDocument } = useDocumentDelete()
+
+  const handleAction = (actionName: DocumentActionType) => {
+    return async () => {
+      let opApi = deleteDocument
+      switch (actionName) {
+        case DocumentActionType.archive:
+          opApi = archiveDocument
+          break
+        case DocumentActionType.enable:
+          opApi = enableDocument
+          break
+        case DocumentActionType.disable:
+          opApi = disableDocument
+          break
+        default:
+          opApi = deleteDocument
+          break
+      }
+      const [e] = await asyncRunSafe<CommonResponse>(opApi({ datasetId, documentIds: selectedIds }) as Promise<CommonResponse>)
+
+      if (!e) {
+        Toast.notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
+        onUpdate()
+      }
+      else { Toast.notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) }
+    }
+  }
+
   return (
-    <div className='w-full h-full overflow-x-auto'>
+    <div className='relative w-full h-full overflow-x-auto'>
       <table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
-        <thead className="h-8 leading-8 border-b border-gray-200 text-gray-500 font-medium text-xs uppercase">
+        <thead className="h-8 leading-8 border-b border-divider-subtle text-text-tertiary font-medium text-xs uppercase">
           <tr>
-            <td className='w-12'>#</td>
+            <td className='w-12'>
+              <div className='flex items-center' onClick={e => e.stopPropagation()}>
+                <Checkbox
+                  className='shrink-0 mr-2'
+                  checked={isAllSelected}
+                  mixed={!isAllSelected && isSomeSelected}
+                  onCheck={onSelectedAll}
+                />
+                #
+              </div>
+            </td>
             <td>
               <div className='flex'>
                 {t('datasetDocuments.list.table.header.fileName')}
               </div>
             </td>
+            <td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
             <td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
             <td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
             <td className='w-44'>
-              <div className='flex justify-between items-center'>
+              <div className='flex items-center' onClick={onClickSort}>
                 {t('datasetDocuments.list.table.header.uploadTime')}
-                <ArrowDownIcon className={cn('h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-gray-500' : 'text-gray-300')} onClick={onClickSort} />
+                <ArrowDownIcon className={cn('ml-0.5 h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-text-tertiary' : 'text-text-disabled')} />
               </div>
             </td>
             <td className='w-40'>{t('datasetDocuments.list.table.header.status')}</td>
             <td className='w-20'>{t('datasetDocuments.list.table.header.action')}</td>
           </tr>
         </thead>
-        <tbody className="text-gray-700">
-          {localDocs.map((doc) => {
+        <tbody className="text-text-secondary">
+          {localDocs.map((doc, index) => {
             const isFile = doc.data_source_type === DataSourceType.FILE
             const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
             return <tr
               key={doc.id}
-              className={'border-b border-gray-200 h-8 hover:bg-gray-50 cursor-pointer'}
+              className={'border-b border-divider-subtle h-8 hover:bg-background-default-hover cursor-pointer'}
               onClick={() => {
                 router.push(`/datasets/${datasetId}/documents/${doc.id}`)
               }}>
-              <td className='text-left align-middle text-gray-500 text-xs'>{doc.position}</td>
+              <td className='text-left align-middle text-text-tertiary text-xs'>
+                <div className='flex items-center' onClick={e => e.stopPropagation()}>
+                  <Checkbox
+                    className='shrink-0 mr-2'
+                    checked={selectedIds.includes(doc.id)}
+                    onCheck={() => {
+                      onSelectedIdChange(
+                        selectedIds.includes(doc.id)
+                          ? selectedIds.filter(id => id !== doc.id)
+                          : [...selectedIds, doc.id],
+                      )
+                    }}
+                  />
+                  {/* {doc.position} */}
+                  {index + 1}
+                </div>
+              </td>
               <td>
-                <div className='group flex items-center justify-between'>
-                  <span className={s.tdValue}>
+                <div className={'group flex items-center justify-between mr-6 hover:mr-0'}>
+                  <span className={cn(s.tdValue, 'flex items-center')}>
                     {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />
                     }
-                    {doc?.data_source_type === DataSourceType.FILE && <div className={cn(s[`${doc?.data_source_info?.upload_file?.extension ?? fileType}Icon`], s.commonIcon, 'mr-1.5')}></div>}
+                    {doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
                     {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />
                     }
                     {
@@ -436,22 +577,27 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
                       popupContent={t('datasetDocuments.list.table.rename')}
                     >
                       <div
-                        className='p-1 rounded-md cursor-pointer hover:bg-black/5'
+                        className='p-1 rounded-md cursor-pointer hover:bg-state-base-hover'
                         onClick={(e) => {
                           e.stopPropagation()
                           handleShowRenameModal(doc)
                         }}
                       >
-                        <Edit03 className='w-4 h-4 text-gray-500' />
+                        <Edit03 className='w-4 h-4 text-text-tertiary' />
                       </div>
                     </Tooltip>
                   </div>
                 </div>
-
+              </td>
+              <td>
+                <ChunkingModeLabel
+                  isGeneralMode={isGeneralMode}
+                  isQAMode={isQAMode}
+                />
               </td>
               <td>{renderCount(doc.word_count)}</td>
               <td>{renderCount(doc.hit_count)}</td>
-              <td className='text-gray-500 text-[13px]'>
+              <td className='text-text-secondary text-[13px]'>
                 {formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
               </td>
               <td>
@@ -473,6 +619,26 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
           })}
         </tbody>
       </table>
+      {(selectedIds.length > 0) && (
+        <BatchAction
+          className='absolute left-0 bottom-16 z-20'
+          selectedIds={selectedIds}
+          onArchive={handleAction(DocumentActionType.archive)}
+          onBatchEnable={handleAction(DocumentActionType.enable)}
+          onBatchDisable={handleAction(DocumentActionType.disable)}
+          onBatchDelete={handleAction(DocumentActionType.delete)}
+          onCancel={() => {
+            onSelectedIdChange([])
+          }}
+        />
+      )}
+      {/* Show Pagination only if the total is more than the limit */}
+      {pagination.total && pagination.total > (pagination.limit || 10) && (
+        <Pagination
+          {...pagination}
+          className='absolute bottom-0 left-0 w-full px-0 pb-0'
+        />
+      )}
 
       {isShowRenameModal && currDocument && (
         <RenameModal
diff --git a/web/app/components/datasets/documents/style.module.css b/web/app/components/datasets/documents/style.module.css
index 58b573bb50..e10402e939 100644
--- a/web/app/components/datasets/documents/style.module.css
+++ b/web/app/components/datasets/documents/style.module.css
@@ -8,26 +8,22 @@
   box-sizing: border-box;
   max-width: 200px;
 }
-.title {
-  @apply text-xl font-medium text-gray-900;
-}
-.desc {
-  @apply text-sm font-normal text-gray-500;
-}
 .actionIconWrapperList {
   @apply h-6 w-6 rounded-md border-none p-1 hover:bg-gray-100 !important;
 }
 .actionIconWrapperDetail {
-  @apply h-8 w-8 p-2 hover:bg-gray-50 border border-gray-200 hover:border-gray-300 hover:shadow-[0_1px_2px_rgba(16,24,40,0.05)] !important;
+  @apply p-2 bg-components-button-secondary-bg hover:bg-components-button-secondary-bg-hover
+  border-[0.5px] border-components-button-secondary-border hover:border-components-button-secondary-border-hover
+  shadow-xs shadow-shadow-shadow-3 !important;
 }
 .actionItem {
   @apply h-9 py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer;
 }
 .deleteActionItem {
-  @apply hover:bg-red-50 !important;
+  @apply hover:bg-state-destructive-hover !important;
 }
 .actionName {
-  @apply text-gray-700 text-sm;
+  @apply text-text-secondary text-sm;
 }
 .addFileBtn {
   @apply mt-4 w-fit !text-[13px] text-primary-600 font-medium bg-white border-[0.5px];
@@ -94,7 +90,8 @@
   background-image: url(~@/assets/docx.svg);
 }
 .statusItemDetail {
-  @apply h-8 font-medium border border-gray-200 inline-flex items-center rounded-lg pl-3 pr-4 mr-2;
+  @apply border-[0.5px] border-components-button-secondary-border inline-flex items-center
+  rounded-lg pl-2.5 pr-2 py-2 mr-2 shadow-xs shadow-shadow-shadow-3 backdrop-blur-[5px];
 }
 .tdValue {
   @apply text-sm overflow-hidden text-ellipsis whitespace-nowrap;
diff --git a/web/app/components/datasets/formatted-text/flavours/edit-slice.tsx b/web/app/components/datasets/formatted-text/flavours/edit-slice.tsx
new file mode 100644
index 0000000000..ca29c032e0
--- /dev/null
+++ b/web/app/components/datasets/formatted-text/flavours/edit-slice.tsx
@@ -0,0 +1,115 @@
+import { useState } from 'react'
+import type { FC, ReactNode } from 'react'
+import { FloatingFocusManager, type OffsetOptions, autoUpdate, flip, offset, shift, useDismiss, useFloating, useHover, useInteractions, useRole } from '@floating-ui/react'
+import { RiDeleteBinLine } from '@remixicon/react'
+// @ts-expect-error no types available
+import lineClamp from 'line-clamp'
+import type { SliceProps } from './type'
+import { SliceContainer, SliceContent, SliceDivider, SliceLabel } from './shared'
+import classNames from '@/utils/classnames'
+import ActionButton, { ActionButtonState } from '@/app/components/base/action-button'
+
+type EditSliceProps = SliceProps<{
+  label: ReactNode
+  onDelete: () => void
+  labelClassName?: string
+  labelInnerClassName?: string
+  contentClassName?: string
+  showDivider?: boolean
+  offsetOptions?: OffsetOptions
+}>
+
+export const EditSlice: FC<EditSliceProps> = (props) => {
+  const {
+    label,
+    className,
+    text,
+    onDelete,
+    labelClassName,
+    labelInnerClassName,
+    contentClassName,
+    showDivider = true,
+    offsetOptions,
+    ...rest
+  } = props
+  const [delBtnShow, setDelBtnShow] = useState(false)
+  const [isDelBtnHover, setDelBtnHover] = useState(false)
+
+  const { refs, floatingStyles, context } = useFloating({
+    open: delBtnShow,
+    onOpenChange: setDelBtnShow,
+    placement: 'right-start',
+    whileElementsMounted: autoUpdate,
+    middleware: [
+      flip(),
+      shift(),
+      offset(offsetOptions),
+    ],
+  })
+  const hover = useHover(context, {})
+  const dismiss = useDismiss(context)
+  const role = useRole(context)
+  const { getReferenceProps, getFloatingProps } = useInteractions([hover, dismiss, role])
+
+  const isDestructive = delBtnShow && isDelBtnHover
+
+  return (
+    <>
+      <SliceContainer {...rest}
+        className={classNames('block mr-0', className)}
+        ref={(ref) => {
+          refs.setReference(ref)
+          if (ref)
+            lineClamp(ref, 4)
+        }}
+        {...getReferenceProps()}
+      >
+        <SliceLabel
+          className={classNames(
+            isDestructive && '!bg-state-destructive-solid !text-text-primary-on-surface',
+            labelClassName,
+          )}
+          labelInnerClassName={labelInnerClassName}
+        >
+          {label}
+        </SliceLabel>
+        <SliceContent
+          className={classNames(
+            isDestructive && '!bg-state-destructive-hover-alt',
+            contentClassName,
+          )}
+        >
+          {text}
+        </SliceContent>
+        {showDivider && <SliceDivider
+          className={classNames(
+            isDestructive && '!bg-state-destructive-hover-alt',
+          )}
+        />}
+        {delBtnShow && <FloatingFocusManager
+          context={context}
+        >
+          <span
+            ref={refs.setFloating}
+            style={floatingStyles}
+            {...getFloatingProps()}
+            className='p-1 rounded-lg bg-components-actionbar-bg shadow inline-flex items-center justify-center'
+            onMouseEnter={() => setDelBtnHover(true)}
+            onMouseLeave={() => setDelBtnHover(false)}
+          >
+            <ActionButton
+              onClick={(e) => {
+                e.stopPropagation()
+                onDelete()
+                setDelBtnShow(false)
+              }}
+              state={ActionButtonState.Destructive}
+            >
+              <RiDeleteBinLine className='w-4 h-4' />
+            </ActionButton>
+          </span>
+        </FloatingFocusManager>}
+      </SliceContainer>
+    </>
+  )
+}
diff --git a/web/app/components/datasets/formatted-text/flavours/preview-slice.tsx b/web/app/components/datasets/formatted-text/flavours/preview-slice.tsx
new file mode 100644
index 0000000000..662362cbf5
--- /dev/null
+++ b/web/app/components/datasets/formatted-text/flavours/preview-slice.tsx
@@ -0,0 +1,56 @@
+import { useState } from 'react'
+import type { FC, ReactNode } from 'react'
+import { autoUpdate, flip, inline, shift, useDismiss, useFloating, useHover, useInteractions, useRole } from '@floating-ui/react'
+import type { SliceProps } from './type'
+import { SliceContainer, SliceContent, SliceDivider, SliceLabel } from './shared'
+
+type PreviewSliceProps = SliceProps<{
+  label: ReactNode
+  tooltip: ReactNode
+  labelInnerClassName?: string
+  dividerClassName?: string
+}>
+
+export const PreviewSlice: FC<PreviewSliceProps> = (props) => {
+  const { label, className, text, tooltip, labelInnerClassName, dividerClassName, ...rest } = props
+  const [tooltipOpen, setTooltipOpen] = useState(false)
+  const { refs, floatingStyles, context } = useFloating({
+    open: tooltipOpen,
+    onOpenChange: setTooltipOpen,
+    whileElementsMounted: autoUpdate,
+    placement: 'top',
+    middleware: [
+      inline(),
+      flip(),
+      shift(),
+    ],
+  })
+  const hover = useHover(context, {
+    delay: { open: 500 },
+    move: true,
+  })
+  const dismiss = useDismiss(context)
+  const role = useRole(context, { role: 'tooltip' })
+  const { getReferenceProps, getFloatingProps } = useInteractions([hover, dismiss, role])
+  return (
+    <>
+      <SliceContainer {...rest}
+        className={className}
+        ref={refs.setReference}
+        {...getReferenceProps()}
+      >
+        <SliceLabel labelInnerClassName={labelInnerClassName}>{label}</SliceLabel>
+        <SliceContent>{text}</SliceContent>
+        <SliceDivider className={dividerClassName} />
+      </SliceContainer>
+      {tooltipOpen && <span
+        ref={refs.setFloating}
+        style={floatingStyles}
+        {...getFloatingProps()}
+        className='p-2 rounded-md bg-components-tooltip-bg shadow shadow-shadow-shadow-5 backdrop-blur-[5px] text-text-secondary leading-4 border-[0.5px] border-components-panel-border text-xs'
+      >
+        {tooltip}
+      </span>}
+    </>
+  )
+}
diff --git a/web/app/components/datasets/formatted-text/flavours/shared.tsx b/web/app/components/datasets/formatted-text/flavours/shared.tsx
new file mode 100644
index 0000000000..577e3cedf4
--- /dev/null
+++ b/web/app/components/datasets/formatted-text/flavours/shared.tsx
@@ -0,0 +1,60 @@
+import { type ComponentProps, type FC, forwardRef } from 'react'
+import classNames from '@/utils/classnames'
+
+const baseStyle = 'py-[3px]'
+
+export type SliceContainerProps = ComponentProps<'span'>
+
+export const SliceContainer: FC<SliceContainerProps> = forwardRef((props, ref) => {
+  const { className, ...rest } = props
+  return <span {...rest} ref={ref} className={classNames(
+    'group align-bottom mr-1 select-none text-sm',
+    className,
+  )} />
+})
+SliceContainer.displayName = 'SliceContainer'
+
+export type SliceLabelProps = ComponentProps<'span'> & { labelInnerClassName?: string }
+
+export const SliceLabel: FC<SliceLabelProps> = forwardRef((props, ref) => {
+  const { className, children, labelInnerClassName, ...rest } = props
+  return <span {...rest} ref={ref} className={classNames(
+    baseStyle,
+    'px-1 bg-state-base-hover-alt group-hover:bg-state-accent-solid group-hover:text-text-primary-on-surface uppercase text-text-tertiary',
+    className,
+  )}>
+    <span className={classNames('text-nowrap', labelInnerClassName)}>
+      {children}
+    </span>
+  </span>
+})
+SliceLabel.displayName = 'SliceLabel'
+
+export type SliceContentProps = ComponentProps<'span'>
+
+export const SliceContent: FC<SliceContentProps> = forwardRef((props, ref) => {
+  const { className, children, ...rest } = props
+  return <span {...rest} ref={ref} className={classNames(
+    baseStyle,
+    'px-1 bg-state-base-hover group-hover:bg-state-accent-hover-alt group-hover:text-text-primary leading-7 whitespace-pre-line break-all',
+    className,
+  )}>
+    {children}
+  </span>
+})
+SliceContent.displayName = 'SliceContent'
+
+export type SliceDividerProps = ComponentProps<'span'>
+
+export const SliceDivider: FC<SliceDividerProps> = forwardRef((props, ref) => {
+  const { className, ...rest } = props
+  return <span {...rest} ref={ref} className={classNames(
+    baseStyle,
+    'bg-state-base-active group-hover:bg-state-accent-solid text-sm px-[1px]',
+    className,
+  )}>
+    {/* use a zero-width space to make the hover area bigger */}
+    &#8203;
+  </span>
+})
+SliceDivider.displayName = 'SliceDivider'
diff --git a/web/app/components/datasets/formatted-text/flavours/type.ts b/web/app/components/datasets/formatted-text/flavours/type.ts
new file mode 100644
index 0000000000..8d2fb431f9
--- /dev/null
+++ b/web/app/components/datasets/formatted-text/flavours/type.ts
@@ -0,0 +1,5 @@
+import type { ComponentProps } from 'react'
+
+export type SliceProps<T = {}> = T & {
+  text: string
+} & ComponentProps<'span'>
diff --git a/web/app/components/datasets/formatted-text/formatted.tsx b/web/app/components/datasets/formatted-text/formatted.tsx
new file mode 100644
index 0000000000..14d339e688
--- /dev/null
+++ b/web/app/components/datasets/formatted-text/formatted.tsx
@@ -0,0 +1,12 @@
+import type { ComponentProps, FC } from 'react'
+import classNames from '@/utils/classnames'
+
+export type FormattedTextProps = ComponentProps<'p'>
+
+export const FormattedText: FC<FormattedTextProps> = (props) => {
+  const { className, ...rest } = props
+  return <p
+    {...rest}
+    className={classNames('leading-7', className)}
+  >{props.children}</p>
+}
diff --git a/web/app/components/datasets/hit-testing/components/child-chunks-item.tsx b/web/app/components/datasets/hit-testing/components/child-chunks-item.tsx
new file mode 100644
index 0000000000..043aa3cea7
--- /dev/null
+++ b/web/app/components/datasets/hit-testing/components/child-chunks-item.tsx
@@ -0,0 +1,30 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { SliceContent } from '../../formatted-text/flavours/shared'
+import Score from './score'
+import type { HitTestingChildChunk } from '@/models/datasets'
+
+type Props = {
+  payload: HitTestingChildChunk
+  isShowAll: boolean
+}
+
+const ChildChunks: FC<Props> = ({
+  payload,
+  isShowAll,
+}) => {
+  const { id, score, content, position } = payload
+  return (
+    <div
+      className={!isShowAll ? 'line-clamp-2' : ''}
+    >
+      <div className='inline-flex items-center relative top-[-2px]'>
+        <div className='flex items-center h-[20.5px] bg-state-accent-solid  system-2xs-semibold-uppercase text-text-primary-on-surface px-1'>C-{position}</div>
+        <Score value={score} besideChunkName />
+      </div>
+      <SliceContent className='py-0.5 bg-state-accent-hover group-hover:bg-state-accent-hover text-sm text-text-secondary font-normal'>{content}</SliceContent>
+    </div>
+  )
+}
+export default React.memo(ChildChunks)
diff --git a/web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx b/web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx
new file mode 100644
index 0000000000..8d49cef3d0
--- /dev/null
+++ b/web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx
@@ -0,0 +1,89 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import { SegmentIndexTag } from '../../documents/detail/completed/common/segment-index-tag'
+import Dot from '../../documents/detail/completed/common/dot'
+import Score from './score'
+import ChildChunksItem from './child-chunks-item'
+import Modal from '@/app/components/base/modal'
+import type { HitTesting } from '@/models/datasets'
+import FileIcon from '@/app/components/base/file-uploader/file-type-icon'
+import type { FileAppearanceTypeEnum } from '@/app/components/base/file-uploader/types'
+import cn from '@/utils/classnames'
+import Tag from '@/app/components/datasets/documents/detail/completed/common/tag'
+
+const i18nPrefix = 'datasetHitTesting'
+
+type Props = {
+  payload: HitTesting
+  onHide: () => void
+}
+
+const ChunkDetailModal: FC<Props> = ({
+  payload,
+  onHide,
+}) => {
+  const { t } = useTranslation()
+  const { segment, score, child_chunks } = payload
+  const { position, content, keywords, document } = segment
+  const isParentChildRetrieval = !!(child_chunks && child_chunks.length > 0)
+  const extension = document.name.split('.').slice(-1)[0] as FileAppearanceTypeEnum
+  const heighClassName = isParentChildRetrieval ? 'h-[min(627px,_80vh)] overflow-y-auto' : 'h-[min(539px,_80vh)] overflow-y-auto'
+  return (
+    <Modal
+      title={t(`${i18nPrefix}.chunkDetail`)}
+      isShow
+      closable
+      onClose={onHide}
+      className={cn(isParentChildRetrieval ? '!min-w-[1200px]' : '!min-w-[800px]')}
+    >
+      <div className='mt-4 flex'>
+        <div className={cn('flex-1', isParentChildRetrieval && 'pr-6')}>
+          {/* Meta info */}
+          <div className='flex justify-between items-center'>
+            <div className='grow flex items-center space-x-2'>
+              <SegmentIndexTag
+                labelPrefix={`${isParentChildRetrieval ? 'Parent-' : ''}Chunk`}
+                positionId={position}
+                className={cn('w-fit group-hover:opacity-100')}
+              />
+              <Dot />
+              <div className='grow flex items-center space-x-1'>
+                <FileIcon type={extension} size='sm' />
+                <span className='grow w-0 truncate text-text-secondary text-[13px] font-normal'>{document.name}</span>
+              </div>
+            </div>
+            <Score value={score} />
+          </div>
+          <div className={cn('mt-2 body-md-regular text-text-secondary', heighClassName)}>
+            {content}
+          </div>
+          {!isParentChildRetrieval && keywords && keywords.length > 0 && (
+            <div className='mt-6'>
+              <div className='font-medium text-xs text-text-tertiary uppercase'>{t(`${i18nPrefix}.keyword`)}</div>
+              <div className='mt-1 flex flex-wrap'>
+                {keywords.map(keyword => (
+                  <Tag key={keyword} text={keyword} className='mr-2' />
+                ))}
+              </div>
+            </div>
+          )}
+        </div>
+
+        {isParentChildRetrieval && (
+          <div className='flex-1 pl-6 pb-6'>
+            <div className='system-xs-semibold-uppercase text-text-secondary'>{t(`${i18nPrefix}.hitChunks`, { num: child_chunks.length })}</div>
+            <div className={cn('mt-1 space-y-2', heighClassName)}>
+              {child_chunks.map(item => (
+                <ChildChunksItem key={item.id} payload={item} isShowAll />
+              ))}
+            </div>
+          </div>
+        )}
+      </div>
+    </Modal>
+  )
+}
+
+export default React.memo(ChunkDetailModal)
diff --git a/web/app/components/datasets/hit-testing/components/result-item.tsx b/web/app/components/datasets/hit-testing/components/result-item.tsx
new file mode 100644
index 0000000000..36ee541161
--- /dev/null
+++ b/web/app/components/datasets/hit-testing/components/result-item.tsx
@@ -0,0 +1,121 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import { RiArrowDownSLine, RiArrowRightSLine, RiArrowRightUpLine } from '@remixicon/react'
+import { useBoolean } from 'ahooks'
+import { SegmentIndexTag } from '../../documents/detail/completed/common/segment-index-tag'
+import Dot from '../../documents/detail/completed/common/dot'
+import Score from './score'
+import ChildChunkItem from './child-chunks-item'
+import ChunkDetailModal from './chunk-detail-modal'
+import type { HitTesting } from '@/models/datasets'
+import cn from '@/utils/classnames'
+import FileIcon from '@/app/components/base/file-uploader/file-type-icon'
+import type { FileAppearanceTypeEnum } from '@/app/components/base/file-uploader/types'
+import Tag from '@/app/components/datasets/documents/detail/completed/common/tag'
+import { extensionToFileType } from '@/app/components/datasets/hit-testing/utils/extension-to-file-type'
+
+const i18nPrefix = 'datasetHitTesting'
+type Props = {
+  isExternal: boolean
+  payload: HitTesting
+}
+
+const ResultItem: FC<Props> = ({
+  isExternal,
+  payload,
+}) => {
+  const { t } = useTranslation()
+  const { segment, content: externalContent, score, child_chunks } = payload
+  const data = isExternal ? externalContent : segment
+  const { position, word_count, content, keywords, document } = data
+  const isParentChildRetrieval = !!(child_chunks && child_chunks.length > 0)
+  const extension = document.name.split('.').slice(-1)[0] as FileAppearanceTypeEnum
+  const fileType = extensionToFileType(extension)
+  const [isFold, {
+    toggle: toggleFold,
+  }] = useBoolean(false)
+  const Icon = isFold ? RiArrowRightSLine : RiArrowDownSLine
+
+  const [isShowDetailModal, {
+    setTrue: showDetailModal,
+    setFalse: hideDetailModal,
+  }] = useBoolean(false)
+
+  const handleClickCard = () => {
+    if (!isParentChildRetrieval)
+      showDetailModal()
+  }
+
+  return (
+    <div className={cn('pt-3 bg-chat-bubble-bg rounded-xl hover:shadow-lg', !isParentChildRetrieval && 'cursor-pointer')} onClick={handleClickCard}>
+      {/* Meta info */}
+      <div className='flex justify-between items-center px-3'>
+        <div className='flex items-center space-x-2'>
+          <SegmentIndexTag
+            labelPrefix={`${isParentChildRetrieval ? 'Parent-' : ''}Chunk`}
+            positionId={position}
+            className={cn('w-fit group-hover:opacity-100')}
+          />
+          <Dot />
+          <div className='system-xs-medium text-text-tertiary'>{word_count} {t('datasetDocuments.segment.characters', { count: word_count })}</div>
+        </div>
+        <Score value={score} />
+      </div>
+
+      {/* Main */}
+      <div className='mt-1 px-3'>
+        <div className='line-clamp-2 body-md-regular'>{content}</div>
+        {isParentChildRetrieval && (
+          <div className='mt-1'>
+            <div className={cn('inline-flex items-center h-6 space-x-0.5 text-text-secondary select-none rounded-lg cursor-pointer', isFold && 'pl-1 bg-[linear-gradient(90deg,_rgba(200,_206,_218,_0.20)_0%,_rgba(200,_206,_218,_0.04)_100%)]')} onClick={toggleFold}>
+              <Icon className={cn('w-4 h-4', isFold && 'opacity-50')} />
+              <div className='text-xs font-semibold uppercase'>{t(`${i18nPrefix}.hitChunks`, { num: child_chunks.length })}</div>
+            </div>
+            {!isFold && (
+              <div className='space-y-2'>
+                {child_chunks.map(item => (
+                  <div key={item.id} className='ml-[7px] pl-[7px] border-l-[2px] border-text-accent-secondary'>
+                    <ChildChunkItem payload={item} isShowAll={false} />
+                  </div>
+                ))}
+              </div>
+            )}
+          </div>
+        )}
+        {!isParentChildRetrieval && keywords && keywords.length > 0 && (
+          <div className='mt-2 flex flex-wrap'>
+            {keywords.map(keyword => (
+              <Tag key={keyword} text={keyword} className='mr-2' />
+            ))}
+          </div>
+        )}
+      </div>
+      {/* Foot */}
+      <div className='mt-3 flex justify-between items-center h-10 pl-3 pr-2 border-t border-divider-subtle'>
+        <div className='grow flex items-center space-x-1'>
+          <FileIcon type={fileType} size='sm' />
+          <span className='grow w-0 truncate text-text-secondary text-[13px] font-normal'>{document.name}</span>
+        </div>
+        <div
+          className='flex items-center space-x-1 cursor-pointer text-text-tertiary'
+          onClick={showDetailModal}
+        >
+          <div className='text-xs uppercase'>{t(`${i18nPrefix}.open`)}</div>
+          <RiArrowRightUpLine className='size-3.5' />
+        </div>
+      </div>
+
+      {
+        isShowDetailModal && (
+          <ChunkDetailModal
+            payload={payload}
+            onHide={hideDetailModal}
+          />
+        )
+      }
+    </div >
+  )
+}
+export default React.memo(ResultItem)
diff --git a/web/app/components/datasets/hit-testing/components/score.tsx b/web/app/components/datasets/hit-testing/components/score.tsx
new file mode 100644
index 0000000000..115141eaaa
--- /dev/null
+++ b/web/app/components/datasets/hit-testing/components/score.tsx
@@ -0,0 +1,25 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import cn from '@/utils/classnames'
+
+type Props = {
+  value: number
+  besideChunkName?: boolean
+}
+
+const Score: FC<Props> = ({
+  value,
+  besideChunkName,
+}) => {
+  return (
+    <div className={cn('relative items-center px-[5px] border border-components-progress-bar-border overflow-hidden', besideChunkName ? 'border-l-0 h-[20.5px]' : 'h-[20px] rounded-md')}>
+      <div className={cn('absolute top-0 left-0 h-full bg-util-colors-blue-brand-blue-brand-100 border-r-[1.5px] border-components-progress-brand-progress', value === 1 && 'border-r-0')} style={{ width: `${value * 100}%` }} />
+      <div className={cn('relative flex items-center h-full space-x-0.5 text-util-colors-blue-brand-blue-brand-700')}>
+        <div className='system-2xs-medium-uppercase'>score</div>
+        <div className='system-xs-semibold'>{value.toFixed(2)}</div>
+      </div>
+    </div>
+  )
+}
+export default React.memo(Score)
diff --git a/web/app/components/datasets/hit-testing/hit-detail.tsx b/web/app/components/datasets/hit-testing/hit-detail.tsx
deleted file mode 100644
index 066e2238c8..0000000000
--- a/web/app/components/datasets/hit-testing/hit-detail.tsx
+++ /dev/null
@@ -1,68 +0,0 @@
-import type { FC } from 'react'
-import React from 'react'
-import { useTranslation } from 'react-i18next'
-import { SegmentIndexTag } from '../documents/detail/completed'
-import s from '../documents/detail/completed/style.module.css'
-import cn from '@/utils/classnames'
-import type { SegmentDetailModel } from '@/models/datasets'
-import Divider from '@/app/components/base/divider'
-
-type IHitDetailProps = {
-  segInfo?: Partial<SegmentDetailModel> & { id: string }
-}
-
-const HitDetail: FC<IHitDetailProps> = ({ segInfo }) => {
-  const { t } = useTranslation()
-
-  const renderContent = () => {
-    if (segInfo?.answer) {
-      return (
-        <>
-          <div className='mt-2 mb-1 text-xs font-medium text-gray-500'>QUESTION</div>
-          <div className='mb-4 text-md text-gray-800'>{segInfo.content}</div>
-          <div className='mb-1 text-xs font-medium text-gray-500'>ANSWER</div>
-          <div className='text-md text-gray-800'>{segInfo.answer}</div>
-        </>
-      )
-    }
-
-    return <div className='mb-4 text-md text-gray-800 h-full'>{segInfo?.content}</div>
-  }
-
-  return (
-    segInfo?.id === 'external'
-      ? <div className='w-full overflow-x-auto px-2'>
-        <div className={s.segModalContent}>{renderContent()}</div>
-      </div>
-      : <div className='overflow-x-auto'>
-        <div className="flex items-center">
-          <SegmentIndexTag
-            positionId={segInfo?.position || ''}
-            className="w-fit mr-6"
-          />
-          <div className={cn(s.commonIcon, s.typeSquareIcon)} />
-          <span className={cn('mr-6', s.numberInfo)}>
-            {segInfo?.word_count} {t('datasetDocuments.segment.characters')}
-          </span>
-          <div className={cn(s.commonIcon, s.targetIcon)} />
-          <span className={s.numberInfo}>
-            {segInfo?.hit_count} {t('datasetDocuments.segment.hitCount')}
-          </span>
-        </div>
-        <Divider />
-        <div className={s.segModalContent}>{renderContent()}</div>
-        <div className={s.keywordTitle}>
-          {t('datasetDocuments.segment.keywords')}
-        </div>
-        <div className={s.keywordWrapper}>
-          {!segInfo?.keywords?.length
-            ? '-'
-            : segInfo?.keywords?.map((word, index) => {
-              return <div key={index} className={s.keyword}>{word}</div>
-            })}
-        </div>
-      </div>
-  )
-}
-
-export default HitDetail
diff --git a/web/app/components/datasets/hit-testing/index.tsx b/web/app/components/datasets/hit-testing/index.tsx
index ce47f2bfa6..30be6fb7e7 100644
--- a/web/app/components/datasets/hit-testing/index.tsx
+++ b/web/app/components/datasets/hit-testing/index.tsx
@@ -6,16 +6,15 @@ import useSWR from 'swr'
 import { omit } from 'lodash-es'
 import { useBoolean } from 'ahooks'
 import { useContext } from 'use-context-selector'
+import { RiApps2Line, RiFocus2Line } from '@remixicon/react'
 import SegmentCard from '../documents/detail/completed/SegmentCard'
-import docStyle from '../documents/detail/completed/style.module.css'
 import Textarea from './textarea'
 import s from './style.module.css'
-import HitDetail from './hit-detail'
 import ModifyRetrievalModal from './modify-retrieval-modal'
+import ResultItem from './components/result-item'
 import cn from '@/utils/classnames'
-import type { ExternalKnowledgeBaseHitTestingResponse, ExternalKnowledgeBaseHitTesting as ExternalKnowledgeBaseHitTestingType, HitTestingResponse, HitTesting as HitTestingType } from '@/models/datasets'
+import type { ExternalKnowledgeBaseHitTestingResponse, HitTestingResponse } from '@/models/datasets'
 import Loading from '@/app/components/base/loading'
-import Modal from '@/app/components/base/modal'
 import Drawer from '@/app/components/base/drawer'
 import Pagination from '@/app/components/base/pagination'
 import FloatRightContainer from '@/app/components/base/float-right-container'
@@ -24,6 +23,7 @@ import DatasetDetailContext from '@/context/dataset-detail'
 import type { RetrievalConfig } from '@/types/app'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import useTimestamp from '@/hooks/use-timestamp'
+import docStyle from '@/app/components/datasets/documents/detail/completed/style.module.css'
 
 const limit = 10
 
@@ -51,8 +51,6 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
   const [hitResult, setHitResult] = useState<HitTestingResponse | undefined>() // 初始化记录为空数组
   const [externalHitResult, setExternalHitResult] = useState<ExternalKnowledgeBaseHitTestingResponse | undefined>()
   const [submitLoading, setSubmitLoading] = useState(false)
-  const [currParagraph, setCurrParagraph] = useState<{ paraInfo?: HitTestingType; showModal: boolean }>({ showModal: false })
-  const [externalCurrParagraph, setExternalCurrParagraph] = useState<{ paraInfo?: ExternalKnowledgeBaseHitTestingType; showModal: boolean }>({ showModal: false })
   const [text, setText] = useState('')
 
   const [currPage, setCurrPage] = React.useState<number>(0)
@@ -64,51 +62,33 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
 
   const total = recordsRes?.total || 0
 
-  const onClickCard = (detail: HitTestingType) => {
-    setCurrParagraph({ paraInfo: detail, showModal: true })
-  }
-
-  const onClickExternalCard = (detail: ExternalKnowledgeBaseHitTestingType) => {
-    setExternalCurrParagraph({ paraInfo: detail, showModal: true })
-  }
   const { dataset: currentDataset } = useContext(DatasetDetailContext)
   const isExternal = currentDataset?.provider === 'external'
 
   const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict as RetrievalConfig)
   const [isShowModifyRetrievalModal, setIsShowModifyRetrievalModal] = useState(false)
   const [isShowRightPanel, { setTrue: showRightPanel, setFalse: hideRightPanel, set: setShowRightPanel }] = useBoolean(!isMobile)
-
-  const renderHitResults = (results: any[], onClickCard: (record: any) => void) => (
-    <>
-      <div className='text-gray-600 font-semibold mb-4'>{t('datasetHitTesting.hit.title')}</div>
-      <div className='overflow-auto flex-1'>
-        <div className={s.cardWrapper}>
-          {results.map((record, idx) => (
-            <SegmentCard
-              key={idx}
-              loading={false}
-              refSource= {{
-                title: record.title,
-                uri: record.metadata ? record.metadata['x-amz-bedrock-kb-source-uri'] : '',
-              }}
-              isExternal={isExternal}
-              detail={record.segment}
-              contentExternal={record.content}
-              score={record.score}
-              scene='hitTesting'
-              className='h-[216px] mb-4'
-              onClick={() => onClickCard(record)}
-            />
-          ))}
-        </div>
+  const renderHitResults = (results: any[]) => (
+    <div className='h-full flex flex-col py-3 px-4 rounded-t-2xl bg-background-body'>
+      <div className='shrink-0 pl-2 text-text-primary font-semibold leading-6 mb-2'>
+        {t('datasetHitTesting.hit.title', { num: results.length })}
       </div>
-    </>
+      <div className='grow overflow-y-auto space-y-2'>
+        {results.map((record, idx) => (
+          <ResultItem
+            key={idx}
+            payload={record}
+            isExternal={isExternal}
+          />
+        ))}
+      </div>
+    </div>
   )
 
   const renderEmptyState = () => (
-    <div className='h-full flex flex-col justify-center items-center'>
-      <div className={cn(docStyle.commonIcon, docStyle.targetIcon, '!bg-gray-200 !h-14 !w-14')} />
-      <div className='text-gray-300 text-[13px] mt-3'>
+    <div className='h-full flex flex-col justify-center items-center py-3 px-4 rounded-t-2xl bg-background-body'>
+      <div className={cn(docStyle.commonIcon, docStyle.targetIcon, '!bg-text-quaternary !h-14 !w-14')} />
+      <div className='text-text-quaternary text-[13px] mt-3'>
         {t('datasetHitTesting.hit.emptyTip')}
       </div>
     </div>
@@ -120,10 +100,10 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
 
   return (
     <div className={s.container}>
-      <div className={s.leftDiv}>
-        <div className={s.titleWrapper}>
-          <h1 className={s.title}>{t('datasetHitTesting.title')}</h1>
-          <p className={s.desc}>{t('datasetHitTesting.desc')}</p>
+      <div className='px-6 py-3 flex flex-col'>
+        <div className='flex flex-col justify-center mb-4'>
+          <h1 className='text-base font-semibold text-text-primary'>{t('datasetHitTesting.title')}</h1>
+          <p className='mt-0.5 text-[13px] leading-4 font-normal text-text-tertiary'>{t('datasetHitTesting.desc')}</p>
         </div>
         <Textarea
           datasetId={datasetId}
@@ -140,7 +120,7 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
           retrievalConfig={retrievalConfig}
           isEconomy={currentDataset?.indexing_technique === 'economy'}
         />
-        <div className={cn(s.title, 'mt-8 mb-2')}>{t('datasetHitTesting.recents')}</div>
+        <div className='text-base font-semibold text-text-primary mt-6 mb-3'>{t('datasetHitTesting.records')}</div>
         {(!recordsRes && !error)
           ? (
             <div className='flex-1'><Loading type='app' /></div>
@@ -149,29 +129,30 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
             ? (
               <>
                 <div className='grow overflow-y-auto'>
-                  <table className={`w-full border-collapse border-0 mt-3 ${s.table}`}>
-                    <thead className="sticky top-0 h-8 bg-white leading-8 border-b border-gray-200 text-gray-500 font-bold">
+                  <table className={'w-full border-collapse border-0 text-[13px] leading-4 text-text-secondary '}>
+                    <thead className="sticky top-0 h-7 leading-7  text-xs text-text-tertiary font-medium uppercase">
                       <tr>
-                        <td className='w-28'>{t('datasetHitTesting.table.header.source')}</td>
-                        <td>{t('datasetHitTesting.table.header.text')}</td>
-                        <td className='w-48'>{t('datasetHitTesting.table.header.time')}</td>
+                        <td className='pl-3 w-[128px] rounded-l-lg bg-background-section-burn'>{t('datasetHitTesting.table.header.source')}</td>
+                        <td className='bg-background-section-burn'>{t('datasetHitTesting.table.header.text')}</td>
+                        <td className='pl-2 w-48 rounded-r-lg bg-background-section-burn'>{t('datasetHitTesting.table.header.time')}</td>
                       </tr>
                     </thead>
-                    <tbody className="text-gray-500">
+                    <tbody>
                       {recordsRes?.data?.map((record) => {
+                        const SourceIcon = record.source === 'app' ? RiApps2Line : RiFocus2Line
                         return <tr
                           key={record.id}
-                          className='group border-b border-gray-200 h-8 hover:bg-gray-50 cursor-pointer'
+                          className='group border-b border-divider-subtle h-10 hover:bg-background-default-hover cursor-pointer'
                           onClick={() => setText(record.content)}
                         >
-                          <td className='w-24'>
+                          <td className='pl-3 w-[128px]'>
                             <div className='flex items-center'>
-                              <div className={cn(s[`${record.source}_icon`], s.commonIcon, 'mr-1')} />
-                              <span className='capitalize'>{record.source.replace('_', ' ')}</span>
+                              <SourceIcon className='mr-1 size-4 text-text-tertiary' />
+                              <span className='capitalize'>{record.source.replace('_', ' ').replace('hit testing', 'retrieval test')}</span>
                             </div>
                           </td>
-                          <td className='max-w-xs group-hover:text-primary-600'>{record.content}</td>
-                          <td className='w-36'>
+                          <td className='max-w-xs py-2'>{record.content}</td>
+                          <td className='pl-2 w-36'>
                             {formatTime(record.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
                           </td>
                         </tr>
@@ -189,57 +170,28 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
             )}
       </div>
       <FloatRightContainer panelClassname='!justify-start !overflow-y-auto' showClose isMobile={isMobile} isOpen={isShowRightPanel} onClose={hideRightPanel} footer={null}>
-        <div className={cn(s.rightDiv, 'p-0 sm:px-8 sm:pt-[42px] sm:pb-[26px]')}>
+        <div className='flex flex-col pt-3'>
+          {/* {renderHitResults(generalResultData)} */}
           {submitLoading
-            ? <div className={s.cardWrapper}>
-              <SegmentCard
-                loading={true}
-                scene='hitTesting'
-                className='h-[216px]'
-              />
-              <SegmentCard
-                loading={true}
-                scene='hitTesting'
-                className='h-[216px]'
-              />
-            </div>
+            ? <SegmentCard
+              loading={true}
+              scene='hitTesting'
+              className='h-[216px]'
+            />
             : (
               (() => {
                 if (!hitResult?.records.length && !externalHitResult?.records.length)
                   return renderEmptyState()
 
                 if (hitResult?.records.length)
-                  return renderHitResults(hitResult.records, onClickCard)
+                  return renderHitResults(hitResult.records)
 
-                return renderHitResults(externalHitResult?.records || [], onClickExternalCard)
+                return renderHitResults(externalHitResult?.records || [])
               })()
             )
           }
         </div>
       </FloatRightContainer>
-      <Modal
-        className={isExternal ? 'py-10 px-8' : 'w-full'}
-        closable
-        onClose={() => {
-          setCurrParagraph({ showModal: false })
-          setExternalCurrParagraph({ showModal: false })
-        }}
-        isShow={currParagraph.showModal || externalCurrParagraph.showModal}
-      >
-        {currParagraph.showModal && (
-          <HitDetail
-            segInfo={currParagraph.paraInfo?.segment}
-          />
-        )}
-        {externalCurrParagraph.showModal && (
-          <HitDetail
-            segInfo={{
-              id: 'external',
-              content: externalCurrParagraph.paraInfo?.content,
-            }}
-          />
-        )}
-      </Modal>
       <Drawer isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
         <ModifyRetrievalModal
           indexMethod={currentDataset?.indexing_technique || ''}
diff --git a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
index 1fc5b68d67..2790ea4d8b 100644
--- a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
+++ b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
@@ -11,6 +11,7 @@ import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/ec
 import Button from '@/app/components/base/button'
 import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
+import { RerankingModeEnum } from '@/models/datasets'
 
 type Props = {
   indexMethod: string
@@ -57,7 +58,10 @@ const ModifyRetrievalModal: FC<Props> = ({
     }
     onSave(ensureRerankModelSelected({
       rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig,
+      retrievalConfig: {
+        ...retrievalConfig,
+        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
+      },
       indexMethod,
     }))
   }
@@ -77,7 +81,7 @@ const ModifyRetrievalModal: FC<Props> = ({
         <div className='text-base font-semibold text-gray-900'>
           <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
           <div className='leading-[18px] text-xs font-normal text-gray-500'>
-            <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+            <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
             {t('datasetSettings.form.retrievalSetting.description')}
           </div>
         </div>
diff --git a/web/app/components/datasets/hit-testing/style.module.css b/web/app/components/datasets/hit-testing/style.module.css
index 1e90902a70..cf8536f998 100644
--- a/web/app/components/datasets/hit-testing/style.module.css
+++ b/web/app/components/datasets/hit-testing/style.module.css
@@ -1,61 +1,43 @@
 .container {
   @apply flex h-full w-full relative overflow-y-auto;
 }
-.container > div {
+
+.container>div {
   @apply flex-1 h-full;
 }
-.leftDiv {
-  @apply border-r border-gray-100 px-6 py-3 flex flex-col;
-}
-.rightDiv {
-  @apply flex flex-col;
-}
-.titleWrapper {
-  @apply flex flex-col justify-center gap-1 mb-5;
-}
-.title {
-  @apply text-xl font-medium text-gray-900;
-}
-.desc {
-  @apply text-sm font-normal text-gray-500;
-}
-.table {
-  @apply text-[13px] text-gray-500;
-}
-.table td {
-  @apply whitespace-nowrap overflow-hidden text-ellipsis;
-}
+
 .commonIcon {
   @apply w-3.5 h-3.5 inline-block align-middle;
   background-repeat: no-repeat;
   background-position: center center;
   background-size: contain;
 }
+
 .app_icon {
   background-image: url(./assets/grid.svg);
 }
+
 .hit_testing_icon {
   background-image: url(../documents/assets/target.svg);
 }
+
 .plugin_icon {
   background-image: url(./assets/plugin.svg);
 }
 
-.wrapper {
-  @apply relative border border-primary-600 rounded-xl;
-}
-
 .cardWrapper {
   display: grid;
   grid-template-columns: repeat(auto-fill, minmax(284px, auto));
   grid-gap: 16px;
   grid-auto-rows: 216px;
 }
+
 .clockWrapper {
   border: 0.5px solid #eaecf5;
   @apply rounded-lg w-11 h-11 flex justify-center items-center;
 }
+
 .clockIcon {
   mask-image: url(./assets/clock.svg);
   @apply bg-gray-500;
-}
+}
\ No newline at end of file
diff --git a/web/app/components/datasets/hit-testing/textarea.tsx b/web/app/components/datasets/hit-testing/textarea.tsx
index 9589a2e835..fcd72a2f1b 100644
--- a/web/app/components/datasets/hit-testing/textarea.tsx
+++ b/web/app/components/datasets/hit-testing/textarea.tsx
@@ -3,10 +3,9 @@ import { useTranslation } from 'react-i18next'
 import {
   RiEqualizer2Line,
 } from '@remixicon/react'
+import Image from 'next/image'
 import Button from '../../base/button'
-import Tag from '../../base/tag'
 import { getIcon } from '../common/retrieval-method-info'
-import s from './style.module.css'
 import ModifyExternalRetrievalModal from './modify-external-retrieval-modal'
 import Tooltip from '@/app/components/base/tooltip'
 import cn from '@/utils/classnames'
@@ -14,6 +13,7 @@ import type { ExternalKnowledgeBaseHitTestingResponse, HitTestingResponse } from
 import { externalKnowledgeBaseHitTesting, hitTesting } from '@/service/datasets'
 import { asyncRunSafe } from '@/utils'
 import { RETRIEVE_METHOD, type RetrievalConfig } from '@/types/app'
+import promptS from '@/app/components/app/configuration/config-prompt/style.module.css'
 
 type TextAreaWithButtonIProps = {
   datasetId: string
@@ -103,13 +103,13 @@ const TextAreaWithButton = ({
   }
 
   const retrievalMethod = isEconomy ? RETRIEVE_METHOD.invertedIndex : retrievalConfig.search_method
-  const Icon = getIcon(retrievalMethod)
+  const icon = <Image className='size-3.5 text-util-colors-purple-purple-600' src={getIcon(retrievalMethod)} alt='' />
   return (
     <>
-      <div className={s.wrapper}>
-        <div className='relative pt-2 rounded-tl-xl rounded-tr-xl bg-[#EEF4FF]'>
-          <div className="px-4 pb-2 flex justify-between h-8 items-center">
-            <span className="text-gray-800 font-semibold text-sm">
+      <div className={cn('relative rounded-xl', promptS.gradientBorder)}>
+        <div className='relative pt-1.5 rounded-tl-xl rounded-tr-xl bg-background-section-burn'>
+          <div className="pl-4 pr-1.5 pb-1 flex justify-between h-8 items-center">
+            <span className="text-text-secondary font-semibold text-[13px] leading-4 uppercase">
               {t('datasetHitTesting.input.title')}
             </span>
             {isExternal
@@ -123,17 +123,14 @@ const TextAreaWithButton = ({
                   <span className='text-components-button-secondary-text system-xs-medium'>{t('datasetHitTesting.settingTitle')}</span>
                 </div>
               </Button>
-              : <Tooltip
-                popupContent={t('dataset.retrieval.changeRetrievalMethod')}
+              : <div
+                onClick={onClickRetrievalMethod}
+                className='flex px-1.5 h-7 items-center bg-components-button-secondary-bg hover:bg-components-button-secondary-bg-hover rounded-lg border-[0.5px] border-components-button-secondary-bg shadow-xs backdrop-blur-[5px] cursor-pointer space-x-0.5'
               >
-                <div
-                  onClick={onClickRetrievalMethod}
-                  className='flex px-2 h-7 items-center space-x-1 bg-white hover:bg-[#ECE9FE] rounded-md shadow-sm cursor-pointer text-[#6927DA]'
-                >
-                  <Icon className='w-3.5 h-3.5'></Icon>
-                  <div className='text-xs font-medium'>{t(`dataset.retrieval.${retrievalMethod}.title`)}</div>
-                </div>
-              </Tooltip>
+                {icon}
+                <div className='text-text-secondary text-xs font-medium uppercase'>{t(`dataset.retrieval.${retrievalMethod}.title`)}</div>
+                <RiEqualizer2Line className='size-4 text-components-menu-item-text'></RiEqualizer2Line>
+              </div>
             }
           </div>
           {
@@ -147,11 +144,11 @@ const TextAreaWithButton = ({
               />
             )
           }
-          <div className='h-2 rounded-tl-xl rounded-tr-xl bg-white'></div>
+          <div className='h-2 rounded-tl-xl rounded-tr-xl bg-background-default'></div>
         </div>
-        <div className='px-4 pb-11'>
+        <div className='px-4 pb-11 bg-background-default rounded-b-xl'>
           <textarea
-            className='h-[220px] border-none resize-none font-normal caret-primary-600 text-gray-700 text-sm w-full focus-visible:outline-none placeholder:text-gray-300 placeholder:text-sm placeholder:font-normal'
+            className='h-[220px] border-none resize-none font-normal caret-primary-600 text-text-secondary text-sm w-full focus-visible:outline-none  placeholder:text-gray-300 placeholder:text-sm placeholder:font-normal'
             value={text}
             onChange={handleTextChange}
             placeholder={t('datasetHitTesting.input.placeholder') as string}
@@ -162,24 +159,23 @@ const TextAreaWithButton = ({
                 <Tooltip
                   popupContent={t('datasetHitTesting.input.countWarning')}
                 >
-                  <div>
-                    <Tag color="red" className="!text-red-600">
-                      {text?.length}
-                      <span className="text-red-300 mx-0.5">/</span>
-                      200
-                    </Tag>
+                  <div
+                    className={cn('flex items-center h-5 px-1 rounded-md bg-background-section-burn text-red-600 text-xs font-medium', !text?.length && 'opacity-50')}
+                  >
+                    {text?.length}
+                    <span className="text-red-300 mx-0.5">/</span>
+                    200
                   </div>
                 </Tooltip>
               )
               : (
-                <Tag
-                  color="gray"
-                  className={cn('!text-gray-500', text?.length ? '' : 'opacity-50')}
+                <div
+                  className={cn('flex items-center h-5 px-1 rounded-md bg-background-section-burn text-text-tertiary text-xs font-medium', !text?.length && 'opacity-50')}
                 >
                   {text?.length}
-                  <span className="text-gray-300 mx-0.5">/</span>
+                  <span className="text-divider-deep mx-0.5">/</span>
                   200
-                </Tag>
+                </div>
               )}
 
             <div>
@@ -188,6 +184,7 @@ const TextAreaWithButton = ({
                 variant="primary"
                 loading={loading}
                 disabled={(!text?.length || text?.length > 200)}
+                className='w-[88px]'
               >
                 {t('datasetHitTesting.input.testing')}
               </Button>
diff --git a/web/app/components/datasets/hit-testing/utils/extension-to-file-type.ts b/web/app/components/datasets/hit-testing/utils/extension-to-file-type.ts
new file mode 100644
index 0000000000..68db742199
--- /dev/null
+++ b/web/app/components/datasets/hit-testing/utils/extension-to-file-type.ts
@@ -0,0 +1,31 @@
+import { FileAppearanceTypeEnum } from '@/app/components/base/file-uploader/types'
+
+export const extensionToFileType = (extension: string): FileAppearanceTypeEnum => {
+  switch (extension) {
+    case 'pdf':
+      return FileAppearanceTypeEnum.pdf
+    case 'doc':
+    case 'docx':
+      return FileAppearanceTypeEnum.word
+    case 'txt':
+    case 'epub':
+      return FileAppearanceTypeEnum.document
+    case 'md':
+    case 'mdx':
+    case 'markdown':
+      return FileAppearanceTypeEnum.markdown
+    case 'csv':
+    case 'xls':
+    case 'xlsx':
+      return FileAppearanceTypeEnum.excel
+    case 'html':
+    case 'htm':
+    case 'xml':
+      return FileAppearanceTypeEnum.document
+    case 'ppt':
+    case 'pptx':
+      return FileAppearanceTypeEnum.ppt
+    default:
+      return FileAppearanceTypeEnum.custom
+  }
+}
diff --git a/web/app/components/datasets/loading.tsx b/web/app/components/datasets/loading.tsx
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/web/app/components/datasets/preview/container.tsx b/web/app/components/datasets/preview/container.tsx
new file mode 100644
index 0000000000..e59fc8dbac
--- /dev/null
+++ b/web/app/components/datasets/preview/container.tsx
@@ -0,0 +1,29 @@
+import type { ComponentProps, FC, ReactNode } from 'react'
+import { forwardRef } from 'react'
+import classNames from '@/utils/classnames'
+
+export type PreviewContainerProps = ComponentProps<'div'> & {
+  header: ReactNode
+  mainClassName?: string
+}
+
+export const PreviewContainer: FC<PreviewContainerProps> = forwardRef((props, ref) => {
+  const { children, className, header, mainClassName, ...rest } = props
+  return <div className={className}>
+    <div
+      {...rest}
+      ref={ref}
+      className={classNames(
+        'flex flex-col w-full h-full overflow-y-auto rounded-l-xl border-t-[0.5px] border-l-[0.5px] border-components-panel-border bg-background-default-lighter shadow shadow-shadow-shadow-5',
+      )}
+    >
+      <header className='pl-5 pt-4 pr-4 pb-3 border-b border-divider-subtle'>
+        {header}
+      </header>
+      <main className={classNames('py-5 px-6 w-full h-full', mainClassName)}>
+        {children}
+      </main>
+    </div>
+  </div>
+})
+PreviewContainer.displayName = 'PreviewContainer'
diff --git a/web/app/components/datasets/preview/header.tsx b/web/app/components/datasets/preview/header.tsx
new file mode 100644
index 0000000000..ccb7cf8e2f
--- /dev/null
+++ b/web/app/components/datasets/preview/header.tsx
@@ -0,0 +1,23 @@
+import type { ComponentProps, FC } from 'react'
+import classNames from '@/utils/classnames'
+
+export type PreviewHeaderProps = Omit<ComponentProps<'div'>, 'title'> & {
+  title: string
+}
+
+export const PreviewHeader: FC<PreviewHeaderProps> = (props) => {
+  const { title, className, children, ...rest } = props
+  return <div
+    {...rest}
+    className={classNames(
+      className,
+    )}
+  >
+    <div
+      className='text-text-accent system-2xs-semibold-uppercase uppercase px-1 mb-1'
+    >
+      {title}
+    </div>
+    {children}
+  </div>
+}
diff --git a/web/app/components/datasets/preview/index.tsx b/web/app/components/datasets/preview/index.tsx
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx
index b93ebd0115..5f6fc00eb7 100644
--- a/web/app/components/datasets/settings/form/index.tsx
+++ b/web/app/components/datasets/settings/form/index.tsx
@@ -8,16 +8,16 @@ import { unstable_serialize } from 'swr/infinite'
 import PermissionSelector from '../permission-selector'
 import IndexMethodRadio from '../index-method-radio'
 import RetrievalSettings from '../../external-knowledge-base/create/RetrievalSettings'
+import { IndexingType } from '../../create/step-two'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
 import { ToastContext } from '@/app/components/base/toast'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
-import Divider from '@/app/components/base/divider'
 import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
 import { updateDatasetSetting } from '@/service/datasets'
-import type { DataSetListResponse } from '@/models/datasets'
+import { type DataSetListResponse, RerankingModeEnum } from '@/models/datasets'
 import DatasetDetailContext from '@/context/dataset-detail'
 import { type RetrievalConfig } from '@/types/app'
 import { useAppContext } from '@/context/app-context'
@@ -31,12 +31,11 @@ import type { DefaultModel } from '@/app/components/header/account-setting/model
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
 import { fetchMembers } from '@/service/common'
 import type { Member } from '@/models/common'
+import AlertTriangle from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback/AlertTriangle'
 
-const rowClass = `
-  flex justify-between py-4 flex-wrap gap-y-2
-`
+const rowClass = 'flex'
 const labelClass = `
-  flex items-center w-[168px] h-9
+  flex items-center shrink-0 w-[180px] h-9
 `
 
 const getKey = (pageIndex: number, previousPageData: DataSetListResponse) => {
@@ -122,7 +121,10 @@ const Form = () => {
     }
     const postRetrievalConfig = ensureRerankModelSelected({
       rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig,
+      retrievalConfig: {
+        ...retrievalConfig,
+        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
+      },
       indexMethod,
     })
     if (postRetrievalConfig.weights) {
@@ -179,12 +181,12 @@ const Form = () => {
   }
 
   return (
-    <div className='w-full sm:w-[800px] p-4 sm:px-16 sm:py-6'>
+    <div className='w-full sm:w-[880px] px-14 py-8 flex flex-col gap-y-4'>
       <div className={rowClass}>
         <div className={labelClass}>
           <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.name')}</div>
         </div>
-        <div className='w-full max-w-[480px]'>
+        <div className='grow'>
           <Input
             disabled={!currentDataset?.embedding_available}
             className='h-9'
@@ -197,10 +199,10 @@ const Form = () => {
         <div className={labelClass}>
           <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.desc')}</div>
         </div>
-        <div className='w-full max-w-[480px]'>
+        <div className='grow'>
           <Textarea
             disabled={!currentDataset?.embedding_available}
-            className='mb-2 h-[120px] resize-none'
+            className='h-[120px] resize-none'
             placeholder={t('datasetSettings.form.descPlaceholder') || ''}
             value={description}
             onChange={e => setDescription(e.target.value)}
@@ -211,7 +213,7 @@ const Form = () => {
         <div className={labelClass}>
           <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.permissions')}</div>
         </div>
-        <div className='w-full sm:w-[480px]'>
+        <div className='grow'>
           <PermissionSelector
             disabled={!currentDataset?.embedding_available || isCurrentWorkspaceDatasetOperator}
             permission={permission}
@@ -224,42 +226,53 @@ const Form = () => {
       </div>
       {currentDataset && currentDataset.indexing_technique && (
         <>
-          <div className='w-full h-0 border-b-[0.5px] border-b-gray-200 my-2' />
+          <div className='w-full h-0 border-b border-divider-subtle my-1' />
           <div className={rowClass}>
             <div className={labelClass}>
               <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.indexMethod')}</div>
             </div>
-            <div className='w-full sm:w-[480px]'>
+            <div className='grow'>
               <IndexMethodRadio
                 disable={!currentDataset?.embedding_available}
                 value={indexMethod}
                 onChange={v => setIndexMethod(v)}
+                docForm={currentDataset.doc_form}
+                currentValue={currentDataset.indexing_technique}
               />
+              {currentDataset.indexing_technique === IndexingType.ECONOMICAL && indexMethod === IndexingType.QUALIFIED && <div className='mt-2 h-10 p-2 flex items-center gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border overflow-hidden bg-components-panel-bg-blur backdrop-blur-[5px] shadow-xs'>
+                <div className='absolute top-0 left-0 right-0 bottom-0 bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)] opacity-40'></div>
+                <div className='p-1'>
+                  <AlertTriangle className='size-4 text-text-warning-secondary' />
+                </div>
+                <span className='system-xs-medium'>{t('datasetSettings.form.upgradeHighQualityTip')}</span>
+              </div>}
             </div>
           </div>
         </>
       )}
       {indexMethod === 'high_quality' && (
-        <div className={rowClass}>
-          <div className={labelClass}>
-            <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.embeddingModel')}</div>
+        <>
+          <div className={rowClass}>
+            <div className={labelClass}>
+              <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.embeddingModel')}</div>
+            </div>
+            <div className='grow'>
+              <ModelSelector
+                triggerClassName=''
+                defaultModel={embeddingModel}
+                modelList={embeddingModelList}
+                onSelect={(model: DefaultModel) => {
+                  setEmbeddingModel(model)
+                }}
+              />
+            </div>
           </div>
-          <div className='w-[480px]'>
-            <ModelSelector
-              triggerClassName=''
-              defaultModel={embeddingModel}
-              modelList={embeddingModelList}
-              onSelect={(model: DefaultModel) => {
-                setEmbeddingModel(model)
-              }}
-            />
-          </div>
-        </div>
+        </>
       )}
       {/* Retrieval Method Config */}
       {currentDataset?.provider === 'external'
         ? <>
-          <div className={rowClass}><Divider /></div>
+          <div className='w-full h-0 border-b border-divider-subtle my-1' />
           <div className={rowClass}>
             <div className={labelClass}>
               <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
@@ -272,12 +285,12 @@ const Form = () => {
               isInRetrievalSetting={true}
             />
           </div>
-          <div className={rowClass}><Divider /></div>
+          <div className='w-full h-0 border-b border-divider-subtle my-1' />
           <div className={rowClass}>
             <div className={labelClass}>
               <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.externalKnowledgeAPI')}</div>
             </div>
-            <div className='w-full max-w-[480px]'>
+            <div className='w-full'>
               <div className='flex h-full px-3 py-2 items-center gap-1 rounded-lg bg-components-input-bg-normal'>
                 <ApiConnectionMod className='w-4 h-4 text-text-secondary' />
                 <div className='overflow-hidden text-text-secondary text-ellipsis system-sm-medium'>
@@ -292,44 +305,47 @@ const Form = () => {
             <div className={labelClass}>
               <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.externalKnowledgeID')}</div>
             </div>
-            <div className='w-full max-w-[480px]'>
+            <div className='w-full'>
               <div className='flex h-full px-3 py-2 items-center gap-1 rounded-lg bg-components-input-bg-normal'>
                 <div className='text-text-tertiary system-xs-regular'>{currentDataset?.external_knowledge_info.external_knowledge_id}</div>
               </div>
             </div>
           </div>
-          <div className={rowClass}><Divider /></div>
         </>
-        : <div className={rowClass}>
-          <div className={labelClass}>
-            <div>
-              <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
-              <div className='leading-[18px] text-xs font-normal text-gray-500'>
-                <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
-                {t('datasetSettings.form.retrievalSetting.description')}
+        : <>
+          <div className='w-full h-0 border-b border-divider-subtle my-1' />
+          <div className={rowClass}>
+            <div className={labelClass}>
+              <div>
+                <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
+                <div className='body-xs-regular text-text-tertiary'>
+                  <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+                  {t('datasetSettings.form.retrievalSetting.description')}
+                </div>
               </div>
             </div>
+            <div className='grow'>
+              {indexMethod === 'high_quality'
+                ? (
+                  <RetrievalMethodConfig
+                    value={retrievalConfig}
+                    onChange={setRetrievalConfig}
+                  />
+                )
+                : (
+                  <EconomicalRetrievalMethodConfig
+                    value={retrievalConfig}
+                    onChange={setRetrievalConfig}
+                  />
+                )}
+            </div>
           </div>
-          <div className='w-[480px]'>
-            {indexMethod === 'high_quality'
-              ? (
-                <RetrievalMethodConfig
-                  value={retrievalConfig}
-                  onChange={setRetrievalConfig}
-                />
-              )
-              : (
-                <EconomicalRetrievalMethodConfig
-                  value={retrievalConfig}
-                  onChange={setRetrievalConfig}
-                />
-              )}
-          </div>
-        </div>
+        </>
       }
+      <div className='w-full h-0 border-b border-divider-subtle my-1' />
       <div className={rowClass}>
         <div className={labelClass} />
-        <div className='w-[480px]'>
+        <div className='grow'>
           <Button
             className='min-w-24'
             variant='primary'
diff --git a/web/app/components/datasets/settings/index-method-radio/index.module.css b/web/app/components/datasets/settings/index-method-radio/index.module.css
deleted file mode 100644
index 30f55b9645..0000000000
--- a/web/app/components/datasets/settings/index-method-radio/index.module.css
+++ /dev/null
@@ -1,54 +0,0 @@
-.icon {
-  margin-right: 12px;
-  width: 24px;
-  height: 24px;
-  background:  center center no-repeat;
-  background-size: contain;
-}
-
-.high-quality-icon {
-  background-image: url(./assets/high-quality.svg);
-}
-.economy-icon {
-  background-image: url(./assets/economy.svg);
-}
-
-.wrapper .item:hover {
-  background-color: #ffffff;
-  border-color: #B2CCFF;
-  box-shadow: 0px 12px 16px -4px rgba(16, 24, 40, 0.08), 0px 4px 6px -2px rgba(16, 24, 40, 0.03);
-}
-
-.wrapper .item-active {
-  background-color: #ffffff;
-  border-width: 1.5px;
-  border-color: #528BFF;
-  box-shadow: 0px 1px 3px rgba(16, 24, 40, 0.1), 0px 1px 2px rgba(16, 24, 40, 0.06);
-}
-
-.wrapper .item-active .radio {
-  border-width: 5px;
-  border-color: #155EEF;
-}
-
-.wrapper .item-active:hover {
-  border-width: 1.5px;
-  border-color: #528BFF;
-  box-shadow: 0px 1px 3px rgba(16, 24, 40, 0.1), 0px 1px 2px rgba(16, 24, 40, 0.06);
-}
-
-.wrapper .item.disable {
-  @apply opacity-60;
-}
-.wrapper .item-active.disable {
-  @apply opacity-60;
-}
-.wrapper .item.disable:hover {
-  @apply bg-gray-25 border border-gray-100 shadow-none cursor-default opacity-60;
-}
-.wrapper .item-active.disable:hover {
-  @apply cursor-default opacity-60;
-  border-width: 1.5px;
-  border-color: #528BFF;
-  box-shadow: 0px 1px 3px rgba(16, 24, 40, 0.1), 0px 1px 2px rgba(16, 24, 40, 0.06);
-}
diff --git a/web/app/components/datasets/settings/index-method-radio/index.tsx b/web/app/components/datasets/settings/index-method-radio/index.tsx
index 2bf6f36ce1..05650e4876 100644
--- a/web/app/components/datasets/settings/index-method-radio/index.tsx
+++ b/web/app/components/datasets/settings/index-method-radio/index.tsx
@@ -1,70 +1,103 @@
 'use client'
 import { useTranslation } from 'react-i18next'
-import s from './index.module.css'
+import Image from 'next/image'
+import { useRef } from 'react'
+import { useHover } from 'ahooks'
+import { IndexingType } from '../../create/step-two'
+import { OptionCard } from '../../create/step-two/option-card'
+import { indexMethodIcon } from '../../create/icons'
 import classNames from '@/utils/classnames'
 import type { DataSet } from '@/models/datasets'
+import { ChunkingMode } from '@/models/datasets'
+import Badge from '@/app/components/base/badge'
+import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem'
 
-const itemClass = `
-  w-full sm:w-[234px] p-3 rounded-xl bg-gray-25 border border-gray-100 cursor-pointer
-`
-const radioClass = `
-  w-4 h-4 border-[2px] border-gray-200 rounded-full
-`
 type IIndexMethodRadioProps = {
   value?: DataSet['indexing_technique']
   onChange: (v?: DataSet['indexing_technique']) => void
   disable?: boolean
-  itemClassName?: string
+  docForm?: ChunkingMode
+  currentValue?: DataSet['indexing_technique']
 }
 
 const IndexMethodRadio = ({
   value,
   onChange,
   disable,
-  itemClassName,
+  docForm,
+  currentValue,
 }: IIndexMethodRadioProps) => {
   const { t } = useTranslation()
+  const economyDomRef = useRef<HTMLDivElement>(null)
+  const isHoveringEconomy = useHover(economyDomRef)
+  const isEconomyDisabled = currentValue === IndexingType.QUALIFIED
   const options = [
     {
       key: 'high_quality',
-      text: t('datasetSettings.form.indexMethodHighQuality'),
+      text: <div className='flex items-center'>
+        {t('datasetCreation.stepTwo.qualified')}
+        <Badge uppercase className='ml-auto border-text-accent-secondary text-text-accent-secondary'>
+          {t('datasetCreation.stepTwo.recommend')}
+        </Badge>
+      </div>,
       desc: t('datasetSettings.form.indexMethodHighQualityTip'),
-      icon: 'high-quality',
     },
     {
       key: 'economy',
       text: t('datasetSettings.form.indexMethodEconomy'),
       desc: t('datasetSettings.form.indexMethodEconomyTip'),
-      icon: 'economy',
     },
   ]
 
   return (
-    <div className={classNames(s.wrapper, 'flex justify-between w-full flex-wrap gap-y-2')}>
+    <div className={classNames('flex justify-between w-full gap-2')}>
       {
-        options.map(option => (
-          <div
-            key={option.key}
-            className={classNames(
-              itemClass,
-              itemClassName,
-              s.item,
-              option.key === value && s['item-active'],
-              disable && s.disable,
-            )}
-            onClick={() => {
-              if (!disable)
-                onChange(option.key as DataSet['indexing_technique'])
-            }}
-          >
-            <div className='flex items-center mb-1'>
-              <div className={classNames(s.icon, s[`${option.icon}-icon`])} />
-              <div className='grow text-sm text-gray-900'>{option.text}</div>
-              <div className={classNames(radioClass, s.radio)} />
-            </div>
-            <div className='pl-9 text-xs text-gray-500 leading-[18px]'>{option.desc}</div>
-          </div>
-        ))
+        options.map((option) => {
+          const isParentChild = docForm === ChunkingMode.parentChild
+          return (
+            <PortalToFollowElem
+              key={option.key}
+              open={
+                isHoveringEconomy && option.key === 'economy'
+              }
+              placement={'top'}
+            >
+              <PortalToFollowElemTrigger>
+                <OptionCard
+                  disabled={
+                    disable
+                    || (isEconomyDisabled && option.key === IndexingType.ECONOMICAL)
+                  }
+                  isActive={option.key === value}
+                  onSwitched={() => {
+                    if (isParentChild && option.key === IndexingType.ECONOMICAL)
+                      return
+                    if (isEconomyDisabled && option.key === IndexingType.ECONOMICAL)
+                      return
+                    if (!disable)
+                      onChange(option.key as DataSet['indexing_technique'])
+                  } }
+                  icon={
+                    <Image
+                      src={option.key === 'high_quality' ? indexMethodIcon.high_quality : indexMethodIcon.economical}
+                      alt={option.desc}
+                    />
+                  }
+                  title={option.text}
+                  description={option.desc}
+                  ref={option.key === 'economy' ? economyDomRef : undefined}
+                  className={classNames((isEconomyDisabled && option.key === 'economy') && 'cursor-not-allowed')}
+                >
+                </OptionCard>
+              </PortalToFollowElemTrigger>
+              <PortalToFollowElemContent style={{ zIndex: 60 }}>
+                <div className='p-3 bg-components-tooltip-bg border-components-panel-border text-xs font-medium text-text-secondary rounded-lg shadow-lg'>
+                  {t('datasetSettings.form.indexMethodChangeToEconomyDisabledTip')}
+                </div>
+              </PortalToFollowElemContent>
+            </PortalToFollowElem>
+          )
+        })
       }
     </div>
   )
diff --git a/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx b/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx
index 023c6a5cd2..556a2ef66f 100644
--- a/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx
+++ b/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx
@@ -13,6 +13,7 @@ import ModelIcon from '../model-icon'
 import ModelName from '../model-name'
 import { AlertTriangle } from '@/app/components/base/icons/src/vender/line/alertsAndFeedback'
 import Tooltip from '@/app/components/base/tooltip'
+import classNames from '@/utils/classnames'
 
 type ModelTriggerProps = {
   open: boolean
@@ -32,13 +33,13 @@ const ModelTrigger: FC<ModelTriggerProps> = ({
 
   return (
     <div
-      className={`
-        group flex items-center px-2 h-8 rounded-lg bg-gray-100 
-        ${!readonly && 'hover:bg-gray-200 cursor-pointer'}
-        ${className}
-        ${open && '!bg-gray-200'}
-        ${model.status !== ModelStatusEnum.active && '!bg-[#FFFAEB]'}
-      `}
+      className={classNames(
+        'group flex items-center px-2 h-8 rounded-lg bg-components-input-bg-normal',
+        !readonly && 'hover:bg-components-input-bg-hover cursor-pointer',
+        className,
+        open && '!bg-components-input-bg-hover',
+        model.status !== ModelStatusEnum.active && '!bg-[#FFFAEB]',
+      )}
     >
       <ModelIcon
         className='shrink-0 mr-1.5'
@@ -68,7 +69,6 @@ const ModelTrigger: FC<ModelTriggerProps> = ({
           }
         </div>
       )}
-
     </div>
   )
 }
diff --git a/web/app/components/header/indicator/index.tsx b/web/app/components/header/indicator/index.tsx
index afa7aa7b08..12629e9848 100644
--- a/web/app/components/header/indicator/index.tsx
+++ b/web/app/components/header/indicator/index.tsx
@@ -7,7 +7,7 @@ export type IndicatorProps = {
   className?: string
 }
 
-type ColorMap = {
+export type ColorMap = {
   green: string
   orange: string
   red: string
@@ -21,7 +21,7 @@ const BACKGROUND_MAP: ColorMap = {
   orange: 'bg-components-badge-status-light-warning-bg',
   red: 'bg-components-badge-status-light-error-bg',
   blue: 'bg-components-badge-status-light-normal-bg',
-  yellow: 'bg-[#FDB022]',
+  yellow: 'bg-components-badge-status-light-warning-bg',
   gray: 'bg-components-badge-status-light-disabled-bg',
 }
 const BORDER_MAP: ColorMap = {
@@ -29,16 +29,16 @@ const BORDER_MAP: ColorMap = {
   orange: 'border-components-badge-status-light-warning-border-inner',
   red: 'border-components-badge-status-light-error-border-inner',
   blue: 'border-components-badge-status-light-normal-border-inner',
-  yellow: 'border-[#F79009]',
+  yellow: 'border-components-badge-status-light-warning-border-inner',
   gray: 'border-components-badge-status-light-disabled-border-inner',
 }
 const SHADOW_MAP: ColorMap = {
-  green: 'shadow-[0_0_5px_-3px_rgba(14,159,110,0.1),0.5px_0.5px_3px_rgba(14,159,110,0.3),inset_1.5px_1.5px_0px_rgba(255,255,255,0.2)]',
-  orange: 'shadow-[0_0_5px_-3px_rgba(255,90,31,0.2),0.5px_0.5px_3px_rgba(255, 90, 31, 0.3), inset_1.5px_1.5px_0_rgba(255, 255, 255, 0.2)]',
-  red: 'shadow-[0_0_5px_-3px_rgba(249,112,102,0.1),0.5px_0.5px_3px_rgba(249, 112, 102, 0.2), inset_1.5px_1.5px_0_rgba(255, 255, 255, 0.4)]',
-  blue: 'shadow-[0_0_5px_-3px_rgba(208, 213, 221, 0.1),0.5px_0.5px_3px_rgba(208, 213, 221, 0.3), inset_1.5px_1.5px_0_rgba(255, 255, 255, 0.2)]',
-  yellow: 'shadow-[0_0_5px_-3px_rgba(253, 176, 34, 0.1),0.5px_0.5px_3px_rgba(253, 176, 34, 0.3), inset_1.5px_1.5px_0_rgba(255, 255, 255, 0.2)]',
-  gray: 'shadow-[0_0_5px_-3px_rgba(208, 213, 221, 0.1),0.5px_0.5px_3px_rgba(208, 213, 221, 0.3), inset_1.5px_1.5px_0_rgba(255, 255, 255, 0.2)]',
+  green: 'shadow-status-indicator-green-shadow',
+  orange: 'shadow-status-indicator-warning-shadow',
+  red: 'shadow-status-indicator-red-shadow',
+  blue: 'shadow-status-indicator-blue-shadow',
+  yellow: 'shadow-status-indicator-warning-shadow',
+  gray: 'shadow-status-indicator-gray-shadow',
 }
 
 export default function Indicator({
diff --git a/web/app/components/workflow/operator/zoom-in-out.tsx b/web/app/components/workflow/operator/zoom-in-out.tsx
index 6c4bed3751..90b5b46256 100644
--- a/web/app/components/workflow/operator/zoom-in-out.tsx
+++ b/web/app/components/workflow/operator/zoom-in-out.tsx
@@ -129,7 +129,7 @@ const ZoomInOut: FC = () => {
         crossAxis: -2,
       }}
     >
-      <PortalToFollowElemTrigger asChild onClick={handleTrigger}>
+      <PortalToFollowElemTrigger asChild>
         <div className={`
           p-0.5 h-9 cursor-pointer text-[13px] backdrop-blur-[5px] rounded-lg
           bg-components-actionbar-bg shadow-lg border-[0.5px] border-components-actionbar-border 
@@ -144,8 +144,11 @@ const ZoomInOut: FC = () => {
               shortcuts={['ctrl', '-']}
             >
               <div
-                className='flex items-center justify-center w-8 h-8 rounded-lg cursor-pointer'
+                className={`flex items-center justify-center w-8 h-8 rounded-lg ${zoom <= 0.25 ? 'cursor-not-allowed' : 'cursor-pointer hover:bg-black/5'}`}
                 onClick={(e) => {
+                  if (zoom <= 0.25)
+                    return
+
                   e.stopPropagation()
                   zoomOut()
                 }}
@@ -153,14 +156,17 @@ const ZoomInOut: FC = () => {
                 <RiZoomOutLine className='w-4 h-4 text-text-tertiary hover:text-text-secondary' />
               </div>
             </TipPopup>
-            <div className={cn('w-[34px] system-sm-medium text-text-tertiary hover:text-text-secondary')}>{parseFloat(`${zoom * 100}`).toFixed(0)}%</div>
+            <div onClick={handleTrigger} className={cn('w-[34px] system-sm-medium text-text-tertiary hover:text-text-secondary')}>{parseFloat(`${zoom * 100}`).toFixed(0)}%</div>
             <TipPopup
               title={t('workflow.operator.zoomIn')}
               shortcuts={['ctrl', '+']}
             >
               <div
-                className='flex items-center justify-center w-8 h-8 rounded-lg cursor-pointer'
+                className={`flex items-center justify-center w-8 h-8 rounded-lg ${zoom >= 2 ? 'cursor-not-allowed' : 'cursor-pointer hover:bg-black/5'}`}
                 onClick={(e) => {
+                  if (zoom >= 2)
+                    return
+
                   e.stopPropagation()
                   zoomIn()
                 }}
diff --git a/web/context/dataset-detail.ts b/web/context/dataset-detail.ts
index de046ce7a0..198de381a8 100644
--- a/web/context/dataset-detail.ts
+++ b/web/context/dataset-detail.ts
@@ -1,8 +1,17 @@
-import { createContext, useContext } from 'use-context-selector'
+import { createContext, useContext, useContextSelector } from 'use-context-selector'
 import type { DataSet } from '@/models/datasets'
+import type { IndexingType } from '@/app/components/datasets/create/step-two'
 
-const DatasetDetailContext = createContext<{ indexingTechnique?: string; dataset?: DataSet; mutateDatasetRes?: () => void }>({})
+type DatasetDetailContextValue = {
+  indexingTechnique?: IndexingType
+  dataset?: DataSet
+  mutateDatasetRes?: () => void
+}
+const DatasetDetailContext = createContext<DatasetDetailContextValue>({})
 
 export const useDatasetDetailContext = () => useContext(DatasetDetailContext)
 
+export const useDatasetDetailContextWithSelector = (selector: (value: DatasetDetailContextValue) => any) => {
+  return useContextSelector(DatasetDetailContext, selector)
+}
 export default DatasetDetailContext
diff --git a/web/hooks/use-metadata.ts b/web/hooks/use-metadata.ts
index 6a4965f2bf..5d1d86c20e 100644
--- a/web/hooks/use-metadata.ts
+++ b/web/hooks/use-metadata.ts
@@ -1,7 +1,7 @@
 'use client'
 import { useTranslation } from 'react-i18next'
 import { formatFileSize, formatNumber, formatTime } from '@/utils/format'
-import type { DocType } from '@/models/datasets'
+import { type DocType, ProcessMode } from '@/models/datasets'
 import useTimestamp from '@/hooks/use-timestamp'
 
 export type inputType = 'input' | 'select' | 'textarea'
@@ -250,7 +250,7 @@ export const useMetadataMap = (): MetadataMap => {
       subFieldsMap: {
         'dataset_process_rule.mode': {
           label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`),
-          render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string),
+          render: value => value === ProcessMode.general ? (t('datasetDocuments.embedding.custom') as string) : (t('datasetDocuments.embedding.hierarchical') as string),
         },
         'dataset_process_rule.rules.segmentation.max_tokens': {
           label: t(`${fieldPrefix}.technicalParameters.segmentLength`),
diff --git a/web/i18n/de-DE/common.ts b/web/i18n/de-DE/common.ts
index f438b4f018..dde421e942 100644
--- a/web/i18n/de-DE/common.ts
+++ b/web/i18n/de-DE/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomIn: 'Vergrößern',
     openInNewTab: 'In neuem Tab öffnen',
     copyImage: 'Bild kopieren',
+    close: 'Schließen',
+    viewMore: 'MEHR SEHEN',
+    regenerate: 'Erneuern',
+    saveAndRegenerate: 'Speichern und Regenerieren von untergeordneten Chunks',
+    view: 'Ansehen',
   },
   placeholder: {
     input: 'Bitte eingeben',
@@ -474,6 +479,7 @@ const translation = {
     emptyTip: 'Das Wissen wurde nicht zugeordnet, bitte gehen Sie zur Anwendung oder zum Plug-in, um die Zuordnung abzuschließen.',
     viewDoc: 'Dokumentation anzeigen',
     relatedApp: 'verbundene Apps',
+    noRelatedApp: 'Keine verknüpften Apps',
   },
   voiceInput: {
     speaking: 'Sprechen Sie jetzt...',
diff --git a/web/i18n/de-DE/dataset-creation.ts b/web/i18n/de-DE/dataset-creation.ts
index b27a732d18..cf389d5ea7 100644
--- a/web/i18n/de-DE/dataset-creation.ts
+++ b/web/i18n/de-DE/dataset-creation.ts
@@ -82,6 +82,7 @@ const translation = {
       useSitemapTooltip: 'Folgen Sie der Sitemap, um die Website zu crawlen. Ist dies nicht der Fall, crawlt Jina Reader iterativ basierend auf der Seitenrelevanz, sodass weniger, aber qualitativ hochwertigere Seiten angezeigt werden.',
       jinaReaderDoc: 'Erfahre mehr über Jina Reader',
     },
+    cancel: 'Abbrechen',
   },
   stepTwo: {
     segmentation: 'Chunk-Einstellungen',
@@ -143,6 +144,28 @@ const translation = {
     webpageUnit: 'Seiten',
     separatorTip: 'Ein Trennzeichen ist das Zeichen, das zum Trennen von Text verwendet wird. \\n\\n und \\n sind häufig verwendete Trennzeichen zum Trennen von Absätzen und Zeilen. In Kombination mit Kommas (\\n\\n,\\n) werden Absätze nach Zeilen segmentiert, wenn die maximale Blocklänge überschritten wird. Sie können auch spezielle, von Ihnen selbst definierte Trennzeichen verwenden (z. B. ***).',
     maxLengthCheck: 'Die maximale Stücklänge sollte weniger als {{limit}} betragen',
+    switch: 'Schalter',
+    previewChunk: 'Vorschau Chunk',
+    highQualityTip: 'Sobald die Einbettung im Modus "Hohe Qualität" abgeschlossen ist, ist es nicht mehr möglich, in den Modus "Wirtschaftlich" zurückzukehren.',
+    parentChildTip: 'Wenn Sie den Parent-Child-Modus verwenden, wird der Child-Chunk für den Abruf und der Parent-Chunk für den Abruf als Kontext verwendet.',
+    fullDoc: 'Vollständiges Dokument',
+    parentChildDelimiterTip: 'Ein Trennzeichen ist das Zeichen, das zum Trennen von Text verwendet wird. \\n\\n wird empfohlen, um das Originaldokument in große übergeordnete Blöcke aufzuteilen. Sie können auch spezielle Trennzeichen verwenden, die Sie selbst definiert haben.',
+    qaSwitchHighQualityTipContent: 'Derzeit unterstützt nur eine hochwertige Indexmethode das Q&A-Format-Chunking. Möchten Sie in den High-Quality-Modus wechseln?',
+    childChunkForRetrieval: 'Child-Chunk zum Abrufen',
+    previewChunkCount: '{{Anzahl}} Geschätzte Chunks',
+    previewChunkTip: 'Klicken Sie auf die Schaltfläche "Preview Chunk" auf der linken Seite, um die Vorschau zu laden',
+    qaSwitchHighQualityTipTitle: 'Das Q&A-Format erfordert eine qualitativ hochwertige Indizierungsmethode',
+    general: 'Allgemein',
+    generalTip: 'Allgemeiner Text-Chunking-Modus, die abgerufenen und zurückgerufenen Chunks sind gleich.',
+    notAvailableForQA: 'Nicht verfügbar für Q&A Index',
+    notAvailableForParentChild: 'Nicht verfügbar für den Parent-Child-Index',
+    parentChild: 'Eltern-Kind',
+    parentChunkForContext: 'Parent-chunk für Context',
+    parentChildChunkDelimiterTip: 'Ein Trennzeichen ist das Zeichen, das zum Trennen von Text verwendet wird. \\n wird empfohlen, um übergeordnete Blöcke in kleine untergeordnete Blöcke aufzuteilen. Sie können auch spezielle Trennzeichen verwenden, die Sie selbst definiert haben.',
+    useQALanguage: 'Chunk im Q&A-Format in',
+    paragraph: 'Absatz',
+    fullDocTip: 'Das gesamte Dokument wird als übergeordneter Block verwendet und direkt abgerufen. Bitte beachten Sie, dass aus Leistungsgründen Texte, die 10000 Token überschreiten, automatisch abgeschnitten werden.',
+    paragraphTip: 'In diesem Modus wird der Text basierend auf Trennzeichen und der maximalen Blocklänge in Absätze aufgeteilt, wobei der geteilte Text als übergeordneter Block für den Abruf verwendet wird.',
   },
   stepThree: {
     creationTitle: '🎉 Wissen erstellt',
@@ -171,6 +194,11 @@ const translation = {
     apiKeyPlaceholder: 'API-Schlüssel von jina.ai',
     getApiKeyLinkText: 'Holen Sie sich Ihren kostenlosen API-Schlüssel bei jina.ai',
   },
+  otherDataSource: {
+    learnMore: 'Weitere Informationen',
+    title: 'Verbinden Sie sich mit anderen Datenquellen?',
+    description: 'Derzeit verfügt die Wissensdatenbank von Dify nur über begrenzte Datenquellen. Das Beitragen einer Datenquelle zur Dify-Wissensdatenbank ist eine fantastische Möglichkeit, die Flexibilität und Leistungsfähigkeit der Plattform für alle Benutzer zu verbessern. Unser Beitragsleitfaden erleichtert Ihnen den Einstieg. Bitte klicken Sie auf den untenstehenden Link, um mehr zu erfahren.',
+  },
 }
 
 export default translation
diff --git a/web/i18n/de-DE/dataset-documents.ts b/web/i18n/de-DE/dataset-documents.ts
index 114a73544d..16bb6349cf 100644
--- a/web/i18n/de-DE/dataset-documents.ts
+++ b/web/i18n/de-DE/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: 'HOCHLADEZEIT',
         status: 'STATUS',
         action: 'AKTION',
+        chunkingMode: 'CHUNKING-MODUS',
       },
       name: 'Name',
       rename: 'Umbenennen',
@@ -77,6 +78,7 @@ const translation = {
       ok: 'OK',
     },
     addUrl: 'URL hinzufügen',
+    learnMore: 'Weitere Informationen',
   },
   metadata: {
     title: 'Metadaten',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Automatisch',
     custom: 'Benutzerdefiniert',
     previewTip: 'Absatzvorschau ist nach Abschluss der Einbettung verfügbar',
+    parentMaxTokens: 'Elternteil',
+    childMaxTokens: 'Kind',
+    hierarchical: 'Eltern-Kind',
+    pause: 'Pause',
   },
   segment: {
     paragraphs: 'Absätze',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Neues Textsegment',
     newQaSegment: 'Neues Q&A-Segment',
     delete: 'Diesen Chunk löschen?',
+    parentChunks_one: 'ÜBERGEORDNETER CHUNK',
+    searchResults_other: 'BEFUND',
+    clearFilter: 'Filter löschen',
+    chunk: 'Stück',
+    childChunk: 'Untergeordneter Brocken',
+    newChildChunk: 'Neuer untergeordneter Block',
+    chunkDetail: 'Chunk-Detail',
+    regeneratingMessage: 'Das kann einen Moment dauern, bitte warten...',
+    searchResults_zero: 'ERGEBNIS',
+    parentChunks_other: 'ÜBERGEORDNETE BLÖCKE',
+    editParentChunk: 'Übergeordneter Block bearbeiten',
+    childChunks_other: 'UNTERGEORDNETE BLÖCKE',
+    editChunk: 'Chunk bearbeiten',
+    regenerationSuccessTitle: 'Regeneration abgeschlossen',
+    parentChunk: 'Übergeordneter Chunk',
+    childChunkAdded: '1 untergeordneter Block hinzugefügt',
+    edited: 'BEARBEITETE',
+    collapseChunks: 'Blöcke reduzieren',
+    empty: 'Kein Chunk gefunden',
+    regenerationSuccessMessage: 'Sie können dieses Fenster schließen.',
+    chunks_other: 'STÜCKE',
+    regenerationConfirmMessage: 'Beim Regenerieren von untergeordneten Blöcken werden die aktuellen untergeordneten Blöcke überschrieben, einschließlich bearbeiteter und neu hinzugefügter Blöcke. Die Regeneration kann nicht rückgängig gemacht werden.',
+    childChunks_one: 'UNTERGEORDNETER CHUNK',
+    characters_other: 'Zeichen',
+    newChunk: 'Neuer Brocken',
+    editChildChunk: 'Untergeordneten Block bearbeiten',
+    chunkAdded: '1 Stück hinzugefügt',
+    expandChunks: 'Blöcke erweitern',
+    editedAt: 'Bearbeitet am',
+    addChunk: 'Block hinzufügen',
+    addAnother: 'Fügen Sie eine weitere hinzu',
+    regeneratingTitle: 'Regenerieren von untergeordneten Blöcken',
+    chunks_one: 'STÜCK',
+    characters_one: 'Zeichen',
+    addChildChunk: 'Untergeordneten Block hinzufügen',
+    regenerationConfirmTitle: 'Möchten Sie untergeordnete Chunks regenerieren?',
+    searchResults_one: 'ERGEBNIS',
   },
 }
 
diff --git a/web/i18n/de-DE/dataset-hit-testing.ts b/web/i18n/de-DE/dataset-hit-testing.ts
index eb9e85238c..cb96b22689 100644
--- a/web/i18n/de-DE/dataset-hit-testing.ts
+++ b/web/i18n/de-DE/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'VEKTORDIAGRAMM ansehen',
   viewDetail: 'Im Detail sehen',
   settingTitle: 'Einstellung für den Abruf',
+  records: 'Aufzeichnungen',
+  open: 'Offen',
+  hitChunks: 'Klicken Sie auf {{num}} untergeordnete Chunks',
+  keyword: 'Schlüsselwörter',
+  chunkDetail: 'Chunk-Detail',
 }
 
 export default translation
diff --git a/web/i18n/de-DE/dataset-settings.ts b/web/i18n/de-DE/dataset-settings.ts
index 7c07f5b6e4..c871e13d4b 100644
--- a/web/i18n/de-DE/dataset-settings.ts
+++ b/web/i18n/de-DE/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeID: 'ID für externes Wissen',
     externalKnowledgeAPI: 'API für externes Wissen',
     retrievalSettings: 'Einstellungen für den Abruf',
+    upgradeHighQualityTip: 'Nach dem Upgrade auf den Modus "Hohe Qualität" ist das Zurücksetzen auf den Modus "Wirtschaftlich" nicht mehr möglich',
+    helpText: 'Erfahren Sie, wie Sie eine gute Datensatzbeschreibung schreiben.',
+    indexMethodChangeToEconomyDisabledTip: 'Nicht verfügbar für ein Downgrade von HQ auf ECO',
   },
 }
 
diff --git a/web/i18n/de-DE/dataset.ts b/web/i18n/de-DE/dataset.ts
index 86ac623a6f..e0bc91723c 100644
--- a/web/i18n/de-DE/dataset.ts
+++ b/web/i18n/de-DE/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   mixtureInternalAndExternalTip: 'Das Rerank-Modell ist für die Mischung von internem und externem Wissen erforderlich.',
   externalKnowledgeId: 'ID für externes Wissen',
   editExternalAPIFormTitle: 'Bearbeiten der API für externes Wissen',
+  chunkingMode: {
+    parentChild: 'Eltern-Kind',
+    general: 'Allgemein',
+  },
+  parentMode: {
+    paragraph: 'Absatz',
+    fullDoc: 'Vollständiges Dokument',
+  },
+  batchAction: {
+    selected: 'Ausgewählt',
+    cancel: 'Abbrechen',
+    archive: 'Archiv',
+    disable: 'Abschalten',
+    delete: 'Löschen',
+    enable: 'Ermöglichen',
+  },
+  enable: 'Ermöglichen',
+  localDocs: 'Lokale Dokumente',
+  preprocessDocument: '{{num}} Vorverarbeiten von Dokumenten',
+  documentsDisabled: '{{num}} Dokumente deaktiviert - seit über 30 Tagen inaktiv',
 }
 
 export default translation
diff --git a/web/i18n/en-US/common.ts b/web/i18n/en-US/common.ts
index ea0e4a88aa..4a951582c4 100644
--- a/web/i18n/en-US/common.ts
+++ b/web/i18n/en-US/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: 'Zoom Out',
     zoomIn: 'Zoom In',
     openInNewTab: 'Open in new tab',
+    saveAndRegenerate: 'Save & Regenerate Child Chunks',
+    close: 'Close',
+    view: 'View',
+    viewMore: 'VIEW MORE',
+    regenerate: 'Regenerate',
   },
   errorMsg: {
     fieldRequired: '{{field}} is required',
@@ -475,9 +480,10 @@ const translation = {
     documents: 'Documents',
     hitTesting: 'Retrieval Testing',
     settings: 'Settings',
-    emptyTip: 'The Knowledge has not been associated, please go to the application or plug-in to complete the association.',
+    emptyTip: 'This Knowledge has not been integrated within any application. Please refer to the document for guidance.',
     viewDoc: 'View documentation',
     relatedApp: 'linked apps',
+    noRelatedApp: 'No linked apps',
   },
   voiceInput: {
     speaking: 'Speak now...',
diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts
index 6caf0056f8..8490896e3b 100644
--- a/web/i18n/en-US/dataset-creation.ts
+++ b/web/i18n/en-US/dataset-creation.ts
@@ -4,9 +4,9 @@ const translation = {
       creation: 'Create Knowledge',
       update: 'Add data',
     },
-    one: 'Choose data source',
-    two: 'Text Preprocessing and Cleaning',
-    three: 'Execute and finish',
+    one: 'Data Source',
+    two: 'Document Processing',
+    three: 'Execute & Finish',
   },
   error: {
     unavailable: 'This Knowledge is not available',
@@ -47,6 +47,7 @@ const translation = {
     notionSyncTitle: 'Notion is not connected',
     notionSyncTip: 'To sync with Notion, connection to Notion must be established first.',
     connect: 'Go to connect',
+    cancel: 'Cancel',
     button: 'Next',
     emptyDatasetCreation: 'I want to create an empty Knowledge',
     modal: {
@@ -94,36 +95,49 @@ const translation = {
     },
   },
   stepTwo: {
-    segmentation: 'Chunk settings',
+    segmentation: 'Chunk Settings',
     auto: 'Automatic',
     autoDescription: 'Automatically set chunk and preprocessing rules. Unfamiliar users are recommended to select this.',
     custom: 'Custom',
     customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.',
+    general: 'General',
+    generalTip: 'General text chunking mode, the chunks retrieved and recalled are the same.',
+    parentChild: 'Parent-child',
+    parentChildTip: 'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.',
+    parentChunkForContext: 'Parent-chunk for Context',
+    childChunkForRetrieval: 'Child-chunk for Retrieval',
+    paragraph: 'Paragraph',
+    paragraphTip: 'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.',
+    fullDoc: 'Full Doc',
+    fullDocTip: 'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.',
     separator: 'Delimiter',
     separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).',
-    separatorPlaceholder: '\\n\\n for separating paragraphs; \\n for separating lines',
+    separatorPlaceholder: '\\n\\n for paragraphs; \\n for lines',
     maxLength: 'Maximum chunk length',
     maxLengthCheck: 'Maximum chunk length should be less than {{limit}}',
     overlap: 'Chunk overlap',
     overlapTip: 'Setting the chunk overlap can maintain the semantic relevance between them, enhancing the retrieve effect. It is recommended to set 10%-25% of the maximum chunk size.',
     overlapCheck: 'chunk overlap should not bigger than maximum chunk length',
-    rules: 'Text preprocessing rules',
+    rules: 'Text Pre-processing Rules',
     removeExtraSpaces: 'Replace consecutive spaces, newlines and tabs',
     removeUrlEmails: 'Delete all URLs and email addresses',
     removeStopwords: 'Remove stopwords such as "a", "an", "the"',
-    preview: 'Confirm & Preview',
+    preview: 'Preview',
+    previewChunk: 'Preview Chunk',
     reset: 'Reset',
-    indexMode: 'Index mode',
+    indexMode: 'Index Method',
     qualified: 'High Quality',
+    highQualityTip: 'Once finishing embedding in High Quality mode, reverting to Economical mode is not available.',
     recommend: 'Recommend',
-    qualifiedTip: 'Call default system embedding interface for processing to provide higher accuracy when users query.',
+    qualifiedTip: 'Calling the embedding model to process documents for more precise retrieval helps LLM generate high-quality answers.',
     warning: 'Please set up the model provider API key first.',
     click: 'Go to settings',
     economical: 'Economical',
-    economicalTip: 'Use offline vector engines, keyword indexes, etc. to reduce accuracy without spending tokens',
+    economicalTip: 'Using 10 keywords per chunk for retrieval, no tokens are consumed at the expense of reduced retrieval accuracy.',
     QATitle: 'Segmenting in Question & Answer format',
     QATip: 'Enable this option will consume more tokens',
     QALanguage: 'Segment using',
+    useQALanguage: 'Chunk using Q&A format in',
     estimateCost: 'Estimation',
     estimateSegment: 'Estimated chunks',
     segmentCount: 'chunks',
@@ -153,10 +167,19 @@ const translation = {
     indexSettingTip: 'To change the index method & embedding model, please go to the ',
     retrievalSettingTip: 'To change the retrieval setting, please go to the ',
     datasetSettingLink: 'Knowledge settings.',
+    previewChunkTip: 'Click the \'Preview Chunk\' button on the left to load the preview',
+    previewChunkCount: '{{count}} Estimated chunks',
+    switch: 'Switch',
+    qaSwitchHighQualityTipTitle: 'Q&A Format Requires High-quality Indexing Method',
+    qaSwitchHighQualityTipContent: 'Currently, only high-quality index method supports Q&A format chunking. Would you like to switch to high-quality mode?',
+    notAvailableForParentChild: 'Not available for Parent-child Index',
+    notAvailableForQA: 'Not available for Q&A Index',
+    parentChildDelimiterTip: 'A delimiter is the character used to separate text. \\n\\n is recommended for splitting the original document into large parent chunks. You can also use special delimiters defined by yourself.',
+    parentChildChunkDelimiterTip: 'A delimiter is the character used to separate text. \\n is recommended for splitting parent chunks into small child chunks. You can also use special delimiters defined by yourself.',
   },
   stepThree: {
     creationTitle: '🎉 Knowledge created',
-    creationContent: 'We automatically named the Knowledge, you can modify it at any time',
+    creationContent: 'We automatically named the Knowledge, you can modify it at any time.',
     label: 'Knowledge name',
     additionTitle: '🎉 Document uploaded',
     additionP1: 'The document has been uploaded to the Knowledge',
@@ -171,6 +194,11 @@ const translation = {
     modelButtonConfirm: 'Confirm',
     modelButtonCancel: 'Cancel',
   },
+  otherDataSource: {
+    title: 'Connect to other data sources?',
+    description: 'Currently, Dify\'s knowledge base only has limited data sources. Contributing a data source to the Dify knowledge base is a fantastic way to help enhance the platform\'s flexibility and power for all users. Our contribution guide makes it easy to get started. Please click on the link below to learn more.',
+    learnMore: 'Learn more',
+  },
 }
 
 export default translation
diff --git a/web/i18n/en-US/dataset-documents.ts b/web/i18n/en-US/dataset-documents.ts
index 8988b9dc18..d315261c36 100644
--- a/web/i18n/en-US/dataset-documents.ts
+++ b/web/i18n/en-US/dataset-documents.ts
@@ -2,12 +2,14 @@ const translation = {
   list: {
     title: 'Documents',
     desc: 'All files of the Knowledge are shown here, and the entire Knowledge can be linked to Dify citations or indexed via the Chat plugin.',
+    learnMore: 'Learn more',
     addFile: 'Add file',
     addPages: 'Add Pages',
     addUrl: 'Add URL',
     table: {
       header: {
-        fileName: 'FILE NAME',
+        fileName: 'NAME',
+        chunkingMode: 'CHUNKING MODE',
         words: 'WORDS',
         hitCount: 'RETRIEVAL COUNT',
         uploadTime: 'UPLOAD TIME',
@@ -19,7 +21,7 @@ const translation = {
     },
     action: {
       uploadFile: 'Upload new file',
-      settings: 'Segment settings',
+      settings: 'Chunking Settings',
       addButton: 'Add chunk',
       add: 'Add a chunk',
       batchAdd: 'Batch add',
@@ -316,36 +318,76 @@ const translation = {
     completed: 'Embedding completed',
     error: 'Embedding error',
     docName: 'Preprocessing document',
-    mode: 'Segmentation rule',
-    segmentLength: 'Chunks length',
-    textCleaning: 'Text pre-definition and cleaning',
+    mode: 'Chunking Setting',
+    segmentLength: 'Maximum Chunk Length',
+    textCleaning: 'Text Preprocessing Rules',
     segments: 'Paragraphs',
     highQuality: 'High-quality mode',
     economy: 'Economy mode',
     estimate: 'Estimated consumption',
     stop: 'Stop processing',
-    resume: 'Resume processing',
+    pause: 'Pause',
+    resume: 'Resume',
     automatic: 'Automatic',
     custom: 'Custom',
+    hierarchical: 'Parent-child',
     previewTip: 'Paragraph preview will be available after embedding is complete',
+    parentMaxTokens: 'Parent',
+    childMaxTokens: 'Child',
   },
   segment: {
     paragraphs: 'Paragraphs',
-    keywords: 'Key Words',
-    addKeyWord: 'Add key word',
+    chunks_one: 'CHUNK',
+    chunks_other: 'CHUNKS',
+    parentChunks_one: 'PARENT CHUNK',
+    parentChunks_other: 'PARENT CHUNKS',
+    childChunks_one: 'CHILD CHUNK',
+    childChunks_other: 'CHILD CHUNKS',
+    searchResults_zero: 'RESULT',
+    searchResults_one: 'RESULT',
+    searchResults_other: 'RESULTS',
+    empty: 'No Chunk found',
+    clearFilter: 'Clear filter',
+    chunk: 'Chunk',
+    parentChunk: 'Parent-Chunk',
+    newChunk: 'New Chunk',
+    childChunk: 'Child-Chunk',
+    newChildChunk: 'New Child Chunk',
+    keywords: 'KEYWORDS',
+    addKeyWord: 'Add keyword',
     keywordError: 'The maximum length of keyword is 20',
-    characters: 'characters',
+    characters_one: 'character',
+    characters_other: 'characters',
     hitCount: 'Retrieval count',
     vectorHash: 'Vector hash: ',
-    questionPlaceholder: 'add question here',
+    questionPlaceholder: 'Add question here',
     questionEmpty: 'Question can not be empty',
-    answerPlaceholder: 'add answer here',
+    answerPlaceholder: 'Add answer here',
     answerEmpty: 'Answer can not be empty',
-    contentPlaceholder: 'add content here',
+    contentPlaceholder: 'Add content here',
     contentEmpty: 'Content can not be empty',
     newTextSegment: 'New Text Segment',
     newQaSegment: 'New Q&A Segment',
+    addChunk: 'Add Chunk',
+    addChildChunk: 'Add Child Chunk',
+    addAnother: 'Add another',
     delete: 'Delete this chunk ?',
+    chunkAdded: '1 chunk added',
+    childChunkAdded: '1 child chunk added',
+    editChunk: 'Edit Chunk',
+    editParentChunk: 'Edit Parent Chunk',
+    editChildChunk: 'Edit Child Chunk',
+    chunkDetail: 'Chunk Detail',
+    regenerationConfirmTitle: 'Do you want to regenerate child chunks?',
+    regenerationConfirmMessage: 'Regenerating child chunks will overwrite the current child chunks, including edited chunks and newly added chunks. The regeneration cannot be undone.',
+    regeneratingTitle: 'Regenerating child chunks',
+    regeneratingMessage: 'This may take a moment, please wait...',
+    regenerationSuccessTitle: 'Regeneration completed',
+    regenerationSuccessMessage: 'You can close this window.',
+    edited: 'EDITED',
+    editedAt: 'Edited at',
+    expandChunks: 'Expand chunks',
+    collapseChunks: 'Collapse chunks',
   },
 }
 
diff --git a/web/i18n/en-US/dataset-hit-testing.ts b/web/i18n/en-US/dataset-hit-testing.ts
index 6dbfa47fee..1e68306a9d 100644
--- a/web/i18n/en-US/dataset-hit-testing.ts
+++ b/web/i18n/en-US/dataset-hit-testing.ts
@@ -1,9 +1,9 @@
 const translation = {
-  title: 'Retrieval Testing',
+  title: 'Retrieval Test',
   settingTitle: 'Retrieval Setting',
-  desc: 'Test the hitting effect of the Knowledge based on the given query text',
+  desc: 'Test the hitting effect of the Knowledge based on the given query text.',
   dateTimeFormat: 'MM/DD/YYYY hh:mm A',
-  recents: 'Recents',
+  records: 'Records',
   table: {
     header: {
       source: 'Source',
@@ -16,15 +16,19 @@ const translation = {
     placeholder: 'Please enter a text, a short declarative sentence is recommended.',
     countWarning: 'Up to 200 characters.',
     indexWarning: 'High quality Knowledge only.',
-    testing: 'Testing',
+    testing: 'Test',
   },
   hit: {
-    title: 'RETRIEVAL PARAGRAPHS',
+    title: '{{num}} Retrieved Chunks',
     emptyTip: 'Retrieval Testing results will show here',
   },
   noRecentTip: 'No recent query results here',
   viewChart: 'View VECTOR CHART',
   viewDetail: 'View Detail',
+  chunkDetail: 'Chunk Detail',
+  hitChunks: 'Hit {{num}} child chunks',
+  open: 'Open',
+  keyword: 'Keywords',
 }
 
 export default translation
diff --git a/web/i18n/en-US/dataset-settings.ts b/web/i18n/en-US/dataset-settings.ts
index aa435c8e0e..d25779ec83 100644
--- a/web/i18n/en-US/dataset-settings.ts
+++ b/web/i18n/en-US/dataset-settings.ts
@@ -7,7 +7,8 @@ const translation = {
     nameError: 'Name cannot be empty',
     desc: 'Knowledge Description',
     descInfo: 'Please write a clear textual description to outline the content of the Knowledge. This description will be used as a basis for matching when selecting from multiple Knowledge for inference.',
-    descPlaceholder: 'Describe what\'s in this Knowledge (optional)',
+    descPlaceholder: 'Describe what is in this data set. A detailed description allows AI to access the content of the data set in a timely manner. If empty, LangGenius will use the default hit strategy.',
+    helpText: 'Learn how to write a good dataset description.',
     descWrite: 'Learn how to write a good Knowledge description.',
     permissions: 'Permissions',
     permissionsOnlyMe: 'Only me',
@@ -16,9 +17,10 @@ const translation = {
     me: '(You)',
     indexMethod: 'Index Method',
     indexMethodHighQuality: 'High Quality',
-    indexMethodHighQualityTip: 'Call Embedding model for processing to provide higher accuracy when users query.',
+    indexMethodHighQualityTip: 'Calling the embedding model to process documents for more precise retrieval helps LLM generate high-quality answers.',
+    upgradeHighQualityTip: 'Once upgrading to High Quality mode, reverting to Economical mode is not available',
     indexMethodEconomy: 'Economical',
-    indexMethodEconomyTip: 'Use offline vector engines, keyword indexes, etc. to reduce accuracy without spending tokens',
+    indexMethodEconomyTip: 'Using 10 keywords per chunk for retrieval, no tokens are consumed at the expense of reduced retrieval accuracy.',
     embeddingModel: 'Embedding Model',
     embeddingModelTip: 'Change the embedded model, please go to ',
     embeddingModelTipLink: 'Settings',
@@ -32,6 +34,7 @@ const translation = {
     externalKnowledgeID: 'External Knowledge ID',
     retrievalSettings: 'Retrieval Settings',
     save: 'Save',
+    indexMethodChangeToEconomyDisabledTip: 'Not available for downgrading from HQ to ECO',
   },
 }
 
diff --git a/web/i18n/en-US/dataset.ts b/web/i18n/en-US/dataset.ts
index e89ea47c26..6a6df700d7 100644
--- a/web/i18n/en-US/dataset.ts
+++ b/web/i18n/en-US/dataset.ts
@@ -1,5 +1,13 @@
 const translation = {
   knowledge: 'Knowledge',
+  chunkingMode: {
+    general: 'General',
+    parentChild: 'Parent-child',
+  },
+  parentMode: {
+    paragraph: 'Paragraph',
+    fullDoc: 'Full-doc',
+  },
   externalTag: 'External',
   externalAPI: 'External API',
   externalAPIPanelTitle: 'External Knowledge API',
@@ -12,6 +20,7 @@ const translation = {
   learnHowToWriteGoodKnowledgeDescription: 'Learn how to write a good knowledge description',
   externalAPIPanelDescription: 'The external knowledge API is used to connect to a knowledge base outside of Dify and retrieve knowledge from that knowledge base.',
   externalAPIPanelDocumentation: 'Learn how to create an External Knowledge API',
+  localDocs: 'Local Docs',
   documentCount: ' docs',
   wordCount: ' k words',
   appCount: ' linked apps',
@@ -115,8 +124,10 @@ const translation = {
     change: 'Change',
     changeRetrievalMethod: 'Change retrieval method',
   },
-  docsFailedNotice: 'documents failed to be indexed',
+  docsFailedNotice: 'documents indexed failed',
   retry: 'Retry',
+  documentsDisabled: '{{num}} documents disabled - inactive for over 30 days',
+  enable: 'Enable',
   indexingTechnique: {
     high_quality: 'HQ',
     economy: 'ECO',
@@ -146,6 +157,15 @@ const translation = {
   nTo1RetrievalLegacy: 'N-to-1 retrieval will be officially deprecated from September. It is recommended to use the latest Multi-path retrieval to obtain better results. ',
   nTo1RetrievalLegacyLink: 'Learn more',
   nTo1RetrievalLegacyLinkText: ' N-to-1 retrieval will be officially deprecated in September.',
+  batchAction: {
+    selected: 'Selected',
+    enable: 'Enable',
+    disable: 'Disable',
+    archive: 'Archive',
+    delete: 'Delete',
+    cancel: 'Cancel',
+  },
+  preprocessDocument: '{{num}} Preprocess Documents',
 }
 
 export default translation
diff --git a/web/i18n/es-ES/common.ts b/web/i18n/es-ES/common.ts
index 2540632758..cf076fa8c0 100644
--- a/web/i18n/es-ES/common.ts
+++ b/web/i18n/es-ES/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomIn: 'Acercar',
     openInNewTab: 'Abrir en una nueva pestaña',
     copyImage: 'Copiar imagen',
+    viewMore: 'VER MÁS',
+    regenerate: 'Regenerar',
+    close: 'Cerrar',
+    saveAndRegenerate: 'Guardar y regenerar fragmentos secundarios',
+    view: 'Vista',
   },
   errorMsg: {
     fieldRequired: '{{field}} es requerido',
@@ -478,6 +483,7 @@ const translation = {
     emptyTip: 'El Conocimiento no ha sido asociado, por favor ve a la aplicación o plugin para completar la asociación.',
     viewDoc: 'Ver documentación',
     relatedApp: 'aplicaciones vinculadas',
+    noRelatedApp: 'No hay aplicaciones vinculadas',
   },
   voiceInput: {
     speaking: 'Habla ahora...',
diff --git a/web/i18n/es-ES/dataset-creation.ts b/web/i18n/es-ES/dataset-creation.ts
index 2b6cac1b0e..00b71ecfa5 100644
--- a/web/i18n/es-ES/dataset-creation.ts
+++ b/web/i18n/es-ES/dataset-creation.ts
@@ -87,6 +87,7 @@ const translation = {
       chooseProvider: 'Seleccione un proveedor',
       jinaReaderDoc: 'Más información sobre Jina Reader',
     },
+    cancel: 'Cancelar',
   },
   stepTwo: {
     segmentation: 'Configuración de fragmentos',
@@ -148,6 +149,28 @@ const translation = {
     datasetSettingLink: 'configuración del conocimiento.',
     separatorTip: 'Un delimitador es el carácter que se utiliza para separar el texto. \\n\\n y \\n son delimitadores comúnmente utilizados para separar párrafos y líneas. Combinado con comas (\\n\\n,\\n), los párrafos se segmentarán por líneas cuando excedan la longitud máxima del fragmento. También puede utilizar delimitadores especiales definidos por usted mismo (por ejemplo, ***).',
     maxLengthCheck: 'La longitud máxima del fragmento debe ser inferior a {{limit}}',
+    previewChunkTip: 'Haga clic en el botón \'Vista previa de fragmento\' a la izquierda para cargar la vista previa',
+    parentChildChunkDelimiterTip: 'Un delimitador es el carácter que se utiliza para separar el texto. \\n se recomienda para dividir fragmentos primarios en fragmentos secundarios pequeños. También puede utilizar delimitadores especiales definidos por usted mismo.',
+    parentChildTip: 'Cuando se utiliza el modo padre-hijo, el fragmento secundario se utiliza para la recuperación y el fragmento primario se utiliza para la recuperación como contexto.',
+    switch: 'Interruptor',
+    parentChild: 'Padre-hijo',
+    childChunkForRetrieval: 'Fragmento secundario para la recuperación',
+    previewChunk: 'Fragmento de vista previa',
+    notAvailableForParentChild: 'No disponible para el índice de elementos primarios y secundarios',
+    paragraph: 'Párrafo',
+    parentChunkForContext: 'Fragmento primario para contexto',
+    fullDoc: 'Documento completo',
+    parentChildDelimiterTip: 'Un delimitador es el carácter que se utiliza para separar el texto. \\n\\n se recomienda para dividir el documento original en grandes fragmentos principales. También puede utilizar delimitadores especiales definidos por usted mismo.',
+    generalTip: 'Modo de fragmentación de texto general, los fragmentos recuperados y recuperados son los mismos.',
+    qaSwitchHighQualityTipContent: 'Actualmente, solo el método de índice de alta calidad admite la fragmentación en formato de preguntas y respuestas. ¿Le gustaría cambiar al modo de alta calidad?',
+    useQALanguage: 'Fragmento usando el formato de preguntas y respuestas en',
+    fullDocTip: 'Todo el documento se utiliza como fragmento principal y se recupera directamente. Tenga en cuenta que, por razones de rendimiento, el texto que supere los 10000 tokens se trunqueará automáticamente.',
+    paragraphTip: 'Este modo divide el texto en párrafos en función de los delimitadores y la longitud máxima del fragmento, utilizando el texto dividido como fragmento principal para la recuperación.',
+    highQualityTip: 'Una vez finalizada la incrustación en el modo de alta calidad, no está disponible volver al modo económico.',
+    notAvailableForQA: 'No disponible para el índice de preguntas y respuestas',
+    qaSwitchHighQualityTipTitle: 'El formato de preguntas y respuestas requiere un método de indexación de alta calidad',
+    previewChunkCount: '{{conteo}} Fragmentos estimados',
+    general: 'General',
   },
   stepThree: {
     creationTitle: '🎉 Conocimiento creado',
@@ -171,6 +194,11 @@ const translation = {
     apiKeyPlaceholder: 'Clave de API de jina.ai',
     getApiKeyLinkText: 'Obtén tu clave API gratuita en jina.ai',
   },
+  otherDataSource: {
+    learnMore: 'Aprende más',
+    description: 'Actualmente, la base de conocimientos de Ifiy solo tiene fuentes de datos limitadas. Contribuir con una fuente de datos a la base de conocimientos de Dify es una manera fantástica de ayudar a mejorar la flexibilidad y el poder de la plataforma para todos los usuarios. Nuestra guía de contribuciones hace que sea fácil comenzar. Haga clic en el enlace a continuación para obtener más información.',
+    title: '¿Conectarse a otras fuentes de datos?',
+  },
 }
 
 export default translation
diff --git a/web/i18n/es-ES/dataset-documents.ts b/web/i18n/es-ES/dataset-documents.ts
index 6a5191ce53..ea4690c5f5 100644
--- a/web/i18n/es-ES/dataset-documents.ts
+++ b/web/i18n/es-ES/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'TIEMPO DE CARGA',
         status: 'ESTADO',
         action: 'ACCIÓN',
+        chunkingMode: 'MODO DE FRAGMENTACIÓN',
       },
       rename: 'Renombrar',
       name: 'Nombre',
@@ -77,6 +78,7 @@ const translation = {
       error: 'Error de importación',
       ok: 'Aceptar',
     },
+    learnMore: 'Aprende más',
   },
   metadata: {
     title: 'Metadatos',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Automático',
     custom: 'Personalizado',
     previewTip: 'La vista previa del párrafo estará disponible después de que se complete la incrustación',
+    pause: 'Pausa',
+    childMaxTokens: 'Niño',
+    hierarchical: 'Padre-hijo',
+    parentMaxTokens: 'Padre',
   },
   segment: {
     paragraphs: 'Párrafos',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Nuevo segmento de texto',
     newQaSegment: 'Nuevo segmento de preguntas y respuestas',
     delete: '¿Eliminar este fragmento?',
+    chunks_one: 'PEDAZO',
+    childChunks_one: 'FRAGMENTO SECUNDARIO',
+    searchResults_other: 'RESULTADOS',
+    newChunk: 'Nuevo fragmento',
+    childChunk: 'Fragmento secundario',
+    addChunk: 'Agregar fragmento',
+    editParentChunk: 'Editar fragmento principal',
+    regenerationConfirmMessage: 'La regeneración de fragmentos secundarios sobrescribirá los fragmentos secundarios actuales, incluidos los fragmentos editados y los fragmentos recién agregados. La regeneración no se puede deshacer.',
+    addAnother: 'Añade otro',
+    regeneratingMessage: 'Esto puede tardar un momento, por favor espere...',
+    addChildChunk: 'Agregar fragmento secundario',
+    chunks_other: 'TROZOS',
+    editChunk: 'Editar fragmento',
+    searchResults_one: 'RESULTADO',
+    parentChunks_one: 'FRAGMENTO PRIMARIO',
+    edited: 'EDITADO',
+    childChunkAdded: 'Se ha añadido 1 fragmento secundario',
+    childChunks_other: 'FRAGMENTOS SECUNDARIOS',
+    chunkAdded: '1 trozo añadido',
+    parentChunk: 'Fragmento primario',
+    editChildChunk: 'Editar fragmento secundario',
+    regeneratingTitle: 'Regeneración de fragmentos secundarios',
+    editedAt: 'Editado en',
+    searchResults_zero: 'RESULTADO',
+    clearFilter: 'Borrar filtro',
+    newChildChunk: 'Nuevo fragmento secundario',
+    chunkDetail: 'Detalle de fragmentos',
+    chunk: 'Pedazo',
+    parentChunks_other: 'FRAGMENTOS PRINCIPALES',
+    expandChunks: 'Expandir fragmentos',
+    empty: 'No se ha encontrado ningún fragmento',
+    regenerationSuccessTitle: 'Regeneración completada',
+    collapseChunks: 'Contraer fragmentos',
+    characters_other: 'Caracteres',
+    characters_one: 'carácter',
+    regenerationSuccessMessage: 'Puede cerrar esta ventana.',
+    regenerationConfirmTitle: '¿Desea regenerar fragmentos secundarios?',
   },
 }
 
diff --git a/web/i18n/es-ES/dataset-hit-testing.ts b/web/i18n/es-ES/dataset-hit-testing.ts
index 29e2af1a64..e8faebc5ea 100644
--- a/web/i18n/es-ES/dataset-hit-testing.ts
+++ b/web/i18n/es-ES/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'Ver GRÁFICO VECTORIAL',
   viewDetail: 'Ver Detalle',
   settingTitle: 'Configuración de recuperación',
+  open: 'Abrir',
+  records: 'Archivo',
+  chunkDetail: 'Detalle de fragmentos',
+  keyword: 'Palabras clave',
+  hitChunks: 'Golpea {{num}} fragmentos secundarios',
 }
 
 export default translation
diff --git a/web/i18n/es-ES/dataset-settings.ts b/web/i18n/es-ES/dataset-settings.ts
index 87f5da1010..211a23edd1 100644
--- a/web/i18n/es-ES/dataset-settings.ts
+++ b/web/i18n/es-ES/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     retrievalSettings: 'Configuración de recuperación',
     externalKnowledgeID: 'ID de conocimiento externo',
     externalKnowledgeAPI: 'API de conocimiento externo',
+    indexMethodChangeToEconomyDisabledTip: 'No disponible para degradar de HQ a ECO',
+    helpText: 'Aprenda a escribir una buena descripción del conjunto de datos.',
+    upgradeHighQualityTip: 'Una vez que se actualiza al modo de alta calidad, no está disponible volver al modo económico',
   },
 }
 
diff --git a/web/i18n/es-ES/dataset.ts b/web/i18n/es-ES/dataset.ts
index 4133eb59f8..5fb668f1f3 100644
--- a/web/i18n/es-ES/dataset.ts
+++ b/web/i18n/es-ES/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   learnHowToWriteGoodKnowledgeDescription: 'Aprende a escribir una buena descripción del conocimiento',
   externalAPIPanelDocumentation: 'Más información sobre cómo crear una API de conocimiento externo',
   mixtureInternalAndExternalTip: 'El modelo de Rerank es necesario para la mezcla de conocimiento interno y externo.',
+  chunkingMode: {
+    parentChild: 'Padre-hijo',
+    general: 'General',
+  },
+  parentMode: {
+    fullDoc: 'Documento completo',
+    paragraph: 'Párrafo',
+  },
+  batchAction: {
+    selected: 'Seleccionado',
+    enable: 'Habilitar',
+    disable: 'Inutilizar',
+    cancel: 'Cancelar',
+    archive: 'Archivo',
+    delete: 'Borrar',
+  },
+  enable: 'Habilitar',
+  documentsDisabled: '{{num}} Documentos desactivados - inactivos durante más de 30 días',
+  preprocessDocument: '{{num}} Documentos de preprocesamiento',
+  localDocs: 'Documentos locales',
 }
 
 export default translation
diff --git a/web/i18n/fa-IR/common.ts b/web/i18n/fa-IR/common.ts
index deab852ddb..46bf878c08 100644
--- a/web/i18n/fa-IR/common.ts
+++ b/web/i18n/fa-IR/common.ts
@@ -42,6 +42,11 @@ const translation = {
     copyImage: 'کپی تصویر',
     openInNewTab: 'باز کردن در برگه جدید',
     zoomOut: 'کوچک نمایی',
+    close: 'نزدیک',
+    regenerate: 'بازسازی',
+    view: 'مشاهده',
+    viewMore: 'بیشتر ببینید',
+    saveAndRegenerate: 'ذخیره و بازسازی تکه های فرزند',
   },
   errorMsg: {
     fieldRequired: '{{field}} الزامی است',
@@ -478,6 +483,7 @@ const translation = {
     emptyTip: 'دانش مرتبط نشده است، لطفاً به برنامه یا افزونه بروید تا ارتباط را کامل کنید.',
     viewDoc: 'مشاهده مستندات',
     relatedApp: 'برنامه‌های مرتبط',
+    noRelatedApp: 'هیچ برنامه پیوندی وجود ندارد',
   },
   voiceInput: {
     speaking: 'اکنون صحبت کنید...',
diff --git a/web/i18n/fa-IR/dataset-creation.ts b/web/i18n/fa-IR/dataset-creation.ts
index 0b5f42827e..b6cb650973 100644
--- a/web/i18n/fa-IR/dataset-creation.ts
+++ b/web/i18n/fa-IR/dataset-creation.ts
@@ -87,6 +87,7 @@ const translation = {
       jinaReaderNotConfiguredDescription: 'با وارد کردن کلید API رایگان خود برای دسترسی، Jina Reader را راه اندازی کنید.',
       useSitemapTooltip: 'نقشه سایت را دنبال کنید تا سایت را بخزید. در غیر این صورت، Jina Reader بر اساس ارتباط صفحه به صورت تکراری می خزد و صفحات کمتر اما با کیفیت بالاتر را به دست می آورد.',
     },
+    cancel: 'لغو',
   },
   stepTwo: {
     segmentation: 'تنظیمات بخشبندی',
@@ -148,6 +149,28 @@ const translation = {
     datasetSettingLink: 'تنظیمات دانش بروید.',
     separatorTip: 'جداکننده نویسه ای است که برای جداسازی متن استفاده می شود. \\n\\n و \\n معمولا برای جداسازی پاراگراف ها و خطوط استفاده می شوند. همراه با کاما (\\n\\n,\\n)، پاراگراف ها زمانی که از حداکثر طول تکه فراتر می روند، با خطوط تقسیم بندی می شوند. همچنین می توانید از جداکننده های خاصی که توسط خودتان تعریف شده اند استفاده کنید (مثلا ***).',
     maxLengthCheck: 'حداکثر طول تکه باید کمتر از {{limit}} باشد',
+    notAvailableForQA: 'برای شاخص پرسش و پاسخ در دسترس نیست',
+    parentChild: 'پدر و مادر و فرزند',
+    qaSwitchHighQualityTipContent: 'در حال حاضر، فقط روش شاخص با کیفیت بالا از تکه تکه کردن فرمت پرسش و پاسخ پشتیبانی می کند. آیا می خواهید به حالت با کیفیت بالا بروید؟',
+    previewChunk: 'پیش نمایش تکه',
+    previewChunkCount: '{{تعداد}} تکه های تخمینی',
+    previewChunkTip: 'روی دکمه "پیش نمایش قطعه" در سمت چپ کلیک کنید تا پیش نمایش بارگیری شود',
+    general: 'عمومی',
+    paragraphTip: 'این حالت متن را بر اساس جداکننده ها و حداکثر طول تکه به پاراگراف ها تقسیم می کند و از متن تقسیم شده به عنوان تکه والد برای بازیابی استفاده می کند.',
+    parentChunkForContext: 'تکه والد برای زمینه',
+    fullDoc: 'مستند کامل',
+    switch: 'سوئیچ',
+    parentChildChunkDelimiterTip: 'جداکننده نویسه ای است که برای جداسازی متن استفاده می شود. \\n برای تقسیم تکه های والد به تکه های کوچک کودک توصیه می شود. همچنین می توانید از جداکننده های ویژه ای که توسط خودتان تعریف شده است استفاده کنید.',
+    generalTip: 'حالت تکه تکه کردن متن عمومی، تکه های بازیابی شده و فراخوانی شده یکسان هستند.',
+    paragraph: 'پاراگراف',
+    highQualityTip: 'پس از اتمام جاسازی در حالت کیفیت بالا، بازگشت به حالت اقتصادی در دسترس نیست.',
+    parentChildTip: 'هنگام استفاده از حالت والد-فرزند، تکه فرزند برای بازیابی و تکه والد برای یادآوری به عنوان زمینه استفاده می شود.',
+    notAvailableForParentChild: 'برای نمایه والد-فرزند در دسترس نیست',
+    parentChildDelimiterTip: 'جداکننده نویسه ای است که برای جداسازی متن استفاده می شود. \\n\\n برای تقسیم سند اصلی به تکه های والد بزرگ توصیه می شود. همچنین می توانید از جداکننده های ویژه ای که توسط خودتان تعریف شده است استفاده کنید.',
+    childChunkForRetrieval: 'تکه کودک برای بازیابی',
+    fullDocTip: 'کل سند به عنوان تکه والد استفاده می شود و مستقیما بازیابی می شود. لطفا توجه داشته باشید که به دلایل عملکردی، متن بیش از 10000 توکن به طور خودکار کوتاه می شود.',
+    qaSwitchHighQualityTipTitle: 'فرمت پرسش و پاسخ به روش نمایه سازی با کیفیت بالا نیاز دارد',
+    useQALanguage: 'تکه با استفاده از فرمت پرسش و پاسخ در',
   },
   stepThree: {
     creationTitle: ' دانش ایجاد شد',
@@ -171,6 +194,11 @@ const translation = {
     apiKeyPlaceholder: 'کلید API از jina.ai',
     getApiKeyLinkText: 'کلید API رایگان خود را در jina.ai دریافت کنید',
   },
+  otherDataSource: {
+    learnMore: 'بیشتر بدانید',
+    description: 'در حال حاضر، پایگاه دانش Dify فقط منابع داده محدودی دارد. کمک به یک منبع داده به پایگاه دانش Dify راهی فوق العاده برای کمک به افزایش انعطاف پذیری و قدرت پلتفرم برای همه کاربران است. راهنمای مشارکت ما شروع کار را آسان می کند. لطفا برای کسب اطلاعات بیشتر روی لینک زیر کلیک کنید.',
+    title: 'به منابع داده دیگر متصل شوید؟',
+  },
 }
 
 export default translation
diff --git a/web/i18n/fa-IR/dataset-documents.ts b/web/i18n/fa-IR/dataset-documents.ts
index f136353c7b..ff9e47f71a 100644
--- a/web/i18n/fa-IR/dataset-documents.ts
+++ b/web/i18n/fa-IR/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'زمان بارگذاری',
         status: 'وضعیت',
         action: 'اقدام',
+        chunkingMode: 'حالت تکه تکه کردن',
       },
       rename: 'تغییر نام',
       name: 'نام',
@@ -77,6 +78,7 @@ const translation = {
       error: 'خطای واردات',
       ok: 'تأیید',
     },
+    learnMore: 'بیشتر بدانید',
   },
   metadata: {
     title: 'اطلاعات متا',
@@ -327,6 +329,10 @@ const translation = {
     automatic: 'خودکار',
     custom: 'سفارشی',
     previewTip: 'پیش‌نمایش پاراگراف پس از اتمام جاسازی در دسترس خواهد بود',
+    parentMaxTokens: 'مادر',
+    pause: 'مکث',
+    childMaxTokens: 'کودک',
+    hierarchical: 'پدر و مادر و فرزند',
   },
   segment: {
     paragraphs: 'پاراگراف‌ها',
@@ -345,6 +351,43 @@ const translation = {
     newTextSegment: 'قطعه متن جدید',
     newQaSegment: 'قطعه پرسش و پاسخ جدید',
     delete: 'حذف این قطعه؟',
+    chunks_other: 'تکه',
+    characters_one: 'شخصیت',
+    editedAt: 'ویرایش شده در',
+    parentChunks_other: 'تکه های والدین',
+    editChunk: 'ویرایش تکه',
+    collapseChunks: 'جمع کردن تکه ها',
+    clearFilter: 'فیلتر را پاک کنید',
+    characters_other: 'کاراکتر',
+    chunkDetail: 'جزئیات تکه',
+    searchResults_other: 'نتیجه',
+    addAnother: 'اضافه کردن دیگری',
+    parentChunks_one: 'تکه والدین',
+    childChunk: 'تکه کودک',
+    regenerationSuccessTitle: 'بازسازی به پایان رسید',
+    chunk: 'تکه',
+    addChildChunk: 'افزودن تکه فرزند',
+    chunkAdded: '1 تکه اضافه شد',
+    childChunks_one: 'تکه کودک',
+    edited: 'ویرایش',
+    editParentChunk: 'ویرایش تکه والد',
+    regeneratingTitle: 'بازسازی تکه های فرزند',
+    expandChunks: 'تکه ها را گسترش دهید',
+    childChunks_other: 'تکه های کودک',
+    newChildChunk: 'تکه کودک جدید',
+    editChildChunk: 'ویرایش Child Chunk',
+    parentChunk: 'تکه والدین',
+    chunks_one: 'تکه',
+    empty: 'هیچ تکه ای یافت نشد',
+    addChunk: 'افزودن تکه',
+    searchResults_one: 'نتیجه',
+    regenerationConfirmMessage: 'بازآفرینی تکه های فرزند تکه های فرزند فعلی، از جمله تکه های ویرایش شده و تکه های تازه اضافه شده را بازنویسی می کند. بازسازی را نمی توان خنثی کرد.',
+    childChunkAdded: '1 تکه کودک اضافه شد',
+    searchResults_zero: 'نتیجه',
+    newChunk: 'تکه جدید',
+    regeneratingMessage: 'این ممکن است یک لحظه طول بکشد، لطفا صبر کنید...',
+    regenerationConfirmTitle: 'آیا می خواهید تکه های کودک را بازسازی کنید؟',
+    regenerationSuccessMessage: 'می توانید این پنجره را ببندید.',
   },
 }
 
diff --git a/web/i18n/fa-IR/dataset-hit-testing.ts b/web/i18n/fa-IR/dataset-hit-testing.ts
index 8c78a1a21a..99ce31b870 100644
--- a/web/i18n/fa-IR/dataset-hit-testing.ts
+++ b/web/i18n/fa-IR/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'مشاهده نمودار بُرداری',
   settingTitle: 'تنظیمات بازیابی',
   viewDetail: 'نمایش جزئیات',
+  records: 'سوابق',
+  keyword: 'کليدواژه',
+  hitChunks: '{{num}} را بزنید تکه های فرزند',
+  chunkDetail: 'جزئیات تکه',
+  open: 'باز',
 }
 
 export default translation
diff --git a/web/i18n/fa-IR/dataset-settings.ts b/web/i18n/fa-IR/dataset-settings.ts
index f7efa7f997..1ddee95e9b 100644
--- a/web/i18n/fa-IR/dataset-settings.ts
+++ b/web/i18n/fa-IR/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeAPI: 'API دانش خارجی',
     retrievalSettings: 'تنظیمات بازیابی',
     externalKnowledgeID: 'شناسه دانش خارجی',
+    indexMethodChangeToEconomyDisabledTip: 'برای تنزل رتبه از HQ به ECO در دسترس نیست',
+    helpText: 'یاد بگیرید که چگونه یک توضیحات مجموعه داده خوب بنویسید.',
+    upgradeHighQualityTip: 'پس از ارتقاء به حالت کیفیت بالا، بازگشت به حالت اقتصادی در دسترس نیست',
   },
 }
 
diff --git a/web/i18n/fa-IR/dataset.ts b/web/i18n/fa-IR/dataset.ts
index 107e8dd097..c8cc83ae9c 100644
--- a/web/i18n/fa-IR/dataset.ts
+++ b/web/i18n/fa-IR/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   externalAPIPanelDescription: 'API دانش خارجی برای اتصال به یک پایگاه دانش خارج از Dify و بازیابی دانش از آن پایگاه دانش استفاده می شود.',
   allExternalTip: 'هنگامی که فقط از دانش خارجی استفاده می کنید، کاربر می تواند انتخاب کند که آیا مدل Rerank را فعال کند یا خیر. اگر فعال نباشد، تکه های بازیابی شده بر اساس امتیازات مرتب می شوند. هنگامی که استراتژی های بازیابی پایگاه های دانش مختلف متناقض باشد، نادرست خواهد بود.',
   mixtureInternalAndExternalTip: 'مدل Rerank برای آمیختگی دانش درونی و بیرونی مورد نیاز است.',
+  chunkingMode: {
+    parentChild: 'پدر و مادر و فرزند',
+    general: 'عمومی',
+  },
+  parentMode: {
+    fullDoc: 'مستند کامل',
+    paragraph: 'پاراگراف',
+  },
+  batchAction: {
+    disable: 'غیر فعال کردن',
+    cancel: 'لغو',
+    selected: 'انتخاب',
+    enable: 'فعال',
+    delete: 'حذف',
+    archive: 'بایگانی',
+  },
+  enable: 'فعال',
+  documentsDisabled: '{{num}} اسناد غیرفعال - غیرفعال برای بیش از 30 روز',
+  preprocessDocument: '{{عدد}} اسناد پیش پردازش',
+  localDocs: 'اسناد محلی',
 }
 
 export default translation
diff --git a/web/i18n/fr-FR/common.ts b/web/i18n/fr-FR/common.ts
index 25142c11cc..e44dc4e14a 100644
--- a/web/i18n/fr-FR/common.ts
+++ b/web/i18n/fr-FR/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomIn: 'Zoom avant',
     openInNewTab: 'Ouvrir dans un nouvel onglet',
     copyImage: 'Copier l’image',
+    view: 'Vue',
+    viewMore: 'VOIR PLUS',
+    close: 'Fermer',
+    saveAndRegenerate: 'Enregistrer et régénérer des morceaux enfants',
+    regenerate: 'Régénérer',
   },
   placeholder: {
     input: 'Veuillez entrer',
@@ -474,6 +479,7 @@ const translation = {
     emptyTip: 'La Connaissance n\'a pas été associée, veuillez aller à l\'application ou au plug-in pour compléter l\'association.',
     viewDoc: 'Voir la documentation',
     relatedApp: 'applications liées',
+    noRelatedApp: 'Pas d’applications liées',
   },
   voiceInput: {
     speaking: 'Parle maintenant...',
diff --git a/web/i18n/fr-FR/dataset-creation.ts b/web/i18n/fr-FR/dataset-creation.ts
index c777af4fbe..058a68f7e3 100644
--- a/web/i18n/fr-FR/dataset-creation.ts
+++ b/web/i18n/fr-FR/dataset-creation.ts
@@ -82,6 +82,7 @@ const translation = {
       chooseProvider: 'Sélectionnez un fournisseur',
       jinaReaderTitle: 'Convertir l’intégralité du site en Markdown',
     },
+    cancel: 'Annuler',
   },
   stepTwo: {
     segmentation: 'Paramètres de bloc',
@@ -143,6 +144,28 @@ const translation = {
     websiteSource: 'Site web de prétraitement',
     separatorTip: 'Un délimiteur est le caractère utilisé pour séparer le texte. \\n\\n et \\n sont des délimiteurs couramment utilisés pour séparer les paragraphes et les lignes. Combiné à des virgules (\\n\\n,\\n), les paragraphes seront segmentés par des lignes lorsqu’ils dépasseront la longueur maximale des morceaux. Vous pouvez également utiliser des délimiteurs spéciaux définis par vous-même (par exemple ***).',
     maxLengthCheck: 'La longueur maximale des morceaux doit être inférieure à {{limit}}',
+    parentChunkForContext: 'Parent-chunk pour le contexte',
+    notAvailableForParentChild: 'Non disponible pour l’indice parent-enfant',
+    parentChild: 'Parent-enfant',
+    useQALanguage: 'Chunk utilisant le format Q&A dans',
+    highQualityTip: 'Une fois l’intégration terminée en mode Haute qualité, il n’est pas possible de revenir au mode économique.',
+    switch: 'Interrupteur',
+    paragraph: 'Paragraphe',
+    general: 'Généralités',
+    fullDocTip: 'L’intégralité du document est utilisée comme morceau parent et récupérée directement. Veuillez noter que pour des raisons de performance, le texte dépassant 10000 jetons sera automatiquement tronqué.',
+    fullDoc: 'Doc complet',
+    previewChunkCount: '{{compte}} Tronçons estimés',
+    childChunkForRetrieval: 'Child-chunk pour l’extraction',
+    parentChildDelimiterTip: 'Un délimiteur est le caractère utilisé pour séparer le texte. \\n\\n est recommandé pour diviser le document d’origine en gros morceaux parents. Vous pouvez également utiliser des délimiteurs spéciaux définis par vous-même.',
+    qaSwitchHighQualityTipTitle: 'Le format Q&R nécessite une méthode d’indexation de haute qualité',
+    notAvailableForQA: 'Non disponible pour l’indice Q&R',
+    previewChunk: 'Aperçu du morceau',
+    parentChildTip: 'Lors de l’utilisation du mode parent-enfant, le morceau enfant est utilisé pour la récupération et le morceau parent est utilisé pour le rappel en tant que contexte.',
+    paragraphTip: 'Ce mode divise le texte en paragraphes en fonction des délimiteurs et de la longueur maximale du morceau, en utilisant le texte scindé comme morceau parent pour la récupération.',
+    qaSwitchHighQualityTipContent: 'Actuellement, seule la méthode d’index de haute qualité prend en charge la segmentation du format Q&R. Vous souhaitez passer en mode haute qualité ?',
+    previewChunkTip: 'Cliquez sur le bouton « Preview Chunk » sur la gauche pour charger l’aperçu',
+    parentChildChunkDelimiterTip: 'Un délimiteur est le caractère utilisé pour séparer le texte. \\n est recommandé pour diviser les blocs parents en petits blocs enfants. Vous pouvez également utiliser des délimiteurs spéciaux définis par vous-même.',
+    generalTip: 'Mode général de segmentation du texte, les morceaux récupérés et rappelés sont les mêmes.',
   },
   stepThree: {
     creationTitle: '🎉 Connaissance créée',
@@ -171,6 +194,11 @@ const translation = {
     apiKeyPlaceholder: 'Clé API de jina.ai',
     configJinaReader: 'Configurer Jina Reader',
   },
+  otherDataSource: {
+    learnMore: 'Pour en savoir plus',
+    description: 'Actuellement, la base de connaissances de Dify ne dispose que de sources de données limitées. Contribuer à une source de données dans la base de connaissances Dify est un moyen fantastique d’améliorer la flexibilité et la puissance de la plateforme pour tous les utilisateurs. Notre guide de contribution facilite la prise en main. Veuillez cliquer sur le lien ci-dessous pour en savoir plus.',
+    title: 'Se connecter à d’autres sources de données ?',
+  },
 }
 
 export default translation
diff --git a/web/i18n/fr-FR/dataset-documents.ts b/web/i18n/fr-FR/dataset-documents.ts
index 1aad7870a8..614590de53 100644
--- a/web/i18n/fr-FR/dataset-documents.ts
+++ b/web/i18n/fr-FR/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: 'TEMPS DE TÉLÉCHARGEMENT',
         status: 'STATUT',
         action: 'ACTION',
+        chunkingMode: 'MODE DE MORCEAU',
       },
       rename: 'Renommer',
       name: 'Nom',
@@ -77,6 +78,7 @@ const translation = {
       ok: 'D\'accord',
     },
     addUrl: 'Ajouter une URL',
+    learnMore: 'Pour en savoir plus',
   },
   metadata: {
     title: 'Métadonnées',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Automatique',
     custom: 'Personnalisé',
     previewTip: 'L\'aperçu du paragraphe sera disponible après la fin de l\'embedding.',
+    childMaxTokens: 'Enfant',
+    hierarchical: 'Parent-enfant',
+    pause: 'Pause',
+    parentMaxTokens: 'Parent',
   },
   segment: {
     paragraphs: 'Paragraphes',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Nouveau Segment de Texte',
     newQaSegment: 'Nouveau Segment Q&R',
     delete: 'Supprimer ce morceau ?',
+    chunks_other: 'MORCEAUX',
+    childChunks_other: 'MORCEAUX ENFANTS',
+    clearFilter: 'Effacer le filtre',
+    newChunk: 'Nouveau Chunk',
+    childChunk: 'Enfant-Chunk',
+    newChildChunk: 'Nouveau morceau enfant',
+    addChunk: 'Ajouter un morceau',
+    chunkAdded: '1 morceau ajouté',
+    editChunk: 'Modifier le morceau',
+    regenerationConfirmMessage: 'La régénération des blocs enfants remplacera les blocs enfants actuels, y compris les blocs modifiés et les blocs nouvellement ajoutés. La régénération ne peut pas être annulée.',
+    regenerationSuccessTitle: 'Régénération terminée',
+    edited: 'ÉDITION',
+    collapseChunks: 'Réduire les morceaux',
+    childChunkAdded: '1 morceau enfant ajouté',
+    addAnother: 'Ajouter un autre',
+    searchResults_one: 'RÉSULTAT',
+    regeneratingTitle: 'Régénération de blocs enfants',
+    expandChunks: 'Développer des blocs',
+    characters_other: 'caractères',
+    editedAt: 'Édité le',
+    searchResults_other: 'RÉSULTATS',
+    regenerationSuccessMessage: 'Vous pouvez fermer cette fenêtre.',
+    parentChunks_one: 'MORCEAU PARENT',
+    regenerationConfirmTitle: 'Voulez-vous régénérer des morceaux enfants ?',
+    chunks_one: 'MORCEAU',
+    childChunks_one: 'MORCEAU ENFANT',
+    parentChunk: 'Parent-Chunk',
+    chunkDetail: 'Détail du morceau',
+    chunk: 'Morceau',
+    parentChunks_other: 'MORCEAUX PARENTS',
+    regeneratingMessage: 'Cela peut prendre un moment, veuillez patienter...',
+    addChildChunk: 'Ajouter un morceau enfant',
+    editParentChunk: 'Modifier le bloc parent',
+    characters_one: 'personnage',
+    searchResults_zero: 'RÉSULTAT',
+    empty: 'Aucun Chunk trouvé',
+    editChildChunk: 'Modifier le morceau enfant',
   },
 }
 
diff --git a/web/i18n/fr-FR/dataset-hit-testing.ts b/web/i18n/fr-FR/dataset-hit-testing.ts
index 3fa2abbab1..0bb7c94999 100644
--- a/web/i18n/fr-FR/dataset-hit-testing.ts
+++ b/web/i18n/fr-FR/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'Voir GRAPHIQUE VECTORIEL',
   settingTitle: 'Réglage de récupération',
   viewDetail: 'Voir les détails',
+  hitChunks: 'Appuyez sur {{num}} morceaux enfants',
+  records: 'Archives',
+  chunkDetail: 'Détail du morceau',
+  open: 'Ouvrir',
+  keyword: 'Mots-clés',
 }
 
 export default translation
diff --git a/web/i18n/fr-FR/dataset-settings.ts b/web/i18n/fr-FR/dataset-settings.ts
index f7c5f4601a..101214d288 100644
--- a/web/i18n/fr-FR/dataset-settings.ts
+++ b/web/i18n/fr-FR/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     retrievalSettings: 'Paramètres de récupération',
     externalKnowledgeAPI: 'API de connaissances externes',
     externalKnowledgeID: 'Identification des connaissances externes',
+    indexMethodChangeToEconomyDisabledTip: 'Non disponible pour le déclassement de HQ à ECO',
+    upgradeHighQualityTip: 'Une fois la mise à niveau vers le mode Haute Qualité, il n’est pas possible de revenir au mode Économique',
+    helpText: 'Apprenez à rédiger une bonne description de jeu de données.',
   },
 }
 
diff --git a/web/i18n/fr-FR/dataset.ts b/web/i18n/fr-FR/dataset.ts
index 54a73115a2..bdaea09eec 100644
--- a/web/i18n/fr-FR/dataset.ts
+++ b/web/i18n/fr-FR/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   externalAPIPanelTitle: 'API de connaissances externes',
   noExternalKnowledge: 'Il n’y a pas encore d’API de connaissances externes, cliquez ici pour créer',
   learnHowToWriteGoodKnowledgeDescription: 'Apprenez à rédiger une bonne description des connaissances',
+  chunkingMode: {
+    general: 'Généralités',
+    parentChild: 'Parent-enfant',
+  },
+  parentMode: {
+    paragraph: 'Paragraphe',
+    fullDoc: 'Doc complet',
+  },
+  batchAction: {
+    archive: 'Archiver',
+    disable: 'Désactiver',
+    delete: 'Supprimer',
+    cancel: 'Annuler',
+    enable: 'Activer',
+    selected: 'Sélectionné',
+  },
+  preprocessDocument: '{{num}} Prétraiter les documents',
+  documentsDisabled: '{{num}} documents désactivés - inactifs depuis plus de 30 jours',
+  localDocs: 'Docs locaux',
+  enable: 'Activer',
 }
 
 export default translation
diff --git a/web/i18n/hi-IN/common.ts b/web/i18n/hi-IN/common.ts
index aabcfc86e6..207c4c16e5 100644
--- a/web/i18n/hi-IN/common.ts
+++ b/web/i18n/hi-IN/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: 'ज़ूम आउट करें',
     openInNewTab: 'नए टैब में खोलें',
     zoomIn: 'ज़ूम इन करें',
+    view: 'देखना',
+    viewMore: 'और देखें',
+    regenerate: 'पुनर्जन्म',
+    close: 'बंद करना',
+    saveAndRegenerate: 'सहेजें और पुन: उत्पन्न करें बाल विखंडू',
   },
   errorMsg: {
     fieldRequired: '{{field}} आवश्यक है',
@@ -496,6 +501,7 @@ const translation = {
       'ज्ञान को संबद्ध नहीं किया गया है, कृपया संबद्धता पूरी करने के लिए एप्लिकेशन या प्लग-इन पर जाएं।',
     viewDoc: 'दस्तावेज़ देखें',
     relatedApp: 'संबंधित ऐप्स',
+    noRelatedApp: 'कोई लिंक नहीं किए गए ऐप्स',
   },
   voiceInput: {
     speaking: 'अब बोलें...',
diff --git a/web/i18n/hi-IN/dataset-creation.ts b/web/i18n/hi-IN/dataset-creation.ts
index 51063dcb14..ecfa9d80a0 100644
--- a/web/i18n/hi-IN/dataset-creation.ts
+++ b/web/i18n/hi-IN/dataset-creation.ts
@@ -92,6 +92,7 @@ const translation = {
       jinaReaderTitle: 'पूरी साइट को मार्कडाउन में बदलें',
       jinaReaderNotConfiguredDescription: 'एक्सेस के लिए अपनी मुफ्त एपीआई कुंजी दर्ज करके जीना रीडर सेट करें।',
     },
+    cancel: 'रद्द करना',
   },
   stepTwo: {
     segmentation: 'खंड सेटिंग्स',
@@ -165,6 +166,28 @@ const translation = {
     datasetSettingLink: 'ज्ञान सेटिंग्स।',
     separatorTip: 'एक सीमांकक पाठ को अलग करने के लिए उपयोग किया जाने वाला वर्ण है। \\n\\n और \\n आमतौर पर पैराग्राफ और लाइनों को अलग करने के लिए उपयोग किए जाने वाले सीमांकक हैं। अल्पविराम (\\n\\n,\\n) के साथ संयुक्त, अधिकतम खंड लंबाई से अधिक होने पर अनुच्छेदों को पंक्तियों द्वारा खंडित किया जाएगा। आप स्वयं द्वारा परिभाषित विशेष सीमांकक का भी उपयोग कर सकते हैं (उदा. ***).',
     maxLengthCheck: 'अधिकतम चंक लंबाई {{limit}} से कम होनी चाहिए',
+    useQALanguage: 'में क्यू एंड ए प्रारूप का उपयोग करके चंक करें',
+    previewChunkCount: '{{गिनती}} अनुमानित खंड',
+    previewChunk: 'पूर्वावलोकन चंक',
+    paragraph: 'अनुच्‍छेद',
+    general: 'सामान्य',
+    highQualityTip: 'एक बार उच्च गुणवत्ता मोड में एम्बेडिंग समाप्त करने के बाद, किफायती मोड पर वापस जाना उपलब्ध नहीं है।',
+    parentChunkForContext: 'संदर्भ के लिए माता-पिता का हिस्सा',
+    notAvailableForQA: 'Q&A इंडेक्स के लिए उपलब्ध नहीं है',
+    fullDoc: 'पूर्ण डॉक्टर',
+    fullDocTip: 'पूरे दस्तावेज़ को मूल खंड के रूप में उपयोग किया जाता है और सीधे पुनर्प्राप्त किया जाता है। कृपया ध्यान दें कि प्रदर्शन कारणों से, 10000 टोकन से अधिक का पाठ स्वचालित रूप से छोटा कर दिया जाएगा।',
+    previewChunkTip: 'पूर्वावलोकन लोड करने के लिए बाईं ओर \'पूर्वावलोकन चंक\' बटन पर क्लिक करें',
+    generalTip: 'सामान्य पाठ चंकिंग मोड, पुनर्प्राप्त और याद किए गए चंक्स समान हैं।',
+    qaSwitchHighQualityTipTitle: 'Q&A प्रारूप के लिए उच्च-गुणवत्ता अनुक्रमण विधि की आवश्यकता होती है',
+    qaSwitchHighQualityTipContent: 'वर्तमान में, केवल उच्च-गुणवत्ता वाली अनुक्रमणिका विधि Q & A प्रारूप चंकिंग का समर्थन करती है। क्या आप उच्च-गुणवत्ता मोड पर स्विच करना चाहेंगे?',
+    parentChildChunkDelimiterTip: 'एक सीमांकक पाठ को अलग करने के लिए उपयोग किया जाने वाला वर्ण है। माता-पिता के टुकड़ों को छोटे बच्चे के टुकड़ों में विभाजित करने के लिए \\n की सिफारिश की जाती है। आप स्वयं द्वारा परिभाषित विशेष सीमांकक का भी उपयोग कर सकते हैं।',
+    childChunkForRetrieval: 'पुनर्प्राप्ति के लिए बाल-खंड',
+    switch: 'स्विच',
+    parentChildDelimiterTip: 'एक सीमांकक पाठ को अलग करने के लिए उपयोग किया जाने वाला वर्ण है। \\n\\n मूल दस्तावेज़ को बड़े पैरेंट विखंडू में विभाजित करने के लिए अनुशंसित है। आप स्वयं द्वारा परिभाषित विशेष सीमांकक का भी उपयोग कर सकते हैं।',
+    notAvailableForParentChild: 'पैरेंट-चाइल्ड इंडेक्स के लिए उपलब्ध नहीं है',
+    parentChild: 'माता-पिता का बच्चा',
+    parentChildTip: 'पैरेंट-चाइल्ड मोड का उपयोग करते समय, चाइल्ड-चंक का उपयोग पुनर्प्राप्ति के लिए किया जाता है और पैरेंट-चंक का उपयोग संदर्भ के रूप में रिकॉल के लिए किया जाता है।',
+    paragraphTip: 'यह मोड पाठ को सीमांकक और अधिकतम खंड लंबाई के आधार पर पैराग्राफ में विभाजित करता है, पुनर्प्राप्ति के लिए मूल खंड के रूप में विभाजित पाठ का उपयोग करता है।',
   },
   stepThree: {
     creationTitle: '🎉 ज्ञान बनाया गया',
@@ -191,6 +214,11 @@ const translation = {
     apiKeyPlaceholder: 'jina.ai से एपीआई कुंजी',
     getApiKeyLinkText: 'jina.ai पर अपनी निःशुल्क एपीआई कुंजी प्राप्त करें',
   },
+  otherDataSource: {
+    title: 'अन्य डेटा स्रोतों से कनेक्ट करें?',
+    learnMore: 'और जानो',
+    description: 'वर्तमान में, Dify के ज्ञानकोष में केवल सीमित डेटा स्रोत हैं। Dify नॉलेज बेस में डेटा स्रोत का योगदान करना सभी उपयोगकर्ताओं के लिए प्लेटफॉर्म के लचीलेपन और शक्ति को बढ़ाने में मदद करने का एक शानदार तरीका है। हमारी योगदान मार्गदर्शिका आरंभ करना आसान बनाती है। अधिक जानने के लिए कृपया नीचे दिए गए लिंक पर क्लिक करें।',
+  },
 }
 
 export default translation
diff --git a/web/i18n/hi-IN/dataset-documents.ts b/web/i18n/hi-IN/dataset-documents.ts
index 87a8ff2b01..e01b3ebb13 100644
--- a/web/i18n/hi-IN/dataset-documents.ts
+++ b/web/i18n/hi-IN/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'अपलोड का समय',
         status: 'स्थिति',
         action: 'क्रिया',
+        chunkingMode: 'चंकिंग मोड',
       },
       rename: 'नाम बदलें',
       name: 'नाम',
@@ -78,6 +79,7 @@ const translation = {
       error: 'आयात त्रुटि',
       ok: 'ठीक है',
     },
+    learnMore: 'और जानो',
   },
   metadata: {
     title: 'मेटाडेटा',
@@ -329,6 +331,10 @@ const translation = {
     automatic: 'स्वचालित',
     custom: 'अनुकूलित',
     previewTip: 'पैराग्राफ पूर्वावलोकन एम्बेडिंग पूरी होने के बाद उपलब्ध होगा',
+    parentMaxTokens: 'जनक',
+    pause: 'रुकना',
+    hierarchical: 'माता-पिता का बच्चा',
+    childMaxTokens: 'बच्चा',
   },
   segment: {
     paragraphs: 'पैराग्राफ',
@@ -347,6 +353,43 @@ const translation = {
     newTextSegment: 'नया पाठ खंड',
     newQaSegment: 'नया Q&A खंड',
     delete: 'इस खंड को हटाएँ ?',
+    parentChunks_other: 'जनक चंक्स',
+    childChunks_one: 'चाइल्ड चंक',
+    searchResults_other: 'परिणाम',
+    parentChunks_one: 'जनक चंक',
+    searchResults_one: 'परिणाम',
+    addChildChunk: 'Child chunk जोड़ें',
+    childChunks_other: 'बच्चे के टुकड़े',
+    collapseChunks: 'चंक्स संक्षिप्त करें',
+    characters_one: 'अक्षर',
+    childChunkAdded: '1 चाइल्ड चंक जोड़ा गया',
+    chunks_other: 'हिस्सा',
+    addAnother: 'एक और जोड़ें',
+    newChunk: 'नया हिस्सा',
+    searchResults_zero: 'परिणाम',
+    chunk: 'हिस्सा',
+    addChunk: 'चंक जोड़ें',
+    editChildChunk: 'संपादित करें बाल चंक',
+    editedAt: 'पर संपादित',
+    childChunk: 'चाइल्ड-चंक',
+    parentChunk: 'माता-पिता-चंक',
+    empty: 'कोई हिस्सा नहीं मिला',
+    editChunk: 'चंक संपादित करें',
+    characters_other: 'वर्ण',
+    regeneratingMessage: 'इसमें कुछ समय लग सकता है, कृपया प्रतीक्षा करें...',
+    regenerationConfirmTitle: 'क्या आप बच्चे के टुकड़ों को पुन: उत्पन्न करना चाहते हैं?',
+    regeneratingTitle: 'बच्चे के टुकड़ों को पुनर्जीवित करना',
+    chunks_one: 'हिस्सा',
+    edited: 'संपादित',
+    editParentChunk: 'पेरेंट चंक संपादित करें',
+    newChildChunk: 'न्यू चाइल्ड चंक',
+    clearFilter: 'फ़िल्टर साफ़ करें',
+    regenerationSuccessMessage: 'आप इस विंडो को बंद कर सकते हैं।',
+    expandChunks: 'विखंडू का विस्तार करें',
+    regenerationSuccessTitle: 'पुनर्जनन पूरा हुआ',
+    chunkAdded: '1 हिस्सा जोड़ा गया',
+    chunkDetail: 'चंक विवरण',
+    regenerationConfirmMessage: 'चाइल्ड चंक्स को रीजनरेट करने से वर्तमान चाइल्ड चंक्स ओवरराइट हो जाएंगे, जिसमें संपादित चंक्स और नए जोड़े गए चंक्स शामिल हैं। पुनरुत्थान को पूर्ववत नहीं किया जा सकता है।',
   },
 }
 
diff --git a/web/i18n/hi-IN/dataset-hit-testing.ts b/web/i18n/hi-IN/dataset-hit-testing.ts
index e15d57884d..fd562062b3 100644
--- a/web/i18n/hi-IN/dataset-hit-testing.ts
+++ b/web/i18n/hi-IN/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'वेक्टर चार्ट देखें',
   viewDetail: 'विस्तार से देखें',
   settingTitle: 'पुनर्प्राप्ति सेटिंग',
+  hitChunks: '{{num}} बच्चे के टुकड़े मारो',
+  keyword: 'खोजशब्दों',
+  chunkDetail: 'चंक विवरण',
+  open: 'खोलना',
+  records: 'रिकॉर्ड',
 }
 
 export default translation
diff --git a/web/i18n/hi-IN/dataset-settings.ts b/web/i18n/hi-IN/dataset-settings.ts
index ea825417c1..ff324dcb43 100644
--- a/web/i18n/hi-IN/dataset-settings.ts
+++ b/web/i18n/hi-IN/dataset-settings.ts
@@ -37,6 +37,9 @@ const translation = {
     externalKnowledgeID: 'बाहरी ज्ञान ID',
     externalKnowledgeAPI: 'बाहरी ज्ञान एपीआई',
     retrievalSettings: 'पुनर्प्राप्ति सेटिंग्स',
+    indexMethodChangeToEconomyDisabledTip: 'मुख्यालय से ईसीओ में डाउनग्रेड करने के लिए उपलब्ध नहीं है',
+    helpText: 'एक अच्छा डेटासेट विवरण लिखना सीखें।',
+    upgradeHighQualityTip: 'एक बार उच्च गुणवत्ता मोड में अपग्रेड करने के बाद, किफायती मोड में वापस जाना उपलब्ध नहीं है',
   },
 }
 
diff --git a/web/i18n/hi-IN/dataset.ts b/web/i18n/hi-IN/dataset.ts
index b0b8519fd7..d3838e3dd0 100644
--- a/web/i18n/hi-IN/dataset.ts
+++ b/web/i18n/hi-IN/dataset.ts
@@ -153,6 +153,26 @@ const translation = {
   noExternalKnowledge: 'अभी तक कोई बाहरी ज्ञान एपीआई नहीं है, बनाने के लिए यहां क्लिक करें',
   createNewExternalAPI: 'एक नया बाहरी नॉलेज API बनाएँ',
   learnHowToWriteGoodKnowledgeDescription: 'एक अच्छा ज्ञान विवरण लिखना सीखें',
+  chunkingMode: {
+    parentChild: 'माता-पिता का बच्चा',
+    general: 'सामान्य',
+  },
+  parentMode: {
+    fullDoc: 'पूर्ण-दस्तावेज़',
+    paragraph: 'अनुच्‍छेद',
+  },
+  batchAction: {
+    cancel: 'रद्द करना',
+    disable: 'अक्षम',
+    enable: 'योग्य बनाना',
+    selected: 'चयनित',
+    delete: 'मिटाना',
+    archive: 'पुरालेख',
+  },
+  localDocs: 'स्थानीय डॉक्स',
+  preprocessDocument: '{{num}} प्रीप्रोसेस दस्तावेज़',
+  enable: 'योग्य बनाना',
+  documentsDisabled: '{{num}} दस्तावेज़ अक्षम - 30 दिनों से अधिक समय से निष्क्रिय',
 }
 
 export default translation
diff --git a/web/i18n/it-IT/common.ts b/web/i18n/it-IT/common.ts
index 4cee6dec50..42ea6d6fa9 100644
--- a/web/i18n/it-IT/common.ts
+++ b/web/i18n/it-IT/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomIn: 'Ingrandisci',
     openInNewTab: 'Apri in una nuova scheda',
     copyImage: 'Copia immagine',
+    close: 'Chiudere',
+    view: 'Vista',
+    saveAndRegenerate: 'Salva e rigenera i blocchi figlio',
+    regenerate: 'Rigenerare',
+    viewMore: 'SCOPRI DI PIÙ',
   },
   errorMsg: {
     fieldRequired: '{{field}} è obbligatorio',
@@ -504,6 +509,7 @@ const translation = {
       'La Conoscenza non è stata associata, per favore vai all\'applicazione o al plug-in per completare l\'associazione.',
     viewDoc: 'Visualizza documentazione',
     relatedApp: 'app collegate',
+    noRelatedApp: 'Nessuna app collegata',
   },
   voiceInput: {
     speaking: 'Parla ora...',
diff --git a/web/i18n/it-IT/dataset-creation.ts b/web/i18n/it-IT/dataset-creation.ts
index af174ce41f..aa7cc8e40a 100644
--- a/web/i18n/it-IT/dataset-creation.ts
+++ b/web/i18n/it-IT/dataset-creation.ts
@@ -94,6 +94,7 @@ const translation = {
       useSitemap: 'Usa la mappa del sito',
       chooseProvider: 'Seleziona un fornitore',
     },
+    cancel: 'Annulla',
   },
   stepTwo: {
     segmentation: 'Impostazioni dei blocchi',
@@ -168,6 +169,28 @@ const translation = {
     datasetSettingLink: 'impostazioni della Conoscenza.',
     separatorTip: 'Un delimitatore è il carattere utilizzato per separare il testo. \\n\\n e \\n sono delimitatori comunemente usati per separare paragrafi e righe. In combinazione con le virgole (\\n\\n,\\n), i paragrafi verranno segmentati per righe quando superano la lunghezza massima del blocco. È inoltre possibile utilizzare delimitatori speciali definiti dall\'utente (ad es. ***).',
     maxLengthCheck: 'La lunghezza massima del blocco deve essere inferiore a {{limit}}',
+    useQALanguage: 'Blocco con il formato Q&A in',
+    previewChunkTip: 'Fai clic sul pulsante "Anteprima blocco" a sinistra per caricare l\'anteprima',
+    paragraph: 'Paragrafo',
+    childChunkForRetrieval: 'Child-chunk per il recupero',
+    general: 'Generale',
+    fullDoc: 'Documento completo',
+    previewChunkCount: '{{conteggio}} Blocchi stimati',
+    generalTip: 'Modalità generale di suddivisione in blocchi del testo, i blocchi recuperati e richiamati sono gli stessi.',
+    parentChildChunkDelimiterTip: 'Un delimitatore è il carattere utilizzato per separare il testo. \\n è consigliato per dividere i blocchi principali in piccoli blocchi figlio. È inoltre possibile utilizzare delimitatori speciali definiti dall\'utente.',
+    fullDocTip: 'L\'intero documento viene utilizzato come blocco principale e recuperato direttamente. Si prega di notare che per motivi di prestazioni, il testo che supera i 10000 token verrà automaticamente troncato.',
+    parentChild: 'Genitore-figlio',
+    qaSwitchHighQualityTipContent: 'Attualmente, solo il metodo dell\'indice di alta qualità supporta la suddivisione in blocchi del formato Domande e risposte. Volete passare alla modalità di alta qualità?',
+    parentChunkForContext: 'Parent-chunk per il contesto',
+    switch: 'Interruttore',
+    paragraphTip: 'Questa modalità suddivide il testo in paragrafi in base ai delimitatori e alla lunghezza massima del blocco, utilizzando il testo diviso come blocco principale per il recupero.',
+    notAvailableForQA: 'Non disponibile per l\'indice Q&A',
+    parentChildTip: 'Quando si utilizza la modalità genitore-figlio, il blocco figlio viene utilizzato per il recupero e il blocco padre viene utilizzato per il richiamo come contesto.',
+    highQualityTip: 'Una volta terminato l\'incorporamento in modalità Alta qualità, il ripristino della modalità Economica non è disponibile.',
+    parentChildDelimiterTip: 'Un delimitatore è il carattere utilizzato per separare il testo. \\n\\n è consigliato per dividere il documento originale in blocchi principali di grandi dimensioni. È inoltre possibile utilizzare delimitatori speciali definiti dall\'utente.',
+    previewChunk: 'Blocco di anteprima',
+    notAvailableForParentChild: 'Non disponibile per l\'indice padre-figlio',
+    qaSwitchHighQualityTipTitle: 'Il formato Domande e risposte richiede un metodo di indicizzazione di alta qualità',
   },
   stepThree: {
     creationTitle: '🎉 Conoscenza creata',
@@ -194,6 +217,11 @@ const translation = {
     apiKeyPlaceholder: 'Chiave API da jina.ai',
     configJinaReader: 'Configura Jina Reader',
   },
+  otherDataSource: {
+    learnMore: 'Ulteriori informazioni',
+    title: 'Connettersi ad altre origini dati?',
+    description: 'Attualmente, la knowledge base di Dify ha solo fonti di dati limitate. Contribuire con una fonte di dati alla knowledge base di Dify è un modo fantastico per migliorare la flessibilità e la potenza della piattaforma per tutti gli utenti. La nostra guida ai contributi ti aiuta a iniziare. Clicca sul link sottostante per saperne di più.',
+  },
 }
 
 export default translation
diff --git a/web/i18n/it-IT/dataset-documents.ts b/web/i18n/it-IT/dataset-documents.ts
index b242ba3735..06c5a2deed 100644
--- a/web/i18n/it-IT/dataset-documents.ts
+++ b/web/i18n/it-IT/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'ORA DI CARICAMENTO',
         status: 'STATO',
         action: 'AZIONE',
+        chunkingMode: 'MODALITÀ DI SUDDIVISIONE IN BLOCCHI',
       },
       rename: 'Rinomina',
       name: 'Nome',
@@ -78,6 +79,7 @@ const translation = {
       error: 'Errore di importazione',
       ok: 'OK',
     },
+    learnMore: 'Ulteriori informazioni',
   },
   metadata: {
     title: 'Metadati',
@@ -330,6 +332,10 @@ const translation = {
     custom: 'Personalizzato',
     previewTip:
       'L\'anteprima del paragrafo sarà disponibile dopo il completamento dell\'embedding',
+    childMaxTokens: 'Bambino',
+    pause: 'Pausa',
+    hierarchical: 'Genitore-figlio',
+    parentMaxTokens: 'Genitore',
   },
   segment: {
     paragraphs: 'Paragrafi',
@@ -348,6 +354,43 @@ const translation = {
     newTextSegment: 'Nuovo Segmento di Testo',
     newQaSegment: 'Nuovo Segmento di Domanda & Risposta',
     delete: 'Eliminare questo blocco?',
+    searchResults_zero: 'RISULTATO',
+    searchResults_other: 'RISULTATI',
+    newChunk: 'Nuovo blocco',
+    characters_one: 'carattere',
+    addChildChunk: 'Aggiungi blocco figlio',
+    addAnother: 'Aggiungi un altro',
+    parentChunks_one: 'BLOCCO PADRE',
+    regenerationConfirmMessage: 'La rigenerazione dei blocchi figlio sovrascriverà i blocchi figlio correnti, inclusi i blocchi modificati e i blocchi appena aggiunti. La rigenerazione non può essere annullata.',
+    regenerationSuccessTitle: 'Rigenerazione completata',
+    regeneratingTitle: 'Rigenerazione di blocchi figlio',
+    chunkAdded: '1 pezzo aggiunto',
+    empty: 'Nessun blocco trovato',
+    parentChunk: 'Blocco genitore',
+    edited: 'MODIFICATO',
+    characters_other: 'personaggi',
+    parentChunks_other: 'BLOCCHI PRINCIPALI',
+    chunk: 'Pezzo',
+    newChildChunk: 'Nuovo blocco figlio',
+    editChildChunk: 'Modifica blocco figlio',
+    addChunk: 'Aggiungi blocco',
+    childChunks_one: 'BLOCCO FIGLIO',
+    regenerationConfirmTitle: 'Si desidera rigenerare i blocchi figlio?',
+    chunks_other: 'BLOCCHI',
+    editedAt: 'A cura di',
+    collapseChunks: 'Comprimi blocchi',
+    clearFilter: 'Cancella filtro',
+    chunks_one: 'PEZZO',
+    editParentChunk: 'Modifica blocco padre',
+    expandChunks: 'Espandi blocchi',
+    chunkDetail: 'Dettaglio pezzo',
+    searchResults_one: 'RISULTATO',
+    regeneratingMessage: 'Questo potrebbe richiedere un momento, si prega di attendere...',
+    childChunk: 'Figlio-Chunk',
+    editChunk: 'Modifica blocco',
+    regenerationSuccessMessage: 'È possibile chiudere questa finestra.',
+    childChunkAdded: '1 blocco figlio aggiunto',
+    childChunks_other: 'BLOCCHI FIGLIO',
   },
 }
 
diff --git a/web/i18n/it-IT/dataset-hit-testing.ts b/web/i18n/it-IT/dataset-hit-testing.ts
index e8ca919220..95dd3d2aee 100644
--- a/web/i18n/it-IT/dataset-hit-testing.ts
+++ b/web/i18n/it-IT/dataset-hit-testing.ts
@@ -26,6 +26,11 @@ const translation = {
   viewChart: 'Visualizza GRAFICO VETTORIALE',
   settingTitle: 'Impostazione di recupero',
   viewDetail: 'vedi dettagli',
+  chunkDetail: 'Dettaglio pezzo',
+  hitChunks: 'Premi {{num}} blocchi figlio',
+  open: 'Aperto',
+  keyword: 'Parole chiavi',
+  records: 'Archivio',
 }
 
 export default translation
diff --git a/web/i18n/it-IT/dataset-settings.ts b/web/i18n/it-IT/dataset-settings.ts
index 9d4b6218e5..66c13bd3b4 100644
--- a/web/i18n/it-IT/dataset-settings.ts
+++ b/web/i18n/it-IT/dataset-settings.ts
@@ -37,6 +37,9 @@ const translation = {
     retrievalSettings: 'Impostazioni di recupero',
     externalKnowledgeID: 'ID conoscenza esterna',
     externalKnowledgeAPI: 'API di conoscenza esterna',
+    helpText: 'Scopri come scrivere una buona descrizione del set di dati.',
+    upgradeHighQualityTip: 'Una volta effettuato l\'aggiornamento alla modalità Alta qualità, il ripristino della modalità Risparmio non è disponibile',
+    indexMethodChangeToEconomyDisabledTip: 'Non disponibile per il downgrade da HQ a ECO',
   },
 }
 
diff --git a/web/i18n/it-IT/dataset.ts b/web/i18n/it-IT/dataset.ts
index 22d1700d14..fc15ca5929 100644
--- a/web/i18n/it-IT/dataset.ts
+++ b/web/i18n/it-IT/dataset.ts
@@ -153,6 +153,26 @@ const translation = {
   allExternalTip: 'Quando si utilizzano solo conoscenze esterne, l\'utente può scegliere se abilitare il modello Rerank. Se non è abilitato, i blocchi recuperati verranno ordinati in base ai punteggi. Quando le strategie di recupero di diverse basi di conoscenza sono incoerenti, saranno imprecise.',
   externalKnowledgeDescriptionPlaceholder: 'Descrivi cosa c\'è in questa Knowledge Base (facoltativo)',
   noExternalKnowledge: 'Non esiste ancora un\'API di conoscenza esterna, fai clic qui per creare',
+  chunkingMode: {
+    general: 'Generale',
+    parentChild: 'Genitore-figlio',
+  },
+  parentMode: {
+    paragraph: 'Paragrafo',
+    fullDoc: 'Full-doc',
+  },
+  batchAction: {
+    archive: 'Archivio',
+    enable: 'Abilitare',
+    cancel: 'Annulla',
+    selected: 'Selezionato',
+    disable: 'Disabilitare',
+    delete: 'Cancellare',
+  },
+  preprocessDocument: '{{num}} Pre-elaborazione dei documenti',
+  enable: 'Abilitare',
+  documentsDisabled: '{{num}} documenti disabilitati - inattivi da oltre 30 giorni',
+  localDocs: 'Documenti locali',
 }
 
 export default translation
diff --git a/web/i18n/ja-JP/common.ts b/web/i18n/ja-JP/common.ts
index 9c23cb6f16..8c9b32c27f 100644
--- a/web/i18n/ja-JP/common.ts
+++ b/web/i18n/ja-JP/common.ts
@@ -42,6 +42,11 @@ const translation = {
     openInNewTab: '新しいタブで開く',
     zoomOut: 'ズームアウト',
     copyImage: '画像をコピー',
+    viewMore: 'もっと見る',
+    view: '眺める',
+    close: '閉める',
+    saveAndRegenerate: '子チャンクの保存と再生成',
+    regenerate: '再生',
   },
   errorMsg: {
     fieldRequired: '{{field}}は必要です',
@@ -478,6 +483,7 @@ const translation = {
     emptyTip: '関連付けられた知識がありません。アプリケーションやプラグインに移動して関連付けを完了してください。',
     viewDoc: 'ドキュメントを表示',
     relatedApp: '関連アプリ',
+    noRelatedApp: 'リンクされたアプリはありません',
   },
   voiceInput: {
     speaking: '今話しています...',
diff --git a/web/i18n/ja-JP/dataset-creation.ts b/web/i18n/ja-JP/dataset-creation.ts
index 9ae6c4e737..e01ea4d51f 100644
--- a/web/i18n/ja-JP/dataset-creation.ts
+++ b/web/i18n/ja-JP/dataset-creation.ts
@@ -87,6 +87,7 @@ const translation = {
       jinaReaderNotConfiguredDescription: '無料のAPIキーを入力してJina Readerを設定します。',
       useSitemapTooltip: 'サイトマップに沿ってサイトをクロールします。そうでない場合、Jina Readerはページの関連性に基づいて繰り返しクロールし、ページ数は少なくなりますが、高品質のページが得られます。',
     },
+    cancel: 'キャンセル',
   },
   stepTwo: {
     segmentation: 'チャンク設定',
@@ -148,6 +149,28 @@ const translation = {
     datasetSettingLink: 'ナレッジ設定',
     separatorTip: '区切り文字は、テキストを区切るために使用される文字です。\\n\\n と \\n は、段落と行を区切るために一般的に使用される区切り記号です。カンマ (\\n\\n,\\n) と組み合わせると、最大チャンク長を超えると、段落は行で区切られます。自分で定義した特別な区切り文字を使用することもできます(例:***)。',
     maxLengthCheck: 'チャンクの最大長は {{limit}} 未満にする必要があります',
+    useQALanguage: 'Q&A 形式を使用したチャンク',
+    previewChunkTip: '左側の「Preview Chunk」ボタンをクリックして、プレビューをロードします',
+    qaSwitchHighQualityTipTitle: 'Q&A 形式には高品質のインデックス作成方法が必要',
+    qaSwitchHighQualityTipContent: '現在、Q&A 形式のチャンク化をサポートしているのは、高品質のインデックス メソッドのみです。高品質モードに切り替えますか?',
+    childChunkForRetrieval: '取得用の子チャンク',
+    fullDoc: 'フルドキュメント',
+    parentChildDelimiterTip: '区切り文字は、テキストを区切るために使用される文字です。\\n\\n は、元のドキュメントを大きな親チャンクに分割する場合に推奨されます。また、自分で定義した特別な区切り文字を使用することもできます。',
+    general: '全般',
+    switch: 'スイッチ',
+    parentChild: '親子',
+    parentChildChunkDelimiterTip: '区切り文字は、テキストを区切るために使用される文字です。\\n は、親チャンクを小さな子チャンクに分割する場合に推奨されます。また、自分で定義した特別な区切り文字を使用することもできます。',
+    generalTip: '一般的なテキストチャンクモードでは、取得されたチャンクとリコールされたチャンクは同じです。',
+    previewChunk: 'プレビューチャンク',
+    parentChunkForContext: 'コンテキストの親チャンク',
+    notAvailableForQA: 'Q&Aインデックスでは使用できません',
+    paragraph: '段落',
+    notAvailableForParentChild: '親子インデックスでは使用できません',
+    fullDocTip: 'ドキュメント全体が親チャンクとして使用され、直接取得されます。パフォーマンス上の理由から、10000トークンを超えるテキストは自動的に切り捨てられることに注意してください。',
+    previewChunkCount: '{{カウント}}推定チャンク',
+    paragraphTip: 'このモードでは、区切り記号とチャンクの最大長に基づいてテキストを段落に分割し、分割されたテキストを取得用の親チャンクとして使用します。',
+    highQualityTip: '高品質モードでの埋め込みが完了すると、経済モードに戻すことはできません。',
+    parentChildTip: '親子モードを使用する場合、子チャンクは取得に使用され、親チャンクはコンテキストとしての再呼び出しに使用されます。',
   },
   stepThree: {
     creationTitle: '🎉 ナレッジが作成されました',
@@ -171,6 +194,11 @@ const translation = {
     apiKeyPlaceholder: 'jina.ai からの API キー',
     configJinaReader: 'Jina Readerの設定',
   },
+  otherDataSource: {
+    description: '現在、Difyのナレッジベースには限られたデータソースしかありません。データソースをDifyナレッジベースに投稿することは、すべてのユーザーにとってプラットフォームの柔軟性とパワーを向上させるのに役立つ素晴らしい方法です。私たちのコントリビューションガイドは、簡単に始めることができます。詳細については、以下のリンクをクリックしてください。',
+    title: '他のデータソースに接続しますか?',
+    learnMore: '詳細情報',
+  },
 }
 
 export default translation
diff --git a/web/i18n/ja-JP/dataset-documents.ts b/web/i18n/ja-JP/dataset-documents.ts
index 654ae0ef6c..30d39a61e4 100644
--- a/web/i18n/ja-JP/dataset-documents.ts
+++ b/web/i18n/ja-JP/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'アップロード時間',
         status: 'ステータス',
         action: 'アクション',
+        chunkingMode: 'チャンクモード',
       },
       rename: '名前を変更',
       name: '名前',
@@ -77,6 +78,7 @@ const translation = {
       error: 'インポートエラー',
       ok: 'OK',
     },
+    learnMore: '詳細情報',
   },
   metadata: {
     title: 'メタデータ',
@@ -328,6 +330,10 @@ const translation = {
     automatic: '自動',
     custom: 'カスタム',
     previewTip: '埋め込みが完了した後、段落のプレビューが利用可能になります',
+    parentMaxTokens: '親',
+    hierarchical: '親子',
+    pause: '休止',
+    childMaxTokens: '子供',
   },
   segment: {
     paragraphs: '段落',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: '新しいテキストセグメント',
     newQaSegment: '新しいQ&Aセグメント',
     delete: 'このチャンクを削除しますか？',
+    searchResults_other: '業績',
+    edited: '編集',
+    parentChunk: '親チャンク',
+    regeneratingTitle: '子チャンクの再生成',
+    collapseChunks: 'チャンクの折りたたみ',
+    characters_other: '文字',
+    childChunk: '子チャンク',
+    regenerationSuccessMessage: 'このウィンドウは閉じることができます。',
+    editChildChunk: '子チャンクの編集',
+    clearFilter: 'フィルターをクリア',
+    chunkDetail: 'チャンクの詳細',
+    regenerationSuccessTitle: '再生完了',
+    parentChunks_one: '親チャンク',
+    newChunk: '新しいチャンク',
+    childChunks_other: '子チャンク',
+    searchResults_zero: '結果',
+    addChildChunk: '子チャンクを追加',
+    searchResults_one: '結果',
+    regeneratingMessage: 'これには少し時間がかかる場合がありますので、お待ちください...',
+    empty: 'チャンクが見つかりません',
+    editedAt: 'で編集',
+    addAnother: '別のものを追加',
+    chunkAdded: '1チャンク追加',
+    childChunks_one: '子チャンク',
+    regenerationConfirmMessage: '子チャンクを再生成すると、編集されたチャンクや新しく追加されたチャンクなど、現在の子チャンクが上書きされます。再生は元に戻せません。',
+    newChildChunk: '新しい子チャンク',
+    childChunkAdded: '子チャンクが1つ追加',
+    regenerationConfirmTitle: '子チャンクを再生成しますか?',
+    expandChunks: 'チャンクの展開',
+    chunks_one: 'チャンク',
+    editChunk: 'チャンクの編集',
+    editParentChunk: '親チャンクの編集',
+    parentChunks_other: '親チャンク',
+    characters_one: '文字',
+    chunks_other: 'チャンク',
+    addChunk: 'チャンクを追加',
+    chunk: 'チャンク',
   },
 }
 
diff --git a/web/i18n/ja-JP/dataset-hit-testing.ts b/web/i18n/ja-JP/dataset-hit-testing.ts
index e2bc807969..44a112f7be 100644
--- a/web/i18n/ja-JP/dataset-hit-testing.ts
+++ b/web/i18n/ja-JP/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'ベクトルチャートを表示',
   settingTitle: '取得設定',
   viewDetail: '詳細を表示',
+  records: '誌',
+  hitChunks: '{{num}}子チャンクをヒット',
+  open: '開ける',
+  keyword: 'キーワード',
+  chunkDetail: 'チャンクの詳細',
 }
 
 export default translation
diff --git a/web/i18n/ja-JP/dataset-settings.ts b/web/i18n/ja-JP/dataset-settings.ts
index f0b8c76a24..c85effe47b 100644
--- a/web/i18n/ja-JP/dataset-settings.ts
+++ b/web/i18n/ja-JP/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeID: '外部ナレッジID',
     retrievalSettings: '取得設定',
     externalKnowledgeAPI: '外部ナレッジAPI',
+    upgradeHighQualityTip: 'ハイクオリティモードにアップグレードすると、エコノミーモードに戻すことはできません',
+    indexMethodChangeToEconomyDisabledTip: '本社からECOへのダウングレードは対象外です',
+    helpText: '適切なデータセットの説明を書く方法を学びます。',
   },
 }
 
diff --git a/web/i18n/ja-JP/dataset.ts b/web/i18n/ja-JP/dataset.ts
index 274cbdaf2b..edb91130d4 100644
--- a/web/i18n/ja-JP/dataset.ts
+++ b/web/i18n/ja-JP/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   externalKnowledgeDescriptionPlaceholder: 'このナレッジベースの内容を説明する(オプション)',
   allExternalTip: '外部ナレッジのみを使用する場合、ユーザーは Rerank モデルを有効にするかどうかを選択できます。有効にしない場合、取得されたチャンクはスコアに基づいて並べ替えられます。異なるナレッジベースの検索戦略に一貫性がない場合、不正確になります。',
   externalAPIPanelDescription: '外部ナレッジAPIは、Difyの外部のナレッジベースに接続し、そのナレッジベースからナレッジを取得するために使用されます。',
+  chunkingMode: {
+    general: '全般',
+    parentChild: '親子',
+  },
+  parentMode: {
+    fullDoc: 'フルドキュメント',
+    paragraph: '段落',
+  },
+  batchAction: {
+    delete: '削除',
+    selected: '入選',
+    archive: 'アーカイブ',
+    enable: 'エネーブル',
+    disable: '無効にする',
+    cancel: 'キャンセル',
+  },
+  documentsDisabled: '{{num}}ドキュメントが無効 - 30日以上非アクティブ',
+  localDocs: 'ローカルドキュメント',
+  enable: 'エネーブル',
+  preprocessDocument: '{{数値}}ドキュメントの前処理',
 }
 
 export default translation
diff --git a/web/i18n/ko-KR/common.ts b/web/i18n/ko-KR/common.ts
index a599aa9bd1..3cf173775e 100644
--- a/web/i18n/ko-KR/common.ts
+++ b/web/i18n/ko-KR/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomIn: '확대',
     copyImage: '이미지 복사',
     zoomOut: '축소',
+    close: '닫다',
+    viewMore: '더보기',
+    regenerate: '재생성',
+    view: '보기',
+    saveAndRegenerate: '저장 및 자식 청크 재생성',
   },
   placeholder: {
     input: '입력해주세요',
@@ -470,6 +475,7 @@ const translation = {
     emptyTip: '연결된 지식이 없습니다. 애플리케이션 또는 플러그인으로 이동하여 연결을 완료하세요.',
     viewDoc: '문서 보기',
     relatedApp: '관련 앱',
+    noRelatedApp: '연결된 앱 없음',
   },
   voiceInput: {
     speaking: '지금 말하고 있습니다...',
diff --git a/web/i18n/ko-KR/dataset-creation.ts b/web/i18n/ko-KR/dataset-creation.ts
index ee5abb5189..340d1790fd 100644
--- a/web/i18n/ko-KR/dataset-creation.ts
+++ b/web/i18n/ko-KR/dataset-creation.ts
@@ -82,6 +82,7 @@ const translation = {
       jinaReaderNotConfigured: 'Jina Reader가 구성되지 않았습니다.',
       useSitemapTooltip: '사이트맵을 따라 사이트를 크롤링합니다. 그렇지 않은 경우 Jina Reader는 페이지 관련성에 따라 반복적으로 크롤링하여 더 적지만 더 높은 품질의 페이지를 생성합니다.',
     },
+    cancel: '취소',
   },
   stepTwo: {
     segmentation: '청크 설정',
@@ -143,6 +144,28 @@ const translation = {
     websiteSource: '웹 사이트 전처리',
     separatorTip: '구분 기호는 텍스트를 구분하는 데 사용되는 문자입니다. \\n\\n 및 \\n은 단락과 줄을 구분하는 데 일반적으로 사용되는 구분 기호입니다. 쉼표(\\n\\n,\\n)와 함께 사용하면 최대 청크 길이를 초과할 경우 단락이 줄로 분할됩니다. 직접 정의한 특수 구분 기호(예: ***)를 사용할 수도 있습니다.',
     maxLengthCheck: '최대 청크 길이는 {{limit}} 미만이어야 합니다.',
+    childChunkForRetrieval: '검색을 위한 자식 청크',
+    qaSwitchHighQualityTipContent: '현재 고품질 인덱스 방법만 Q&A 형식 청크를 지원합니다. 고화질 모드로 전환하시겠습니까?',
+    previewChunkTip: '왼쪽의 \'Preview Chunk\' 버튼을 클릭하여 프리뷰를 로드합니다',
+    general: '일반',
+    fullDoc: '전체 문서',
+    previewChunk: '프리뷰 청크(Preview Chunk)',
+    parentChunkForContext: '컨텍스트에 대한 Parent-chunk',
+    parentChildDelimiterTip: '구분 기호는 텍스트를 구분하는 데 사용되는 문자입니다. \\n\\n은 원본 문서를 큰 부모 청크로 분할하는 데 권장됩니다. 직접 정의한 특수 구분 기호를 사용할 수도 있습니다.',
+    paragraph: '단락',
+    parentChild: '부모-자식',
+    useQALanguage: 'Q&A 형식을 사용하는 청크',
+    highQualityTip: '고품질 모드에서 삽입을 마치면 경제적 모드로 되돌릴 수 없습니다.',
+    notAvailableForQA: 'Q&A 인덱스에는 사용할 수 없습니다.',
+    qaSwitchHighQualityTipTitle: 'Q&A 형식에는 고품질 인덱싱 방법이 필요합니다.',
+    notAvailableForParentChild: '부모-자식 인덱스에는 사용할 수 없습니다.',
+    previewChunkCount: '{{개수}} 추정된 청크',
+    parentChildTip: '부모-자식 모드를 사용할 때 자식 청크는 검색에 사용되고 부모 청크는 컨텍스트로 회수에 사용됩니다.',
+    generalTip: '일반적인 텍스트 청크 모드에서는 검색된 청크와 회수된 청크가 동일합니다.',
+    fullDocTip: '전체 문서가 상위 청크로 사용되며 직접 검색됩니다. 성능상의 이유로 10000 토큰을 초과하는 텍스트는 자동으로 잘립니다.',
+    parentChildChunkDelimiterTip: '구분 기호는 텍스트를 구분하는 데 사용되는 문자입니다. \\n 은 부모 청크를 작은 자식 청크로 분할하는 데 권장됩니다. 직접 정의한 특수 구분 기호를 사용할 수도 있습니다.',
+    switch: '스위치',
+    paragraphTip: '이 모드는 구분 기호와 최대 청크 길이에 따라 텍스트를 단락으로 분할하며, 분할된 텍스트를 검색을 위한 부모 청크로 사용합니다.',
   },
   stepThree: {
     creationTitle: '🎉 지식이 생성되었습니다',
@@ -172,6 +195,11 @@ const translation = {
     getApiKeyLinkText: 'jina.ai 에서 무료 API 키 받기',
     configJinaReader: 'Jina Reader 구성',
   },
+  otherDataSource: {
+    learnMore: '더 알아보세요',
+    title: '다른 데이터 소스에 연결하시겠습니까?',
+    description: '현재 Dify의 기술 자료에는 제한된 데이터 소스만 있습니다. Dify 기술 자료에 데이터 소스를 제공하는 것은 모든 사용자를 위해 플랫폼의 유연성과 기능을 향상시키는 데 도움이 되는 환상적인 방법입니다. 기여 가이드를 통해 쉽게 시작할 수 있습니다. 자세한 내용은 아래 링크를 클릭하십시오.',
+  },
 }
 
 export default translation
diff --git a/web/i18n/ko-KR/dataset-documents.ts b/web/i18n/ko-KR/dataset-documents.ts
index 74bdc78d28..ec0b2bb62d 100644
--- a/web/i18n/ko-KR/dataset-documents.ts
+++ b/web/i18n/ko-KR/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: '업로드 시간',
         status: '상태',
         action: '동작',
+        chunkingMode: '청크 모드',
       },
       name: '이름',
       rename: '이름 바꾸기',
@@ -77,6 +78,7 @@ const translation = {
       ok: '확인',
     },
     addUrl: 'URL 추가',
+    learnMore: '더 알아보세요',
   },
   metadata: {
     title: '메타데이터',
@@ -327,6 +329,10 @@ const translation = {
     automatic: '자동',
     custom: '사용자 정의',
     previewTip: '임베딩이 완료된 후에 세그먼트 미리보기를 사용할 수 있습니다',
+    childMaxTokens: '아이',
+    parentMaxTokens: '부모',
+    pause: '일시 중지',
+    hierarchical: '부모-자식',
   },
   segment: {
     paragraphs: '단락',
@@ -345,6 +351,43 @@ const translation = {
     newTextSegment: '새로운 텍스트 세그먼트',
     newQaSegment: '새로운 Q&A 세그먼트',
     delete: '이 청크를 삭제하시겠습니까?',
+    parentChunks_one: '부모 청크(PARENT CHUNK)',
+    newChunk: '새 청크',
+    addChildChunk: '자손 청크 추가(Add Child Chunk)',
+    editChildChunk: '자손 청크 편집(Edit Child Chunk)',
+    chunkDetail: '청크 디테일(Chunk Detail)',
+    editChunk: '청크 편집(Edit Chunk)',
+    regeneratingTitle: '자식 청크 재생성',
+    newChildChunk: '새 자손 청크(New Child Chunk)',
+    childChunkAdded: '자식 청크 1개 추가됨',
+    chunk: '덩어리',
+    searchResults_zero: '결과',
+    empty: '청크를 찾을 수 없습니다.',
+    editParentChunk: '부모 청크 편집(Edit Parent Chunk)',
+    chunks_one: '덩어리',
+    regenerationSuccessMessage: '이 창을 닫을 수 있습니다.',
+    childChunks_one: '자식 청크(CHILD CHUNK)',
+    regenerationSuccessTitle: '재생이 완료되었습니다.',
+    editedAt: '편집 위치',
+    addAnother: '다른 항목 추가',
+    chunkAdded: '청크 1개 추가됨',
+    searchResults_one: '결과',
+    searchResults_other: '결과',
+    regenerationConfirmMessage: '자식 청크를 다시 생성하면 편집된 청크와 새로 추가된 청크를 포함하여 현재 자식 청크를 덮어씁니다. 재생성은 취소할 수 없습니다.',
+    regenerationConfirmTitle: '자식 청크를 다시 생성하시겠습니까?',
+    clearFilter: '필터 지우기',
+    characters_one: '문자',
+    parentChunk: '부모-청크',
+    expandChunks: '청크 확장',
+    collapseChunks: '청크 축소',
+    parentChunks_other: '부모 청크(PARENT CHUNKS)',
+    childChunk: '자식 청크',
+    childChunks_other: '자식 청크',
+    chunks_other: '청크',
+    edited: '편집',
+    addChunk: '청크 추가(Add Chunk)',
+    characters_other: '문자',
+    regeneratingMessage: '시간이 걸릴 수 있으니 잠시만 기다려 주십시오...',
   },
 }
 
diff --git a/web/i18n/ko-KR/dataset-hit-testing.ts b/web/i18n/ko-KR/dataset-hit-testing.ts
index a2cbc55bbb..a5329fbdb5 100644
--- a/web/i18n/ko-KR/dataset-hit-testing.ts
+++ b/web/i18n/ko-KR/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: '벡터 차트 보기',
   settingTitle: '검색 설정',
   viewDetail: '자세히보기',
+  open: '열다',
+  records: '레코드',
+  hitChunks: '{{num}}개의 자식 청크를 히트했습니다.',
+  keyword: '키워드',
+  chunkDetail: '청크 디테일(Chunk Detail)',
 }
 
 export default translation
diff --git a/web/i18n/ko-KR/dataset-settings.ts b/web/i18n/ko-KR/dataset-settings.ts
index 11fd010eec..22e9733ed8 100644
--- a/web/i18n/ko-KR/dataset-settings.ts
+++ b/web/i18n/ko-KR/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeAPI: '외부 지식 API',
     externalKnowledgeID: '외부 지식 ID',
     retrievalSettings: '검색 설정',
+    upgradeHighQualityTip: '고품질 모드로 업그레이드한 후에는 경제적 모드로 되돌릴 수 없습니다.',
+    indexMethodChangeToEconomyDisabledTip: 'HQ에서 ECO로 다운그레이드할 수 없습니다.',
+    helpText: '좋은 데이터 세트 설명을 작성하는 방법을 알아보세요.',
   },
 }
 
diff --git a/web/i18n/ko-KR/dataset.ts b/web/i18n/ko-KR/dataset.ts
index 1683b5fb7d..db187ec421 100644
--- a/web/i18n/ko-KR/dataset.ts
+++ b/web/i18n/ko-KR/dataset.ts
@@ -145,6 +145,26 @@ const translation = {
   allExternalTip: '외부 지식만 사용하는 경우 사용자는 리랭크 모델을 사용할지 여부를 선택할 수 있습니다. 활성화하지 않으면 검색된 청크가 점수에 따라 정렬됩니다. 서로 다른 기술 자료의 검색 전략이 일관되지 않으면 부정확합니다.',
   externalAPIPanelDescription: '외부 지식 API는 Dify 외부의 기술 자료에 연결하고 해당 기술 자료에서 지식을 검색하는 데 사용됩니다.',
   noExternalKnowledge: '아직 외부 지식 API가 없으므로 여기를 클릭하여 생성하십시오.',
+  chunkingMode: {
+    parentChild: '부모-자식',
+    general: '일반',
+  },
+  parentMode: {
+    fullDoc: '전체 문서',
+    paragraph: '단락',
+  },
+  batchAction: {
+    delete: '삭제하다',
+    enable: '사용',
+    cancel: '취소',
+    archive: '보관',
+    selected: '선택한',
+    disable: '비활성화',
+  },
+  localDocs: '로컬 문서',
+  preprocessDocument: '{{숫자}} 문서 전처리',
+  enable: '사용',
+  documentsDisabled: '{{num}} 문서 사용 안 함 - 30일 이상 비활성 상태',
 }
 
 export default translation
diff --git a/web/i18n/pl-PL/common.ts b/web/i18n/pl-PL/common.ts
index 69441dbab3..f2ee93298d 100644
--- a/web/i18n/pl-PL/common.ts
+++ b/web/i18n/pl-PL/common.ts
@@ -42,6 +42,11 @@ const translation = {
     openInNewTab: 'Otwórz w nowej karcie',
     zoomIn: 'Powiększenie',
     zoomOut: 'Pomniejszanie',
+    saveAndRegenerate: 'Zapisywanie i regeneracja fragmentów podrzędnych',
+    view: 'Widok',
+    regenerate: 'Ponownie wygenerować',
+    viewMore: 'ZOBACZ WIĘCEJ',
+    close: 'Zamykać',
   },
   placeholder: {
     input: 'Proszę wprowadzić',
@@ -489,6 +494,7 @@ const translation = {
       'Wiedza nie została powiązana, przejdź do aplikacji lub wtyczki, aby ukończyć powiązanie.',
     viewDoc: 'Zobacz dokumentację',
     relatedApp: 'powiązane aplikacje',
+    noRelatedApp: 'Brak połączonych aplikacji',
   },
   voiceInput: {
     speaking: 'Mów teraz...',
diff --git a/web/i18n/pl-PL/dataset-creation.ts b/web/i18n/pl-PL/dataset-creation.ts
index c12ed66278..6a7c890678 100644
--- a/web/i18n/pl-PL/dataset-creation.ts
+++ b/web/i18n/pl-PL/dataset-creation.ts
@@ -83,6 +83,7 @@ const translation = {
       jinaReaderTitle: 'Konwertowanie całej witryny na język Markdown',
       jinaReaderNotConfiguredDescription: 'Skonfiguruj Jina Reader, wprowadzając bezpłatny klucz API, aby uzyskać dostęp.',
     },
+    cancel: 'Anuluj',
   },
   stepTwo: {
     segmentation: 'Ustawienia bloków tekstu',
@@ -156,6 +157,28 @@ const translation = {
     websiteSource: 'Witryna internetowa przetwarzania wstępnego',
     separatorTip: 'Ogranicznik to znak używany do oddzielania tekstu. \\n\\n i \\n są powszechnie używanymi ogranicznikami do oddzielania akapitów i wierszy. W połączeniu z przecinkami (\\n\\n,\\n), akapity będą segmentowane wierszami po przekroczeniu maksymalnej długości fragmentu. Możesz również skorzystać ze zdefiniowanych przez siebie specjalnych ograniczników (np. ***).',
     maxLengthCheck: 'Maksymalna długość porcji powinna być mniejsza niż {{limit}}',
+    parentChunkForContext: 'Fragment nadrzędny dla kontekstu',
+    generalTip: 'Ogólny tryb fragmentowania tekstu, fragmenty pobierane i odwoływane są takie same.',
+    parentChildDelimiterTip: 'Ogranicznik to znak używany do oddzielania tekstu. \\n\\n jest zalecane do dzielenia oryginalnego dokumentu na duże fragmenty nadrzędne. Możesz również użyć specjalnych ograniczników zdefiniowanych przez siebie.',
+    switch: 'Przełącznik',
+    parentChildChunkDelimiterTip: 'Ogranicznik to znak używany do oddzielania tekstu. \\n jest zalecane do dzielenia fragmentów nadrzędnych na małe fragmenty podrzędne. Możesz również użyć specjalnych ograniczników zdefiniowanych przez siebie.',
+    paragraphTip: 'W tym trybie tekst jest dzielony na akapity na podstawie ograniczników i maksymalnej długości fragmentu, używając podzielonego tekstu jako fragmentu nadrzędnego do pobierania.',
+    general: 'Ogólne',
+    notAvailableForQA: 'Niedostępne dla indeksu pytań i odpowiedziNot available for Q&A Index',
+    childChunkForRetrieval: 'Fragment podrzędny do pobrania',
+    fullDoc: 'Pełna wersja dokumentu',
+    fullDocTip: 'Cały dokument jest używany jako fragment nadrzędny i pobierany bezpośrednio. Należy pamiętać, że ze względu na wydajność, tekst przekraczający 10000 tokenów zostanie automatycznie obcięty.',
+    previewChunkCount: '{{liczba}} Szacowane porcje',
+    paragraph: 'Akapit',
+    parentChild: 'Rodzic-dziecko',
+    previewChunk: 'Fragment podglądu',
+    notAvailableForParentChild: 'Niedostępne dla indeksu nadrzędny-podrzędny',
+    highQualityTip: 'Po zakończeniu osadzania w trybie wysokiej jakości powrót do trybu ekonomicznego nie jest dostępny.',
+    previewChunkTip: 'Kliknij przycisk "Podgląd fragmentu" po lewej stronie, aby załadować podgląd',
+    qaSwitchHighQualityTipContent: 'Obecnie tylko metoda indeksu wysokiej jakości obsługuje fragmentowanie formatu pytań i odpowiedzi. Czy chcesz przełączyć się w tryb wysokiej jakości?',
+    useQALanguage: 'Fragment przy użyciu formatu Q&A w',
+    parentChildTip: 'W przypadku korzystania z trybu nadrzędny-podrzędny fragment podrzędny jest używany do pobierania, a fragment nadrzędny jest używany do przywoływania jako kontekstu.',
+    qaSwitchHighQualityTipTitle: 'Format Q&A wymaga metody indeksowania wysokiej jakości',
   },
   stepThree: {
     creationTitle: '🎉 Utworzono Wiedzę',
@@ -187,6 +210,11 @@ const translation = {
     apiKeyPlaceholder: 'Klucz API od jina.ai',
     configJinaReader: 'Konfiguracja czytnika Jina',
   },
+  otherDataSource: {
+    learnMore: 'Dowiedz się więcej',
+    title: 'Połączyć się z innymi źródłami danych?',
+    description: 'Obecnie baza wiedzy Dify ma tylko ograniczone źródła danych. Dodanie źródła danych do bazy wiedzy Dify to fantastyczny sposób na zwiększenie elastyczności i możliwości platformy dla wszystkich użytkowników. Nasz przewodnik po wkładach ułatwia rozpoczęcie pracy. Kliknij poniższy link, aby dowiedzieć się więcej.',
+  },
 }
 
 export default translation
diff --git a/web/i18n/pl-PL/dataset-documents.ts b/web/i18n/pl-PL/dataset-documents.ts
index 7152c3e9d6..d5292fd2c4 100644
--- a/web/i18n/pl-PL/dataset-documents.ts
+++ b/web/i18n/pl-PL/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: 'CZAS WGRANIA',
         status: 'STATUS',
         action: 'AKCJA',
+        chunkingMode: 'TRYB CHUNKINGU',
       },
       name: 'Nazwa',
       rename: 'Przemianować',
@@ -78,6 +79,7 @@ const translation = {
       ok: 'OK',
     },
     addUrl: 'Dodaj adres URL',
+    learnMore: 'Dowiedz się więcej',
   },
   metadata: {
     title: 'Metadane',
@@ -329,6 +331,10 @@ const translation = {
     automatic: 'Automatyczny',
     custom: 'Niestandardowy',
     previewTip: 'Podgląd akapitu będzie dostępny po zakończeniu osadzania',
+    parentMaxTokens: 'Rodzic',
+    hierarchical: 'Rodzic-dziecko',
+    childMaxTokens: 'Dziecko',
+    pause: 'Pauza',
   },
   segment: {
     paragraphs: 'Akapity',
@@ -347,6 +353,43 @@ const translation = {
     newTextSegment: 'Nowy segment tekstowy',
     newQaSegment: 'Nowy segment Q&A',
     delete: 'Usunąć ten fragment?',
+    parentChunks_one: 'FRAGMENT NADRZĘDNY',
+    parentChunks_other: 'FRAGMENTY NADRZĘDNE',
+    searchResults_one: 'WYNIK',
+    chunk: 'Kawał',
+    parentChunk: 'Fragment nadrzędny',
+    characters_other: 'Znaków',
+    addChunk: 'Dodaj kawałek',
+    addChildChunk: 'Dodaj fragment podrzędny',
+    addAnother: 'Dodaj kolejny',
+    childChunkAdded: 'Dodano 1 fragment podrzędny',
+    editChunk: 'Edytuj fragment',
+    regenerationSuccessTitle: 'Regeneracja zakończona',
+    edited: 'EDYTOWANE',
+    editedAt: 'Zredagowane w',
+    collapseChunks: 'Zwijanie fragmentów',
+    empty: 'Nie znaleziono fragmentu',
+    newChunk: 'Nowy fragment',
+    regenerationConfirmTitle: 'Czy chcesz zregenerować fragmenty podrzędne?',
+    chunks_other: 'KAWAŁKI',
+    editChildChunk: 'Edytuj fragment podrzędny',
+    characters_one: 'znak',
+    regeneratingMessage: 'To może chwilę potrwać, proszę czekać...',
+    chunkDetail: 'Szczegóły kawałka',
+    chunkAdded: 'Dodano 1 kawałek',
+    regeneratingTitle: 'Regenerowanie fragmentów podrzędnych',
+    childChunks_other: 'FRAGMENTY POTOMNE',
+    expandChunks: 'Rozwijanie fragmentów',
+    childChunk: 'Fragment podrzędny',
+    regenerationConfirmMessage: 'Ponowne wygenerowanie fragmentów podrzędnych spowoduje zastąpienie bieżących fragmentów podrzędnych, w tym fragmentów edytowanych i nowo dodanych fragmentów. Regeneracji nie można cofnąć.',
+    regenerationSuccessMessage: 'Możesz zamknąć to okno.',
+    searchResults_other: 'WYNIKI',
+    searchResults_zero: 'WYNIK',
+    chunks_one: 'KAWAŁ',
+    editParentChunk: 'Edytuj fragment nadrzędny',
+    newChildChunk: 'Nowy fragment podrzędny',
+    clearFilter: 'Wyczyść filtr',
+    childChunks_one: 'FRAGMENT POTOMNY',
   },
 }
 
diff --git a/web/i18n/pl-PL/dataset-hit-testing.ts b/web/i18n/pl-PL/dataset-hit-testing.ts
index e9509a0f05..f069e4de9e 100644
--- a/web/i18n/pl-PL/dataset-hit-testing.ts
+++ b/web/i18n/pl-PL/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'Zobacz WYKRES WEKTOROWY',
   settingTitle: 'Ustawienie pobierania',
   viewDetail: 'Pokaż szczegóły',
+  keyword: 'Słowa kluczowe',
+  hitChunks: 'Trafienie w {{num}} fragmentów podrzędnych',
+  open: 'Otwierać',
+  records: 'Rekordy',
+  chunkDetail: 'Szczegóły kawałka',
 }
 
 export default translation
diff --git a/web/i18n/pl-PL/dataset-settings.ts b/web/i18n/pl-PL/dataset-settings.ts
index 0ed428d407..ff2a2e5d5f 100644
--- a/web/i18n/pl-PL/dataset-settings.ts
+++ b/web/i18n/pl-PL/dataset-settings.ts
@@ -37,6 +37,9 @@ const translation = {
     externalKnowledgeAPI: 'Interfejs API wiedzy zewnętrznej',
     retrievalSettings: 'Ustawienia pobierania',
     externalKnowledgeID: 'Zewnętrzny identyfikator wiedzy',
+    helpText: 'Dowiedz się, jak napisać dobry opis zestawu danych.',
+    upgradeHighQualityTip: 'Po uaktualnieniu do trybu wysokiej jakości powrót do trybu ekonomicznego nie jest dostępny',
+    indexMethodChangeToEconomyDisabledTip: 'Niedostępne w przypadku zmiany z HQ na ECO',
   },
 }
 
diff --git a/web/i18n/pl-PL/dataset.ts b/web/i18n/pl-PL/dataset.ts
index 4538b26ae2..3db197bd76 100644
--- a/web/i18n/pl-PL/dataset.ts
+++ b/web/i18n/pl-PL/dataset.ts
@@ -152,6 +152,26 @@ const translation = {
   learnHowToWriteGoodKnowledgeDescription: 'Dowiedz się, jak napisać dobry opis wiedzy',
   externalKnowledgeNamePlaceholder: 'Podaj nazwę bazy wiedzy',
   externalAPIPanelDescription: 'Interfejs API wiedzy zewnętrznej służy do łączenia się z bazą wiedzy poza Dify i pobierania wiedzy z tej bazy wiedzy.',
+  chunkingMode: {
+    parentChild: 'Rodzic-dziecko',
+    general: 'Ogólne',
+  },
+  parentMode: {
+    fullDoc: 'Pełna wersja dokumentu',
+    paragraph: 'Akapit',
+  },
+  batchAction: {
+    selected: 'Wybrany',
+    archive: 'Archiwum',
+    enable: 'Umożliwiać',
+    disable: 'Wyłączać',
+    delete: 'Usunąć',
+    cancel: 'Anuluj',
+  },
+  preprocessDocument: '{{liczba}} Przetwarzanie wstępne dokumentów',
+  localDocs: 'Lokalne dokumenty',
+  documentsDisabled: '{{num}} dokumenty wyłączone - nieaktywne przez ponad 30 dni',
+  enable: 'Umożliwiać',
 }
 
 export default translation
diff --git a/web/i18n/pt-BR/common.ts b/web/i18n/pt-BR/common.ts
index 6f66e65878..c85669b8a4 100644
--- a/web/i18n/pt-BR/common.ts
+++ b/web/i18n/pt-BR/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomIn: 'Ampliar',
     copyImage: 'Copiar imagem',
     openInNewTab: 'Abrir em nova guia',
+    viewMore: 'VER MAIS',
+    regenerate: 'Regenerar',
+    close: 'Fechar',
+    saveAndRegenerate: 'Salvar e regenerar pedaços filhos',
+    view: 'Vista',
   },
   placeholder: {
     input: 'Por favor, insira',
@@ -474,6 +479,7 @@ const translation = {
     emptyTip: 'O Conhecimento não foi associado, por favor, vá para o aplicativo ou plug-in para completar a associação.',
     viewDoc: 'Ver documentação',
     relatedApp: 'aplicativos relacionados',
+    noRelatedApp: 'Nenhum aplicativo vinculado',
   },
   voiceInput: {
     speaking: 'Fale agora...',
diff --git a/web/i18n/pt-BR/dataset-creation.ts b/web/i18n/pt-BR/dataset-creation.ts
index 8534bdce1d..bbd2d482b7 100644
--- a/web/i18n/pt-BR/dataset-creation.ts
+++ b/web/i18n/pt-BR/dataset-creation.ts
@@ -82,6 +82,7 @@ const translation = {
       useSitemapTooltip: 'Siga o mapa do site para rastrear o site. Caso contrário, o Jina Reader rastreará iterativamente com base na relevância da página, produzindo menos páginas, mas de maior qualidade.',
       jinaReaderTitle: 'Converter todo o site em Markdown',
     },
+    cancel: 'Cancelar',
   },
   stepTwo: {
     segmentation: 'Configurações de fragmentação',
@@ -143,6 +144,28 @@ const translation = {
     webpageUnit: 'Páginas',
     separatorTip: 'Um delimitador é o caractere usado para separar o texto. \\n\\n e \\n são delimitadores comumente usados para separar parágrafos e linhas. Combinado com vírgulas (\\n\\n,\\n), os parágrafos serão segmentados por linhas ao exceder o comprimento máximo do bloco. Você também pode usar delimitadores especiais definidos por você (por exemplo, ***).',
     maxLengthCheck: 'O comprimento máximo do chunk deve ser inferior a {{limit}}',
+    parentChildDelimiterTip: 'Um delimitador é o caractere usado para separar o texto. \\n\\n é recomendado para dividir o documento original em grandes partes pai. Você também pode usar delimitadores especiais definidos por você.',
+    parentChildChunkDelimiterTip: 'Um delimitador é o caractere usado para separar o texto. \\n é recomendado para dividir partes pai em pequenas partes filhas. Você também pode usar delimitadores especiais definidos por você.',
+    notAvailableForQA: 'Não disponível para o Índice de P e R',
+    parentChild: 'Pai-filho',
+    general: 'Geral',
+    qaSwitchHighQualityTipTitle: 'O formato de perguntas e respostas requer um método de indexação de alta qualidade',
+    parentChunkForContext: 'Parte-pai para contexto',
+    switch: 'Interruptor',
+    fullDoc: 'Doc completo',
+    qaSwitchHighQualityTipContent: 'Atualmente, apenas o método de índice de alta qualidade dá suporte ao agrupamento no formato Q&A. Gostaria de mudar para o modo de alta qualidade?',
+    childChunkForRetrieval: 'Filho-pedaço para recuperação',
+    useQALanguage: 'Chunk usando o formato de perguntas e respostas em',
+    previewChunk: 'Visualizar parte',
+    notAvailableForParentChild: 'Não disponível para Índice pai-filho',
+    paragraph: 'Parágrafo',
+    parentChildTip: 'Ao usar o modo pai-filho, o filho-chunk é usado para recuperação e o pai-chunk é usado para recall como contexto.',
+    generalTip: 'Modo de agrupamento de texto geral, os pedaços recuperados e recuperados são os mesmos.',
+    highQualityTip: 'Depois de concluir a incorporação no modo de alta qualidade, a reversão para o modo econômico não estará disponível.',
+    previewChunkTip: 'Clique no botão \'Preview Chunk\' à esquerda para carregar a visualização',
+    fullDocTip: 'O documento inteiro é usado como parte pai e recuperado diretamente. Observe que, por motivos de desempenho, o texto que exceder 10000 tokens será truncado automaticamente.',
+    paragraphTip: 'Esse modo divide o texto em parágrafos com base em delimitadores e no comprimento máximo da parte, usando o texto dividido como a parte pai para recuperação.',
+    previewChunkCount: '{{contagem}} Partes estimadas',
   },
   stepThree: {
     creationTitle: '🎉 Conhecimento criado',
@@ -171,6 +194,11 @@ const translation = {
     configJinaReader: 'Configurar o Jina Reader',
     apiKeyPlaceholder: 'Chave de API do jina.ai',
   },
+  otherDataSource: {
+    learnMore: 'Saiba Mais',
+    description: 'Atualmente, a base de conhecimento da Dify possui apenas fontes de dados limitadas. Contribuir com uma fonte de dados para a base de conhecimento Dify é uma maneira fantástica de ajudar a aumentar a flexibilidade e o poder da plataforma para todos os usuários. Nosso guia de contribuição facilita o início. Clique no link abaixo para saber mais.',
+    title: 'Conectar-se a outras fontes de dados?',
+  },
 }
 
 export default translation
diff --git a/web/i18n/pt-BR/dataset-documents.ts b/web/i18n/pt-BR/dataset-documents.ts
index ded46c8a14..9acfca3029 100644
--- a/web/i18n/pt-BR/dataset-documents.ts
+++ b/web/i18n/pt-BR/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: 'HORA DO UPLOAD',
         status: 'STATUS',
         action: 'AÇÃO',
+        chunkingMode: 'MODO DE FRAGMENTAÇÃO',
       },
       name: 'Nome',
       rename: 'Renomear',
@@ -77,6 +78,7 @@ const translation = {
       ok: 'OK',
     },
     addUrl: 'Adicionar URL',
+    learnMore: 'Saiba Mais',
   },
   metadata: {
     title: 'Metadados',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Automático',
     custom: 'Personalizado',
     previewTip: 'A visualização do parágrafo estará disponível após a incorporação ser concluída',
+    pause: 'Pausa',
+    hierarchical: 'Pai-filho',
+    parentMaxTokens: 'Pai',
+    childMaxTokens: 'Criança',
   },
   segment: {
     paragraphs: 'Parágrafos',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Novo fragmento de texto',
     newQaSegment: 'Novo fragmento de P&R',
     delete: 'Excluir este fragmento?',
+    chunks_other: 'PEDAÇOS',
+    parentChunks_other: 'PARTES PAI',
+    childChunks_one: 'PEDAÇO FILHO',
+    searchResults_zero: 'RESULTADO',
+    searchResults_one: 'RESULTADO',
+    searchResults_other: 'RESULTADOS',
+    empty: 'Nenhum pedaço encontrado',
+    chunk: 'Pedaço',
+    newChunk: 'Novo pedaço',
+    childChunk: 'Pedaço filho',
+    characters_other: 'Caracteres',
+    addChunk: 'Adicionar pedaço',
+    addChildChunk: 'Adicionar pedaço filho',
+    addAnother: 'Adicionar outro',
+    editChunk: 'Editar Chunk',
+    editParentChunk: 'Editar parte pai',
+    editChildChunk: 'Editar parte filho',
+    regenerationConfirmTitle: 'Deseja regenerar partes filhas?',
+    regeneratingTitle: 'Regenerando partes filhas',
+    regeneratingMessage: 'Isso pode demorar um pouco, por favor aguarde...',
+    edited: 'EDIÇÃO',
+    editedAt: 'Editado em',
+    expandChunks: 'Expandir pedaços',
+    collapseChunks: 'Recolher partes',
+    regenerationConfirmMessage: 'A regeneração de partes filhas substituirá as partes filhas atuais, incluindo partes editadas e partes recém-adicionadas. A regeneração não pode ser desfeita.',
+    parentChunks_one: 'PEDAÇO PAI',
+    regenerationSuccessMessage: 'Você pode fechar esta janela.',
+    chunks_one: 'PEDAÇO',
+    childChunkAdded: '1 pedaço filho adicionado',
+    clearFilter: 'Limpar filtro',
+    regenerationSuccessTitle: 'Regeneração concluída',
+    chunkDetail: 'Detalhe do pedaço',
+    childChunks_other: 'PEDAÇOS FILHOS',
+    chunkAdded: '1 pedaço adicionado',
+    newChildChunk: 'Novo pedaço filho',
+    characters_one: 'personagem',
+    parentChunk: 'Pedaço pai',
   },
 }
 
diff --git a/web/i18n/pt-BR/dataset-hit-testing.ts b/web/i18n/pt-BR/dataset-hit-testing.ts
index 9e65c1cb0d..61ab4f3d6e 100644
--- a/web/i18n/pt-BR/dataset-hit-testing.ts
+++ b/web/i18n/pt-BR/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'Ver GRÁFICO DE VETORES',
   viewDetail: 'Ver detalhes',
   settingTitle: 'Configuração de recuperação',
+  records: 'Arquivo',
+  hitChunks: 'Hit {{num}} pedaços filhos',
+  open: 'Abrir',
+  chunkDetail: 'Detalhe do pedaço',
+  keyword: 'Palavras-chave',
 }
 
 export default translation
diff --git a/web/i18n/pt-BR/dataset-settings.ts b/web/i18n/pt-BR/dataset-settings.ts
index 380a7e7f9d..b8176d222a 100644
--- a/web/i18n/pt-BR/dataset-settings.ts
+++ b/web/i18n/pt-BR/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     retrievalSettings: 'Configurações de recuperação',
     externalKnowledgeID: 'ID de conhecimento externo',
     externalKnowledgeAPI: 'API de conhecimento externo',
+    indexMethodChangeToEconomyDisabledTip: 'Não disponível para rebaixamento de HQ para ECO',
+    helpText: 'Aprenda a escrever uma boa descrição do conjunto de dados.',
+    upgradeHighQualityTip: 'Depois de atualizar para o modo de alta qualidade, reverter para o modo econômico não está disponível',
   },
 }
 
diff --git a/web/i18n/pt-BR/dataset.ts b/web/i18n/pt-BR/dataset.ts
index fa0e74e399..c3c3e3b631 100644
--- a/web/i18n/pt-BR/dataset.ts
+++ b/web/i18n/pt-BR/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   learnHowToWriteGoodKnowledgeDescription: 'Aprenda a escrever uma boa descrição de conhecimento',
   externalAPIPanelDocumentation: 'Saiba como criar uma API de conhecimento externo',
   externalKnowledgeDescription: 'Descrição do Conhecimento',
+  chunkingMode: {
+    parentChild: 'Pai-filho',
+    general: 'Geral',
+  },
+  parentMode: {
+    fullDoc: 'Documento completo',
+    paragraph: 'Parágrafo',
+  },
+  batchAction: {
+    selected: 'Selecionado',
+    delete: 'Excluir',
+    enable: 'Habilitar',
+    archive: 'Arquivo',
+    disable: 'Desabilitar',
+    cancel: 'Cancelar',
+  },
+  documentsDisabled: '{{num}} documentos desativados - inativos por mais de 30 dias',
+  enable: 'Habilitar',
+  preprocessDocument: '{{num}} Documentos de pré-processamento',
+  localDocs: 'Documentos locais',
 }
 
 export default translation
diff --git a/web/i18n/ro-RO/common.ts b/web/i18n/ro-RO/common.ts
index 0badaf5a13..46db92a127 100644
--- a/web/i18n/ro-RO/common.ts
+++ b/web/i18n/ro-RO/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: 'Micșorare',
     openInNewTab: 'Deschide într-o filă nouă',
     zoomIn: 'Măriți',
+    close: 'Închide',
+    viewMore: 'VEZI MAI MULT',
+    regenerate: 'Regenera',
+    saveAndRegenerate: 'Salvați și regenerați bucățile secundare',
+    view: 'Vedere',
   },
   placeholder: {
     input: 'Vă rugăm să introduceți',
@@ -474,6 +479,7 @@ const translation = {
     emptyTip: 'Cunoștințele nu au fost asociate, vă rugăm să mergeți la aplicație sau la plug-in pentru a finaliza asocierea.',
     viewDoc: 'Vizualizați documentația',
     relatedApp: 'aplicații asociate',
+    noRelatedApp: 'Fără aplicații conectate',
   },
   voiceInput: {
     speaking: 'Vorbiți acum...',
diff --git a/web/i18n/ro-RO/dataset-creation.ts b/web/i18n/ro-RO/dataset-creation.ts
index f18b2ac5c2..3a4e23308e 100644
--- a/web/i18n/ro-RO/dataset-creation.ts
+++ b/web/i18n/ro-RO/dataset-creation.ts
@@ -82,6 +82,7 @@ const translation = {
       jinaReaderNotConfigured: 'Jina Reader nu este configurat',
       useSitemapTooltip: 'Urmați harta site-ului pentru a accesa cu crawlere site-ul. Dacă nu, Jina Reader va accesa cu crawlere iterativ în funcție de relevanța paginii, producând mai puține pagini, dar de calitate superioară.',
     },
+    cancel: 'Anula',
   },
   stepTwo: {
     segmentation: 'Setări de segmentare',
@@ -143,6 +144,28 @@ const translation = {
     websiteSource: 'Site-ul web de preprocesare',
     separatorTip: 'Un delimitator este caracterul folosit pentru a separa textul. \\n\\n și \\n sunt delimitatori utilizați în mod obișnuit pentru separarea paragrafelor și liniilor. Combinate cu virgule (\\n\\n,\\n), paragrafele vor fi segmentate pe linii atunci când depășesc lungimea maximă a bucății. De asemenea, puteți utiliza delimitatori speciali definiți de dumneavoastră (de exemplu, ***).',
     maxLengthCheck: 'Lungimea maximă a bucății trebuie să fie mai mică de {{limit}}',
+    notAvailableForQA: 'Nu este disponibil pentru Indexul de întrebări și răspunsuri',
+    generalTip: 'Modul general de fragmentare a textului, bucățile recuperate și rechemate sunt aceleași.',
+    previewChunk: 'Previzualizare bucată',
+    previewChunkTip: 'Faceți clic pe butonul "Previzualizare bucată" din stânga pentru a încărca previzualizarea',
+    fullDoc: 'Documentul complet',
+    parentChildDelimiterTip: 'Un delimitator este caracterul folosit pentru a separa textul. \\n\\n este recomandat pentru împărțirea documentului original în bucăți părinte mari. De asemenea, puteți utiliza delimitatori speciali definiți de dvs.',
+    fullDocTip: 'Întregul document este folosit ca bucată părinte și preluat direct. Vă rugăm să rețineți că, din motive de performanță, textul care depășește 10000 de jetoane va fi trunchiat automat.',
+    switch: 'Comutator',
+    previewChunkCount: '{{număr}} Bucăți estimate',
+    parentChunkForContext: 'Părinte-bucată pentru context',
+    paragraph: 'Paragraf',
+    childChunkForRetrieval: 'Child-chunk pentru recuperare',
+    parentChild: 'Părinte-copil',
+    parentChildTip: 'Când utilizați modul părinte-copil, fragmentul copil este utilizat pentru recuperare, iar fragmentul părinte este utilizat pentru reamintire ca context.',
+    highQualityTip: 'După terminarea încorporarii în modul Înaltă calitate, revenirea la modul Economic nu este disponibilă.',
+    qaSwitchHighQualityTipTitle: 'Formatul de întrebări și răspunsuri necesită o metodă de indexare de înaltă calitate',
+    paragraphTip: 'Acest mod împarte textul în paragrafe pe baza delimitatorilor și a lungimii maxime a bucății, folosind textul împărțit ca bucată părinte pentru recuperare.',
+    general: 'General',
+    notAvailableForParentChild: 'Nu este disponibil pentru Indexul părinte-copil',
+    qaSwitchHighQualityTipContent: 'În prezent, numai metoda de index de înaltă calitate acceptă fragmentarea formatului de întrebări și răspunsuri. Doriți să treceți la modul de înaltă calitate?',
+    parentChildChunkDelimiterTip: 'Un delimitator este caracterul folosit pentru a separa textul. \\n este recomandat pentru împărțirea bucăților părinte în bucăți copii mici. De asemenea, puteți utiliza delimitatori speciali definiți de dvs.',
+    useQALanguage: 'Fragmentați folosind formatul Întrebări și răspunsuri în',
   },
   stepThree: {
     creationTitle: '🎉 Cunoștință creată',
@@ -171,6 +194,11 @@ const translation = {
     apiKeyPlaceholder: 'Cheie API de la jina.ai',
     getApiKeyLinkText: 'Obțineți cheia API gratuită la jina.ai',
   },
+  otherDataSource: {
+    title: 'Conectați-vă la alte surse de date?',
+    description: 'În prezent, baza de cunoștințe a Dify are doar surse de date limitate. Contribuția cu o sursă de date la baza de cunoștințe Dify este o modalitate fantastică de a ajuta la îmbunătățirea flexibilității și puterii platformei pentru toți utilizatorii. Ghidul nostru de contribuție vă ajută să începeți. Vă rugăm să faceți clic pe linkul de mai jos pentru a afla mai multe.',
+    learnMore: 'Află mai multe',
+  },
 }
 
 export default translation
diff --git a/web/i18n/ro-RO/dataset-documents.ts b/web/i18n/ro-RO/dataset-documents.ts
index ed8720e35a..acf40ec4aa 100644
--- a/web/i18n/ro-RO/dataset-documents.ts
+++ b/web/i18n/ro-RO/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: 'TIMP DE ÎNCĂRCARE',
         status: 'STARE',
         action: 'ACȚIUNE',
+        chunkingMode: 'MOD DE FRAGMENTARE',
       },
       name: 'Nume',
       rename: 'Redenumire',
@@ -77,6 +78,7 @@ const translation = {
       ok: 'OK',
     },
     addUrl: 'Adăugați adresa URL',
+    learnMore: 'Află mai multe',
   },
   metadata: {
     title: 'Metadate',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Automat',
     custom: 'Personalizat',
     previewTip: 'Previzualizarea paragrafului va fi disponibilă după finalizarea încorporării',
+    hierarchical: 'Părinte-copil',
+    childMaxTokens: 'Copil',
+    parentMaxTokens: 'Părinte',
+    pause: 'Pauză',
   },
   segment: {
     paragraphs: 'Paragrafe',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Nou segment de text',
     newQaSegment: 'Nou segment de întrebări și răspunsuri',
     delete: 'Ștergeți acest fragment?',
+    searchResults_zero: 'REZULTAT',
+    searchResults_one: 'REZULTAT',
+    characters_other: 'Caractere',
+    chunkAdded: '1 bucată adăugată',
+    chunks_other: 'BUCĂŢI',
+    characters_one: 'caracter',
+    regenerationSuccessTitle: 'Regenerare finalizată',
+    editedAt: 'Editat la',
+    addChunk: 'Adăugați o bucată',
+    chunk: 'Bucată',
+    chunks_one: 'BUCATĂ',
+    empty: 'Nu s-a găsit nicio bucată',
+    expandChunks: 'Extindeți bucățile',
+    editParentChunk: 'Editați bucata părinte',
+    regenerationSuccessMessage: 'Puteți închide această fereastră.',
+    chunkDetail: 'Detalii bucăți',
+    childChunk: 'Bucată de copil',
+    edited: 'EDITATE',
+    childChunks_one: 'BUCATĂ COPIL',
+    childChunkAdded: '1 bucată de copil adăugată',
+    regenerationConfirmTitle: 'Doriți să regenerați bucățile copil?',
+    newChildChunk: 'Bucată copil nouă',
+    editChildChunk: 'Editați fragmentul copil',
+    childChunks_other: 'BUCĂȚI COPIL',
+    newChunk: 'Bucată nouă',
+    clearFilter: 'Ștergeți filtrul',
+    editChunk: 'Editați bucata',
+    addAnother: 'Adăugați altul',
+    parentChunks_other: 'BUCĂȚI PĂRINTE',
+    collapseChunks: 'Restrângerea bucăților',
+    parentChunk: 'Părinte-bucată',
+    regeneratingMessage: 'Acest lucru poate dura un moment, vă rugăm să așteptați...',
+    parentChunks_one: 'FRAGMENT PĂRINTE',
+    regenerationConfirmMessage: 'Regenerarea bucăților copii va suprascrie bucățile copil curente, inclusiv bucățile editate și bucățile nou adăugate. Regenerarea nu poate fi anulată.',
+    regeneratingTitle: 'Regenerarea bucăților secundare',
+    addChildChunk: 'Adăugați o bucată copil',
+    searchResults_other: 'REZULTATELE',
   },
 }
 
diff --git a/web/i18n/ro-RO/dataset-hit-testing.ts b/web/i18n/ro-RO/dataset-hit-testing.ts
index 9bf934420a..323cd68746 100644
--- a/web/i18n/ro-RO/dataset-hit-testing.ts
+++ b/web/i18n/ro-RO/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'Vizualizați GRAFICUL VECTORIAL',
   settingTitle: 'Setare de recuperare',
   viewDetail: 'Vezi detalii',
+  keyword: 'Cuvinte cheie',
+  chunkDetail: 'Detalii bucăți',
+  open: 'Deschide',
+  hitChunks: 'Accesează {{num}} bucăți copil',
+  records: 'Înregistrări',
 }
 
 export default translation
diff --git a/web/i18n/ro-RO/dataset-settings.ts b/web/i18n/ro-RO/dataset-settings.ts
index 0452d568c6..baf86c7a8e 100644
--- a/web/i18n/ro-RO/dataset-settings.ts
+++ b/web/i18n/ro-RO/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeID: 'ID de cunoștințe extern',
     externalKnowledgeAPI: 'API de cunoștințe externe',
     retrievalSettings: 'Setări de recuperare',
+    indexMethodChangeToEconomyDisabledTip: 'Nu este disponibil pentru retrogradarea de la HQ la ECO',
+    upgradeHighQualityTip: 'După ce faceți upgrade la modul Înaltă calitate, revenirea la modul Economic nu este disponibilă',
+    helpText: 'Aflați cum să scrieți o descriere bună a setului de date.',
   },
 }
 
diff --git a/web/i18n/ro-RO/dataset.ts b/web/i18n/ro-RO/dataset.ts
index 8e35331983..b73230f4c5 100644
--- a/web/i18n/ro-RO/dataset.ts
+++ b/web/i18n/ro-RO/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   mixtureInternalAndExternalTip: 'Modelul Rerank este necesar pentru amestecul de cunoștințe interne și externe.',
   externalAPIPanelDescription: 'API-ul de cunoștințe externe este utilizat pentru a se conecta la o bază de cunoștințe din afara Dify și pentru a prelua cunoștințe din acea bază de cunoștințe.',
   createNewExternalAPI: 'Creați un nou API de cunoștințe externe',
+  chunkingMode: {
+    general: 'General',
+    parentChild: 'Părinte-copil',
+  },
+  parentMode: {
+    paragraph: 'Paragraf',
+    fullDoc: 'Documentar complet',
+  },
+  batchAction: {
+    enable: 'Activa',
+    cancel: 'Anula',
+    delete: 'Șterge',
+    disable: 'Dezactiva',
+    selected: 'Selectat',
+    archive: 'Arhivă',
+  },
+  documentsDisabled: '{{num}} documente dezactivate - inactive de peste 30 de zile',
+  preprocessDocument: '{{num}} Procesarea prealabilă a documentelor',
+  enable: 'Activa',
+  localDocs: 'Documente locale',
 }
 
 export default translation
diff --git a/web/i18n/ru-RU/common.ts b/web/i18n/ru-RU/common.ts
index 64a7c9375d..ba349fcf22 100644
--- a/web/i18n/ru-RU/common.ts
+++ b/web/i18n/ru-RU/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: 'Уменьшение масштаба',
     openInNewTab: 'Открыть в новой вкладке',
     copyImage: 'Скопировать изображение',
+    close: 'Закрывать',
+    regenerate: 'Регенерировать',
+    view: 'Вид',
+    viewMore: 'ПОДРОБНЕЕ',
+    saveAndRegenerate: 'Сохранение и повторное создание дочерних блоков',
   },
   errorMsg: {
     fieldRequired: '{{field}} обязательно',
@@ -478,6 +483,7 @@ const translation = {
     emptyTip: 'Знания не были связаны, пожалуйста, перейдите в приложение или плагин, чтобы завершить связывание.',
     viewDoc: 'Просмотреть документацию',
     relatedApp: 'связанные приложения',
+    noRelatedApp: 'Нет связанных приложений',
   },
   voiceInput: {
     speaking: 'Говорите сейчас...',
diff --git a/web/i18n/ru-RU/dataset-creation.ts b/web/i18n/ru-RU/dataset-creation.ts
index ca88e093bc..40ae3a6cfd 100644
--- a/web/i18n/ru-RU/dataset-creation.ts
+++ b/web/i18n/ru-RU/dataset-creation.ts
@@ -87,6 +87,7 @@ const translation = {
       jinaReaderTitle: 'Конвертируйте весь сайт в Markdown',
       useSitemapTooltip: 'Следуйте карте сайта, чтобы просканировать сайт. Если нет, Jina Reader будет сканировать итеративно в зависимости от релевантности страницы, выдавая меньшее количество страниц, но более высокого качества.',
     },
+    cancel: 'Отмена',
   },
   stepTwo: {
     segmentation: 'Настройки фрагментации',
@@ -148,6 +149,28 @@ const translation = {
     datasetSettingLink: 'настройки базы знаний.',
     separatorTip: 'Разделитель — это символ, используемый для разделения текста. \\n\\n и \\n — это часто используемые разделители для разделения абзацев и строк. В сочетании с запятыми (\\n\\n,\\n) абзацы будут сегментированы по строкам, если максимальная длина блока превышает их. Вы также можете использовать специальные разделители, определенные вами (например, ***).',
     maxLengthCheck: 'Максимальная длина блока должна быть меньше {{limit}}',
+    switch: 'Выключатель',
+    parentChunkForContext: 'Родительский блок для контекста',
+    previewChunkTip: 'Нажмите кнопку «Предварительный просмотр фрагмента» слева, чтобы загрузить предварительный просмотр',
+    notAvailableForParentChild: 'Недоступно для индекса типа "родитель-потомок"',
+    parentChildChunkDelimiterTip: 'Разделитель — это символ, используемый для разделения текста. \\n рекомендуется для разбиения родительских блоков на небольшие дочерние блоки. Вы также можете использовать специальные разделители, определенные самостоятельно.',
+    previewChunk: 'Предварительный просмотр фрагмента',
+    previewChunkCount: '{{Количество}} Предполагаемые куски',
+    generalTip: 'Общий режим фрагментации текста, извлекаемые и вызываемые фрагменты одинаковы.',
+    general: 'Общее',
+    useQALanguage: 'Фрагмент с использованием формата Q&A в',
+    notAvailableForQA: 'Недоступно для индекса Q&A',
+    paragraph: 'Параграф',
+    parentChild: 'Родитель-дочерний',
+    fullDoc: 'Полный документальный фильм',
+    qaSwitchHighQualityTipTitle: 'Формат вопросов и ответов требует высококачественного метода индексации',
+    parentChildDelimiterTip: 'Разделитель — это символ, используемый для разделения текста. \\n\\n рекомендуется для разделения исходного документа на большие родительские части. Вы также можете использовать специальные разделители, определенные самостоятельно.',
+    parentChildTip: 'При использовании режима «родитель-потомок» дочерний блок используется для извлечения, а родительский блок — для вызова в качестве контекста.',
+    paragraphTip: 'В этом режиме текст разбивается на абзацы на основе разделителей и максимальной длины блока, используя разделенный текст в качестве родительского блока для извлечения.',
+    highQualityTip: 'После завершения встраивания в режиме «Высокое качество» возврат к экономичному режиму невозможен.',
+    childChunkForRetrieval: 'Детский фрагмент для извлечения',
+    qaSwitchHighQualityTipContent: 'В настоящее время только высококачественный метод индекса поддерживает фрагментацию формата Q&A. Хотели бы вы перейти в режим высокого качества?',
+    fullDocTip: 'Весь документ используется в качестве родительского блока и извлекается напрямую. Обратите внимание, что по причинам производительности текст, превышающий 10000 токенов, будет автоматически обрезан.',
   },
   stepThree: {
     creationTitle: '🎉 База знаний создана',
@@ -171,6 +194,11 @@ const translation = {
     configJinaReader: 'Настройка Jina Reader',
     apiKeyPlaceholder: 'Ключ API от jina.ai',
   },
+  otherDataSource: {
+    learnMore: 'Подробнее',
+    title: 'Подключаться к другим источникам данных?',
+    description: 'В настоящее время база знаний Dify имеет лишь ограниченные источники данных. Добавление источника данных в базу знаний Dify — это отличный способ повысить гибкость и возможности платформы для всех пользователей. Наше руководство по вкладу поможет вам легко начать работу. Пожалуйста, нажмите на ссылку ниже, чтобы узнать больше.',
+  },
 }
 
 export default translation
diff --git a/web/i18n/ru-RU/dataset-documents.ts b/web/i18n/ru-RU/dataset-documents.ts
index b1870fb680..f344a7e48c 100644
--- a/web/i18n/ru-RU/dataset-documents.ts
+++ b/web/i18n/ru-RU/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'ВРЕМЯ ЗАГРУЗКИ',
         status: 'СТАТУС',
         action: 'ДЕЙСТВИЕ',
+        chunkingMode: 'РЕЖИМ ДРОБЛЕНИЯ',
       },
       rename: 'Переименовать',
       name: 'Название',
@@ -77,6 +78,7 @@ const translation = {
       error: 'Ошибка импорта',
       ok: 'ОК',
     },
+    learnMore: 'Подробнее',
   },
   metadata: {
     title: 'Метаданные',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Автоматически',
     custom: 'Пользовательский',
     previewTip: 'Предварительный просмотр абзацев будет доступен после завершения расчета эмбеддингов',
+    parentMaxTokens: 'Родитель',
+    childMaxTokens: 'Ребёнок',
+    hierarchical: 'Родитель-дочерний',
+    pause: 'Пауза',
   },
   segment: {
     paragraphs: 'Абзацы',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Новый текстовый сегмент',
     newQaSegment: 'Новый сегмент вопрос-ответ',
     delete: 'Удалить этот фрагмент?',
+    chunks_other: 'КУСКИ',
+    searchResults_one: 'РЕЗУЛЬТАТ',
+    parentChunk: 'Родительский блок',
+    characters_other: 'письмена',
+    edited: 'ОТРЕДАКТИРОВАНЫ',
+    regenerationSuccessMessage: 'Вы можете закрыть это окно.',
+    searchResults_other: 'РЕЗУЛЬТАТЫ',
+    regeneratingTitle: 'Регенерация дочерних блоков',
+    parentChunks_one: 'РОДИТЕЛЬСКИЙ БЛОК',
+    childChunk: 'Чайлд-Чанк',
+    editedAt: 'Отредактировано в',
+    editChildChunk: 'Редактирование дочернего фрагмента',
+    parentChunks_other: 'РОДИТЕЛЬСКИЕ БЛОКИ',
+    regenerationSuccessTitle: 'Регенерация завершена',
+    childChunks_one: 'ДОЧЕРНИЙ ЧАНК',
+    newChunk: 'Новый чанк',
+    addAnother: 'Добавить еще один',
+    clearFilter: 'Очистить фильтр',
+    addChunk: 'Добавить чанк',
+    editParentChunk: 'Редактирование родительского блока',
+    chunkDetail: 'Деталь Чанка',
+    regenerationConfirmMessage: 'При повторном создании дочерних блоков текущие дочерние блоки будут перезаписаны, включая отредактированные и вновь добавленные блоки. Регенерацию нельзя отменить.',
+    collapseChunks: 'Сворачивание кусков',
+    regenerationConfirmTitle: 'Вы хотите регенерировать дочерние куски?',
+    searchResults_zero: 'РЕЗУЛЬТАТ',
+    childChunks_other: 'ДЕТСКИЕ КУСОЧКИ',
+    childChunkAdded: 'Добавлен 1 дочерний чанк',
+    editChunk: 'Редактировать фрагмент',
+    empty: 'Чанк не найден',
+    chunks_one: 'ЛОМОТЬ',
+    regeneratingMessage: 'Это может занять некоторое время, пожалуйста, подождите...',
+    chunkAdded: 'Добавлен 1 блок',
+    chunk: 'Ломоть',
+    expandChunks: 'Развернуть чанки',
+    characters_one: 'характер',
+    addChildChunk: 'Добавить дочерний чанк',
+    newChildChunk: 'Новый дочерний чанк',
   },
 }
 
diff --git a/web/i18n/ru-RU/dataset-hit-testing.ts b/web/i18n/ru-RU/dataset-hit-testing.ts
index c8aeff7d5f..5ac504efbf 100644
--- a/web/i18n/ru-RU/dataset-hit-testing.ts
+++ b/web/i18n/ru-RU/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'Посмотреть ВЕКТОРНУЮ ДИАГРАММУ',
   viewDetail: 'Подробнее',
   settingTitle: 'Настройка извлечения',
+  records: 'Записи',
+  hitChunks: 'Попадание {{num}} дочерних чанков',
+  chunkDetail: 'Деталь Чанка',
+  open: 'Открытый',
+  keyword: 'Ключевые слова',
 }
 
 export default translation
diff --git a/web/i18n/ru-RU/dataset-settings.ts b/web/i18n/ru-RU/dataset-settings.ts
index c862d9ca46..82c2fafe2d 100644
--- a/web/i18n/ru-RU/dataset-settings.ts
+++ b/web/i18n/ru-RU/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeAPI: 'API внешних знаний',
     retrievalSettings: 'Настройки извлечения',
     externalKnowledgeID: 'Внешний идентификатор базы знаний',
+    helpText: 'Узнайте, как написать хорошее описание набора данных.',
+    upgradeHighQualityTip: 'После обновления до режима «Высокое качество» возврат к экономичному режиму невозможен',
+    indexMethodChangeToEconomyDisabledTip: 'Недоступно для понижения уровня с HQ до ECO',
   },
 }
 
diff --git a/web/i18n/ru-RU/dataset.ts b/web/i18n/ru-RU/dataset.ts
index cdff506138..c2831756f1 100644
--- a/web/i18n/ru-RU/dataset.ts
+++ b/web/i18n/ru-RU/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   allExternalTip: 'При использовании только внешних знаний пользователь может выбрать, следует ли включать модель повторного ранжирования. Если этот параметр не включен, полученные фрагменты будут сортироваться на основе баллов. Когда стратегии извлечения из разных баз знаний несовместимы, они будут неточными.',
   externalAPIPanelDocumentation: 'Узнайте, как создать API внешних знаний',
   externalAPIPanelDescription: 'Внешний API базы знаний используется для подключения к базе знаний за пределами Dify и извлечения знаний из этой базы знаний.',
+  chunkingMode: {
+    general: 'Общее',
+    parentChild: 'Родитель-дочерний',
+  },
+  parentMode: {
+    fullDoc: 'Полный документ',
+    paragraph: 'Параграф',
+  },
+  batchAction: {
+    enable: 'Давать возможность',
+    delete: 'Удалить',
+    selected: 'Выбранный',
+    disable: 'Отключить',
+    cancel: 'Отмена',
+    archive: 'Архив',
+  },
+  preprocessDocument: '{{число}} Предварительная обработка документов',
+  documentsDisabled: 'Документы {{num}} отключены - неактивны более 30 дней',
+  localDocs: 'Местная документация',
+  enable: 'Давать возможность',
 }
 
 export default translation
diff --git a/web/i18n/sl-SI/common.ts b/web/i18n/sl-SI/common.ts
index 0c5d1dfc4b..a0e916d29a 100644
--- a/web/i18n/sl-SI/common.ts
+++ b/web/i18n/sl-SI/common.ts
@@ -42,6 +42,11 @@ const translation = {
     openInNewTab: 'Odpri v novem zavihku',
     zoomOut: 'Pomanjšanje',
     zoomIn: 'Povečava',
+    saveAndRegenerate: 'Shranite in regenerirajte otroške koščke',
+    close: 'Blizu',
+    view: 'Pogled',
+    regenerate: 'Regeneracijo',
+    viewMore: 'POGLEJ VEČ',
   },
   errorMsg: {
     fieldRequired: '{{field}} je obvezno',
@@ -677,6 +682,7 @@ const translation = {
     emptyTip: 'Znanje ni bilo povezano, prosimo, pojdite na aplikacijo ali vtičnik, da dokončate združenje.',
     viewDoc: 'Oglejte si dokumentacijo',
     relatedApp: 'Povezane aplikacije',
+    noRelatedApp: 'Brez povezanih aplikacij',
   },
   voiceInput: {
     notAllow: 'Mikrofon ni pooblaščen',
diff --git a/web/i18n/sl-SI/dataset-creation.ts b/web/i18n/sl-SI/dataset-creation.ts
index 8dcfa8c3f5..573acb9352 100644
--- a/web/i18n/sl-SI/dataset-creation.ts
+++ b/web/i18n/sl-SI/dataset-creation.ts
@@ -92,6 +92,7 @@ const translation = {
       preview: 'Predogled',
       maxDepthTooltip: 'Največja globina iskanja glede na vneseni URL. Globina 0 bo iskala le stran z vnesenim URL-jem, globina 1 bo iskala URL in vse za tem, dodano z enim /, in tako naprej.',
     },
+    cancel: 'Odpovedati',
   },
   stepTwo: {
     segmentation: 'Nastavitve razdeljevanja',
@@ -153,6 +154,28 @@ const translation = {
     retrievalSettingTip: 'Če želite spremeniti nastavitve iskanja, pojdite na ',
     datasetSettingLink: 'nastavitve Znanja.',
     maxLengthCheck: 'Največja dolžina kosa mora biti manjša od {{limit}}',
+    fullDoc: 'Celoten dokument',
+    parentChildChunkDelimiterTip: 'Ločilo je znak, ki se uporablja za ločevanje besedila. \\n je priporočljivo za razdelitev starševskih kosov na majhne otroške koščke. Uporabite lahko tudi posebne ločila, ki ste jih določili sami.',
+    highQualityTip: 'Ko končate vdelavo v načinu visoke kakovosti, vrnitev v ekonomični način ni na voljo.',
+    parentChildTip: 'Ko uporabljate način nadreje-podrejenega, se podrejeni kos uporablja za pridobivanje, nadrejeni kos pa se uporablja za odpoklic kot kontekst.',
+    paragraph: 'Odstavek',
+    qaSwitchHighQualityTipTitle: 'Oblika zapisa vprašanj in odgovorov zahteva visokokakovostno metodo indeksiranja',
+    paragraphTip: 'Ta način razdeli besedilo na odstavke na podlagi ločil in največje dolžine kosa, pri čemer se razdeljeno besedilo uporabi kot nadrejeni kos za pridobivanje.',
+    parentChildDelimiterTip: 'Ločilo je znak, ki se uporablja za ločevanje besedila. \\n\\n je priporočljivo za razdelitev izvirnega dokumenta na velike nadrejene dele. Uporabite lahko tudi posebne ločila, ki ste jih določili sami.',
+    notAvailableForQA: 'Ni na voljo za indeks vprašanj in odgovorov',
+    parentChild: 'Starš-otrok',
+    parentChunkForContext: 'Nadrejeni kos za kontekst',
+    notAvailableForParentChild: 'Ni na voljo za indeks nadrejenega in podrejenega',
+    previewChunk: 'Predogled kosa',
+    previewChunkCount: '{{štetje}} Ocenjeni kosi',
+    previewChunkTip: 'Kliknite gumb »Predogled kosa« na levi, da naložite predogled',
+    fullDocTip: 'Celoten dokument je uporabljen kot nadrejeni kos in pridobljen neposredno. Upoštevajte, da bo zaradi uspešnosti besedilo, ki presega 10000 žetonov, samodejno prikrajšano.',
+    childChunkForRetrieval: 'Otroški kos za pridobivanje',
+    qaSwitchHighQualityTipContent: 'Trenutno samo visokokakovostna metoda indeksa podpira deljenje v obliki vprašanj in odgovorov. Želite preklopiti na kakovosten način?',
+    generalTip: 'Splošni način deljenja besedila, pridobljeni in odpoklicani kosi so enaki.',
+    useQALanguage: 'Delček z obliko zapisa vprašanj in odgovorov v',
+    general: 'Splošno',
+    switch: 'Stikalo',
   },
   stepThree: {
     creationTitle: '🎉 Znanje ustvarjeno',
@@ -171,6 +194,11 @@ const translation = {
     modelButtonConfirm: 'Potrdi',
     modelButtonCancel: 'Prekliči',
   },
+  otherDataSource: {
+    learnMore: 'Izvedi več',
+    title: 'Vzpostavite povezavo z drugimi viri podatkov?',
+    description: 'Trenutno ima baza znanja Dify le omejene vire podatkov. Prispevanje vira podatkov v bazo znanja Dify je fantastičen način za izboljšanje prilagodljivosti in moči platforme za vse uporabnike. Naš vodnik za prispevke olajša začetek. Če želite izvedeti več, kliknite spodnjo povezavo.',
+  },
 }
 
 export default translation
diff --git a/web/i18n/sl-SI/dataset-documents.ts b/web/i18n/sl-SI/dataset-documents.ts
index 0ca8be9e95..3953e9a574 100644
--- a/web/i18n/sl-SI/dataset-documents.ts
+++ b/web/i18n/sl-SI/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'ČAS NALAGANJA',
         status: 'STATUS',
         action: 'DEJANJE',
+        chunkingMode: 'NAČIN KOŠČENJA',
       },
       rename: 'Preimenuj',
       name: 'Ime',
@@ -77,6 +78,7 @@ const translation = {
       error: 'Napaka pri uvozu',
       ok: 'V redu',
     },
+    learnMore: 'Izvedi več',
   },
   metadata: {
     title: 'Metapodatki',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Samodejno',
     custom: 'Po meri',
     previewTip: 'Predogled odstavkov bo na voljo po zaključku vdelave',
+    hierarchical: 'Starš-otrok',
+    childMaxTokens: 'Otrok',
+    pause: 'Pavza',
+    parentMaxTokens: 'Starš',
   },
   segment: {
     paragraphs: 'Odstavki',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Nov besedilni segment',
     newQaSegment: 'Nov Q&A segment',
     delete: 'Izbriši ta del?',
+    regenerationSuccessTitle: 'Regeneracija končana',
+    expandChunks: 'Razširitev kosov',
+    childChunk: 'Otroški kos',
+    editedAt: 'Urejeno na',
+    edited: 'UREJATI',
+    addAnother: 'Dodajanje še enega',
+    childChunks_one: 'OTROŠKI KOS',
+    chunkDetail: 'Detajl koščka',
+    chunkAdded: 'Dodan 1 kos',
+    editChildChunk: 'Urejanje podrejenega kosa',
+    regenerationConfirmTitle: 'Ali želite regenerirati otroške koščke?',
+    empty: 'Ni najdenega koščka',
+    searchResults_other: 'REZULTATI',
+    childChunks_other: 'OTROŠKI KOŠČKI',
+    addChildChunk: 'Dodajanje podrejenega kosa',
+    editParentChunk: 'Urejanje nadrejenega kosa',
+    regenerationConfirmMessage: 'Obnavljanje podrejenih kosov bo prepisalo trenutne podrejene koščke, vključno z urejenimi koščki in na novo dodanimi kosi. Regeneracije ni mogoče razveljaviti.',
+    editChunk: 'Uredi kos',
+    chunks_one: 'KOS',
+    searchResults_one: 'REZULTAT',
+    parentChunks_one: 'STARŠEVSKI KOS',
+    characters_other: 'Znakov',
+    chunks_other: 'KOSE',
+    clearFilter: 'Počisti filter',
+    newChildChunk: 'Nov podzakonski kos',
+    characters_one: 'znak',
+    regeneratingTitle: 'Regeneracija otroških kosov',
+    regeneratingMessage: 'To lahko traja trenutek, prosim počakajte ...',
+    parentChunks_other: 'STARŠEVSKI KOSI',
+    collapseChunks: 'Strniti koščke',
+    parentChunk: 'Starševski kos',
+    regenerationSuccessMessage: 'To okno lahko zaprete.',
+    newChunk: 'Nov kos',
+    searchResults_zero: 'REZULTAT',
+    chunk: 'Kos',
+    addChunk: 'Dodajanje kosa',
+    childChunkAdded: 'Dodan je 1 kos otroka',
   },
 }
 
diff --git a/web/i18n/sl-SI/dataset-hit-testing.ts b/web/i18n/sl-SI/dataset-hit-testing.ts
index 5f5f895d6e..645fd654d2 100644
--- a/web/i18n/sl-SI/dataset-hit-testing.ts
+++ b/web/i18n/sl-SI/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   noRecentTip: 'Tukaj ni nedavnih rezultatov poizvedb',
   viewChart: 'Prikaži VEKTORSKI GRAF',
   viewDetail: 'Prikaži podrobnosti',
+  records: 'Zapisov',
+  keyword: 'Ključne besede',
+  chunkDetail: 'Detajl koščka',
+  open: 'Odprt',
+  hitChunks: 'Zadenite {{num}} podrejene koščke',
 }
 
 export default translation
diff --git a/web/i18n/sl-SI/dataset-settings.ts b/web/i18n/sl-SI/dataset-settings.ts
index 1e42fcd9b4..5cd7a72a27 100644
--- a/web/i18n/sl-SI/dataset-settings.ts
+++ b/web/i18n/sl-SI/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeID: 'ID zunanjega znanja',
     retrievalSettings: 'Nastavitve pridobivanja',
     save: 'Shrani',
+    indexMethodChangeToEconomyDisabledTip: 'Ni na voljo za pregradnjo iz HQ v ECO',
+    upgradeHighQualityTip: 'Ko nadgradite na način visoke kakovosti, vrnitev v ekonomični način ni na voljo',
+    helpText: 'Naučite se napisati dober opis nabora podatkov.',
   },
 }
 
diff --git a/web/i18n/sl-SI/dataset.ts b/web/i18n/sl-SI/dataset.ts
index 482a3eab7e..e0d46be82f 100644
--- a/web/i18n/sl-SI/dataset.ts
+++ b/web/i18n/sl-SI/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   nTo1RetrievalLegacy: 'N-to-1 pridobivanje bo uradno ukinjeno septembra. Priporočamo uporabo najnovejšega večpotnega pridobivanja za boljše rezultate.',
   nTo1RetrievalLegacyLink: 'Izvedite več',
   nTo1RetrievalLegacyLinkText: 'N-to-1 pridobivanje bo uradno ukinjeno septembra.',
+  chunkingMode: {
+    general: 'Splošno',
+    parentChild: 'Starš-otrok',
+  },
+  parentMode: {
+    fullDoc: 'Celoten dokument',
+    paragraph: 'Odstavek',
+  },
+  batchAction: {
+    cancel: 'Odpovedati',
+    selected: 'Izbrane',
+    enable: 'Omogočiti',
+    disable: 'Onesposobiti',
+    archive: 'Arhiv',
+    delete: 'Izbrisati',
+  },
+  localDocs: 'Lokalni dokumenti',
+  documentsDisabled: '{{num}} dokumenti onemogočeni - neaktivni več kot 30 dni',
+  preprocessDocument: '{{num}} Predobdelava dokumentov',
+  enable: 'Omogočiti',
 }
 
 export default translation
diff --git a/web/i18n/th-TH/common.ts b/web/i18n/th-TH/common.ts
index 6aa1b30610..edb3698170 100644
--- a/web/i18n/th-TH/common.ts
+++ b/web/i18n/th-TH/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: 'ซูมออก',
     zoomIn: 'ซูมเข้า',
     openInNewTab: 'เปิดในแท็บใหม่',
+    view: 'ทิวทัศน์',
+    regenerate: 'สร้างใหม่',
+    viewMore: 'ดูเพิ่มเติม',
+    saveAndRegenerate: 'บันทึกและสร้างก้อนย่อยใหม่',
+    close: 'ปิด',
   },
   errorMsg: {
     fieldRequired: '{{field}} เป็นสิ่งจําเป็น',
@@ -473,6 +478,7 @@ const translation = {
     emptyTip: 'ความรู้ยังไม่ได้เชื่อมโยง โปรดไปที่แอปพลิเคชันหรือปลั๊กอินเพื่อเชื่อมโยงให้เสร็จสมบูรณ์',
     viewDoc: 'ดูเอกสารประกอบ',
     relatedApp: 'แอปที่เชื่อมโยง',
+    noRelatedApp: 'ไม่มีแอปที่เชื่อมโยง',
   },
   voiceInput: {
     speaking: 'พูดเดี๋ยวนี้...',
diff --git a/web/i18n/th-TH/dataset-creation.ts b/web/i18n/th-TH/dataset-creation.ts
index 16ec1fab3e..8beea5eb0f 100644
--- a/web/i18n/th-TH/dataset-creation.ts
+++ b/web/i18n/th-TH/dataset-creation.ts
@@ -92,6 +92,7 @@ const translation = {
       preview: 'ดูตัวอย่าง',
       maxDepthTooltip: 'ความลึกสูงสุดในการรวบรวมข้อมูลเมื่อเทียบกับ URL ที่ป้อน ความลึก 0 เพียงแค่ขูดหน้าของ URL ที่ป้อนความลึก 1 ขูด url และทุกอย่างหลังจาก enteredURL + หนึ่ง / เป็นต้น',
     },
+    cancel: 'ยกเลิก',
   },
   stepTwo: {
     segmentation: 'การตั้งค่าก้อน',
@@ -153,6 +154,28 @@ const translation = {
     indexSettingTip: 'หากต้องการเปลี่ยนวิธีการจัดทําดัชนีและรูปแบบการฝัง โปรดไปที่',
     retrievalSettingTip: 'หากต้องการเปลี่ยนการตั้งค่าการดึงข้อมูล โปรดไปที่',
     datasetSettingLink: 'การตั้งค่าความรู้',
+    notAvailableForParentChild: 'ไม่สามารถใช้ได้กับ ดัชนีผู้ปกครอง-ลูก',
+    qaSwitchHighQualityTipContent: 'ปัจจุบัน มีเพียงวิธีการจัดทําดัชนีคุณภาพสูงเท่านั้นที่รองรับการแบ่งกลุ่มรูปแบบ Q&A คุณต้องการเปลี่ยนไปใช้โหมดคุณภาพสูงหรือไม่?',
+    fullDoc: 'เอกสารฉบับเต็ม',
+    parentChild: 'พ่อแม่ลูก',
+    parentChunkForContext: 'Parent-chunk สําหรับบริบท',
+    general: 'ทั่วไป',
+    parentChildChunkDelimiterTip: 'ตัวคั่นคืออักขระที่ใช้ในการแยกข้อความ \\n แนะนําให้ใช้สําหรับการแยกก้อนหลักออกเป็นก้อนย่อยขนาดเล็ก คุณยังสามารถใช้ตัวคั่นพิเศษที่กําหนดโดยตัวคุณเอง',
+    previewChunkCount: '{{นับ}} ก้อนโดยประมาณ',
+    fullDocTip: 'เอกสารทั้งหมดจะถูกใช้เป็นส่วนหลักและดึงข้อมูลโดยตรง โปรดทราบว่าด้วยเหตุผลด้านประสิทธิภาพ ข้อความที่เกิน 10,000 โทเค็นจะถูกตัดทอนโดยอัตโนมัติ',
+    useQALanguage: 'ก้อนโดยใช้รูปแบบ Q&A ใน',
+    switch: 'เปลี่ยน',
+    paragraphTip: 'โหมดนี้จะแบ่งข้อความออกเป็นย่อหน้าตามตัวคั่นและความยาวของกลุ่มสูงสุด โดยใช้ข้อความที่แยกเป็นส่วนหลักสําหรับการดึงข้อมูล',
+    childChunkForRetrieval: 'ก้อนเด็กสําหรับการดึงข้อมูล',
+    parentChildDelimiterTip: 'ตัวคั่นคืออักขระที่ใช้ในการแยกข้อความ \\n\\n แนะนําให้ใช้สําหรับการแบ่งเอกสารต้นฉบับออกเป็นส่วนหลักขนาดใหญ่ คุณยังสามารถใช้ตัวคั่นพิเศษที่กําหนดโดยตัวคุณเอง',
+    qaSwitchHighQualityTipTitle: 'รูปแบบ Q&A ต้องใช้วิธีการจัดทําดัชนีคุณภาพสูง',
+    highQualityTip: 'เมื่อฝังในโหมดคุณภาพสูงเสร็จแล้ว จะไม่สามารถเปลี่ยนกลับเป็นโหมดประหยัดได้',
+    generalTip: 'โหมดการแบ่งกลุ่มข้อความทั่วไป กลุ่มที่ดึงและเรียกคืนจะเหมือนกัน',
+    previewChunkTip: 'คลิกปุ่ม \'Preview Chunk\' ทางด้านซ้ายเพื่อโหลดตัวอย่าง',
+    previewChunk: 'ดูตัวอย่าง Chunk',
+    notAvailableForQA: 'ไม่สามารถใช้ได้กับ Q&A Index',
+    paragraph: 'วรรค',
+    parentChildTip: 'เมื่อใช้โหมดผู้ปกครอง-รอง child-chunk จะใช้สําหรับการดึงข้อมูล และ parent-chunk จะใช้สําหรับการเรียกคืนเป็นบริบท',
   },
   stepThree: {
     creationTitle: '🎉 สร้างความรู้',
@@ -171,6 +194,11 @@ const translation = {
     modelButtonConfirm: 'ยืนยัน',
     modelButtonCancel: 'ยกเลิก',
   },
+  otherDataSource: {
+    learnMore: 'ศึกษาเพิ่มเติม',
+    title: 'เชื่อมต่อกับแหล่งข้อมูลอื่นใช่ไหม',
+    description: 'ปัจจุบัน ฐานความรู้ของ Dify มีแหล่งข้อมูลที่จํากัดเท่านั้น การมีส่วนร่วมในแหล่งข้อมูลในฐานความรู้ Dify เป็นวิธีที่ยอดเยี่ยมในการช่วยเพิ่มความยืดหยุ่นและพลังของแพลตฟอร์มสําหรับผู้ใช้ทุกคน คู่มือการมีส่วนร่วมของเราทําให้ง่ายต่อการเริ่มต้นใช้งาน โปรดคลิกที่ลิงค์ด้านล่างเพื่อเรียนรู้เพิ่มเติม',
+  },
 }
 
 export default translation
diff --git a/web/i18n/th-TH/dataset-documents.ts b/web/i18n/th-TH/dataset-documents.ts
index 411b8b7c8b..a7ea67c11b 100644
--- a/web/i18n/th-TH/dataset-documents.ts
+++ b/web/i18n/th-TH/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'เวลาอัปโหลด',
         status: 'สถานะ',
         action: 'การเคลื่อนไหว',
+        chunkingMode: 'โหมดก้อน',
       },
       rename: 'ตั้งชื่อใหม่',
       name: 'ชื่อ',
@@ -77,6 +78,7 @@ const translation = {
       error: 'ข้อผิดพลาดในการนําเข้า',
       ok: 'ตกลง, ได้',
     },
+    learnMore: 'ศึกษาเพิ่มเติม',
   },
   metadata: {
     title: 'ข้อมูลเมตา',
@@ -327,6 +329,10 @@ const translation = {
     automatic: 'อัตโนมัติ',
     custom: 'ธรรมเนียม',
     previewTip: 'การแสดงตัวอย่างย่อหน้าจะพร้อมใช้งานหลังจากการฝังเสร็จสิ้น',
+    childMaxTokens: 'เด็ก',
+    parentMaxTokens: 'พ่อแม่',
+    pause: 'หยุด',
+    hierarchical: 'พ่อแม่ลูก',
   },
   segment: {
     paragraphs: 'ย่อหน้า',
@@ -345,6 +351,43 @@ const translation = {
     newTextSegment: 'เซ็กเมนต์ข้อความใหม่',
     newQaSegment: 'ส่วนถาม & คําตอบใหม่',
     delete: 'ลบส่วนนี้ ?',
+    parentChunks_other: 'ก้อนผู้ปกครอง',
+    childChunkAdded: 'เพิ่มก้อนลูก 1 ชิ้น',
+    regeneratingMessage: 'อาจใช้เวลาสักครู่โปรดรอสักครู่...',
+    regenerationSuccessTitle: 'การฟื้นฟูเสร็จสมบูรณ์',
+    chunkDetail: 'รายละเอียดก้อน',
+    childChunk: 'ก้อนเด็ก',
+    chunk: 'ก้อน',
+    edited: 'แก้ไข',
+    addChunk: 'เพิ่มก้อน',
+    editedAt: 'แก้ไขที่',
+    childChunks_other: 'ก้อนเด็ก',
+    editChildChunk: 'แก้ไข Child Chunk',
+    parentChunk: 'ผู้ปกครอง-ก้อน',
+    newChildChunk: 'ก้อนเด็กใหม่',
+    regenerationConfirmTitle: 'คุณต้องการสร้างก้อนเด็กขึ้นมาใหม่หรือไม่?',
+    chunks_other: 'ก้อน',
+    regeneratingTitle: 'การสร้างก้อนลูกใหม่',
+    regenerationConfirmMessage: 'การสร้างก้อนย่อยใหม่จะเขียนทับส่วนย่อยปัจจุบัน รวมถึงก้อนที่แก้ไขแล้วและก้อนที่เพิ่มเข้ามาใหม่ การฟื้นฟูไม่สามารถยกเลิกได้',
+    chunkAdded: 'เพิ่ม 1 ก้อน',
+    expandChunks: 'ขยายก้อน',
+    searchResults_zero: 'ผล',
+    characters_one: 'อักขระ',
+    empty: 'ไม่พบก้อน',
+    addChildChunk: 'เพิ่ม Child Chunk',
+    chunks_one: 'ก้อน',
+    clearFilter: 'ล้างตัวกรอง',
+    searchResults_one: 'ผล',
+    addAnother: 'เพิ่มอีก',
+    editParentChunk: 'แก้ไขส่วนผู้ปกครอง',
+    characters_other: 'อักขระ',
+    parentChunks_one: 'ก้อนผู้ปกครอง',
+    collapseChunks: 'ยุบก้อน',
+    newChunk: 'ก้อนใหม่',
+    editChunk: 'แก้ไขก้อน',
+    searchResults_other: 'ผลลัพธ์',
+    regenerationSuccessMessage: 'คุณสามารถปิดหน้าต่างนี้ได้',
+    childChunks_one: 'ก้อนเด็ก',
   },
 }
 
diff --git a/web/i18n/th-TH/dataset-hit-testing.ts b/web/i18n/th-TH/dataset-hit-testing.ts
index f5c692a191..d04f2be2fc 100644
--- a/web/i18n/th-TH/dataset-hit-testing.ts
+++ b/web/i18n/th-TH/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   noRecentTip: 'ไม่มีผลการค้นหาล่าสุดที่นี่',
   viewChart: 'ดูแผนภูมิเวกเตอร์',
   viewDetail: 'ดูรายละเอียด',
+  open: 'เปิด',
+  keyword: 'คำ',
+  chunkDetail: 'รายละเอียดก้อน',
+  records: 'เรก คอร์ด',
+  hitChunks: 'กด {{num}} ก้อนลูก',
 }
 
 export default translation
diff --git a/web/i18n/th-TH/dataset-settings.ts b/web/i18n/th-TH/dataset-settings.ts
index 2e49c95f9b..ec05db6824 100644
--- a/web/i18n/th-TH/dataset-settings.ts
+++ b/web/i18n/th-TH/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeID: 'ID ความรู้ภายนอก',
     retrievalSettings: 'การตั้งค่าการดึงข้อมูล',
     save: 'ประหยัด',
+    indexMethodChangeToEconomyDisabledTip: 'ไม่สามารถดาวน์เกรดจาก HQ เป็น ECO ได้',
+    helpText: 'เรียนรู้วิธีเขียนคําอธิบายชุดข้อมูลที่ดี',
+    upgradeHighQualityTip: 'เมื่ออัปเกรดเป็นโหมดคุณภาพสูงแล้ว จะไม่สามารถเปลี่ยนกลับเป็นโหมดประหยัดได้',
   },
 }
 
diff --git a/web/i18n/th-TH/dataset.ts b/web/i18n/th-TH/dataset.ts
index f75ff462a6..a2a07bf5a1 100644
--- a/web/i18n/th-TH/dataset.ts
+++ b/web/i18n/th-TH/dataset.ts
@@ -145,6 +145,26 @@ const translation = {
   nTo1RetrievalLegacy: 'การดึงข้อมูล N-to-1 จะเลิกใช้อย่างเป็นทางการตั้งแต่เดือนกันยายน ขอแนะนําให้ใช้การดึงข้อมูลหลายเส้นทางล่าสุดเพื่อให้ได้ผลลัพธ์ที่ดีขึ้น',
   nTo1RetrievalLegacyLink: 'ศึกษาเพิ่มเติม',
   nTo1RetrievalLegacyLinkText: 'การดึงข้อมูล N-to-1 จะเลิกใช้อย่างเป็นทางการในเดือนกันยายน',
+  chunkingMode: {
+    general: 'ทั่วไป',
+    parentChild: 'พ่อแม่ลูก',
+  },
+  parentMode: {
+    paragraph: 'วรรค',
+    fullDoc: 'เอกสารฉบับเต็ม',
+  },
+  batchAction: {
+    selected: 'เลือก',
+    archive: 'หอจดหมายเหตุ',
+    delete: 'ลบ',
+    enable: 'เปิด',
+    cancel: 'ยกเลิก',
+    disable: 'เก',
+  },
+  localDocs: 'เอกสารท้องถิ่น',
+  preprocessDocument: '{{num}} เอกสารการประมวลผลล่วงหน้า',
+  documentsDisabled: '{{num}} เอกสารถูกปิดใช้งาน - ไม่ได้ใช้งานนานกว่า 30 วัน',
+  enable: 'เปิด',
 }
 
 export default translation
diff --git a/web/i18n/tr-TR/common.ts b/web/i18n/tr-TR/common.ts
index 9792f07e18..387208c850 100644
--- a/web/i18n/tr-TR/common.ts
+++ b/web/i18n/tr-TR/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: 'Uzaklaştırma',
     openInNewTab: 'Yeni sekmede aç',
     zoomIn: 'Yakınlaştırma',
+    view: 'Görünüm',
+    viewMore: 'DAHA FAZLA GÖSTER',
+    regenerate: 'Yenilemek',
+    saveAndRegenerate: 'Alt Parçaları Kaydetme ve Yeniden Oluşturma',
+    close: 'Kapatmak',
   },
   errorMsg: {
     fieldRequired: '{{field}} gereklidir',
@@ -478,6 +483,7 @@ const translation = {
     emptyTip: 'Bilgi ilişkilendirilmemiş, ilişkilendirme işlemini tamamlamak için uygulama veya eklentiye gidin.',
     viewDoc: 'Dökümantasyon görüntüle',
     relatedApp: 'bağlantılı uygulamalar',
+    noRelatedApp: 'Bağlı uygulama yok',
   },
   voiceInput: {
     speaking: 'Şimdi konuş...',
diff --git a/web/i18n/tr-TR/dataset-creation.ts b/web/i18n/tr-TR/dataset-creation.ts
index 11e5789884..f8ddefb6ae 100644
--- a/web/i18n/tr-TR/dataset-creation.ts
+++ b/web/i18n/tr-TR/dataset-creation.ts
@@ -87,6 +87,7 @@ const translation = {
       jinaReaderNotConfigured: 'Jina Reader yapılandırılmadı',
       jinaReaderDocLink: 'https://jina.ai/reader',
     },
+    cancel: 'İptal',
   },
   stepTwo: {
     segmentation: 'Parçalanma ayarları',
@@ -148,6 +149,28 @@ const translation = {
     datasetSettingLink: 'Bilgi ayarlarına gidin.',
     separatorTip: 'Sınırlayıcı, metni ayırmak için kullanılan karakterdir. \\n\\n ve \\n, paragrafları ve satırları ayırmak için yaygın olarak kullanılan sınırlayıcılardır. Virgüllerle (\\n\\n,\\n) birleştirildiğinde, paragraflar maksimum öbek uzunluğunu aştığında satırlarla bölünür. Kendiniz tarafından tanımlanan özel sınırlayıcıları da kullanabilirsiniz (örn.',
     maxLengthCheck: 'Maksimum yığın uzunluğu {{limit}}\'den az olmalıdır',
+    paragraph: 'Paragraf',
+    parentChildDelimiterTip: 'Sınırlayıcı, metni ayırmak için kullanılan karakterdir. \\n\\n orijinal belgeyi büyük üst parçalara bölmek için önerilir. Kendiniz tarafından tanımlanan özel sınırlayıcıları da kullanabilirsiniz.',
+    parentChild: 'Ebeveyn-çocuk',
+    previewChunkCount: '{{sayı}} Tahmini parçalar',
+    parentChildChunkDelimiterTip: 'Sınırlayıcı, metni ayırmak için kullanılan karakterdir. \\n Üst parçaları küçük alt parçalara bölmek için önerilir. Kendiniz tarafından tanımlanan özel sınırlayıcıları da kullanabilirsiniz.',
+    qaSwitchHighQualityTipContent: 'Şu anda, yalnızca yüksek kaliteli dizin yöntemi Soru-Cevap biçimi öbeklerini destekler. Yüksek kalite moduna geçmek ister misiniz?',
+    previewChunkTip: 'Önizlemeyi yüklemek için soldaki \'Önizleme Parçası\' düğmesini tıklayın',
+    qaSwitchHighQualityTipTitle: 'Soru-Cevap Formatı Yüksek Kaliteli İndeksleme Yöntemi Gerektirir',
+    notAvailableForQA: 'Soru-Cevap Dizini için kullanılamaz',
+    generalTip: 'Genel metin parçalama modu, alınan ve geri çağrılan parçalar aynıdır.',
+    paragraphTip: 'Bu mod, metni sınırlayıcılara ve maksimum öbek uzunluğuna göre paragraflara böler ve bölünmüş metni almak için üst öbek olarak kullanır.',
+    parentChildTip: 'Üst-alt modu kullanılırken, alt öbek alma için kullanılır ve üst öbek bağlam olarak geri çağırma için kullanılır.',
+    fullDocTip: 'Belgenin tamamı üst yığın olarak kullanılır ve doğrudan alınır. Performans nedenleriyle, 10000 jetonu aşan metnin otomatik olarak kesileceğini lütfen unutmayın.',
+    fullDoc: 'Tam Doküman',
+    useQALanguage: 'Soru-Cevap biçimini kullanarak parçalama',
+    general: 'Genel',
+    switch: 'Şalter',
+    notAvailableForParentChild: 'Üst-alt Dizini için kullanılamaz',
+    previewChunk: 'Önizleme Parçası',
+    highQualityTip: 'Yüksek Kalite modunda yerleştirme işlemi tamamlandıktan sonra, Ekonomik moda geri dönülemez.',
+    childChunkForRetrieval: 'Alma için alt yığın',
+    parentChunkForContext: 'Bağlam için üst yığın',
   },
   stepThree: {
     creationTitle: '🎉 Bilgi oluşturuldu',
@@ -171,6 +194,11 @@ const translation = {
     configJinaReader: 'Jina Reader\'ı Yapılandırma',
     getApiKeyLinkText: 'Ücretsiz API anahtarınızı hemen jina.ai alın',
   },
+  otherDataSource: {
+    learnMore: 'Daha fazla bilgi edinin',
+    description: 'Şu anda, Dify\'ın bilgi tabanı yalnızca sınırlı veri kaynaklarına sahiptir. Dify bilgi tabanına bir veri kaynağına katkıda bulunmak, tüm kullanıcılar için platformun esnekliğini ve gücünü artırmaya yardımcı olmanın harika bir yoludur. Katkı kılavuzumuz, başlamanızı kolaylaştırır. Daha fazla bilgi edinmek için lütfen aşağıdaki bağlantıya tıklayın.',
+    title: 'Diğer veri kaynaklarına bağlanılıyor mu?',
+  },
 }
 
 export default translation
diff --git a/web/i18n/tr-TR/dataset-documents.ts b/web/i18n/tr-TR/dataset-documents.ts
index efe5186306..7a297d9093 100644
--- a/web/i18n/tr-TR/dataset-documents.ts
+++ b/web/i18n/tr-TR/dataset-documents.ts
@@ -13,6 +13,7 @@ const translation = {
         uploadTime: 'YÜKLEME ZAMANI',
         status: 'DURUM',
         action: 'EYLEM',
+        chunkingMode: 'PARÇALAMA MODU',
       },
       rename: 'Yeniden Adlandır',
       name: 'Ad',
@@ -77,6 +78,7 @@ const translation = {
       error: 'İçe Aktarma Hatası',
       ok: 'Tamam',
     },
+    learnMore: 'Daha fazla bilgi edinin',
   },
   metadata: {
     title: 'Meta Veri',
@@ -327,6 +329,10 @@ const translation = {
     automatic: 'Otomatik',
     custom: 'Özel',
     previewTip: 'Paragraf önizlemesi yerleştirme tamamlandıktan sonra kullanılabilir olacak',
+    childMaxTokens: 'Çocuk',
+    parentMaxTokens: 'Ebeveyn',
+    hierarchical: 'Ebeveyn-çocuk',
+    pause: 'Duraklat',
   },
   segment: {
     paragraphs: 'Paragraflar',
@@ -345,6 +351,43 @@ const translation = {
     newTextSegment: 'Yeni Metin Parçası',
     newQaSegment: 'Yeni Soru-Cevap Parçası',
     delete: 'Bu parçayı silmek istiyor musunuz?',
+    chunks_one: 'ÖBEK',
+    childChunks_one: 'ALT ÖBEK',
+    searchResults_one: 'SONUÇ',
+    chunk: 'Öbek',
+    addChunk: 'Parça Ekle',
+    regenerationSuccessMessage: 'Bu pencereyi kapatabilirsiniz.',
+    characters_other: 'Karakter',
+    editParentChunk: 'Üst Parçayı Düzenle',
+    editChildChunk: 'Alt Parçayı Düzenle',
+    edited: 'DÜZENLEN -MİŞ',
+    collapseChunks: 'Parçaları daraltma',
+    chunkDetail: 'Yığın Detayı',
+    parentChunk: 'Ebeveyn-Yığın',
+    parentChunks_one: 'ÜST ÖBEK',
+    searchResults_other: 'SONUÇ -LARI',
+    childChunks_other: 'ÇOCUK PARÇALARI',
+    newChunk: 'Yeni Yığın',
+    regenerationConfirmMessage: 'Alt öbekleri yeniden oluşturmak, düzenlenen öbekler ve yeni eklenen öbekler de dahil olmak üzere mevcut alt öbeklerin üzerine yazılır. Yenilenme geri alınamaz.',
+    empty: 'Yığın bulunamadı',
+    parentChunks_other: 'ÜST PARÇALAR',
+    childChunk: 'Çocuk-Parça',
+    expandChunks: 'Parçaları genişletme',
+    childChunkAdded: '1 alt öbek eklendi',
+    newChildChunk: 'Yeni Çocuk Yığını',
+    editChunk: 'Yığını Düzenle',
+    chunkAdded: '1 parça eklendi',
+    regenerationSuccessTitle: 'Rejenerasyon tamamlandı',
+    regeneratingTitle: 'Alt parçaları yeniden oluşturma',
+    clearFilter: 'Filtreyi kaldır',
+    regenerationConfirmTitle: 'Alt parçaları yeniden oluşturmak istiyor musunuz?',
+    characters_one: 'karakter',
+    addAnother: 'Bir tane daha ekle',
+    regeneratingMessage: 'Bu biraz zaman alabilir, lütfen bekleyin...',
+    searchResults_zero: 'SONUÇ',
+    chunks_other: 'Parçalar',
+    editedAt: 'Şurada düzenlendi:',
+    addChildChunk: 'Alt Parça Ekle',
   },
 }
 
diff --git a/web/i18n/tr-TR/dataset-hit-testing.ts b/web/i18n/tr-TR/dataset-hit-testing.ts
index 545e0f074c..d22df0d93e 100644
--- a/web/i18n/tr-TR/dataset-hit-testing.ts
+++ b/web/i18n/tr-TR/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'VEKTÖR GRAFİĞİNİ GÖRÜNTÜLE',
   viewDetail: 'ayrıntılara bakın',
   settingTitle: 'Alma Ayarı',
+  open: 'Açık',
+  chunkDetail: 'Yığın Detayı',
+  keyword: 'Anahtar kelime -ler',
+  hitChunks: '{{num}} alt parçalarına basın',
+  records: 'Kayıt',
 }
 
 export default translation
diff --git a/web/i18n/tr-TR/dataset-settings.ts b/web/i18n/tr-TR/dataset-settings.ts
index 9bd716c063..d173563da8 100644
--- a/web/i18n/tr-TR/dataset-settings.ts
+++ b/web/i18n/tr-TR/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     retrievalSettings: 'Alma Ayarları',
     externalKnowledgeAPI: 'Harici Bilgi API\'si',
     externalKnowledgeID: 'Harici Bilgi Kimliği',
+    upgradeHighQualityTip: 'Yüksek Kalite moduna yükselttikten sonra Ekonomik moda geri dönülemez',
+    indexMethodChangeToEconomyDisabledTip: 'Genel Merkezden ECO\'ya düşürme için mevcut değil',
+    helpText: 'İyi bir veri kümesi açıklamasının nasıl yazılacağını öğrenin.',
   },
 }
 
diff --git a/web/i18n/tr-TR/dataset.ts b/web/i18n/tr-TR/dataset.ts
index 611917be7d..facaf3ee5b 100644
--- a/web/i18n/tr-TR/dataset.ts
+++ b/web/i18n/tr-TR/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   externalKnowledgeId: 'Harici Bilgi Kimliği',
   createExternalAPI: 'Harici bilgi API\'si ekleme',
   createNewExternalAPI: 'Yeni bir External Knowledge API oluşturma',
+  chunkingMode: {
+    general: 'Genel',
+    parentChild: 'Ebeveyn-çocuk',
+  },
+  parentMode: {
+    fullDoc: 'Tam doküman',
+    paragraph: 'Paragraf',
+  },
+  batchAction: {
+    selected: 'Seçilmiş',
+    cancel: 'İptal',
+    enable: 'Etkinleştirmek',
+    delete: 'Silmek',
+    archive: 'Arşiv',
+    disable: 'Devre dışı bırakmak',
+  },
+  preprocessDocument: '{{sayı}} Belgeleri Ön İşleme',
+  localDocs: 'Yerel Dokümanlar',
+  documentsDisabled: '{{num}} belge devre dışı - 30 günden uzun süre etkin değil',
+  enable: 'Etkinleştirmek',
 }
 
 export default translation
diff --git a/web/i18n/uk-UA/common.ts b/web/i18n/uk-UA/common.ts
index fbe9b67750..08c6250d31 100644
--- a/web/i18n/uk-UA/common.ts
+++ b/web/i18n/uk-UA/common.ts
@@ -42,6 +42,11 @@ const translation = {
     openInNewTab: 'Відкрити в новій вкладці',
     zoomOut: 'Зменшити масштаб',
     zoomIn: 'Збільшити масштаб',
+    close: 'Закрити',
+    regenerate: 'Відновити',
+    view: 'Вид',
+    viewMore: 'ДИВИТИСЬ БІЛЬШЕ',
+    saveAndRegenerate: 'Збереження та регенерація дочірніх фрагментів',
   },
   placeholder: {
     input: 'Будь ласка, введіть текст',
@@ -475,6 +480,7 @@ const translation = {
     emptyTip: 'Знання не пов’язані, будь ласка, перейдіть до програми або плагіна, щоб завершити зв’язок.',
     viewDoc: 'Переглянути документацію',
     relatedApp: 'пов\'язані програми',
+    noRelatedApp: 'Немає пов\'язаних додатків',
   },
   voiceInput: {
     speaking: 'Говоріть зараз...',
diff --git a/web/i18n/uk-UA/dataset-creation.ts b/web/i18n/uk-UA/dataset-creation.ts
index d6ee22c2d3..0db1baaff8 100644
--- a/web/i18n/uk-UA/dataset-creation.ts
+++ b/web/i18n/uk-UA/dataset-creation.ts
@@ -82,6 +82,7 @@ const translation = {
       jinaReaderTitle: 'Перетворіть весь сайт на Markdown',
       useSitemap: 'Використовуйте карту сайту',
     },
+    cancel: 'Скасувати',
   },
   stepTwo: {
     segmentation: 'Налаштування фрагментації',
@@ -143,6 +144,28 @@ const translation = {
     websiteSource: 'Веб-сайт попередньої обробки',
     separatorTip: 'Роздільник – це символ, який використовується для поділу тексту. \\n\\n та \\n є часто використовуваними роздільниками для відокремлення абзаців та рядків. У поєднанні з комами (\\n\\n,\\n) абзаци будуть розділені лініями, якщо вони перевищують максимальну довжину фрагмента. Ви також можете використовувати спеціальні роздільники, визначені вами (наприклад, ***).',
     maxLengthCheck: 'Максимальна довжина шматка має бути меншою за {{limit}}',
+    parentChild: 'Батьки-діти',
+    childChunkForRetrieval: 'Дочірній шматок для пошуку',
+    notAvailableForQA: 'Недоступно для Індексу запитань і відповідей',
+    parentChunkForContext: 'Parent-chunk для контексту',
+    paragraph: 'Абзац',
+    general: 'Загальне',
+    highQualityTip: 'Після завершення вбудовування в режимі високої якості повернення до економного режиму недоступне.',
+    generalTip: 'Загальний режим фрагментації тексту, отримані та викликані фрагменти однакові.',
+    previewChunk: 'Фрагмент попереднього перегляду',
+    fullDoc: 'Повний документ',
+    useQALanguage: 'Фрагмент у форматі запитань і відповідей у',
+    notAvailableForParentChild: 'Недоступно для Батьківсько-дочірнього індексу',
+    qaSwitchHighQualityTipContent: 'В даний час тільки високоякісний метод індексу підтримує фрагментацію формату запитань і відповідей. Чи хотіли б ви перейти в якісний режим?',
+    previewChunkTip: 'Натисніть кнопку «Фрагмент попереднього перегляду» ліворуч, щоб завантажити попередній перегляд',
+    previewChunkCount: '{{count}} Приблизні шматки',
+    fullDocTip: 'Увесь документ використовується як батьківський фрагмент і отримується безпосередньо. Зверніть увагу, що з міркувань продуктивності текст, що перевищує 10000 токенів, буде автоматично обрізаний.',
+    parentChildDelimiterTip: 'Роздільник – це символ, який використовується для поділу тексту. \\n\\n рекомендується для поділу оригінального документа на великі батьківські фрагменти. Ви також можете використовувати спеціальні роздільники, визначені самостійно.',
+    parentChildChunkDelimiterTip: 'Роздільник – це символ, який використовується для поділу тексту. \\n рекомендується для поділу батьківських фрагментів на маленькі дочірні частини. Ви також можете використовувати спеціальні роздільники, визначені самостійно.',
+    parentChildTip: 'При використанні режиму батьків-дочірній елемент використовується для пошуку, а батьківський фрагмент використовується для виклику як контекст.',
+    switch: 'Комутатор',
+    qaSwitchHighQualityTipTitle: 'Формат запитань і відповідей вимагає якісного методу індексації',
+    paragraphTip: 'Цей режим розбиває текст на абзаци на основі роздільників і максимальної довжини фрагмента, використовуючи розділений текст як батьківський фрагмент для пошуку.',
   },
   stepThree: {
     creationTitle: '🎉 Знання створено',
@@ -171,6 +194,11 @@ const translation = {
     configJinaReader: 'Налаштування Jina Reader',
     getApiKeyLinkText: 'Отримайте безкоштовний API-ключ за адресою jina.ai',
   },
+  otherDataSource: {
+    learnMore: 'Дізнатися більше',
+    title: 'Підключитися до інших джерел даних?',
+    description: 'Наразі база знань Dify має лише обмежені джерела даних. Додавання джерела даних до бази знань Dify – це фантастичний спосіб підвищити гнучкість і потужність платформи для всіх користувачів. Наш посібник із внеску спрощує початок роботи. Будь ласка, натисніть на посилання нижче, щоб дізнатися більше.',
+  },
 }
 
 export default translation
diff --git a/web/i18n/uk-UA/dataset-documents.ts b/web/i18n/uk-UA/dataset-documents.ts
index 0b20d534e7..192253b264 100644
--- a/web/i18n/uk-UA/dataset-documents.ts
+++ b/web/i18n/uk-UA/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: 'ЧАС ЗАВАНТАЖЕННЯ',
         status: 'СТАТУС',
         action: 'ДІЯ',
+        chunkingMode: 'РЕЖИМ ФРАГМЕНТАЦІЇ',
       },
       name: 'Ім\'я',
       rename: 'Перейменувати',
@@ -77,6 +78,7 @@ const translation = {
       ok: 'ОК',
     },
     addUrl: 'Додати URL-адресу',
+    learnMore: 'Дізнатися більше',
   },
   metadata: {
     title: 'Метадані',
@@ -328,6 +330,10 @@ const translation = {
     automatic: 'Автоматичний',
     custom: 'Користувацький',
     previewTip: 'Попередній перегляд параграфа буде доступний після завершення побудови векторів',
+    childMaxTokens: 'Дитина',
+    pause: 'Пауза',
+    parentMaxTokens: 'Родитель',
+    hierarchical: 'Батьки-діти',
   },
   segment: {
     paragraphs: 'Параграфи',
@@ -346,6 +352,43 @@ const translation = {
     newTextSegment: 'Новий текстовий сегмент',
     newQaSegment: 'Новий сегмент запитань та відповідей',
     delete: 'Видалити цей фрагмент?',
+    chunks_one: 'ШМАТОК',
+    childChunk: 'Дочірній шматок',
+    clearFilter: 'Очистити фільтр',
+    addAnother: 'Додати ще один',
+    newChunk: 'Новий шматок',
+    searchResults_other: 'РЕЗУЛЬТАТІВ',
+    parentChunks_other: 'БАТЬКІВСЬКІ ФРАГМЕНТИ',
+    childChunks_other: 'ДОЧІРНІ ШМАТКИ',
+    chunkDetail: 'Деталізація фрагмента',
+    regeneratingTitle: 'Регенерація дочірніх фрагментів',
+    chunkAdded: 'Додано 1 фрагмент',
+    addChildChunk: 'Додати дочірній фрагмент',
+    empty: 'Шматок не знайдено',
+    chunks_other: 'ШМАТКИ',
+    editedAt: 'За редакцією',
+    searchResults_zero: 'РЕЗУЛЬТАТ',
+    collapseChunks: 'Згортання шматків',
+    childChunkAdded: 'Додано 1 дочірній фрагмент',
+    editParentChunk: 'Редагувати батьківський фрагмент',
+    newChildChunk: 'Новий дочірній шматок',
+    editChunk: 'Редагувати фрагмент',
+    regenerationSuccessTitle: 'Регенерація завершена',
+    edited: 'РЕДАГУВАТИ',
+    regenerationConfirmMessage: 'Регенерація дочірніх фрагментів перезаписує поточні дочірні фрагменти, включаючи відредаговані фрагменти та нещодавно додані фрагменти. Регенерація не може бути скасована.',
+    chunk: 'Шматок',
+    childChunks_one: 'ДОЧІРНЯ ЧАСТИНА',
+    parentChunk: 'Батьківський чанк',
+    parentChunks_one: 'БАТЬКІВСЬКИЙ ФРАГМЕНТ',
+    characters_other: 'Символів',
+    searchResults_one: 'РЕЗУЛЬТАТ',
+    regeneratingMessage: 'Це може зайняти мить, будь ласка, зачекайте...',
+    characters_one: 'знак',
+    addChunk: 'Додати шматок',
+    editChildChunk: 'Редагувати дочірній фрагмент',
+    regenerationSuccessMessage: 'Ви можете закрити це вікно.',
+    expandChunks: 'Розгортання фрагментів',
+    regenerationConfirmTitle: 'Хочете регенерувати дитячі шматки?',
   },
 }
 
diff --git a/web/i18n/uk-UA/dataset-hit-testing.ts b/web/i18n/uk-UA/dataset-hit-testing.ts
index d348e56a70..3567c098f2 100644
--- a/web/i18n/uk-UA/dataset-hit-testing.ts
+++ b/web/i18n/uk-UA/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'ПЕРЕГЛЯНУТИ ВЕКТОРНУ ДІАГРАМУ',
   settingTitle: 'Налаштування отримання',
   viewDetail: 'Переглянути деталі',
+  chunkDetail: 'Деталізація фрагмента',
+  hitChunks: 'Натисніть {{num}} на дочірні шматки',
+  open: 'Відкривати',
+  keyword: 'Ключові слова',
+  records: 'Записи',
 }
 
 export default translation
diff --git a/web/i18n/uk-UA/dataset-settings.ts b/web/i18n/uk-UA/dataset-settings.ts
index 4532ed4add..ef3bd5eaa6 100644
--- a/web/i18n/uk-UA/dataset-settings.ts
+++ b/web/i18n/uk-UA/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeAPI: 'API зовнішніх знань',
     externalKnowledgeID: 'Зовнішній ідентифікатор знань',
     retrievalSettings: 'Налаштування отримання',
+    helpText: 'Дізнайтеся, як написати хороший опис набору даних.',
+    indexMethodChangeToEconomyDisabledTip: 'Недоступно для пониження з HQ до ECO',
+    upgradeHighQualityTip: 'Після оновлення до режиму високої якості повернення до економного режиму недоступне',
   },
 }
 
diff --git a/web/i18n/uk-UA/dataset.ts b/web/i18n/uk-UA/dataset.ts
index 3e1349a617..e4ec8851ae 100644
--- a/web/i18n/uk-UA/dataset.ts
+++ b/web/i18n/uk-UA/dataset.ts
@@ -147,6 +147,26 @@ const translation = {
   mixtureInternalAndExternalTip: 'Модель Rerank необхідна для поєднання внутрішніх і зовнішніх знань.',
   externalKnowledgeIdPlaceholder: 'Будь ласка, введіть Knowledge ID',
   externalAPIPanelDescription: 'API зовнішніх знань використовується для підключення до бази знань за межами Dify і отримання знань із цієї бази знань.',
+  chunkingMode: {
+    parentChild: 'Батьки-діти',
+    general: 'Загальне',
+  },
+  parentMode: {
+    fullDoc: 'Повний документ',
+    paragraph: 'Абзац',
+  },
+  batchAction: {
+    delete: 'Видалити',
+    enable: 'Вмикати',
+    disable: 'Вимкнути',
+    archive: 'Архів',
+    selected: 'Вибрані',
+    cancel: 'Скасувати',
+  },
+  preprocessDocument: '{{num}} Попередня обробка документів',
+  documentsDisabled: 'Документи {{num}} вимкнені - неактивні понад 30 днів',
+  localDocs: 'Локальні документи',
+  enable: 'Вмикати',
 }
 
 export default translation
diff --git a/web/i18n/vi-VN/common.ts b/web/i18n/vi-VN/common.ts
index 8bafd86854..bc94d46c65 100644
--- a/web/i18n/vi-VN/common.ts
+++ b/web/i18n/vi-VN/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: 'Thu nhỏ',
     zoomIn: 'Phóng to',
     openInNewTab: 'Mở trong tab mới',
+    regenerate: 'Tái tạo',
+    close: 'Đóng',
+    saveAndRegenerate: 'Lưu và tạo lại các phần con',
+    view: 'Cảnh',
+    viewMore: 'XEM THÊM',
   },
   placeholder: {
     input: 'Vui lòng nhập',
@@ -474,6 +479,7 @@ const translation = {
     emptyTip: 'Kiến thức chưa được liên kết, vui lòng đi đến ứng dụng hoặc plug-in để hoàn thành liên kết.',
     viewDoc: 'Xem tài liệu',
     relatedApp: 'các ứng dụng liên kết',
+    noRelatedApp: 'Không có ứng dụng được liên kết',
   },
   voiceInput: {
     speaking: 'Hãy nói...',
diff --git a/web/i18n/vi-VN/dataset-creation.ts b/web/i18n/vi-VN/dataset-creation.ts
index 045f3d6ea5..5e0b010a54 100644
--- a/web/i18n/vi-VN/dataset-creation.ts
+++ b/web/i18n/vi-VN/dataset-creation.ts
@@ -82,6 +82,7 @@ const translation = {
       jinaReaderNotConfiguredDescription: 'Thiết lập Jina Reader bằng cách nhập khóa API miễn phí của bạn để truy cập.',
       useSitemapTooltip: 'Thực hiện theo sơ đồ trang web để thu thập dữ liệu trang web. Nếu không, Jina Reader sẽ thu thập dữ liệu lặp đi lặp lại dựa trên mức độ liên quan của trang, mang lại ít trang hơn nhưng chất lượng cao hơn.',
     },
+    cancel: 'Hủy',
   },
   stepTwo: {
     segmentation: 'Cài đặt phân đoạn',
@@ -143,6 +144,28 @@ const translation = {
     webpageUnit: 'Trang',
     separatorTip: 'Dấu phân cách là ký tự được sử dụng để phân tách văn bản. \\n\\n và \\n là dấu phân cách thường được sử dụng để tách các đoạn văn và dòng. Kết hợp với dấu phẩy (\\n\\n,\\n), các đoạn văn sẽ được phân đoạn theo các dòng khi vượt quá độ dài đoạn tối đa. Bạn cũng có thể sử dụng dấu phân cách đặc biệt do chính bạn xác định (ví dụ: ***).',
     maxLengthCheck: 'Chiều dài đoạn tối đa phải nhỏ hơn {{limit}}',
+    fullDocTip: 'Toàn bộ tài liệu được sử dụng làm phần cha và được truy xuất trực tiếp. Xin lưu ý rằng vì lý do hiệu suất, văn bản vượt quá 10000 mã thông báo sẽ tự động bị cắt bớt.',
+    parentChild: 'Cha mẹ-con cái',
+    general: 'Tổng quát',
+    parentChildTip: 'Khi sử dụng chế độ cha-con, phần con được sử dụng để truy xuất và phần cha được sử dụng để gọi lại dưới dạng ngữ cảnh.',
+    fullDoc: 'Tài liệu đầy đủ',
+    notAvailableForQA: 'Không có sẵn cho Chỉ số Hỏi & Đáp',
+    notAvailableForParentChild: 'Không có sẵn cho Chỉ số cha mẹ-con',
+    previewChunk: 'Xem trước Chunk',
+    previewChunkTip: 'Nhấp vào nút \'Preview Chunk\' ở bên trái để tải bản xem trước',
+    childChunkForRetrieval: 'Child-chunk để truy xuất',
+    highQualityTip: 'Sau khi hoàn tất việc nhúng ở chế độ Chất lượng cao, không thể hoàn nguyên về chế độ Tiết kiệm.',
+    useQALanguage: 'Chunk sử dụng định dạng Q & A trong',
+    generalTip: 'Chế độ phân đoạn văn bản chung, các đoạn được truy xuất và gọi lại là như nhau.',
+    qaSwitchHighQualityTipTitle: 'Định dạng Q & A yêu cầu phương pháp lập chỉ mục chất lượng cao',
+    qaSwitchHighQualityTipContent: 'Hiện tại, chỉ có phương pháp chỉ mục chất lượng cao mới hỗ trợ phân đoạn định dạng Q&A. Bạn có muốn chuyển sang chế độ chất lượng cao không?',
+    switch: 'Chuyển',
+    paragraph: 'Đoạn',
+    parentChunkForContext: 'Parent-chunk cho ngữ cảnh',
+    previewChunkCount: '{{đếm}} Khối ước tính',
+    parentChildDelimiterTip: 'Dấu phân cách là ký tự được sử dụng để phân tách văn bản. \\n\\n được khuyến nghị để chia tài liệu gốc thành các phần lớn của cha mẹ. Bạn cũng có thể sử dụng các dấu phân cách đặc biệt do chính bạn xác định.',
+    parentChildChunkDelimiterTip: 'Dấu phân cách là ký tự được sử dụng để phân tách văn bản. \\n được khuyến nghị để chia các chunk cha thành các chunk con nhỏ. Bạn cũng có thể sử dụng các dấu phân cách đặc biệt do chính bạn xác định.',
+    paragraphTip: 'Chế độ này chia văn bản thành các đoạn văn dựa trên dấu phân cách và độ dài khối tối đa, sử dụng văn bản được tách làm phần gốc để truy xuất.',
   },
   stepThree: {
     creationTitle: '🎉 Kiến thức đã được tạo',
@@ -171,6 +194,11 @@ const translation = {
     configJinaReader: 'Định cấu hình Jina Reader',
     apiKeyPlaceholder: 'Khóa API từ jina.ai',
   },
+  otherDataSource: {
+    title: 'Kết nối với các nguồn dữ liệu khác?',
+    description: 'Hiện tại, cơ sở tri thức của Dify chỉ có nguồn dữ liệu hạn chế. Đóng góp nguồn dữ liệu vào cơ sở kiến thức Dify là một cách tuyệt vời để giúp nâng cao tính linh hoạt và sức mạnh của nền tảng cho tất cả người dùng. Hướng dẫn đóng góp của chúng tôi giúp bạn dễ dàng bắt đầu. Vui lòng nhấp vào liên kết bên dưới để tìm hiểu thêm.',
+    learnMore: 'Tìm hiểu thêm',
+  },
 }
 
 export default translation
diff --git a/web/i18n/vi-VN/dataset-documents.ts b/web/i18n/vi-VN/dataset-documents.ts
index 16570dff6e..07e5c5c6e3 100644
--- a/web/i18n/vi-VN/dataset-documents.ts
+++ b/web/i18n/vi-VN/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: 'THỜI GIAN TẢI LÊN',
         status: 'TRẠNG THÁI',
         action: 'THAO TÁC',
+        chunkingMode: 'CHẾ ĐỘ CHUNKING',
       },
       rename: 'Rename',
       name: 'Tên',
@@ -77,6 +78,7 @@ const translation = {
       ok: 'OK',
     },
     addUrl: 'Thêm URL',
+    learnMore: 'Tìm hiểu thêm',
   },
   metadata: {
     title: 'Siêu dữ liệu',
@@ -327,6 +329,10 @@ const translation = {
     automatic: 'Tự động',
     custom: 'Tùy chỉnh',
     previewTip: 'Xem trước đoạn sẽ có sẵn sau khi việc nhúng hoàn tất',
+    parentMaxTokens: 'Cha mẹ',
+    pause: 'Tạm dừng',
+    childMaxTokens: 'Con',
+    hierarchical: 'Cha mẹ-con cái',
   },
   segment: {
     paragraphs: 'Đoạn văn',
@@ -345,6 +351,43 @@ const translation = {
     newTextSegment: 'Đoạn văn bản mới',
     newQaSegment: 'Đoạn hỏi đáp mới',
     delete: 'Xóa đoạn này?',
+    childChunks_one: 'KHỐI TRẺ',
+    searchResults_zero: 'KẾT QUẢ',
+    empty: 'Không tìm thấy Chunk',
+    newChunk: 'Khối mới',
+    childChunk: 'Khối trẻ em',
+    regeneratingMessage: 'Quá trình này có thể mất một lúc, vui lòng đợi...',
+    regenerationSuccessMessage: 'Bạn có thể đóng cửa sổ này.',
+    regenerationSuccessTitle: 'Hoàn thành tái tạo',
+    characters_other: 'Ký tự',
+    chunks_one: 'KHÚC',
+    chunkAdded: '1 miếng được thêm vào',
+    editChildChunk: 'Chỉnh sửa phần con',
+    characters_one: 'nhân vật',
+    expandChunks: 'Mở rộng các đoạn',
+    chunks_other: 'KHỐI',
+    editedAt: 'Chỉnh sửa tại',
+    addAnother: 'Thêm một cái khác',
+    regenerationConfirmTitle: 'Bạn có muốn tái tạo các chunk con không?',
+    searchResults_one: 'KẾT QUẢ',
+    regeneratingTitle: 'Tái tạo các chunk con',
+    editParentChunk: 'Chỉnh sửa phần cha',
+    collapseChunks: 'Thu gọn các đoạn',
+    searchResults_other: 'KẾT QUẢ',
+    parentChunks_one: 'PHẦN CHA MẸ',
+    newChildChunk: 'Khối con mới',
+    parentChunk: 'Phần cha mẹ',
+    parentChunks_other: 'PHẦN CHA MẸ',
+    regenerationConfirmMessage: 'Tạo lại các chunk con sẽ ghi đè lên các chunk con hiện tại, bao gồm các chunk đã chỉnh sửa và chunk mới được thêm vào. Sự tái sinh không thể hoàn tác.',
+    childChunkAdded: '1 phần con được thêm vào',
+    addChunk: 'Thêm Chunk',
+    chunkDetail: 'Chi tiết khối',
+    childChunks_other: 'KHỐI CON',
+    editChunk: 'Chỉnh sửa Chunk',
+    addChildChunk: 'Thêm phần con',
+    clearFilter: 'Bộ lọc rõ ràng',
+    chunk: 'Khúc',
+    edited: 'EDITED',
   },
 }
 
diff --git a/web/i18n/vi-VN/dataset-hit-testing.ts b/web/i18n/vi-VN/dataset-hit-testing.ts
index 7972eba288..02a2547938 100644
--- a/web/i18n/vi-VN/dataset-hit-testing.ts
+++ b/web/i18n/vi-VN/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: 'Xem BIỂU ĐỒ VECTOR',
   settingTitle: 'Cài đặt truy xuất',
   viewDetail: 'Xem chi tiết',
+  records: 'Hồ sơ',
+  open: 'Mở',
+  keyword: 'Từ khoá',
+  hitChunks: 'Nhấn {{num}} đoạn con',
+  chunkDetail: 'Chi tiết khối',
 }
 
 export default translation
diff --git a/web/i18n/vi-VN/dataset-settings.ts b/web/i18n/vi-VN/dataset-settings.ts
index 48ff2f038d..790fd05ca8 100644
--- a/web/i18n/vi-VN/dataset-settings.ts
+++ b/web/i18n/vi-VN/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeAPI: 'API kiến thức bên ngoài',
     retrievalSettings: 'Cài đặt truy xuất',
     externalKnowledgeID: 'ID kiến thức bên ngoài',
+    helpText: 'Tìm hiểu cách viết mô tả tập dữ liệu tốt.',
+    indexMethodChangeToEconomyDisabledTip: 'Không khả dụng để hạ cấp từ HQ xuống ECO',
+    upgradeHighQualityTip: 'Sau khi nâng cấp lên chế độ Chất lượng cao, không thể hoàn nguyên về chế độ Tiết kiệm',
   },
 }
 
diff --git a/web/i18n/vi-VN/dataset.ts b/web/i18n/vi-VN/dataset.ts
index eb7ef414f3..0e9ab77d0f 100644
--- a/web/i18n/vi-VN/dataset.ts
+++ b/web/i18n/vi-VN/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   externalKnowledgeIdPlaceholder: 'Vui lòng nhập ID kiến thức',
   createNewExternalAPI: 'Tạo API Kiến thức Bên ngoài mới',
   externalAPIPanelDescription: 'API kiến thức bên ngoài được sử dụng để kết nối với cơ sở kiến thức bên ngoài Dify và truy xuất kiến thức từ cơ sở kiến thức đó.',
+  chunkingMode: {
+    general: 'Tổng quát',
+    parentChild: 'Cha mẹ-con cái',
+  },
+  parentMode: {
+    paragraph: 'Đoạn',
+    fullDoc: 'Tài liệu đầy đủ',
+  },
+  batchAction: {
+    disable: 'Vô hiệu hóa',
+    enable: 'Kích hoạt',
+    delete: 'Xóa',
+    selected: 'Chọn',
+    archive: 'Lưu trữ',
+    cancel: 'Hủy',
+  },
+  localDocs: 'Tài liệu địa phương',
+  enable: 'Kích hoạt',
+  preprocessDocument: '{{số}} Tiền xử lý tài liệu',
+  documentsDisabled: '{{num}} tài liệu bị vô hiệu hóa - không hoạt động trong hơn 30 ngày',
 }
 
 export default translation
diff --git a/web/i18n/zh-Hans/common.ts b/web/i18n/zh-Hans/common.ts
index 96e08a9337..7f1dc1f899 100644
--- a/web/i18n/zh-Hans/common.ts
+++ b/web/i18n/zh-Hans/common.ts
@@ -42,6 +42,11 @@ const translation = {
     zoomOut: '缩小',
     zoomIn: '放大',
     openInNewTab: '在新标签页打开',
+    saveAndRegenerate: '保存并重新生成子分段',
+    close: '关闭',
+    view: '查看',
+    viewMore: '查看更多',
+    regenerate: '重新生成',
   },
   errorMsg: {
     fieldRequired: '{{field}} 为必填项',
@@ -475,9 +480,10 @@ const translation = {
     documents: '文档',
     hitTesting: '召回测试',
     settings: '设置',
-    emptyTip: ' 知识库尚未关联，请前往应用程序或插件完成关联。',
+    emptyTip: '此知识尚未集成到任何应用程序中。请参阅文档以获取指导。',
     viewDoc: '查看文档',
     relatedApp: '个关联应用',
+    noRelatedApp: '无关联应用',
   },
   voiceInput: {
     speaking: '现在讲...',
diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts
index e6c3aa00ba..c61e75eb98 100644
--- a/web/i18n/zh-Hans/dataset-creation.ts
+++ b/web/i18n/zh-Hans/dataset-creation.ts
@@ -47,6 +47,7 @@ const translation = {
     notionSyncTitle: 'Notion 未绑定',
     notionSyncTip: '同步 Notion 内容前，须先绑定 Notion 空间',
     connect: '去绑定',
+    cancel: '取消',
     button: '下一步',
     emptyDatasetCreation: '创建一个空知识库',
     modal: {
@@ -99,6 +100,16 @@ const translation = {
     autoDescription: '自动设置分段规则与预处理规则，如果不了解这些参数建议选择此项',
     custom: '自定义',
     customDescription: '自定义分段规则、分段长度以及预处理规则等参数',
+    general: '通用',
+    generalTip: '通用文本分块模式，检索和回忆的块是相同的',
+    parentChild: '父子分段',
+    parentChildTip: '使用父子模式时，子块用于检索，父块用作上下文',
+    parentChunkForContext: '父块用作上下文',
+    childChunkForRetrieval: '子块用于检索',
+    paragraph: '段落',
+    paragraphTip: '此模式根据分隔符和最大块长度将文本拆分为段落，使用拆分文本作为检索的父块',
+    fullDoc: '全文',
+    fullDocTip: '整个文档用作父块并直接检索。请注意，出于性能原因，超过10000个标记的文本将被自动截断。',
     separator: '分段标识符',
     separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符（\\n\\n,\\n），当段落超过最大块长度时，会按行进行分割。你也可以使用自定义的特殊分隔符（例如 ***）。',
     separatorPlaceholder: '\\n\\n 用于分段；\\n 用于分行',
@@ -111,19 +122,22 @@ const translation = {
     removeExtraSpaces: '替换掉连续的空格、换行符和制表符',
     removeUrlEmails: '删除所有 URL 和电子邮件地址',
     removeStopwords: '去除停用词，例如 “a”，“an”，“the” 等',
-    preview: '确认并预览',
+    preview: '预览',
+    previewChunk: '预览块',
     reset: '重置',
     indexMode: '索引方式',
     qualified: '高质量',
+    highQualityTip: '使用高质量模式进行嵌入后，无法切换回经济模式。',
     recommend: '推荐',
-    qualifiedTip: '调用系统默认的嵌入接口进行处理，以在用户查询时提供更高的准确度',
+    qualifiedTip: '调用嵌入模型处理文档以实现更精确的检索，可以帮助LLM生成高质量的答案。',
     warning: '请先完成模型供应商的 API KEY 设置。.',
     click: '前往设置',
     economical: '经济',
-    economicalTip: '使用离线的向量引擎、关键词索引等方式，降低了准确度但无需花费 Token',
+    economicalTip: '每个数据块使用10个关键词进行检索，不会消耗任何tokens，但会以降低检索准确性为代价。',
     QATitle: '采用 Q&A 分段模式',
     QATip: '开启后将会消耗额外的 token',
     QALanguage: '分段使用',
+    useQALanguage: '使用 Q&A 分段，语言',
     estimateCost: '执行嵌入预估消耗',
     estimateSegment: '预估分段数',
     segmentCount: '段',
@@ -153,6 +167,15 @@ const translation = {
     indexSettingTip: '要更改索引方法和 embedding 模型，请转到',
     retrievalSettingTip: '要更改检索方法，请转到',
     datasetSettingLink: '知识库设置。',
+    previewChunkTip: '点击左侧的“预览块”按钮来加载预览',
+    previewChunkCount: '{{count}} 预估块',
+    switch: '切换',
+    qaSwitchHighQualityTipTitle: 'Q&A 格式需要高质量的索引方法',
+    qaSwitchHighQualityTipContent: '目前，只有高质量的索引方法支持 Q&A 格式分块。您要切换到高质量模式吗？',
+    notAvailableForParentChild: '不支持父子索引',
+    notAvailableForQA: '不支持 Q&A 索引',
+    parentChildDelimiterTip: '文本分隔符是用于分隔文本的字符。建议用 \n\n 将原始文档划分为较大的父级片段。您也可以自定义特殊分隔符。',
+    parentChildChunkDelimiterTip: '文本分隔符是用于分隔文本的字符。建议使用 \n 将父级片段拆分为较小的子级片段。您也可以自定义特殊分隔符。',
   },
   stepThree: {
     creationTitle: '🎉 知识库已创建',
@@ -171,6 +194,11 @@ const translation = {
     modelButtonConfirm: '确认停止',
     modelButtonCancel: '取消',
   },
+  otherDataSource: {
+    title: '连接到其他数据源？',
+    description: '目前，Dify 的知识库只有有限的数据源。向 Dify 知识库贡献数据源是帮助所有用户增强平台灵活性和强大功能的绝佳方式。我们的贡献指南使入门变得容易。请点击下面的链接了解更多信息。',
+    learnMore: '了解更多信息',
+  },
 }
 
 export default translation
diff --git a/web/i18n/zh-Hans/dataset-documents.ts b/web/i18n/zh-Hans/dataset-documents.ts
index 2f68f04d1d..9949f33d87 100644
--- a/web/i18n/zh-Hans/dataset-documents.ts
+++ b/web/i18n/zh-Hans/dataset-documents.ts
@@ -2,12 +2,14 @@ const translation = {
   list: {
     title: '文档',
     desc: '知识库的所有文件都在这里显示，整个知识库都可以链接到 Dify 引用或通过 Chat 插件进行索引。',
+    learnMore: '了解更多',
     addFile: '添加文件',
     addPages: '添加页面',
     addUrl: '添加 URL',
     table: {
       header: {
-        fileName: '文件名',
+        fileName: '名称',
+        chunkingMode: '分段模式',
         words: '字符数',
         hitCount: '召回次数',
         uploadTime: '上传时间',
@@ -315,25 +317,46 @@ const translation = {
     completed: '嵌入已完成',
     error: '嵌入发生错误',
     docName: '预处理文档',
-    mode: '分段规则',
-    segmentLength: '分段长度',
-    textCleaning: '文本预定义与清洗',
+    mode: '分段模式',
+    segmentLength: '最大分段长度',
+    textCleaning: '文本预处理规则',
     segments: '段落',
     highQuality: '高质量模式',
     economy: '经济模式',
     estimate: '预估消耗',
     stop: '停止处理',
-    resume: '恢复处理',
+    pause: '暂停',
+    resume: '恢复',
     automatic: '自动',
     custom: '自定义',
+    hierarchical: '父子分段',
     previewTip: '段落预览将在嵌入完成后可用',
+    parentMaxTokens: '父',
+    childMaxTokens: '子',
   },
   segment: {
     paragraphs: '段落',
+    chunks_one: '分段',
+    chunks_other: '分段',
+    parentChunks_one: '父分段',
+    parentChunks_other: '父分段',
+    childChunks_one: '子分段',
+    childChunks_other: '子分段',
+    searchResults_zero: '搜索结果',
+    searchResults_one: '搜索结果',
+    searchResults_other: '搜索结果',
+    empty: '未找到分段',
+    clearFilter: '清空搜索条件',
+    chunk: '分段',
+    parentChunk: '父分段',
+    newChunk: '新分段',
+    childChunk: '子分段',
+    newChildChunk: '新子分段',
     keywords: '关键词',
     addKeyWord: '添加关键词',
     keywordError: '关键词最大长度为 20',
-    characters: '字符',
+    characters_one: '字符',
+    characters_other: '字符',
     hitCount: '召回次数',
     vectorHash: '向量哈希：',
     questionPlaceholder: '在这里添加问题',
@@ -344,7 +367,26 @@ const translation = {
     contentEmpty: '内容不能为空',
     newTextSegment: '新文本分段',
     newQaSegment: '新问答分段',
+    addChunk: '新增分段',
+    addChildChunk: '新增子分段',
+    addAnother: '连续新增',
     delete: '删除这个分段？',
+    chunkAdded: '新增一个分段',
+    childChunkAdded: '新增一个子分段',
+    editChunk: '编辑分段',
+    editParentChunk: '编辑父分段',
+    editChildChunk: '编辑子分段',
+    chunkDetail: '分段详情',
+    regenerationConfirmTitle: '是否需要重新生成子分段？',
+    regenerationConfirmMessage: '重新生成的子分段将会覆盖当前的子分段，包括编辑过的分段和新添加的分段。重新生成操作无法撤销。',
+    regeneratingTitle: '正在生成子分段',
+    regeneratingMessage: '生成子分段需要一些时间，请耐心等待...',
+    regenerationSuccessTitle: '子分段已重新生成',
+    regenerationSuccessMessage: '可以关闭窗口',
+    edited: '已编辑',
+    editedAt: '编辑于',
+    expandChunks: '展开分段',
+    collapseChunks: '折叠分段',
   },
 }
 
diff --git a/web/i18n/zh-Hans/dataset-hit-testing.ts b/web/i18n/zh-Hans/dataset-hit-testing.ts
index 09cfdc2824..88924edc82 100644
--- a/web/i18n/zh-Hans/dataset-hit-testing.ts
+++ b/web/i18n/zh-Hans/dataset-hit-testing.ts
@@ -1,9 +1,9 @@
 const translation = {
   title: '召回测试',
   settingTitle: '召回设置',
-  desc: '基于给定的查询文本测试知识库的召回效果',
+  desc: '根据给定的查询文本测试知识的召回效果。',
   dateTimeFormat: 'YYYY-MM-DD HH:mm',
-  recents: '最近查询',
+  records: '记录',
   table: {
     header: {
       source: '数据源',
@@ -19,12 +19,16 @@ const translation = {
     testing: '测试',
   },
   hit: {
-    title: '召回段落',
+    title: '{{num}} 个召回段落',
     emptyTip: '召回测试结果将展示在这里',
   },
   noRecentTip: '最近无查询结果',
   viewChart: '查看向量图表',
   viewDetail: '查看详情',
+  chunkDetail: '段落详情',
+  hitChunks: '命中 {{num}} 个子段落',
+  open: '打开',
+  keyword: '关键词',
 }
 
 export default translation
diff --git a/web/i18n/zh-Hans/dataset-settings.ts b/web/i18n/zh-Hans/dataset-settings.ts
index 423703e04c..0817127c19 100644
--- a/web/i18n/zh-Hans/dataset-settings.ts
+++ b/web/i18n/zh-Hans/dataset-settings.ts
@@ -7,7 +7,8 @@ const translation = {
     nameError: '名称不能为空',
     desc: '知识库描述',
     descInfo: '请写出清楚的文字描述来概述知识库的内容。当从多个知识库中进行选择匹配时，该描述将用作匹配的基础。',
-    descPlaceholder: '请描述这个知识库包含的内容（可选）',
+    descPlaceholder: '描述该数据集的内容。详细描述可以让 AI 更快地访问数据集的内容。如果为空，LangGenius 将使用默认的命中策略。',
+    helpText: '学习如何编写一份优秀的数据集描述。',
     descWrite: '了解如何编写更好的知识库描述。',
     permissions: '可见权限',
     permissionsOnlyMe: '只有我',
@@ -16,9 +17,10 @@ const translation = {
     me: '（你）',
     indexMethod: '索引模式',
     indexMethodHighQuality: '高质量',
-    indexMethodHighQualityTip: '调用 Embedding 模型进行处理，以在用户查询时提供更高的准确度。',
+    indexMethodHighQualityTip: '调用嵌入模型来处理文档以实现更精确的检索，可以帮助大语言模型生成高质量的回答。',
+    upgradeHighQualityTip: '一旦升级为高质量模式，将无法切换回经济模式。',
     indexMethodEconomy: '经济',
-    indexMethodEconomyTip: '使用离线的向量引擎、关键词索引等方式，降低了准确度但无需花费 Token',
+    indexMethodEconomyTip: '每个块使用 10 个关键词进行检索，不消耗 tokens，但会降低检索准确性。',
     embeddingModel: 'Embedding 模型',
     embeddingModelTip: '修改 Embedding 模型，请去',
     embeddingModelTipLink: '设置',
@@ -32,6 +34,7 @@ const translation = {
     externalKnowledgeID: '外部知识库 ID',
     save: '保存',
     retrievalSettings: '检索设置',
+    indexMethodChangeToEconomyDisabledTip: '无法从高质量降级为经济',
   },
 }
 
diff --git a/web/i18n/zh-Hans/dataset.ts b/web/i18n/zh-Hans/dataset.ts
index d057af0c16..d7834b4116 100644
--- a/web/i18n/zh-Hans/dataset.ts
+++ b/web/i18n/zh-Hans/dataset.ts
@@ -1,5 +1,13 @@
 const translation = {
   knowledge: '知识库',
+  chunkingMode: {
+    general: '通用',
+    parentChild: '父子',
+  },
+  parentMode: {
+    paragraph: '段落',
+    fullDoc: '全文',
+  },
   externalTag: '外部',
   externalAPI: '外部 API',
   externalAPIPanelTitle: '外部知识库 API',
@@ -12,6 +20,7 @@ const translation = {
   learnHowToWriteGoodKnowledgeDescription: '了解如何编写良好的知识库描述',
   externalAPIPanelDescription: '外部知识库 API 用于连接到 Dify 之外的知识库并从中检索知识。',
   externalAPIPanelDocumentation: '了解如何创建外部知识库 API',
+  localDocs: '本地文档',
   documentCount: ' 文档',
   wordCount: ' 千字符',
   appCount: ' 关联应用',
@@ -115,8 +124,10 @@ const translation = {
     change: '更改',
     changeRetrievalMethod: '更改检索方法',
   },
-  docsFailedNotice: '文档无法被索引',
+  docsFailedNotice: '文档索引失败',
   retry: '重试',
+  documentsDisabled: '{{num}} 个文档已禁用 - 未活动超过 30 天',
+  enable: '启用',
   indexingTechnique: {
     high_quality: '高质量',
     economy: '经济',
@@ -146,6 +157,15 @@ const translation = {
   nTo1RetrievalLegacy: '9 月 1 日起我们将不再提供此能力，推荐使用最新的多路召回获得更好的检索效果。',
   nTo1RetrievalLegacyLink: '了解更多',
   nTo1RetrievalLegacyLinkText: '9 月 1 日起我们将不再提供此能力。',
+  batchAction: {
+    selected: '已选择',
+    enable: '启用',
+    disable: '禁用',
+    archive: '归档',
+    delete: '删除',
+    cancel: '取消',
+  },
+  preprocessDocument: '{{num}} 个预处理文档',
 }
 
 export default translation
diff --git a/web/i18n/zh-Hant/common.ts b/web/i18n/zh-Hant/common.ts
index 8340650993..4c09229796 100644
--- a/web/i18n/zh-Hant/common.ts
+++ b/web/i18n/zh-Hant/common.ts
@@ -42,6 +42,11 @@ const translation = {
     openInNewTab: '在新選項卡中打開',
     zoomIn: '放大',
     zoomOut: '縮小',
+    saveAndRegenerate: '保存並重新生成子塊',
+    view: '視圖',
+    close: '關閉',
+    viewMore: '查看更多',
+    regenerate: '再生',
   },
   placeholder: {
     input: '請輸入',
@@ -474,6 +479,7 @@ const translation = {
     emptyTip: ' 知識庫尚未關聯，請前往應用程式或外掛完成關聯。',
     viewDoc: '檢視文件',
     relatedApp: '個關聯應用',
+    noRelatedApp: '沒有連結的應用程式',
   },
   voiceInput: {
     speaking: '現在講...',
diff --git a/web/i18n/zh-Hant/dataset-creation.ts b/web/i18n/zh-Hant/dataset-creation.ts
index 8c5673cb3f..e35e4fce92 100644
--- a/web/i18n/zh-Hant/dataset-creation.ts
+++ b/web/i18n/zh-Hant/dataset-creation.ts
@@ -37,6 +37,7 @@ const translation = {
     notionSyncTitle: 'Notion 未繫結',
     notionSyncTip: '同步 Notion 內容前，須先繫結 Notion 空間',
     connect: '去繫結',
+    cancel: '取消',
     button: '下一步',
     emptyDatasetCreation: '建立一個空知識庫',
     modal: {
@@ -99,7 +100,7 @@ const translation = {
     removeExtraSpaces: '替換掉連續的空格、換行符和製表符',
     removeUrlEmails: '刪除所有 URL 和電子郵件地址',
     removeStopwords: '去除停用詞，例如 “a”，“an”，“the” 等',
-    preview: '確認並預覽',
+    preview: '預覽',
     reset: '重置',
     indexMode: '索引方式',
     qualified: '高質量',
@@ -143,6 +144,28 @@ const translation = {
     webpageUnit: '頁面',
     separatorTip: '分隔符是用於分隔文字的字元。\\n\\n 和 \\n 是分隔段落和行的常用分隔符。與逗號 （\\n\\n，\\n） 組合使用時，當超過最大區塊長度時，段落將按行分段。您也可以使用自定義的特殊分隔符（例如 ***）。',
     maxLengthCheck: '塊最大長度應小於 {{limit}}',
+    general: '常規',
+    previewChunkCount: '{{count}}估計塊數',
+    useQALanguage: '使用Q&A格式的塊',
+    qaSwitchHighQualityTipContent: '目前，只有高品質索引方法支援Q&A格式分塊。是否要切換到高品質模式？',
+    previewChunk: '預覽資料塊（Preview Chunk）',
+    fullDocTip: '整個文件用作父塊並直接檢索。請注意，出於性能原因，超過10000個令牌的文本將被自動截斷。',
+    parentChunkForContext: '父母的背景',
+    previewChunkTip: '點擊左側的 『Preview Chunk』 按鈕載入預覽',
+    parentChild: '父子',
+    notAvailableForQA: '不適用於 Q&A 索引',
+    parentChildChunkDelimiterTip: '分隔符是用於分隔文字的字元。建議使用 \\n 將父塊拆分為小的子塊。您還可以使用自己定義的特殊分隔符。',
+    parentChildDelimiterTip: '分隔符是用於分隔文字的字元。建議將原始文檔拆分為多個大型父塊。您還可以使用自己定義的特殊分隔符。',
+    generalTip: '常規文本分塊模式，檢索和調用的塊是相同的。',
+    highQualityTip: '在 High Quality 模式下完成嵌入後，將無法恢復到 Economical （經濟） 模式。',
+    childChunkForRetrieval: '用於檢索的 Child-chunk',
+    paragraphTip: '此模式根據分隔符和最大區塊長度將文本拆分為段落，使用拆分文本作為父區塊進行檢索。',
+    paragraph: '段',
+    qaSwitchHighQualityTipTitle: 'Q&A 格式需要高品質的索引方法',
+    notAvailableForParentChild: '不適用於父子索引',
+    fullDoc: '完整文件',
+    parentChildTip: '當使用 parent-child 模式時，child-chunk 用於檢索，parent-chunk 用於作為上下文調用 call。',
+    switch: '開關',
   },
   stepThree: {
     creationTitle: '🎉 知識庫已建立',
@@ -171,6 +194,11 @@ const translation = {
     getApiKeyLinkText: '在 jina.ai 獲取您的免費 API 金鑰',
     apiKeyPlaceholder: '來自 jina.ai 的 API 金鑰',
   },
+  otherDataSource: {
+    learnMore: '瞭解更多資訊',
+    description: '目前，Dify 的知識庫只有有限的數據源。向 Dify 知識庫貢獻數據源是説明所有使用者增強平台靈活性和強大功能的絕佳方式。我們的貢獻指南使入門變得容易。請點擊下面的連結瞭解更多資訊。',
+    title: '連接到其他數據源？',
+  },
 }
 
 export default translation
diff --git a/web/i18n/zh-Hant/dataset-documents.ts b/web/i18n/zh-Hant/dataset-documents.ts
index b4e6b44181..8a6c1f7924 100644
--- a/web/i18n/zh-Hant/dataset-documents.ts
+++ b/web/i18n/zh-Hant/dataset-documents.ts
@@ -12,6 +12,7 @@ const translation = {
         uploadTime: '上傳時間',
         status: '狀態',
         action: '操作',
+        chunkingMode: '分塊模式',
       },
       name: '名字',
       rename: '重新命名',
@@ -77,6 +78,7 @@ const translation = {
       ok: '確定',
     },
     addUrl: '添加 URL',
+    learnMore: '瞭解更多資訊',
   },
   metadata: {
     title: '元資料',
@@ -327,6 +329,10 @@ const translation = {
     automatic: '自動',
     custom: '自定義',
     previewTip: '段落預覽將在嵌入完成後可用',
+    hierarchical: '父子',
+    parentMaxTokens: '父母',
+    childMaxTokens: '孩子',
+    pause: '暫停',
   },
   segment: {
     paragraphs: '段落',
@@ -345,6 +351,43 @@ const translation = {
     newTextSegment: '新文字分段',
     newQaSegment: '新問答分段',
     delete: '刪除這個分段？',
+    characters_other: '字元',
+    addChunk: '添加數據塊',
+    addChildChunk: '添加子塊',
+    addAnother: '添加另一個',
+    childChunkAdded: '添加了1個子塊',
+    editParentChunk: '編輯父塊（Edit Parent Chunk）',
+    editChildChunk: '編輯子塊',
+    chunkDetail: '數據塊詳細資訊',
+    regenerationConfirmTitle: '是否要重新生成子塊？',
+    regenerationConfirmMessage: '重新生成子數據塊將覆蓋當前子數據塊，包括已編輯的數據塊和新添加的數據塊。重新生成無法復原。',
+    regeneratingTitle: '重新生成子塊',
+    regenerationSuccessTitle: '再生完成',
+    collapseChunks: '摺疊塊',
+    expandChunks: '擴展塊',
+    chunkAdded: '添加了 1 個數據塊',
+    editedAt: '編輯於',
+    regeneratingMessage: '這可能需要一些時間，請稍候...',
+    editChunk: '編輯數據塊',
+    regenerationSuccessMessage: '您可以關閉此視窗。',
+    chunks_other: '塊',
+    childChunks_one: '子塊',
+    edited: '編輯',
+    characters_one: '字元',
+    childChunks_other: '子塊',
+    newChunk: '新區塊',
+    chunk: '塊',
+    childChunk: '子塊',
+    parentChunks_one: '父數據塊',
+    searchResults_one: '結果',
+    searchResults_other: '結果',
+    chunks_one: '塊',
+    clearFilter: '清除過濾器',
+    parentChunk: '父塊',
+    empty: '未找到 Chunk',
+    searchResults_zero: '結果',
+    parentChunks_other: '父塊',
+    newChildChunk: '新兒童塊',
   },
 }
 
diff --git a/web/i18n/zh-Hant/dataset-hit-testing.ts b/web/i18n/zh-Hant/dataset-hit-testing.ts
index 72ca8f8d48..0dbe149025 100644
--- a/web/i18n/zh-Hant/dataset-hit-testing.ts
+++ b/web/i18n/zh-Hant/dataset-hit-testing.ts
@@ -25,6 +25,11 @@ const translation = {
   viewChart: '查看向量圖表',
   viewDetail: '查看詳情',
   settingTitle: '檢索設置',
+  open: '打開',
+  records: '記錄',
+  chunkDetail: '數據塊詳細資訊',
+  hitChunks: '命中 {{num}} 個子塊',
+  keyword: '關鍵字',
 }
 
 export default translation
diff --git a/web/i18n/zh-Hant/dataset-settings.ts b/web/i18n/zh-Hant/dataset-settings.ts
index a528befc5d..b22f899f32 100644
--- a/web/i18n/zh-Hant/dataset-settings.ts
+++ b/web/i18n/zh-Hant/dataset-settings.ts
@@ -32,6 +32,9 @@ const translation = {
     externalKnowledgeID: '外部知識ID',
     externalKnowledgeAPI: '外部知識 API',
     retrievalSettings: '檢索設置',
+    indexMethodChangeToEconomyDisabledTip: '不適用於從 HQ 降級到 ECO',
+    upgradeHighQualityTip: '升級到高品質模式后，無法恢復到經濟模式',
+    helpText: '瞭解如何編寫良好的數據集描述。',
   },
 }
 
diff --git a/web/i18n/zh-Hant/dataset.ts b/web/i18n/zh-Hant/dataset.ts
index 30dfd6b98c..1aaeb50cd2 100644
--- a/web/i18n/zh-Hant/dataset.ts
+++ b/web/i18n/zh-Hant/dataset.ts
@@ -146,6 +146,26 @@ const translation = {
   externalAPIPanelDocumentation: '瞭解如何創建外部知識 API',
   externalKnowledgeNamePlaceholder: '請輸入知識庫的名稱',
   noExternalKnowledge: '目前還沒有外部知識 API，按兩下此處創建',
+  chunkingMode: {
+    parentChild: '父子',
+    general: '常規',
+  },
+  parentMode: {
+    paragraph: '段',
+    fullDoc: '完整文件',
+  },
+  batchAction: {
+    delete: '刪除',
+    enable: '使',
+    disable: '禁用',
+    cancel: '取消',
+    archive: '檔案',
+    selected: '選擇',
+  },
+  enable: '使',
+  documentsDisabled: '已禁用 {{num}} 個文檔 - 處於非活動狀態超過 30 天',
+  localDocs: '本地文件',
+  preprocessDocument: '{{num}}預處理文件',
 }
 
 export default translation
diff --git a/web/models/datasets.ts b/web/models/datasets.ts
index e2fa608bae..9d4768b67c 100644
--- a/web/models/datasets.ts
+++ b/web/models/datasets.ts
@@ -1,6 +1,7 @@
 import type { DataSourceNotionPage, DataSourceProvider } from './common'
 import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
 import type { Tag } from '@/app/components/base/tag-management/constant'
+import type { IndexingType } from '@/app/components/datasets/create/step-two'
 
 export enum DataSourceType {
   FILE = 'upload_file',
@@ -10,6 +11,12 @@ export enum DataSourceType {
 
 export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
 
+export enum ChunkingMode {
+  'text' = 'text_model', // General text
+  'qa' = 'qa_model', // General QA
+  'parentChild' = 'hierarchical_model', // Parent-Child
+}
+
 export type DataSet = {
   id: string
   name: string
@@ -18,11 +25,12 @@ export type DataSet = {
   description: string
   permission: DatasetPermission
   data_source_type: DataSourceType
-  indexing_technique: 'high_quality' | 'economy'
+  indexing_technique: IndexingType
   created_by: string
   updated_by: string
   updated_at: number
   app_count: number
+  doc_form: ChunkingMode
   document_count: number
   word_count: number
   provider: string
@@ -95,6 +103,12 @@ export type CustomFile = File & {
   created_at?: number
 }
 
+export type DocumentItem = {
+  id: string
+  name: string
+  extension: string
+}
+
 export type CrawlOptions = {
   crawl_sub_pages: boolean
   only_main_content: boolean
@@ -144,7 +158,7 @@ export type IndexingEstimateResponse = {
   total_price: number
   currency: string
   total_segments: number
-  preview: string[]
+  preview: Array<{ content: string; child_chunks: string[] }>
   qa_preview?: QA[]
 }
 
@@ -170,7 +184,12 @@ export type IndexingStatusBatchResponse = {
   data: IndexingStatusResponse[]
 }
 
-export type ProcessMode = 'automatic' | 'custom'
+export enum ProcessMode {
+  general = 'custom',
+  parentChild = 'hierarchical',
+}
+
+export type ParentMode = 'full-doc' | 'paragraph'
 
 export type ProcessRuleResponse = {
   mode: ProcessMode
@@ -181,6 +200,8 @@ export type ProcessRuleResponse = {
 export type Rules = {
   pre_processing_rules: PreProcessingRule[]
   segmentation: Segmentation
+  parent_mode: ParentMode
+  subchunk_segmentation: Segmentation
 }
 
 export type Limits = {
@@ -195,7 +216,7 @@ export type PreProcessingRule = {
 export type Segmentation = {
   separator: string
   max_tokens: number
-  chunk_overlap: number
+  chunk_overlap?: number
 }
 
 export const DocumentIndexingStatusList = [
@@ -258,13 +279,14 @@ export type InitialDocumentDetail = {
   display_status: DocumentDisplayStatus
   completed_segments?: number
   total_segments?: number
-  doc_form: 'text_model' | 'qa_model'
+  doc_form: ChunkingMode
   doc_language: string
 }
 
 export type SimpleDocumentDetail = InitialDocumentDetail & {
   enabled: boolean
   word_count: number
+  is_qa: boolean // TODO waiting for backend to add this field
   error?: string | null
   archived: boolean
   updated_at: number
@@ -289,7 +311,7 @@ export type DocumentListResponse = {
 export type DocumentReq = {
   original_document_id?: string
   indexing_technique?: string
-  doc_form: 'text_model' | 'qa_model'
+  doc_form: ChunkingMode
   doc_language: string
   process_rule: ProcessRule
 }
@@ -331,7 +353,7 @@ export type NotionPage = {
 }
 
 export type ProcessRule = {
-  mode: string
+  mode: ProcessMode
   rules: Rules
 }
 
@@ -341,6 +363,11 @@ export type createDocumentResponse = {
   documents: InitialDocumentDetail[]
 }
 
+export type PrecessRule = {
+  mode: ProcessMode
+  rules: Rules
+}
+
 export type FullDocumentDetail = SimpleDocumentDetail & {
   batch: string
   created_api_request_id: string
@@ -363,6 +390,8 @@ export type FullDocumentDetail = SimpleDocumentDetail & {
   doc_type?: DocType | null | 'others'
   doc_metadata?: DocMetadata | null
   segment_count: number
+  dataset_process_rule: PrecessRule
+  document_process_rule: ProcessRule
   [key: string]: any
 }
 
@@ -399,12 +428,12 @@ export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
 export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
 
 export type SegmentsQuery = {
-  last_id?: string
+  page?: string
   limit: number
   // status?: SegmentStatus
   hit_count_gte?: number
   keyword?: string
-  enabled?: boolean
+  enabled?: boolean | 'all'
 }
 
 export type SegmentDetailModel = {
@@ -429,6 +458,8 @@ export type SegmentDetailModel = {
   error: string | null
   stopped_at: number
   answer?: string
+  child_chunks?: ChildChunkDetail[]
+  updated_at: number
 }
 
 export type SegmentsResponse = {
@@ -436,6 +467,8 @@ export type SegmentsResponse = {
   has_more: boolean
   limit: number
   total: number
+  total_pages: number
+  page: number
 }
 
 export type HitTestingRecord = {
@@ -448,10 +481,18 @@ export type HitTestingRecord = {
   created_at: number
 }
 
+export type HitTestingChildChunk = {
+  id: string
+  content: string
+  position: number
+  score: number
+}
 export type HitTesting = {
   segment: Segment
+  content: Segment
   score: number
   tsne_position: TsnePosition
+  child_chunks?: HitTestingChildChunk[] | null
 }
 
 export type ExternalKnowledgeBaseHitTesting = {
@@ -530,11 +571,7 @@ export type SegmentUpdater = {
   content: string
   answer?: string
   keywords?: string[]
-}
-
-export enum DocForm {
-  TEXT = 'text_model',
-  QA = 'qa_model',
+  regenerate_child_chunks?: boolean
 }
 
 export type ErrorDocsResponse = {
@@ -579,3 +616,49 @@ export const DEFAULT_WEIGHTED_SCORE = {
     keyword: 0.3,
   },
 }
+
+export type ChildChunkType = 'automatic' | 'customized'
+
+export type ChildChunkDetail = {
+  id: string
+  position: number
+  segment_id: string
+  content: string
+  word_count: number
+  created_at: number
+  updated_at: number
+  type: ChildChunkType
+}
+
+export type ChildSegmentsResponse = {
+  data: ChildChunkDetail[]
+  total: number
+  total_pages: number
+  page: number
+  limit: number
+}
+
+export type UpdateDocumentParams = {
+  datasetId: string
+  documentId: string
+}
+
+// Used in api url
+export enum DocumentActionType {
+  enable = 'enable',
+  disable = 'disable',
+  archive = 'archive',
+  unArchive = 'un_archive',
+  delete = 'delete',
+}
+
+export type UpdateDocumentBatchParams = {
+  datasetId: string
+  documentId?: string
+  documentIds?: string[] | string
+}
+
+export type BatchImportResponse = {
+  job_id: string
+  job_status: string
+}
diff --git a/web/package.json b/web/package.json
index d9515645c8..64721051a9 100644
--- a/web/package.json
+++ b/web/package.json
@@ -64,6 +64,7 @@
     "katex": "^0.16.10",
     "lamejs": "^1.2.1",
     "lexical": "^0.16.0",
+    "line-clamp": "^1.0.0",
     "lodash-es": "^4.17.21",
     "mermaid": "11.4.1",
     "mime": "^4.0.4",
@@ -102,6 +103,7 @@
     "scheduler": "^0.23.0",
     "server-only": "^0.0.1",
     "sharp": "^0.33.2",
+    "shave": "^5.0.4",
     "sortablejs": "^1.15.0",
     "swr": "^2.1.0",
     "tailwind-merge": "^2.4.0",
diff --git a/web/service/datasets.ts b/web/service/datasets.ts
index 90411efd4e..87f4e3a638 100644
--- a/web/service/datasets.ts
+++ b/web/service/datasets.ts
@@ -5,7 +5,6 @@ import type {
   CreateDocumentReq,
   DataSet,
   DataSetListResponse,
-  DocumentDetailResponse,
   DocumentListResponse,
   ErrorDocsResponse,
   ExternalAPIDeleteResponse,
@@ -23,10 +22,6 @@ import type {
   IndexingStatusResponse,
   ProcessRuleResponse,
   RelatedAppResponse,
-  SegmentDetailModel,
-  SegmentUpdater,
-  SegmentsQuery,
-  SegmentsResponse,
   createDocumentResponse,
 } from '@/models/datasets'
 import type { CreateKnowledgeBaseReq } from '@/app/components/datasets/external-knowledge-base/create/declarations'
@@ -153,10 +148,6 @@ export const fetchIndexingStatusBatch: Fetcher<IndexingStatusBatchResponse, Batc
   return get<IndexingStatusBatchResponse>(`/datasets/${datasetId}/batch/${batchId}/indexing-status`, {})
 }
 
-export const fetchDocumentDetail: Fetcher<DocumentDetailResponse, CommonDocReq & { params: { metadata?: MetadataType } }> = ({ datasetId, documentId, params }) => {
-  return get<DocumentDetailResponse>(`/datasets/${datasetId}/documents/${documentId}`, { params })
-}
-
 export const renameDocumentName: Fetcher<CommonResponse, CommonDocReq & { name: string }> = ({ datasetId, documentId, name }) => {
   return post<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/rename`, {
     body: { name },
@@ -171,34 +162,6 @@ export const resumeDocIndexing: Fetcher<CommonResponse, CommonDocReq> = ({ datas
   return patch<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/processing/resume`)
 }
 
-export const deleteDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
-  return del<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}`)
-}
-
-export const archiveDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
-  return patch<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/status/archive`)
-}
-
-export const unArchiveDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
-  return patch<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/status/un_archive`)
-}
-
-export const enableDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
-  return patch<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/status/enable`)
-}
-
-export const disableDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
-  return patch<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/status/disable`)
-}
-
-export const syncDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
-  return get<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/notion/sync`)
-}
-
-export const syncWebsite: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
-  return get<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/website-sync`)
-}
-
 export const preImportNotionPages: Fetcher<{ notion_info: DataSourceNotionWorkspace[] }, { url: string; datasetId?: string }> = ({ url, datasetId }) => {
   return get<{ notion_info: DataSourceNotionWorkspace[] }>(url, { params: { dataset_id: datasetId } })
 }
@@ -207,40 +170,6 @@ export const modifyDocMetadata: Fetcher<CommonResponse, CommonDocReq & { body: {
   return put<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/metadata`, { body })
 }
 
-// apis for segments in a document
-
-export const fetchSegments: Fetcher<SegmentsResponse, CommonDocReq & { params: SegmentsQuery }> = ({ datasetId, documentId, params }) => {
-  return get<SegmentsResponse>(`/datasets/${datasetId}/documents/${documentId}/segments`, { params })
-}
-
-export const enableSegment: Fetcher<CommonResponse, { datasetId: string; segmentId: string }> = ({ datasetId, segmentId }) => {
-  return patch<CommonResponse>(`/datasets/${datasetId}/segments/${segmentId}/enable`)
-}
-
-export const disableSegment: Fetcher<CommonResponse, { datasetId: string; segmentId: string }> = ({ datasetId, segmentId }) => {
-  return patch<CommonResponse>(`/datasets/${datasetId}/segments/${segmentId}/disable`)
-}
-
-export const updateSegment: Fetcher<{ data: SegmentDetailModel; doc_form: string }, { datasetId: string; documentId: string; segmentId: string; body: SegmentUpdater }> = ({ datasetId, documentId, segmentId, body }) => {
-  return patch<{ data: SegmentDetailModel; doc_form: string }>(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}`, { body })
-}
-
-export const addSegment: Fetcher<{ data: SegmentDetailModel; doc_form: string }, { datasetId: string; documentId: string; body: SegmentUpdater }> = ({ datasetId, documentId, body }) => {
-  return post<{ data: SegmentDetailModel; doc_form: string }>(`/datasets/${datasetId}/documents/${documentId}/segment`, { body })
-}
-
-export const deleteSegment: Fetcher<CommonResponse, { datasetId: string; documentId: string; segmentId: string }> = ({ datasetId, documentId, segmentId }) => {
-  return del<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}`)
-}
-
-export const segmentBatchImport: Fetcher<{ job_id: string; job_status: string }, { url: string; body: FormData }> = ({ url, body }) => {
-  return post<{ job_id: string; job_status: string }>(url, { body }, { bodyStringify: false, deleteContentType: true })
-}
-
-export const checkSegmentBatchImportProgress: Fetcher<{ job_id: string; job_status: string }, { jobID: string }> = ({ jobID }) => {
-  return get<{ job_id: string; job_status: string }>(`/datasets/batch_import_status/${jobID}`)
-}
-
 // hit testing
 export const hitTesting: Fetcher<HitTestingResponse, { datasetId: string; queryText: string; retrieval_model: RetrievalConfig }> = ({ datasetId, queryText, retrieval_model }) => {
   return post<HitTestingResponse>(`/datasets/${datasetId}/hit-testing`, { body: { query: queryText, retrieval_model } })
diff --git a/web/service/knowledge/use-create-dataset.ts b/web/service/knowledge/use-create-dataset.ts
new file mode 100644
index 0000000000..ecd3f87a93
--- /dev/null
+++ b/web/service/knowledge/use-create-dataset.ts
@@ -0,0 +1,223 @@
+import groupBy from 'lodash-es/groupBy'
+import type { MutationOptions } from '@tanstack/react-query'
+import { useMutation } from '@tanstack/react-query'
+import { createDocument, createFirstDocument, fetchDefaultProcessRule, fetchFileIndexingEstimate } from '../datasets'
+import { type IndexingType } from '@/app/components/datasets/create/step-two'
+import type { ChunkingMode, CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DataSourceType, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule, ProcessRuleResponse, createDocumentResponse } from '@/models/datasets'
+import type { DataSourceProvider, NotionPage } from '@/models/common'
+
+export const getNotionInfo = (
+  notionPages: NotionPage[],
+) => {
+  const workspacesMap = groupBy(notionPages, 'workspace_id')
+  const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
+    return {
+      workspaceId,
+      pages: workspacesMap[workspaceId],
+    }
+  })
+  return workspaces.map((workspace) => {
+    return {
+      workspace_id: workspace.workspaceId,
+      pages: workspace.pages.map((page) => {
+        const { page_id, page_name, page_icon, type } = page
+        return {
+          page_id,
+          page_name,
+          page_icon,
+          type,
+        }
+      }),
+    }
+  }) as NotionInfo[]
+}
+
+export const getWebsiteInfo = (
+  opts: {
+    websiteCrawlProvider: DataSourceProvider
+    websiteCrawlJobId: string
+    websitePages: CrawlResultItem[]
+    crawlOptions?: CrawlOptions
+  },
+) => {
+  const { websiteCrawlProvider, websiteCrawlJobId, websitePages, crawlOptions } = opts
+  return {
+    provider: websiteCrawlProvider,
+    job_id: websiteCrawlJobId,
+    urls: websitePages.map(page => page.source_url),
+    only_main_content: crawlOptions?.only_main_content,
+  }
+}
+
+type GetFileIndexingEstimateParamsOptionBase = {
+  docForm: ChunkingMode
+  docLanguage: string
+  indexingTechnique: IndexingType
+  processRule: ProcessRule
+  dataset_id: string
+}
+
+type GetFileIndexingEstimateParamsOptionFile = GetFileIndexingEstimateParamsOptionBase & {
+  dataSourceType: DataSourceType.FILE
+  files: CustomFile[]
+}
+
+const getFileIndexingEstimateParamsForFile = ({
+  docForm,
+  docLanguage,
+  dataSourceType,
+  files,
+  indexingTechnique,
+  processRule,
+  dataset_id,
+}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
+  return {
+    info_list: {
+      data_source_type: dataSourceType,
+      file_info_list: {
+        file_ids: files.map(file => file.id) as string[],
+      },
+    },
+    indexing_technique: indexingTechnique,
+    process_rule: processRule,
+    doc_form: docForm,
+    doc_language: docLanguage,
+    dataset_id,
+  }
+}
+
+export const useFetchFileIndexingEstimateForFile = (
+  options: GetFileIndexingEstimateParamsOptionFile,
+  mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
+) => {
+  return useMutation({
+    mutationFn: async () => {
+      return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options))
+    },
+    ...mutationOptions,
+  })
+}
+
+type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & {
+  dataSourceType: DataSourceType.NOTION
+  notionPages: NotionPage[]
+}
+
+const getFileIndexingEstimateParamsForNotion = ({
+  docForm,
+  docLanguage,
+  dataSourceType,
+  notionPages,
+  indexingTechnique,
+  processRule,
+  dataset_id,
+}: GetFileIndexingEstimateParamsOptionNotion): IndexingEstimateParams => {
+  return {
+    info_list: {
+      data_source_type: dataSourceType,
+      notion_info_list: getNotionInfo(notionPages),
+    },
+    indexing_technique: indexingTechnique,
+    process_rule: processRule,
+    doc_form: docForm,
+    doc_language: docLanguage,
+    dataset_id,
+  }
+}
+
+export const useFetchFileIndexingEstimateForNotion = (
+  options: GetFileIndexingEstimateParamsOptionNotion,
+  mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
+) => {
+  return useMutation({
+    mutationFn: async () => {
+      return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options))
+    },
+    ...mutationOptions,
+  })
+}
+
+type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & {
+  dataSourceType: DataSourceType.WEB
+  websitePages: CrawlResultItem[]
+  crawlOptions?: CrawlOptions
+  websiteCrawlProvider: DataSourceProvider
+  websiteCrawlJobId: string
+}
+
+const getFileIndexingEstimateParamsForWeb = ({
+  docForm,
+  docLanguage,
+  dataSourceType,
+  websitePages,
+  crawlOptions,
+  websiteCrawlProvider,
+  websiteCrawlJobId,
+  indexingTechnique,
+  processRule,
+  dataset_id,
+}: GetFileIndexingEstimateParamsOptionWeb): IndexingEstimateParams => {
+  return {
+    info_list: {
+      data_source_type: dataSourceType,
+      website_info_list: getWebsiteInfo({
+        websiteCrawlProvider,
+        websiteCrawlJobId,
+        websitePages,
+        crawlOptions,
+      }),
+    },
+    indexing_technique: indexingTechnique,
+    process_rule: processRule,
+    doc_form: docForm,
+    doc_language: docLanguage,
+    dataset_id,
+  }
+}
+
+export const useFetchFileIndexingEstimateForWeb = (
+  options: GetFileIndexingEstimateParamsOptionWeb,
+  mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
+) => {
+  return useMutation({
+    mutationFn: async () => {
+      return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForWeb(options))
+    },
+    ...mutationOptions,
+  })
+}
+
+export const useCreateFirstDocument = (
+  mutationOptions: MutationOptions<createDocumentResponse, Error, CreateDocumentReq> = {},
+) => {
+  return useMutation({
+    mutationFn: async (createDocumentReq: CreateDocumentReq,
+    ) => {
+      return createFirstDocument({ body: createDocumentReq })
+    },
+    ...mutationOptions,
+  })
+}
+
+export const useCreateDocument = (
+  datasetId: string,
+  mutationOptions: MutationOptions<createDocumentResponse, Error, CreateDocumentReq> = {},
+) => {
+  return useMutation({
+    mutationFn: async (req: CreateDocumentReq) => {
+      return createDocument({ datasetId, body: req })
+    },
+    ...mutationOptions,
+  })
+}
+
+export const useFetchDefaultProcessRule = (
+  mutationOptions: MutationOptions<ProcessRuleResponse, Error, string> = {},
+) => {
+  return useMutation({
+    mutationFn: async (url: string) => {
+      return fetchDefaultProcessRule({ url })
+    },
+    ...mutationOptions,
+  })
+}
diff --git a/web/service/knowledge/use-dateset.ts b/web/service/knowledge/use-dateset.ts
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/web/service/knowledge/use-document.ts b/web/service/knowledge/use-document.ts
new file mode 100644
index 0000000000..2b9981f22f
--- /dev/null
+++ b/web/service/knowledge/use-document.ts
@@ -0,0 +1,124 @@
+import {
+  useMutation,
+  useQuery,
+} from '@tanstack/react-query'
+import { del, get, patch } from '../base'
+import { useInvalid } from '../use-base'
+import type { MetadataType } from '../datasets'
+import type { DocumentDetailResponse, SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets'
+import { DocumentActionType } from '@/models/datasets'
+import type { CommonResponse } from '@/models/common'
+
+const NAME_SPACE = 'knowledge/document'
+
+const useDocumentListKey = [NAME_SPACE, 'documentList']
+export const useDocumentList = (payload: {
+  datasetId: string
+  query: {
+    keyword: string
+    page: number
+    limit: number
+  }
+}) => {
+  const { query, datasetId } = payload
+  return useQuery<{ data: SimpleDocumentDetail[] }>({
+    queryKey: [...useDocumentListKey, datasetId, query],
+    queryFn: () => get<{ data: SimpleDocumentDetail[] }>(`/datasets/${datasetId}/documents`, {
+      params: query,
+    }),
+  })
+}
+
+const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument']
+export const useAutoDisabledDocuments = (datasetId: string) => {
+  return useQuery({
+    queryKey: [...useAutoDisabledDocumentKey, datasetId],
+    queryFn: () => get<{ document_ids: string[] }>(`/datasets/${datasetId}/auto-disable-logs`),
+  })
+}
+
+export const useInvalidDisabledDocument = () => {
+  return useInvalid(useAutoDisabledDocumentKey)
+}
+
+const toBatchDocumentsIdParams = (documentIds: string[] | string) => {
+  const ids = Array.isArray(documentIds) ? documentIds : [documentIds]
+  return ids.map(id => `document_id=${id}`).join('&')
+}
+
+export const useDocumentBatchAction = (action: DocumentActionType) => {
+  return useMutation({
+    mutationFn: ({ datasetId, documentIds, documentId }: UpdateDocumentBatchParams) => {
+      return patch<CommonResponse>(`/datasets/${datasetId}/documents/status/${action}/batch?${toBatchDocumentsIdParams(documentId || documentIds!)}`)
+    },
+  })
+}
+
+export const useDocumentEnable = () => {
+  return useDocumentBatchAction(DocumentActionType.enable)
+}
+
+export const useDocumentDisable = () => {
+  return useDocumentBatchAction(DocumentActionType.disable)
+}
+
+export const useDocumentArchive = () => {
+  return useDocumentBatchAction(DocumentActionType.archive)
+}
+
+export const useDocumentUnArchive = () => {
+  return useDocumentBatchAction(DocumentActionType.unArchive)
+}
+
+export const useDocumentDelete = () => {
+  return useMutation({
+    mutationFn: ({ datasetId, documentIds, documentId }: UpdateDocumentBatchParams) => {
+      return del<CommonResponse>(`/datasets/${datasetId}/documents?${toBatchDocumentsIdParams(documentId || documentIds!)}`)
+    },
+  })
+}
+
+export const useSyncDocument = () => {
+  return useMutation({
+    mutationFn: ({ datasetId, documentId }: UpdateDocumentBatchParams) => {
+      return get<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/notion/sync`)
+    },
+  })
+}
+
+export const useSyncWebsite = () => {
+  return useMutation({
+    mutationFn: ({ datasetId, documentId }: UpdateDocumentBatchParams) => {
+      return get<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/website-sync`)
+    },
+  })
+}
+
+const useDocumentDetailKey = [NAME_SPACE, 'documentDetail']
+export const useDocumentDetail = (payload: {
+  datasetId: string
+  documentId: string
+  params: { metadata: MetadataType }
+}) => {
+  const { datasetId, documentId, params } = payload
+  return useQuery<DocumentDetailResponse>({
+    queryKey: [...useDocumentDetailKey, 'withoutMetaData', datasetId, documentId],
+    queryFn: () => get<DocumentDetailResponse>(`/datasets/${datasetId}/documents/${documentId}`, { params }),
+  })
+}
+
+export const useDocumentMetadata = (payload: {
+  datasetId: string
+  documentId: string
+  params: { metadata: MetadataType }
+}) => {
+  const { datasetId, documentId, params } = payload
+  return useQuery<DocumentDetailResponse>({
+    queryKey: [...useDocumentDetailKey, 'withMetaData', datasetId, documentId],
+    queryFn: () => get<DocumentDetailResponse>(`/datasets/${datasetId}/documents/${documentId}`, { params }),
+  })
+}
+
+export const useInvalidDocumentDetailKey = () => {
+  return useInvalid(useDocumentDetailKey)
+}
diff --git a/web/service/knowledge/use-hit-testing.ts b/web/service/knowledge/use-hit-testing.ts
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/web/service/knowledge/use-import.ts b/web/service/knowledge/use-import.ts
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/web/service/knowledge/use-segment.ts b/web/service/knowledge/use-segment.ts
new file mode 100644
index 0000000000..749bbf2d28
--- /dev/null
+++ b/web/service/knowledge/use-segment.ts
@@ -0,0 +1,169 @@
+import { useMutation, useQuery } from '@tanstack/react-query'
+import { del, get, patch, post } from '../base'
+import type { CommonResponse } from '@/models/common'
+import type {
+  BatchImportResponse,
+  ChildChunkDetail,
+  ChildSegmentsResponse,
+  ChunkingMode,
+  SegmentDetailModel,
+  SegmentUpdater,
+  SegmentsResponse,
+} from '@/models/datasets'
+
+const NAME_SPACE = 'segment'
+
+export const useSegmentListKey = [NAME_SPACE, 'chunkList']
+
+export const useSegmentList = (
+  payload: {
+    datasetId: string
+    documentId: string
+    params: {
+      page: number
+      limit: number
+      keyword: string
+      enabled: boolean | 'all'
+    }
+  },
+  disable?: boolean,
+) => {
+  const { datasetId, documentId, params } = payload
+  const { page, limit, keyword, enabled } = params
+  return useQuery<SegmentsResponse>({
+    queryKey: [...useSegmentListKey, datasetId, documentId, page, limit, keyword, enabled],
+    queryFn: () => {
+      return get<SegmentsResponse>(`/datasets/${datasetId}/documents/${documentId}/segments`, { params })
+    },
+    enabled: !disable,
+  })
+}
+
+export const useUpdateSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'update'],
+    mutationFn: (payload: { datasetId: string; documentId: string; segmentId: string; body: SegmentUpdater }) => {
+      const { datasetId, documentId, segmentId, body } = payload
+      return patch<{ data: SegmentDetailModel; doc_form: ChunkingMode }>(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}`, { body })
+    },
+  })
+}
+
+export const useAddSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'add'],
+    mutationFn: (payload: { datasetId: string; documentId: string; body: SegmentUpdater }) => {
+      const { datasetId, documentId, body } = payload
+      return post<{ data: SegmentDetailModel; doc_form: ChunkingMode }>(`/datasets/${datasetId}/documents/${documentId}/segment`, { body })
+    },
+  })
+}
+
+export const useEnableSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'enable'],
+    mutationFn: (payload: { datasetId: string; documentId: string; segmentIds: string[] }) => {
+      const { datasetId, documentId, segmentIds } = payload
+      const query = segmentIds.map(id => `segment_id=${id}`).join('&')
+      return patch<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/segment/enable?${query}`)
+    },
+  })
+}
+
+export const useDisableSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'disable'],
+    mutationFn: (payload: { datasetId: string; documentId: string; segmentIds: string[] }) => {
+      const { datasetId, documentId, segmentIds } = payload
+      const query = segmentIds.map(id => `segment_id=${id}`).join('&')
+      return patch<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/segment/disable?${query}`)
+    },
+  })
+}
+
+export const useDeleteSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'delete'],
+    mutationFn: (payload: { datasetId: string; documentId: string; segmentIds: string[] }) => {
+      const { datasetId, documentId, segmentIds } = payload
+      const query = segmentIds.map(id => `segment_id=${id}`).join('&')
+      return del<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/segments?${query}`)
+    },
+  })
+}
+
+export const useChildSegmentListKey = [NAME_SPACE, 'childChunkList']
+
+export const useChildSegmentList = (
+  payload: {
+    datasetId: string
+    documentId: string
+    segmentId: string
+    params: {
+      page: number
+      limit: number
+      keyword: string
+    }
+  },
+  disable?: boolean,
+) => {
+  const { datasetId, documentId, segmentId, params } = payload
+  const { page, limit, keyword } = params
+  return useQuery({
+    queryKey: [...useChildSegmentListKey, datasetId, documentId, segmentId, page, limit, keyword],
+    queryFn: () => {
+      return get<ChildSegmentsResponse>(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}/child_chunks`, { params })
+    },
+    enabled: !disable,
+  })
+}
+
+export const useDeleteChildSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'childChunk', 'delete'],
+    mutationFn: (payload: { datasetId: string; documentId: string; segmentId: string; childChunkId: string }) => {
+      const { datasetId, documentId, segmentId, childChunkId } = payload
+      return del<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}/child_chunks/${childChunkId}`)
+    },
+  })
+}
+
+export const useAddChildSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'childChunk', 'add'],
+    mutationFn: (payload: { datasetId: string; documentId: string; segmentId: string; body: { content: string } }) => {
+      const { datasetId, documentId, segmentId, body } = payload
+      return post<{ data: ChildChunkDetail }>(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}/child_chunks`, { body })
+    },
+  })
+}
+
+export const useUpdateChildSegment = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'childChunk', 'update'],
+    mutationFn: (payload: { datasetId: string; documentId: string; segmentId: string; childChunkId: string; body: { content: string } }) => {
+      const { datasetId, documentId, segmentId, childChunkId, body } = payload
+      return patch<{ data: ChildChunkDetail }>(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}/child_chunks/${childChunkId}`, { body })
+    },
+  })
+}
+
+export const useSegmentBatchImport = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'batchImport'],
+    mutationFn: (payload: { url: string; body: FormData }) => {
+      const { url, body } = payload
+      return post<BatchImportResponse>(url, { body }, { bodyStringify: false, deleteContentType: true })
+    },
+  })
+}
+
+export const useCheckSegmentBatchImportProgress = () => {
+  return useMutation({
+    mutationKey: [NAME_SPACE, 'batchImport', 'checkProgress'],
+    mutationFn: (payload: { jobID: string }) => {
+      const { jobID } = payload
+      return get<BatchImportResponse>(`/datasets/batch_import_status/${jobID}`)
+    },
+  })
+}
diff --git a/web/tailwind.config.js b/web/tailwind.config.js
index b4e2263167..0da4968a7f 100644
--- a/web/tailwind.config.js
+++ b/web/tailwind.config.js
@@ -1,6 +1,7 @@
-/** @type {import('tailwindcss').Config} */
 import tailwindThemeVarDefine from './themes/tailwind-theme-var-define'
-module.exports = {
+
+/** @type {import('tailwindcss').Config} */
+const config = {
   content: [
     './app/**/*.{js,ts,jsx,tsx}',
     './components/**/*.{js,ts,jsx,tsx}',
@@ -81,6 +82,11 @@ module.exports = {
         'xl': '0px 8px 8px -4px rgba(16, 24, 40, 0.03), 0px 20px 24px -4px rgba(16, 24, 40, 0.08)',
         '2xl': '0px 24px 48px -12px rgba(16, 24, 40, 0.18)',
         '3xl': '0px 32px 64px -12px rgba(16, 24, 40, 0.14)',
+        'status-indicator-green-shadow': '0px 2px 6px 0px var(--color-components-badge-status-light-success-halo), 0px 0px 0px 1px var(--color-components-badge-status-light-border-outer)',
+        'status-indicator-warning-shadow': '0px 2px 6px 0px var(--color-components-badge-status-light-warning-halo), 0px 0px 0px 1px var(--color-components-badge-status-light-border-outer)',
+        'status-indicator-red-shadow': '0px 2px 6px 0px var(--color-components-badge-status-light-error-halo), 0px 0px 0px 1px var(--color-components-badge-status-light-border-outer)',
+        'status-indicator-blue-shadow': '0px 2px 6px 0px var(--color-components-badge-status-light-normal-halo), 0px 0px 0px 1px var(--color-components-badge-status-light-border-outer)',
+        'status-indicator-gray-shadow': '0px 1px 2px 0px var(--color-components-badge-status-light-disabled-halo), 0px 0px 0px 1px var(--color-components-badge-status-light-border-outer)',
       },
       opacity: {
         2: '0.02',
@@ -93,6 +99,17 @@ module.exports = {
         'chatbot-bg': 'var(--color-chatbot-bg)',
         'chat-bubble-bg': 'var(--color-chat-bubble-bg)',
         'workflow-process-bg': 'var(--color-workflow-process-bg)',
+        'dataset-chunk-process-success-bg': 'var(--color-dataset-chunk-process-success-bg)',
+        'dataset-chunk-process-error-bg': 'var(--color-dataset-chunk-process-error-bg)',
+        'dataset-chunk-detail-card-hover-bg': 'var(--color-dataset-chunk-detail-card-hover-bg)',
+        'dataset-child-chunk-expand-btn-bg': 'var(--color-dataset-child-chunk-expand-btn-bg)',
+        'dataset-option-card-blue-gradient': 'var(--color-dataset-option-card-blue-gradient)',
+        'dataset-option-card-purple-gradient': 'var(--color-dataset-option-card-purple-gradient)',
+        'dataset-option-card-orange-gradient': 'var(--color-dataset-option-card-orange-gradient)',
+        'dataset-chunk-list-mask-bg': 'var(--color-dataset-chunk-list-mask-bg)',
+      },
+      lineClamp: {
+        '20': '20',
         'mask-top2bottom-gray-50-to-transparent': 'var(--mask-top2bottom-gray-50-to-transparent)',
       },
     },
@@ -105,3 +122,5 @@ module.exports = {
     preflight: false,
   },
 }
+
+export default config
diff --git a/web/themes/manual-dark.css b/web/themes/manual-dark.css
index 4052e5566d..6d4c5f3908 100644
--- a/web/themes/manual-dark.css
+++ b/web/themes/manual-dark.css
@@ -19,6 +19,14 @@ html[data-theme="dark"] {
     rgba(34, 34, 37, 0.9) -0.1%,
     rgba(29, 29, 32, 0.9) 98.26%
   );
+  --color-dataset-chunk-process-success-bg: linear-gradient(92deg, rgba(23, 178, 106, 0.30) 0%, rgba(0, 0, 0, 0.00) 100%);
+  --color-dataset-chunk-process-error-bg: linear-gradient(92deg, rgba(240, 68, 56, 0.30) 0%, rgba(0, 0, 0, 0.00) 100%);
+  --color-dataset-chunk-detail-card-hover-bg: linear-gradient(180deg, #1D1D20 0%, #222225 100%);
+  --color-dataset-child-chunk-expand-btn-bg: linear-gradient(90deg, rgba(24, 24, 27, 0.25) 0%, rgba(24, 24, 27, 0.04) 100%);
+  --color-dataset-option-card-blue-gradient: linear-gradient(90deg, #24252E 0%, #1E1E21 100%);
+  --color-dataset-option-card-purple-gradient: linear-gradient(90deg, #25242E 0%, #1E1E21 100%);
+  --color-dataset-option-card-orange-gradient: linear-gradient(90deg, #2B2322 0%, #1E1E21 100%);
+  --color-dataset-chunk-list-mask-bg: linear-gradient(180deg, rgba(34, 34, 37, 0.00) 0%, #222225 100%);
   --mask-top2bottom-gray-50-to-transparent: linear-gradient(
     180deg,
     rgba(24, 24, 27, 0.08) 0%,
diff --git a/web/themes/manual-light.css b/web/themes/manual-light.css
index 303963c55d..501f9f1d1f 100644
--- a/web/themes/manual-light.css
+++ b/web/themes/manual-light.css
@@ -19,6 +19,14 @@ html[data-theme="light"] {
     rgba(249, 250, 251, 0.9) -0.1%,
     rgba(242, 244, 247, 0.9) 98.26%
   );
+  --color-dataset-chunk-process-success-bg: linear-gradient(92deg, rgba(23, 178, 106, 0.25) 0%, rgba(255, 255, 255, 0.00) 100%);
+  --color-dataset-chunk-process-error-bg: linear-gradient(92deg, rgba(240, 68, 56, 0.25) 0%, rgba(255, 255, 255, 0.00) 100%);
+  --color-dataset-chunk-detail-card-hover-bg: linear-gradient(180deg, #F2F4F7 0%, #F9FAFB 100%);
+  --color-dataset-child-chunk-expand-btn-bg: linear-gradient(90deg, rgba(200, 206, 218, 0.20) 0%, rgba(200, 206, 218, 0.04) 100%);
+  --color-dataset-option-card-blue-gradient: linear-gradient(90deg, #F2F4F7 0%, #F9FAFB 100%);
+  --color-dataset-option-card-purple-gradient: linear-gradient(90deg, #F0EEFA 0%, #F9FAFB 100%);
+  --color-dataset-option-card-orange-gradient: linear-gradient(90deg, #F8F2EE 0%, #F9FAFB 100%);
+  --color-dataset-chunk-list-mask-bg: linear-gradient(180deg, rgba(255, 255, 255, 0.00) 0%, #FCFCFD 100%);
   --mask-top2bottom-gray-50-to-transparent: linear-gradient(
     180deg,
     rgba(200, 206, 218, 0.2) 0%,
diff --git a/web/utils/time.ts b/web/utils/time.ts
new file mode 100644
index 0000000000..ff2e38321f
--- /dev/null
+++ b/web/utils/time.ts
@@ -0,0 +1,12 @@
+import dayjs, { type ConfigType } from 'dayjs'
+import utc from 'dayjs/plugin/utc'
+
+dayjs.extend(utc)
+
+export const isAfter = (date: ConfigType, compare: ConfigType) => {
+  return dayjs(date).isAfter(dayjs(compare))
+}
+
+export const formatTime = ({ date, dateFormat }: { date: ConfigType; dateFormat: string }) => {
+  return dayjs(date).format(dateFormat)
+}
diff --git a/web/yarn.lock b/web/yarn.lock
index c9590d6b06..339f47c236 100644
--- a/web/yarn.lock
+++ b/web/yarn.lock
@@ -8748,6 +8748,11 @@ lilconfig@2.1.0, lilconfig@^2.0.5, lilconfig@^2.1.0:
   resolved "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz"
   integrity sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==
 
+line-clamp@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/line-clamp/-/line-clamp-1.0.0.tgz#e0216489a599e989215595080b912c90ba09192c"
+  integrity sha512-dCDlvMj572RIRBQ3x9aIX0DTdt2St1bMdpi64jVTAi5vqBck7wf+J97//+J7+pS80rFJaYa8HiyXCTp0flpnBA==
+
 lines-and-columns@^1.1.6:
   version "1.2.4"
   resolved "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz"
@@ -11766,6 +11771,11 @@ sharp@^0.33.2, sharp@^0.33.3:
     "@img/sharp-win32-ia32" "0.33.5"
     "@img/sharp-win32-x64" "0.33.5"
 
+shave@^5.0.4:
+  version "5.0.4"
+  resolved "https://registry.yarnpkg.com/shave/-/shave-5.0.4.tgz#88a696c5d7f4959a875c6e904a38dd054ff95d8d"
+  integrity sha512-AnvEI1wM2rQmrwCl364LVLLhzCzSHJ7DQmdd+fHJTnNzbD2mjsUAOcxWLLYKam7Q63skwyQf2CB2TCdJ2O5c8w==
+
 shebang-command@^2.0.0:
   version "2.0.0"
   resolved "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz"