From 4bb987eca35314ddeda76bc8ceae91b77d2d6976 Mon Sep 17 00:00:00 2001
From: juyua9 <suyu_ena@163.com>
Date: Tue, 12 May 2026 13:07:03 +0800
Subject: [PATCH 1/8] fix: validate missing text indexing technique (#35941)

---
 api/controllers/service_api/dataset/document.py             | 2 +-
 .../controllers/service_api/dataset/test_document.py        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py
index cb48fe6715..e68eeeca25 100644
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -136,7 +136,7 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[
     if not dataset:
         raise ValueError("Dataset does not exist.")
 
-    if not dataset.indexing_technique and not args["indexing_technique"]:
+    if not dataset.indexing_technique and not args.get("indexing_technique"):
         raise ValueError("indexing_technique is required.")
 
     embedding_model_provider = payload.embedding_model_provider
diff --git a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py
index 230c51161f..738238d10a 100644
--- a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py
+++ b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py
@@ -1057,8 +1057,8 @@ class TestDocumentAddByTextApi:
         """Test error when both dataset and payload lack indexing_technique.
 
         When ``indexing_technique`` is ``None`` in the payload, ``model_dump(exclude_none=True)``
-        omits the key.  The production code accesses ``args["indexing_technique"]`` which raises
-        ``KeyError`` before the ``ValueError`` guard can fire.
+        omits the key.  The service API should still raise the same validation error as other
+        document creation paths instead of leaking a ``KeyError`` from the dumped payload dict.
         """
         # Arrange — neutralise billing decorators
         self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id)
@@ -1074,7 +1074,7 @@ class TestDocumentAddByTextApi:
             headers={"Authorization": "Bearer test_token"},
         ):
             api = DocumentAddByTextApi()
-            with pytest.raises(KeyError):
+            with pytest.raises(ValueError, match="indexing_technique is required."):
                 api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id)
 
 

From cd90d7ffc15fc1d4fa29c6b7a6870178d7da80d6 Mon Sep 17 00:00:00 2001
From: yyh <92089059+lyzno1@users.noreply.github.com>
Date: Tue, 12 May 2026 13:34:19 +0800
Subject: [PATCH 2/8] refactor(web): migrate searchable pickers to combobox
 (#36066)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
---
 eslint-suppressions.json                      |   5 -
 packages/dify-ui/AGENTS.md                    |   1 +
 .../__tests__/access-control.spec.tsx         |  18 +-
 .../add-member-or-group-pop.spec.tsx          |  32 +-
 .../add-member-or-group-pop.tsx               | 336 +++---
 .../__tests__/document-list.spec.tsx          | 105 +-
 .../document-picker/__tests__/index.spec.tsx  | 979 ++----------------
 .../common/document-picker/document-list.tsx  |  60 +-
 .../datasets/common/document-picker/index.tsx | 255 +++--
 .../preview-document-picker.tsx               |  27 +-
 .../detail/__tests__/document-title.spec.tsx  |  87 +-
 .../documents/detail/__tests__/index.spec.tsx |  17 +-
 .../documents/detail/document-title.tsx       |  32 +-
 .../datasets/documents/detail/index.tsx       |  20 +-
 14 files changed, 735 insertions(+), 1239 deletions(-)

diff --git a/eslint-suppressions.json b/eslint-suppressions.json
index 4adca38aa0..46277d3349 100644
--- a/eslint-suppressions.json
+++ b/eslint-suppressions.json
@@ -246,11 +246,6 @@
       "count": 1
     }
   },
-  "web/app/components/app/app-access-control/add-member-or-group-pop.tsx": {
-    "no-restricted-imports": {
-      "count": 1
-    }
-  },
   "web/app/components/app/app-publisher/features-wrapper.tsx": {
     "ts/no-explicit-any": {
       "count": 4
diff --git a/packages/dify-ui/AGENTS.md b/packages/dify-ui/AGENTS.md
index 9524394214..6eadd200f0 100644
--- a/packages/dify-ui/AGENTS.md
+++ b/packages/dify-ui/AGENTS.md
@@ -9,6 +9,7 @@ Shared design tokens, the `cn()` utility, CSS-first Tailwind styles, and headles
 - No imports from `web/`. No dependencies on next / i18next / ky / jotai / zustand.
 - One component per folder: `src/<name>/index.tsx`, optional `index.stories.tsx` and `__tests__/index.spec.tsx`. Add a matching `./<name>` subpath to `package.json#exports`.
 - Props pattern: `Omit<BaseXxx.Root.Props, 'className' | ...> & VariantProps<typeof xxxVariants> & { /* custom */ }`.
+- Use plain `Omit<...>` only for non-union Base UI props. When a prop changes the valid shape of related props (for example `value` / `defaultValue`, `multiple` / `value`, or `clearable` / `onChange`), model that relationship with an explicit discriminated union or a distributive helper instead of flattening the props.
 - When a component accepts a prop typed from a shared internal module, `export type` it from that component so consumers import it from the component subpath.
 
 ## Overlay Primitive Selection: Tooltip vs PreviewCard vs Popover
diff --git a/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx b/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx
index a3c63f5a0c..52c2a0dd54 100644
--- a/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx
+++ b/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx
@@ -254,9 +254,7 @@ describe('AddMemberOrGroupDialog', () => {
     await user.click(expandButton)
     expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([baseGroup])
 
-    const memberLabel = screen.getByText(baseMember.name)
-    const memberCheckbox = memberLabel.parentElement?.previousElementSibling as HTMLElement
-    fireEvent.click(memberCheckbox)
+    await user.click(screen.getByRole('option', { name: /Member One/ }))
 
     expect(useAccessControlStore.getState().specificMembers).toEqual([baseMember])
   })
@@ -277,13 +275,13 @@ describe('AddMemberOrGroupDialog', () => {
     await user.type(screen.getByPlaceholderText('app.accessControlDialog.operateGroupAndMember.searchPlaceholder'), 'Group')
     expect(document.querySelector('.spin-animation')).toBeInTheDocument()
 
-    const groupCheckbox = screen.getByText(baseGroup.name).closest('div')?.previousElementSibling as HTMLElement
-    fireEvent.click(groupCheckbox)
-    fireEvent.click(groupCheckbox)
+    const groupOption = screen.getByRole('option', { name: /Group One/ })
+    fireEvent.click(groupOption)
+    fireEvent.click(groupOption)
 
-    const memberCheckbox = screen.getByText(baseMember.name).parentElement?.previousElementSibling as HTMLElement
-    fireEvent.click(memberCheckbox)
-    fireEvent.click(memberCheckbox)
+    const memberOption = screen.getByRole('option', { name: /Member One/ })
+    fireEvent.click(memberOption)
+    fireEvent.click(memberOption)
 
     fireEvent.click(screen.getByText('app.accessControlDialog.operateGroupAndMember.expand'))
     fireEvent.click(screen.getByText('app.accessControlDialog.operateGroupAndMember.allMembers'))
@@ -307,7 +305,7 @@ describe('AddMemberOrGroupDialog', () => {
 
     await user.click(screen.getByText('common.operation.add'))
 
-    expect(screen.getByText('app.accessControlDialog.operateGroupAndMember.noResult')).toBeInTheDocument()
+    expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult')
   })
 })
 
diff --git a/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx b/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx
index 725b121d30..d34756e85e 100644
--- a/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx
+++ b/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx
@@ -1,5 +1,5 @@
 import type { AccessControlAccount, AccessControlGroup, Subject } from '@/models/access-control'
-import { fireEvent, render, screen } from '@testing-library/react'
+import { render, screen } from '@testing-library/react'
 import userEvent from '@testing-library/user-event'
 import useAccessControlStore from '@/context/access-control-store'
 import { SubjectType } from '@/models/access-control'
@@ -106,8 +106,7 @@ describe('AddMemberOrGroupDialog', () => {
 
     expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([baseGroup])
 
-    const memberCheckbox = screen.getByText(baseMember.name).parentElement?.previousElementSibling as HTMLElement
-    fireEvent.click(memberCheckbox)
+    await user.click(screen.getByRole('option', { name: /Member One/ }))
 
     expect(useAccessControlStore.getState().specificMembers).toEqual([baseMember])
   })
@@ -125,6 +124,31 @@ describe('AddMemberOrGroupDialog', () => {
 
     await user.click(screen.getByText('common.operation.add'))
 
-    expect(screen.getByText('app.accessControlDialog.operateGroupAndMember.noResult')).toBeInTheDocument()
+    expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult')
+  })
+
+  it('should keep breadcrumbs visible when the current group has no candidates', async () => {
+    useAccessControlStore.setState({
+      selectedGroupsForBreadcrumb: [baseGroup],
+    })
+    mockUseSearchForWhiteListCandidates.mockReturnValue({
+      isLoading: false,
+      isFetchingNextPage: false,
+      fetchNextPage: vi.fn(),
+      data: { pages: [{ currPage: 1, subjects: [], hasMore: false }] },
+    })
+
+    const user = userEvent.setup()
+    render(<AddMemberOrGroupDialog />)
+
+    await user.click(screen.getByText('common.operation.add'))
+
+    expect(screen.getByRole('button', { name: 'app.accessControlDialog.operateGroupAndMember.allMembers' })).toBeInTheDocument()
+    expect(screen.getByText(baseGroup.name)).toBeInTheDocument()
+    expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult')
+
+    await user.click(screen.getByRole('button', { name: 'app.accessControlDialog.operateGroupAndMember.allMembers' }))
+
+    expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([])
   })
 })
diff --git a/web/app/components/app/app-access-control/add-member-or-group-pop.tsx b/web/app/components/app/app-access-control/add-member-or-group-pop.tsx
index 8d9bf19ea3..1e3a992136 100644
--- a/web/app/components/app/app-access-control/add-member-or-group-pop.tsx
+++ b/web/app/components/app/app-access-control/add-member-or-group-pop.tsx
@@ -1,110 +1,207 @@
 'use client'
+import type { ComboboxRootChangeEventDetails } from '@langgenius/dify-ui/combobox'
 import type { AccessControlAccount, AccessControlGroup, Subject, SubjectAccount, SubjectGroup } from '@/models/access-control'
-import { FloatingOverlay } from '@floating-ui/react'
 import { Avatar } from '@langgenius/dify-ui/avatar'
 import { Button } from '@langgenius/dify-ui/button'
 import { cn } from '@langgenius/dify-ui/cn'
-import { Popover, PopoverContent, PopoverTrigger } from '@langgenius/dify-ui/popover'
+import {
+  Combobox,
+  ComboboxContent,
+  ComboboxEmpty,
+  ComboboxInput,
+  ComboboxInputGroup,
+  ComboboxItem,
+  ComboboxItemText,
+  ComboboxList,
+  ComboboxStatus,
+  ComboboxTrigger,
+} from '@langgenius/dify-ui/combobox'
 import { RiAddCircleFill, RiArrowRightSLine, RiOrganizationChart } from '@remixicon/react'
 import { useDebounce } from 'ahooks'
-import { useCallback, useEffect, useRef, useState } from 'react'
+import { useEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useSelector } from '@/context/app-context'
 import { SubjectType } from '@/models/access-control'
 import { useSearchForWhiteListCandidates } from '@/service/access-control'
 import useAccessControlStore from '../../../../context/access-control-store'
-import Checkbox from '../../base/checkbox'
-import Input from '../../base/input'
 import Loading from '../../base/loading'
 
 export default function AddMemberOrGroupDialog() {
   const { t } = useTranslation()
   const [open, setOpen] = useState(false)
   const [keyword, setKeyword] = useState('')
+  const scrollRootRef = useRef<HTMLDivElement>(null)
+  const anchorRef = useRef<HTMLDivElement>(null)
+  const specificGroups = useAccessControlStore(s => s.specificGroups)
+  const setSpecificGroups = useAccessControlStore(s => s.setSpecificGroups)
+  const specificMembers = useAccessControlStore(s => s.specificMembers)
+  const setSpecificMembers = useAccessControlStore(s => s.setSpecificMembers)
   const selectedGroupsForBreadcrumb = useAccessControlStore(s => s.selectedGroupsForBreadcrumb)
   const debouncedKeyword = useDebounce(keyword, { wait: 500 })
 
   const lastAvailableGroup = selectedGroupsForBreadcrumb[selectedGroupsForBreadcrumb.length - 1]
   const { isLoading, isFetchingNextPage, fetchNextPage, data } = useSearchForWhiteListCandidates({ keyword: debouncedKeyword, groupId: lastAvailableGroup?.id, resultsPerPage: 10 }, open)
-  const handleKeywordChange = (e: React.ChangeEvent<HTMLInputElement>) => {
-    setKeyword(e.target.value)
-  }
+  const pages = data?.pages ?? []
+  const subjects = pages.flatMap(page => page.subjects ?? [])
+  const selectedSubjects = [
+    ...specificGroups.map(groupToSubject),
+    ...specificMembers.map(memberToSubject),
+  ]
+  const hasResults = pages.length > 0 && subjects.length > 0
+  const shouldShowBreadcrumb = hasResults || selectedGroupsForBreadcrumb.length > 0
+  const hasMore = pages[pages.length - 1]?.hasMore ?? false
 
-  const anchorRef = useRef<HTMLDivElement>(null)
   useEffect(() => {
-    const hasMore = data?.pages?.[0]?.hasMore ?? false
     let observer: IntersectionObserver | undefined
     if (anchorRef.current) {
       observer = new IntersectionObserver((entries) => {
         if (entries[0]!.isIntersecting && !isLoading && hasMore)
           fetchNextPage()
-      }, { rootMargin: '20px' })
+      }, { root: scrollRootRef.current, rootMargin: '20px' })
       observer.observe(anchorRef.current)
     }
     return () => observer?.disconnect()
-  }, [isLoading, fetchNextPage, anchorRef, data])
+  }, [isLoading, fetchNextPage, hasMore])
+
+  const handleOpenChange = (nextOpen: boolean) => {
+    if (!nextOpen)
+      setKeyword('')
+
+    setOpen(nextOpen)
+  }
+
+  const handleInputValueChange = (inputValue: string, details: ComboboxRootChangeEventDetails) => {
+    if (details.reason !== 'item-press')
+      setKeyword(inputValue)
+  }
+
+  const handleValueChange = (nextSubjects: Subject[]) => {
+    const nextGroups: AccessControlGroup[] = []
+    const nextMembers: AccessControlAccount[] = []
+
+    for (const subject of nextSubjects) {
+      if (subject.subjectType === SubjectType.GROUP)
+        nextGroups.push((subject as SubjectGroup).groupData)
+      else
+        nextMembers.push((subject as SubjectAccount).accountData)
+    }
+
+    setSpecificGroups(nextGroups)
+    setSpecificMembers(nextMembers)
+  }
 
   return (
-    <Popover open={open} onOpenChange={setOpen}>
-      <PopoverTrigger
-        render={(
-          <Button variant="ghost-accent" size="small" className="flex shrink-0 items-center gap-x-0.5">
-            <RiAddCircleFill className="h-4 w-4" />
-            <span>{t('operation.add', { ns: 'common' })}</span>
-          </Button>
-        )}
-      />
-      {open && <FloatingOverlay />}
-      <PopoverContent
+    <Combobox<Subject, true>
+      multiple
+      open={open}
+      value={selectedSubjects}
+      inputValue={keyword}
+      items={subjects}
+      itemToStringLabel={getSubjectLabel}
+      itemToStringValue={getSubjectValue}
+      isItemEqualToValue={isSameSubject}
+      filter={null}
+      onOpenChange={handleOpenChange}
+      onInputValueChange={handleInputValueChange}
+      onValueChange={handleValueChange}
+    >
+      <ComboboxTrigger
+        aria-label={t('operation.add', { ns: 'common' })}
+        icon={false}
+        size="small"
+        className="flex h-6 w-auto shrink-0 items-center gap-x-0.5 rounded-md border-0 bg-transparent px-2 py-0 text-xs font-medium text-components-button-secondary-accent-text hover:bg-state-accent-hover focus-visible:bg-state-accent-hover focus-visible:ring-2 focus-visible:ring-state-accent-solid data-open:bg-state-accent-hover"
+      >
+        <RiAddCircleFill className="h-4 w-4" aria-hidden="true" />
+        <span>{t('operation.add', { ns: 'common' })}</span>
+      </ComboboxTrigger>
+      <ComboboxContent
         placement="bottom-end"
         alignOffset={300}
-        popupClassName="border-none bg-transparent shadow-none"
+        popupClassName="relative flex max-h-[400px] w-[400px] flex-col overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-0 shadow-lg backdrop-blur-[5px]"
       >
-        <div className="relative flex max-h-[400px] w-[400px] flex-col overflow-y-auto rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur shadow-lg backdrop-blur-[5px]">
+        <div ref={scrollRootRef} className="min-h-0 overflow-y-auto">
           <div className="sticky top-0 z-10 bg-components-panel-bg-blur p-2 pb-0.5 backdrop-blur-[5px]">
-            <Input value={keyword} onChange={handleKeywordChange} showLeftIcon placeholder={t('accessControlDialog.operateGroupAndMember.searchPlaceholder', { ns: 'app' }) as string} />
+            <ComboboxInputGroup className="h-8 min-h-8 px-2">
+              <span className="mr-0.5 i-ri-search-line size-4 shrink-0 text-text-tertiary" aria-hidden="true" />
+              <ComboboxInput
+                aria-label={t('accessControlDialog.operateGroupAndMember.searchPlaceholder', { ns: 'app' })}
+                placeholder={t('accessControlDialog.operateGroupAndMember.searchPlaceholder', { ns: 'app' })}
+                className="block h-4.5 grow px-1 py-0 text-[13px] text-text-primary"
+              />
+            </ComboboxInputGroup>
           </div>
-          {
-            isLoading
-              ? <div className="p-1"><Loading /></div>
-              : (data?.pages?.length ?? 0) > 0
-                  ? (
-                      <>
-                        <div className="flex h-7 items-center px-2 py-0.5">
-                          <SelectedGroupsBreadCrumb />
-                        </div>
-                        <div className="p-1">
-                          {renderGroupOrMember(data?.pages ?? [])}
+          {isLoading
+            ? (
+                <ComboboxStatus className="p-1">
+                  <Loading />
+                </ComboboxStatus>
+              )
+            : (
+                <>
+                  {shouldShowBreadcrumb && (
+                    <div className="flex h-7 items-center px-2 py-0.5">
+                      <SelectedGroupsBreadCrumb />
+                    </div>
+                  )}
+                  {hasResults
+                    ? (
+                        <>
+                          <ComboboxList className="max-h-none p-1">
+                            {(subject: Subject) => <SubjectItem key={getSubjectValue(subject)} subject={subject} />}
+                          </ComboboxList>
                           {isFetchingNextPage && <Loading />}
-                        </div>
-                        <div ref={anchorRef} className="h-0"> </div>
-                      </>
-                    )
-                  : (
-                      <div className="flex h-7 items-center justify-center px-2 py-0.5">
-                        <span className="system-xs-regular text-text-tertiary">{t('accessControlDialog.operateGroupAndMember.noResult', { ns: 'app' })}</span>
-                      </div>
-                    )
-          }
+                          <div ref={anchorRef} className="h-0" />
+                        </>
+                      )
+                    : (
+                        <ComboboxEmpty className="flex h-7 items-center justify-center px-2 py-0.5">
+                          {t('accessControlDialog.operateGroupAndMember.noResult', { ns: 'app' })}
+                        </ComboboxEmpty>
+                      )}
+                </>
+              )}
         </div>
-      </PopoverContent>
-    </Popover>
+      </ComboboxContent>
+    </Combobox>
   )
 }
 
-type GroupOrMemberData = { subjects: Subject[], currPage: number }[]
-function renderGroupOrMember(data: GroupOrMemberData) {
-  return data?.map((page) => {
-    return (
-      <div key={`search_group_member_page_${page.currPage}`}>
-        {page.subjects?.map((item, index) => {
-          if (item.subjectType === SubjectType.GROUP)
-            return <GroupItem key={index} group={(item as SubjectGroup).groupData} />
-          return <MemberItem key={index} member={(item as SubjectAccount).accountData} />
-        })}
-      </div>
-    )
-  }) ?? null
+function groupToSubject(group: AccessControlGroup): SubjectGroup {
+  return {
+    subjectId: group.id,
+    subjectType: SubjectType.GROUP,
+    groupData: group,
+  }
+}
+
+function memberToSubject(member: AccessControlAccount): SubjectAccount {
+  return {
+    subjectId: member.id,
+    subjectType: SubjectType.ACCOUNT,
+    accountData: member,
+  }
+}
+
+function getSubjectLabel(subject: Subject) {
+  if (subject.subjectType === SubjectType.GROUP)
+    return (subject as SubjectGroup).groupData.name
+
+  return (subject as SubjectAccount).accountData.name
+}
+
+function getSubjectValue(subject: Subject) {
+  return `${subject.subjectType}:${subject.subjectId}`
+}
+
+function isSameSubject(item: Subject, value: Subject) {
+  return item.subjectId === value.subjectId && item.subjectType === value.subjectType
+}
+
+function SubjectItem({ subject }: { subject: Subject }) {
+  if (subject.subjectType === SubjectType.GROUP)
+    return <GroupItem group={(subject as SubjectGroup).groupData} subject={subject} />
+
+  return <MemberItem member={(subject as SubjectAccount).accountData} subject={subject} />
 }
 
 function SelectedGroupsBreadCrumb() {
@@ -112,13 +209,13 @@ function SelectedGroupsBreadCrumb() {
   const setSelectedGroupsForBreadcrumb = useAccessControlStore(s => s.setSelectedGroupsForBreadcrumb)
   const { t } = useTranslation()
 
-  const handleBreadCrumbClick = useCallback((index: number) => {
+  const handleBreadCrumbClick = (index: number) => {
     const newGroups = selectedGroupsForBreadcrumb.slice(0, index + 1)
     setSelectedGroupsForBreadcrumb(newGroups)
-  }, [setSelectedGroupsForBreadcrumb, selectedGroupsForBreadcrumb])
-  const handleReset = useCallback(() => {
+  }
+  const handleReset = () => {
     setSelectedGroupsForBreadcrumb([])
-  }, [setSelectedGroupsForBreadcrumb])
+  }
   const hasBreadcrumb = selectedGroupsForBreadcrumb.length > 0
 
   return (
@@ -162,104 +259,111 @@ function SelectedGroupsBreadCrumb() {
 
 type GroupItemProps = {
   group: AccessControlGroup
+  subject: Subject
 }
-function GroupItem({ group }: GroupItemProps) {
+function GroupItem({ group, subject }: GroupItemProps) {
   const { t } = useTranslation()
   const specificGroups = useAccessControlStore(s => s.specificGroups)
-  const setSpecificGroups = useAccessControlStore(s => s.setSpecificGroups)
   const selectedGroupsForBreadcrumb = useAccessControlStore(s => s.selectedGroupsForBreadcrumb)
   const setSelectedGroupsForBreadcrumb = useAccessControlStore(s => s.setSelectedGroupsForBreadcrumb)
   const isChecked = specificGroups.some(g => g.id === group.id)
-  const handleCheckChange = useCallback(() => {
-    if (!isChecked) {
-      const newGroups = [...specificGroups, group]
-      setSpecificGroups(newGroups)
-    }
-    else {
-      const newGroups = specificGroups.filter(g => g.id !== group.id)
-      setSpecificGroups(newGroups)
-    }
-  }, [specificGroups, setSpecificGroups, group, isChecked])
 
-  const handleExpandClick = useCallback(() => {
+  const handleExpandClick = () => {
     setSelectedGroupsForBreadcrumb([...selectedGroupsForBreadcrumb, group])
-  }, [selectedGroupsForBreadcrumb, setSelectedGroupsForBreadcrumb, group])
+  }
+
   return (
-    <BaseItem>
-      <Checkbox checked={isChecked} className="h-4 w-4 shrink-0" onCheck={handleCheckChange} />
-      <div className="item-center flex grow">
-        <div className="mr-2 h-5 w-5 overflow-hidden rounded-full bg-components-icon-bg-blue-solid">
-          <div className="bg-access-app-icon-mask-bg flex h-full w-full items-center justify-center">
-            <RiOrganizationChart className="h-[14px] w-[14px] text-components-avatar-shape-fill-stop-0" />
+    <div className="flex items-center gap-2 rounded-lg hover:bg-state-base-hover">
+      <BaseItem subject={subject}>
+        <SelectionBox checked={isChecked} />
+        <ComboboxItemText className="flex grow items-center px-0">
+          <div className="mr-2 h-5 w-5 overflow-hidden rounded-full bg-components-icon-bg-blue-solid">
+            <div className="bg-access-app-icon-mask-bg flex h-full w-full items-center justify-center">
+              <RiOrganizationChart className="h-[14px] w-[14px] text-components-avatar-shape-fill-stop-0" aria-hidden="true" />
+            </div>
           </div>
-        </div>
-        <p className="mr-1 system-sm-medium text-text-secondary">{group.name}</p>
-        <p className="system-xs-regular text-text-tertiary">{group.groupSize}</p>
-      </div>
+          <span className="mr-1 system-sm-medium text-text-secondary">{group.name}</span>
+          <span className="system-xs-regular text-text-tertiary">{group.groupSize}</span>
+        </ComboboxItemText>
+      </BaseItem>
       <Button
         size="small"
         disabled={isChecked}
         variant="ghost-accent"
-        className="flex shrink-0 items-center justify-between px-1.5 py-1"
+        className="mr-1 flex shrink-0 items-center justify-between px-1.5 py-1"
+        onPointerDown={event => event.preventDefault()}
         onClick={handleExpandClick}
       >
         <span className="px-[3px]">{t('accessControlDialog.operateGroupAndMember.expand', { ns: 'app' })}</span>
-        <RiArrowRightSLine className="h-4 w-4" />
+        <RiArrowRightSLine className="h-4 w-4" aria-hidden="true" />
       </Button>
-    </BaseItem>
+    </div>
   )
 }
 
 type MemberItemProps = {
   member: AccessControlAccount
+  subject: Subject
 }
-function MemberItem({ member }: MemberItemProps) {
+function MemberItem({ member, subject }: MemberItemProps) {
   const currentUser = useSelector(s => s.userProfile)
   const { t } = useTranslation()
   const specificMembers = useAccessControlStore(s => s.specificMembers)
-  const setSpecificMembers = useAccessControlStore(s => s.setSpecificMembers)
   const isChecked = specificMembers.some(m => m.id === member.id)
-  const handleCheckChange = useCallback(() => {
-    if (!isChecked) {
-      const newMembers = [...specificMembers, member]
-      setSpecificMembers(newMembers)
-    }
-    else {
-      const newMembers = specificMembers.filter(m => m.id !== member.id)
-      setSpecificMembers(newMembers)
-    }
-  }, [specificMembers, setSpecificMembers, member, isChecked])
   return (
-    <BaseItem className="pr-3">
-      <Checkbox checked={isChecked} className="h-4 w-4 shrink-0" onCheck={handleCheckChange} />
-      <div className="flex grow items-center">
+    <BaseItem subject={subject} className="pr-3">
+      <SelectionBox checked={isChecked} />
+      <ComboboxItemText className="flex grow items-center px-0">
         <div className="mr-2 h-5 w-5 overflow-hidden rounded-full bg-components-icon-bg-blue-solid">
           <div className="bg-access-app-icon-mask-bg flex h-full w-full items-center justify-center">
             <Avatar size="xxs" avatar={null} name={member.name} />
           </div>
         </div>
-        <p className="mr-1 system-sm-medium text-text-secondary">{member.name}</p>
+        <span className="mr-1 system-sm-medium text-text-secondary">{member.name}</span>
         {currentUser.email === member.email && (
-          <p className="system-xs-regular text-text-tertiary">
+          <span className="system-xs-regular text-text-tertiary">
             (
             {t('you', { ns: 'common' })}
             )
-          </p>
+          </span>
         )}
-      </div>
-      <p className="system-xs-regular text-text-quaternary">{member.email}</p>
+      </ComboboxItemText>
+      <span className="system-xs-regular text-text-quaternary">{member.email}</span>
     </BaseItem>
   )
 }
 
 type BaseItemProps = {
   className?: string
+  subject: Subject
   children: React.ReactNode
 }
-function BaseItem({ children, className }: BaseItemProps) {
+function BaseItem({ children, className, subject }: BaseItemProps) {
   return (
-    <div className={cn('flex cursor-pointer items-center space-x-2 p-1 pl-2 hover:rounded-lg hover:bg-state-base-hover', className)}>
+    <ComboboxItem
+      value={subject}
+      className={cn(
+        'mx-0 flex min-h-8 grow grid-cols-none items-center gap-2 rounded-lg p-1 pl-2',
+        className,
+      )}
+    >
       {children}
-    </div>
+    </ComboboxItem>
+  )
+}
+
+function SelectionBox({ checked }: { checked: boolean }) {
+  return (
+    <span
+      aria-hidden="true"
+      className={cn(
+        'flex size-4 shrink-0 items-center justify-center rounded-sm shadow-xs shadow-shadow-shadow-3',
+        checked
+          ? 'bg-components-checkbox-bg text-components-checkbox-icon'
+          : 'border border-components-checkbox-border bg-components-checkbox-bg-unchecked',
+      )}
+    >
+      {checked && <span className="i-ri-check-line size-3" />}
+    </span>
   )
 }
diff --git a/web/app/components/datasets/common/document-picker/__tests__/document-list.spec.tsx b/web/app/components/datasets/common/document-picker/__tests__/document-list.spec.tsx
index 83063cedf5..fa21f71c9d 100644
--- a/web/app/components/datasets/common/document-picker/__tests__/document-list.spec.tsx
+++ b/web/app/components/datasets/common/document-picker/__tests__/document-list.spec.tsx
@@ -1,6 +1,8 @@
-import type { DocumentItem } from '@/models/datasets'
-import { fireEvent, render, screen } from '@testing-library/react'
-import { beforeEach, describe, expect, it, vi } from 'vitest'
+import type { SimpleDocumentDetail } from '@/models/datasets'
+import { Combobox } from '@langgenius/dify-ui/combobox'
+import { render, screen } from '@testing-library/react'
+import userEvent from '@testing-library/user-event'
+import { ChunkingMode, DataSourceType } from '@/models/datasets'
 import DocumentList from '../document-list'
 
 vi.mock('../../document-file-icon', () => ({
@@ -13,37 +15,92 @@ vi.mock('../../document-file-icon', () => ({
   ),
 }))
 
+const createDocument = (overrides: Partial<SimpleDocumentDetail> = {}): SimpleDocumentDetail => ({
+  id: 'doc-1',
+  batch: 'batch-1',
+  position: 1,
+  dataset_id: 'dataset-1',
+  data_source_type: DataSourceType.FILE,
+  data_source_info: {
+    upload_file: {
+      id: 'file-1',
+      name: 'report.pdf',
+      size: 1024,
+      extension: 'pdf',
+      mime_type: 'application/pdf',
+      created_by: 'user-1',
+      created_at: Date.now(),
+    },
+    job_id: 'job-1',
+    url: '',
+  },
+  dataset_process_rule_id: 'rule-1',
+  name: 'report',
+  created_from: 'web',
+  created_by: 'user-1',
+  created_at: Date.now(),
+  indexing_status: 'completed',
+  display_status: 'enabled',
+  doc_form: ChunkingMode.text,
+  doc_language: 'en',
+  enabled: true,
+  word_count: 1000,
+  archived: false,
+  updated_at: Date.now(),
+  hit_count: 0,
+  data_source_detail_dict: {
+    upload_file: {
+      name: 'report.pdf',
+      extension: 'pdf',
+    },
+  },
+  ...overrides,
+})
+
+const renderDocumentList = (list: SimpleDocumentDetail[], onValueChange = vi.fn()) => ({
+  onValueChange,
+  ...render(
+    <Combobox
+      open
+      items={list}
+      itemToStringLabel={document => document.name}
+      itemToStringValue={document => document.id}
+      onValueChange={onValueChange}
+    >
+      <DocumentList />
+    </Combobox>,
+  ),
+})
+
 describe('DocumentList', () => {
-  const mockList = [
-    { id: 'doc-1', name: 'report', extension: 'pdf' },
-    { id: 'doc-2', name: 'data', extension: 'csv' },
-  ] as DocumentItem[]
-
-  const onChange = vi.fn()
-
   beforeEach(() => {
     vi.clearAllMocks()
   })
 
-  it('should render all documents', () => {
-    render(<DocumentList list={mockList} onChange={onChange} />)
-    expect(screen.getByText('report')).toBeInTheDocument()
-    expect(screen.getByText('data')).toBeInTheDocument()
-  })
+  it('should render documents as combobox options', () => {
+    renderDocumentList([
+      createDocument({ id: 'doc-1', name: 'report' }),
+      createDocument({ id: 'doc-2', name: 'data' }),
+    ])
 
-  it('should render file icons', () => {
-    render(<DocumentList list={mockList} onChange={onChange} />)
+    expect(screen.getByRole('option', { name: /report/ })).toBeInTheDocument()
+    expect(screen.getByRole('option', { name: /data/ })).toBeInTheDocument()
     expect(screen.getAllByTestId('file-icon')).toHaveLength(2)
   })
 
-  it('should call onChange with document on click', () => {
-    render(<DocumentList list={mockList} onChange={onChange} />)
-    fireEvent.click(screen.getByText('report'))
-    expect(onChange).toHaveBeenCalledWith(mockList[0])
+  it('should keep item spacing symmetric with the search field', () => {
+    renderDocumentList([createDocument({ id: 'doc-1', name: 'report' })])
+
+    expect(screen.getByRole('option', { name: /report/ })).toHaveClass('px-3')
   })
 
-  it('should render empty list without errors', () => {
-    const { container } = render(<DocumentList list={[]} onChange={onChange} />)
-    expect(container.firstChild).toBeInTheDocument()
+  it('should select a document through combobox value change', async () => {
+    const user = userEvent.setup()
+    const selectedDocument = createDocument({ id: 'doc-1', name: 'report' })
+    const { onValueChange } = renderDocumentList([selectedDocument])
+
+    await user.click(screen.getByRole('option', { name: /report/ }))
+
+    expect(onValueChange).toHaveBeenCalledWith(selectedDocument, expect.any(Object))
   })
 })
diff --git a/web/app/components/datasets/common/document-picker/__tests__/index.spec.tsx b/web/app/components/datasets/common/document-picker/__tests__/index.spec.tsx
index 1251eab9fb..a6c2078836 100644
--- a/web/app/components/datasets/common/document-picker/__tests__/index.spec.tsx
+++ b/web/app/components/datasets/common/document-picker/__tests__/index.spec.tsx
@@ -1,33 +1,22 @@
-import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets'
+import type { SimpleDocumentDetail } from '@/models/datasets'
 import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
-import { fireEvent, render, screen } from '@testing-library/react'
-import * as React from 'react'
+import { render, screen, waitFor } from '@testing-library/react'
+import userEvent from '@testing-library/user-event'
 import { ChunkingMode, DataSourceType } from '@/models/datasets'
-import DocumentPicker from '../index'
+import { DocumentPicker } from '../index'
 
-vi.mock('@langgenius/dify-ui/popover', () => import('@/__mocks__/base-ui-popover'))
-
-// Mock useDocumentList hook with controllable return value
 let mockDocumentListData: { data: SimpleDocumentDetail[] } | undefined
-let mockDocumentListLoading = false
 
 const { mockUseDocumentList } = vi.hoisted(() => ({
   mockUseDocumentList: vi.fn(),
 }))
 
-// Set up the implementation after variables are defined
-mockUseDocumentList.mockImplementation(() => ({
-  data: mockDocumentListLoading ? undefined : mockDocumentListData,
-  isLoading: mockDocumentListLoading,
-}))
-
 vi.mock('@/service/knowledge/use-document', () => ({
   useDocumentList: mockUseDocumentList,
 }))
 
-// Factory function to create mock SimpleDocumentDetail
-const createMockDocument = (overrides: Partial<SimpleDocumentDetail> = {}): SimpleDocumentDetail => ({
-  id: `doc-${Math.random().toString(36).substr(2, 9)}`,
+const createDocument = (overrides: Partial<SimpleDocumentDetail> = {}): SimpleDocumentDetail => ({
+  id: 'doc-1',
   batch: 'batch-1',
   position: 1,
   dataset_id: 'dataset-1',
@@ -35,19 +24,18 @@ const createMockDocument = (overrides: Partial<SimpleDocumentDetail> = {}): Simp
   data_source_info: {
     upload_file: {
       id: 'file-1',
-      name: 'test-file.txt',
+      name: 'document.pdf',
       size: 1024,
-      extension: 'txt',
-      mime_type: 'text/plain',
+      extension: 'pdf',
+      mime_type: 'application/pdf',
       created_by: 'user-1',
       created_at: Date.now(),
     },
-    // Required fields for LegacyDataSourceInfo
     job_id: 'job-1',
     url: '',
   },
   dataset_process_rule_id: 'rule-1',
-  name: 'Test Document',
+  name: 'Document 1',
   created_from: 'web',
   created_by: 'user-1',
   created_at: Date.now(),
@@ -62,937 +50,146 @@ const createMockDocument = (overrides: Partial<SimpleDocumentDetail> = {}): Simp
   hit_count: 0,
   data_source_detail_dict: {
     upload_file: {
-      name: 'test-file.txt',
-      extension: 'txt',
+      name: 'document.pdf',
+      extension: 'pdf',
     },
   },
   ...overrides,
 })
 
-// Factory function to create multiple documents
-const createMockDocumentList = (count: number): SimpleDocumentDetail[] => {
-  return Array.from({ length: count }, (_, index) =>
-    createMockDocument({
-      id: `doc-${index + 1}`,
-      name: `Document ${index + 1}`,
-      data_source_detail_dict: {
-        upload_file: {
-          name: `document-${index + 1}.pdf`,
-          extension: 'pdf',
-        },
-      },
-    }))
-}
-
-// Factory function to create props
-const createDefaultProps = (overrides: Partial<React.ComponentProps<typeof DocumentPicker>> = {}) => ({
+const createProps = (overrides: Partial<React.ComponentProps<typeof DocumentPicker>> = {}) => ({
   datasetId: 'dataset-1',
-  value: {
-    name: 'Test Document',
-    extension: 'txt',
-    chunkingMode: ChunkingMode.text,
-    parentMode: undefined as ParentMode | undefined,
-  },
+  value: createDocument({ id: 'doc-1', name: 'Document 1' }),
   onChange: vi.fn(),
   ...overrides,
 })
 
-// Create a new QueryClient for each test
-const createTestQueryClient = () =>
-  new QueryClient({
+const renderDocumentPicker = (props: Partial<React.ComponentProps<typeof DocumentPicker>> = {}) => {
+  const queryClient = new QueryClient({
     defaultOptions: {
       queries: {
         retry: false,
-        gcTime: 0,
-        staleTime: 0,
       },
     },
   })
-
-// Helper to render component with providers
-const renderComponent = (props: Partial<React.ComponentProps<typeof DocumentPicker>> = {}) => {
-  const queryClient = createTestQueryClient()
-  const defaultProps = createDefaultProps(props)
+  const defaultProps = createProps(props)
 
   return {
+    props: defaultProps,
     ...render(
       <QueryClientProvider client={queryClient}>
         <DocumentPicker {...defaultProps} />
       </QueryClientProvider>,
     ),
-    queryClient,
-    props: defaultProps,
   }
 }
 
-const openPopover = () => {
-  fireEvent.click(screen.getByTestId('popover-trigger'))
-}
-
 describe('DocumentPicker', () => {
   beforeEach(() => {
     vi.clearAllMocks()
-    // Reset mock state
-    mockDocumentListData = { data: createMockDocumentList(5) }
-    mockDocumentListLoading = false
+    mockDocumentListData = {
+      data: [
+        createDocument({ id: 'doc-1', name: 'Document 1' }),
+        createDocument({ id: 'doc-2', name: 'Document 2' }),
+      ],
+    }
+    mockUseDocumentList.mockImplementation(() => ({
+      data: mockDocumentListData,
+    }))
   })
 
-  // Tests for basic rendering
-  describe('Rendering', () => {
-    it('should render without crashing', () => {
-      renderComponent()
-
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
+  it('should render the current document and chunking mode', () => {
+    renderDocumentPicker({
+      value: createDocument({
+        id: 'current-doc',
+        name: 'Current Document',
+        doc_form: ChunkingMode.parentChild,
+      }),
+      parentMode: 'paragraph',
     })
 
-    it('should render document name when provided', () => {
-      renderComponent({
-        value: {
-          name: 'My Document',
-          extension: 'pdf',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
-
-      expect(screen.getByText('My Document')).toBeInTheDocument()
-    })
-
-    it('should render placeholder when name is not provided', () => {
-      renderComponent({
-        value: {
-          name: undefined,
-          extension: 'pdf',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
-
-      expect(screen.getByText('--')).toBeInTheDocument()
-    })
-
-    it('should render general mode label', () => {
-      renderComponent({
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
-
-      expect(screen.getByText('dataset.chunkingMode.general')).toBeInTheDocument()
-    })
-
-    it('should render QA mode label', () => {
-      renderComponent({
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.qa,
-        },
-      })
-
-      expect(screen.getByText('dataset.chunkingMode.qa')).toBeInTheDocument()
-    })
-
-    it('should render parentChild mode label with paragraph parent mode', () => {
-      renderComponent({
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.parentChild,
-          parentMode: 'paragraph',
-        },
-      })
-
-      expect(screen.getByText(/dataset.chunkingMode.parentChild/)).toBeInTheDocument()
-      expect(screen.getByText(/dataset.parentMode.paragraph/)).toBeInTheDocument()
-    })
-
-    it('should render parentChild mode label with full-doc parent mode', () => {
-      renderComponent({
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.parentChild,
-          parentMode: 'full-doc',
-        },
-      })
-
-      expect(screen.getByText(/dataset.chunkingMode.parentChild/)).toBeInTheDocument()
-      expect(screen.getByText(/dataset.parentMode.fullDoc/)).toBeInTheDocument()
-    })
-
-    it('should render placeholder for parentMode when not provided', () => {
-      renderComponent({
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.parentChild,
-          parentMode: undefined,
-        },
-      })
-
-      // parentModeLabel should be '--' when parentMode is not provided
-      expect(screen.getByText(/--/)).toBeInTheDocument()
-    })
+    expect(screen.getByRole('combobox', { name: 'Current Document' })).toBeInTheDocument()
+    expect(screen.getByText(/dataset.chunkingMode.parentChild/)).toBeInTheDocument()
+    expect(screen.getByText(/dataset.parentMode.paragraph/)).toBeInTheDocument()
   })
 
-  // Tests for props handling
-  describe('Props', () => {
-    it('should accept required props', () => {
-      const onChange = vi.fn()
-      renderComponent({
-        datasetId: 'test-dataset',
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.text,
-        },
-        onChange,
-      })
+  it('should fetch documents with the current dataset and search keyword', async () => {
+    const user = userEvent.setup()
+    renderDocumentPicker({ datasetId: 'dataset-custom' })
 
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
+    await user.click(screen.getByRole('combobox', { name: 'Document 1' }))
+    await user.type(screen.getByPlaceholderText('common.operation.search'), 'report')
 
-    it('should handle value with all fields', () => {
-      renderComponent({
-        value: {
-          name: 'Full Document',
-          extension: 'docx',
-          chunkingMode: ChunkingMode.parentChild,
-          parentMode: 'paragraph',
-        },
-      })
-
-      expect(screen.getByText('Full Document')).toBeInTheDocument()
-    })
-
-    it('should handle value with minimal fields', () => {
-      renderComponent({
-        value: {
-          name: undefined,
-          extension: undefined,
-          chunkingMode: undefined,
-          parentMode: undefined,
-        },
-      })
-
-      expect(screen.getByText('--')).toBeInTheDocument()
-    })
-
-    it('should pass datasetId to mockUseDocumentList hook', () => {
-      renderComponent({ datasetId: 'custom-dataset-id' })
-
-      expect(mockUseDocumentList).toHaveBeenCalledWith(
-        expect.objectContaining({
-          datasetId: 'custom-dataset-id',
-        }),
-      )
-    })
-  })
-
-  // Tests for state management and updates
-  describe('State Management', () => {
-    it('should initialize with popup closed', () => {
-      renderComponent()
-
-      expect(screen.getByTestId('popover')).toHaveAttribute('data-open', 'false')
-    })
-
-    it('should open popup when trigger is clicked', () => {
-      renderComponent()
-
-      const trigger = screen.getByTestId('popover-trigger')
-      fireEvent.click(trigger)
-
-      // Verify click handler is called
-      expect(trigger).toBeInTheDocument()
-    })
-
-    it('should maintain search query state', async () => {
-      renderComponent()
-
-      // Initial call should have empty keyword
-      expect(mockUseDocumentList).toHaveBeenCalledWith(
-        expect.objectContaining({
-          query: expect.objectContaining({
-            keyword: '',
-          }),
-        }),
-      )
-    })
-
-    it('should update query when search input changes', () => {
-      renderComponent()
-
-      // Verify the component uses mockUseDocumentList with query parameter
-
-      expect(mockUseDocumentList).toHaveBeenCalledWith(
-        expect.objectContaining({
-          query: expect.objectContaining({
-            keyword: '',
-          }),
-        }),
-      )
-    })
-  })
-
-  // Tests for callback stability and memoization
-  describe('Callback Stability', () => {
-    it('should maintain stable onChange callback when value changes', () => {
-      const onChange = vi.fn()
-      const value1 = {
-        name: 'Doc 1',
-        extension: 'txt',
-        chunkingMode: ChunkingMode.text,
-      }
-      const value2 = {
-        name: 'Doc 2',
-        extension: 'pdf',
-        chunkingMode: ChunkingMode.text,
-      }
-
-      const queryClient = createTestQueryClient()
-      const { rerender } = render(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="dataset-1"
-            value={value1}
-            onChange={onChange}
-          />
-        </QueryClientProvider>,
-      )
-
-      rerender(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="dataset-1"
-            value={value2}
-            onChange={onChange}
-          />
-        </QueryClientProvider>,
-      )
-
-      // Component should still render correctly after rerender
-      expect(screen.getByText('Doc 2')).toBeInTheDocument()
-    })
-
-    it('should use updated onChange callback after rerender', () => {
-      const onChange1 = vi.fn()
-      const onChange2 = vi.fn()
-      const value = {
-        name: 'Test Doc',
-        extension: 'txt',
-        chunkingMode: ChunkingMode.text,
-      }
-
-      const queryClient = createTestQueryClient()
-      const { rerender } = render(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="dataset-1"
-            value={value}
-            onChange={onChange1}
-          />
-        </QueryClientProvider>,
-      )
-
-      rerender(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="dataset-1"
-            value={value}
-            onChange={onChange2}
-          />
-        </QueryClientProvider>,
-      )
-
-      // The component should use the new callback
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
-
-    it('should memoize handleChange callback with useCallback', () => {
-      // The handleChange callback is created with useCallback and depends on
-      // documentsList, onChange, and setOpen
-      const onChange = vi.fn()
-      renderComponent({ onChange })
-
-      // Verify component renders correctly, callback memoization is internal
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
-  })
-
-  // Tests for memoization logic and dependencies
-  describe('Memoization Logic', () => {
-    it('should be wrapped with React.memo', () => {
-      // React.memo components have a $$typeof property
-      expect((DocumentPicker as unknown as { $$typeof: symbol }).$$typeof).toBeDefined()
-    })
-
-    it('should compute parentModeLabel correctly with useMemo', () => {
-      // Test paragraph mode
-      renderComponent({
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.parentChild,
-          parentMode: 'paragraph',
-        },
-      })
-
-      expect(screen.getByText(/dataset.parentMode.paragraph/)).toBeInTheDocument()
-    })
-
-    it('should update parentModeLabel when parentMode changes', () => {
-      // Test full-doc mode
-      renderComponent({
-        value: {
-          name: 'Test',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.parentChild,
-          parentMode: 'full-doc',
-        },
-      })
-
-      expect(screen.getByText(/dataset.parentMode.fullDoc/)).toBeInTheDocument()
-    })
-
-    it('should not re-render when props are the same', () => {
-      const onChange = vi.fn()
-      const value = {
-        name: 'Stable Doc',
-        extension: 'txt',
-        chunkingMode: ChunkingMode.text,
-      }
-
-      const queryClient = createTestQueryClient()
-      const { rerender } = render(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="dataset-1"
-            value={value}
-            onChange={onChange}
-          />
-        </QueryClientProvider>,
-      )
-
-      // Rerender with same props reference
-      rerender(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="dataset-1"
-            value={value}
-            onChange={onChange}
-          />
-        </QueryClientProvider>,
-      )
-
-      expect(screen.getByText('Stable Doc')).toBeInTheDocument()
-    })
-  })
-
-  // Tests for user interactions and event handlers
-  describe('User Interactions', () => {
-    it('should toggle popup when trigger is clicked', () => {
-      renderComponent()
-
-      const trigger = screen.getByTestId('popover-trigger')
-      fireEvent.click(trigger)
-
-      // Trigger click should be handled
-      expect(trigger).toBeInTheDocument()
-    })
-
-    it('should handle document selection when popup is open', () => {
-      // Test the handleChange callback logic
-      const onChange = vi.fn()
-      const mockDocs = createMockDocumentList(3)
-      mockDocumentListData = { data: mockDocs }
-
-      renderComponent({ onChange })
-
-      // The handleChange callback should find the document and call onChange
-      // We can verify this by checking that mockUseDocumentList was called
-
-      expect(mockUseDocumentList).toHaveBeenCalled()
-    })
-
-    it('should handle search input change', () => {
-      renderComponent()
-
-      // The search input is only visible when popup is open
-      // We verify that the component initializes with empty query
-
-      expect(mockUseDocumentList).toHaveBeenCalledWith(
-        expect.objectContaining({
-          query: expect.objectContaining({
-            keyword: '',
-          }),
-        }),
-      )
-    })
-
-    it('should initialize with default query parameters', () => {
-      renderComponent()
-
-      expect(mockUseDocumentList).toHaveBeenCalledWith(
-        expect.objectContaining({
-          query: {
-            keyword: '',
-            page: 1,
-            limit: 20,
-          },
-        }),
-      )
-    })
-  })
-
-  // Tests for API calls
-  describe('API Calls', () => {
-    it('should call mockUseDocumentList with correct parameters', () => {
-      renderComponent({ datasetId: 'test-dataset-123' })
-
-      expect(mockUseDocumentList).toHaveBeenCalledWith({
-        datasetId: 'test-dataset-123',
+    await waitFor(() => {
+      expect(mockUseDocumentList).toHaveBeenLastCalledWith({
+        datasetId: 'dataset-custom',
         query: {
-          keyword: '',
+          keyword: 'report',
           page: 1,
           limit: 20,
         },
       })
     })
-
-    it('should handle loading state', () => {
-      mockDocumentListLoading = true
-      mockDocumentListData = undefined
-
-      renderComponent()
-
-      // When loading, component should still render without crashing
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
-
-    it('should fetch documents on mount', () => {
-      mockDocumentListLoading = false
-      mockDocumentListData = { data: createMockDocumentList(3) }
-
-      renderComponent()
-
-      // Verify the hook was called
-
-      expect(mockUseDocumentList).toHaveBeenCalled()
-    })
-
-    it('should handle empty document list', () => {
-      mockDocumentListData = { data: [] }
-
-      renderComponent()
-
-      // Component should render without crashing
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
-
-    it('should handle undefined data response', () => {
-      mockDocumentListData = undefined
-
-      renderComponent()
-
-      // Should not crash
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
   })
 
-  // Tests for component memoization
-  describe('Component Memoization', () => {
-    it('should export as React.memo wrapped component', () => {
-      // Check that the component is memoized
-      expect(DocumentPicker).toBeDefined()
-      expect(typeof DocumentPicker).toBe('object') // React.memo returns an object
-    })
+  it('should keep focus in the search input while deleting quickly', async () => {
+    const user = userEvent.setup()
+    renderDocumentPicker()
 
-    it('should preserve render output when datasetId is the same', () => {
-      const queryClient = createTestQueryClient()
-      const value = {
-        name: 'Memo Test',
-        extension: 'txt',
-        chunkingMode: ChunkingMode.text,
-      }
-      const onChange = vi.fn()
+    const trigger = screen.getByRole('combobox', { name: 'Document 1' })
+    await user.click(trigger)
 
-      const { rerender } = render(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="same-dataset"
-            value={value}
-            onChange={onChange}
-          />
-        </QueryClientProvider>,
-      )
+    const searchInput = screen.getByPlaceholderText('common.operation.search')
+    await user.type(searchInput, 'report')
+    await user.keyboard('{Backspace}{Backspace}{Backspace}{Backspace}{Backspace}{Backspace}')
 
-      expect(screen.getByText('Memo Test')).toBeInTheDocument()
-
-      rerender(
-        <QueryClientProvider client={queryClient}>
-          <DocumentPicker
-            datasetId="same-dataset"
-            value={value}
-            onChange={onChange}
-          />
-        </QueryClientProvider>,
-      )
-
-      expect(screen.getByText('Memo Test')).toBeInTheDocument()
-    })
+    expect(trigger).toHaveAttribute('aria-expanded', 'true')
+    expect(searchInput).toHaveFocus()
+    expect(trigger).not.toHaveFocus()
   })
 
-  // Tests for edge cases and error handling
-  describe('Edge Cases', () => {
-    it('should handle null name', () => {
-      renderComponent({
-        value: {
-          name: undefined,
-          extension: 'txt',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
+  it('should keep focus in the search input while typing quickly', async () => {
+    const user = userEvent.setup()
+    renderDocumentPicker()
 
-      expect(screen.getByText('--')).toBeInTheDocument()
-    })
+    const trigger = screen.getByRole('combobox', { name: 'Document 1' })
+    await user.click(trigger)
 
-    it('should handle empty string name', () => {
-      renderComponent({
-        value: {
-          name: '',
-          extension: 'txt',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
+    const searchInput = screen.getByPlaceholderText('common.operation.search')
+    await user.keyboard('quarterly-report-final')
 
-      // Empty string is falsy, so should show '--'
-      expect(screen.queryByText('--')).toBeInTheDocument()
-    })
-
-    it('should handle undefined extension', () => {
-      renderComponent({
-        value: {
-          name: 'Test Doc',
-          extension: undefined,
-          chunkingMode: ChunkingMode.text,
-        },
-      })
-
-      // Should not crash
-      expect(screen.getByText('Test Doc')).toBeInTheDocument()
-    })
-
-    it('should handle undefined chunkingMode', () => {
-      renderComponent({
-        value: {
-          name: 'Test Doc',
-          extension: 'txt',
-          chunkingMode: undefined,
-        },
-      })
-
-      // When chunkingMode is undefined, none of the mode conditions are true
-      expect(screen.getByText('Test Doc')).toBeInTheDocument()
-    })
-
-    it('should handle document without data_source_detail_dict', () => {
-      const docWithoutDetail = createMockDocument({
-        id: 'doc-no-detail',
-        name: 'Doc Without Detail',
-        data_source_detail_dict: undefined,
-      })
-      mockDocumentListData = { data: [docWithoutDetail] }
-
-      // Component should handle mapping documents even without data_source_detail_dict
-      renderComponent()
-
-      // Should not crash
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
-
-    it('should handle rapid toggle clicks', () => {
-      renderComponent()
-
-      const trigger = screen.getByTestId('popover-trigger')
-
-      // Rapid clicks
-      fireEvent.click(trigger)
-      fireEvent.click(trigger)
-      fireEvent.click(trigger)
-      fireEvent.click(trigger)
-
-      // Should not crash
-      expect(trigger).toBeInTheDocument()
-    })
-
-    it('should handle very long document names in trigger', () => {
-      const longName = 'A'.repeat(500)
-      renderComponent({
-        value: {
-          name: longName,
-          extension: 'txt',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
-
-      // Should render long name without crashing
-      expect(screen.getByText(longName)).toBeInTheDocument()
-    })
-
-    it('should handle special characters in document name', () => {
-      const specialName = '<script>alert("xss")</script>'
-      renderComponent({
-        value: {
-          name: specialName,
-          extension: 'txt',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
-
-      // React should escape the text
-      expect(screen.getByText(specialName)).toBeInTheDocument()
-    })
-
-    it('should handle documents with missing extension in data_source_detail_dict', () => {
-      const docWithEmptyExtension = createMockDocument({
-        id: 'doc-empty-ext',
-        name: 'Doc Empty Ext',
-        data_source_detail_dict: {
-          upload_file: {
-            name: 'file-no-ext',
-            extension: '',
-          },
-        },
-      })
-      mockDocumentListData = { data: [docWithEmptyExtension] }
-
-      // Component should handle mapping documents with empty extension
-      renderComponent()
-
-      // Should not crash
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
-
-    it('should handle document list mapping with various data_source_detail_dict states', () => {
-      // Test the mapping logic: d.data_source_detail_dict?.upload_file?.extension || ''
-      const docs = [
-        createMockDocument({
-          id: 'doc-1',
-          name: 'With Extension',
-          data_source_detail_dict: {
-            upload_file: { name: 'file.pdf', extension: 'pdf' },
-          },
-        }),
-        createMockDocument({
-          id: 'doc-2',
-          name: 'Without Detail Dict',
-          data_source_detail_dict: undefined,
-        }),
-      ]
-      mockDocumentListData = { data: docs }
-
-      renderComponent()
-
-      // Should not crash during mapping
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
+    expect(trigger).toHaveAttribute('aria-expanded', 'true')
+    expect(searchInput).toHaveFocus()
+    expect(trigger).not.toHaveFocus()
   })
 
-  // Tests for all prop variations
-  describe('Prop Variations', () => {
-    describe('datasetId variations', () => {
-      it('should handle empty datasetId', () => {
-        renderComponent({ datasetId: '' })
+  it('should call onChange with the selected document', async () => {
+    const user = userEvent.setup()
+    const onChange = vi.fn()
+    const selectedDocument = createDocument({ id: 'doc-2', name: 'Document 2' })
+    mockDocumentListData = {
+      data: [
+        createDocument({ id: 'doc-1', name: 'Document 1' }),
+        selectedDocument,
+      ],
+    }
 
-        expect(screen.getByTestId('popover')).toBeInTheDocument()
-      })
+    renderDocumentPicker({ onChange })
 
-      it('should handle UUID format datasetId', () => {
-        renderComponent({ datasetId: '123e4567-e89b-12d3-a456-426614174000' })
+    await user.click(screen.getByRole('combobox', { name: 'Document 1' }))
+    await user.click(await screen.findByRole('option', { name: /Document 2/ }))
 
-        expect(screen.getByTestId('popover')).toBeInTheDocument()
-      })
-    })
-
-    describe('value.chunkingMode variations', () => {
-      const chunkingModes = [
-        { mode: ChunkingMode.text, label: 'dataset.chunkingMode.general' },
-        { mode: ChunkingMode.qa, label: 'dataset.chunkingMode.qa' },
-        { mode: ChunkingMode.parentChild, label: 'dataset.chunkingMode.parentChild' },
-      ]
-
-      it.each(chunkingModes)(
-        'should display correct label for $mode mode',
-        ({ mode, label }) => {
-          renderComponent({
-            value: {
-              name: 'Test',
-              extension: 'txt',
-              chunkingMode: mode,
-              parentMode: mode === ChunkingMode.parentChild ? 'paragraph' : undefined,
-            },
-          })
-
-          expect(screen.getByText(new RegExp(label))).toBeInTheDocument()
-        },
-      )
-    })
-
-    describe('value.parentMode variations', () => {
-      const parentModes: Array<{ mode: ParentMode, label: string }> = [
-        { mode: 'paragraph', label: 'dataset.parentMode.paragraph' },
-        { mode: 'full-doc', label: 'dataset.parentMode.fullDoc' },
-      ]
-
-      it.each(parentModes)(
-        'should display correct label for $mode parentMode',
-        ({ mode, label }) => {
-          renderComponent({
-            value: {
-              name: 'Test',
-              extension: 'txt',
-              chunkingMode: ChunkingMode.parentChild,
-              parentMode: mode,
-            },
-          })
-
-          expect(screen.getByText(new RegExp(label))).toBeInTheDocument()
-        },
-      )
-    })
-
-    describe('value.extension variations', () => {
-      const extensions = ['txt', 'pdf', 'docx', 'xlsx', 'csv', 'md', 'html']
-
-      it.each(extensions)('should handle %s extension', (ext) => {
-        renderComponent({
-          value: {
-            name: `File.${ext}`,
-            extension: ext,
-            chunkingMode: ChunkingMode.text,
-          },
-        })
-
-        expect(screen.getByText(`File.${ext}`)).toBeInTheDocument()
-      })
-    })
+    expect(onChange).toHaveBeenCalledWith(selectedDocument)
   })
 
-  // Tests for document selection
-  describe('Document Selection', () => {
-    it('should fetch documents list via mockUseDocumentList', () => {
-      const mockDoc = createMockDocument({
-        id: 'selected-doc',
-        name: 'Selected Document',
-      })
-      mockDocumentListData = { data: [mockDoc] }
-      const onChange = vi.fn()
+  it('should show an empty state when no documents match', async () => {
+    const user = userEvent.setup()
+    mockDocumentListData = { data: [] }
 
-      renderComponent({ onChange })
+    renderDocumentPicker()
 
-      // Verify the hook was called
+    await user.click(screen.getByRole('combobox', { name: 'Document 1' }))
 
-      expect(mockUseDocumentList).toHaveBeenCalled()
-    })
-
-    it('should call onChange when document is selected', () => {
-      const docs = createMockDocumentList(3)
-      mockDocumentListData = { data: docs }
-      const onChange = vi.fn()
-
-      renderComponent({ onChange })
-      openPopover()
-
-      fireEvent.click(screen.getByText('Document 2'))
-
-      // handleChange should find the document and call onChange with full document
-      expect(onChange).toHaveBeenCalledTimes(1)
-      expect(onChange).toHaveBeenCalledWith(docs[1])
-    })
-
-    it('should map document list items correctly', () => {
-      const docs = createMockDocumentList(3)
-      mockDocumentListData = { data: docs }
-
-      renderComponent()
-      openPopover()
-
-      // Documents should be rendered in the list
-      expect(screen.getByText('Document 1')).toBeInTheDocument()
-      expect(screen.getByText('Document 2')).toBeInTheDocument()
-      expect(screen.getByText('Document 3')).toBeInTheDocument()
-    })
-  })
-
-  // Tests for integration with child components
-  describe('Child Component Integration', () => {
-    it('should pass correct data to DocumentList when popup is open', () => {
-      const docs = createMockDocumentList(3)
-      mockDocumentListData = { data: docs }
-
-      renderComponent()
-
-      // DocumentList receives mapped documents: { id, name, extension }
-      // We verify the data is fetched
-
-      expect(mockUseDocumentList).toHaveBeenCalled()
-    })
-
-    it('should map document data_source_detail_dict extension correctly', () => {
-      const doc = createMockDocument({
-        id: 'mapped-doc',
-        name: 'Mapped Document',
-        data_source_detail_dict: {
-          upload_file: {
-            name: 'mapped.pdf',
-            extension: 'pdf',
-          },
-        },
-      })
-      mockDocumentListData = { data: [doc] }
-
-      renderComponent()
-
-      // The mapping: d.data_source_detail_dict?.upload_file?.extension || ''
-      // Should extract 'pdf' from the document
-      expect(screen.getByTestId('popover')).toBeInTheDocument()
-    })
-
-    it('should render trigger with SearchInput integration', () => {
-      renderComponent()
-
-      // The trigger is always rendered
-      expect(screen.getByTestId('popover-trigger')).toBeInTheDocument()
-    })
-
-    it('should integrate FileIcon component', () => {
-      // Use empty document list to avoid duplicate icons from list
-      mockDocumentListData = { data: [] }
-
-      renderComponent({
-        value: {
-          name: 'test.pdf',
-          extension: 'pdf',
-          chunkingMode: ChunkingMode.text,
-        },
-      })
-
-      // FileIcon should render an SVG icon for the file extension
-      const trigger = screen.getByTestId('popover-trigger')
-      expect(trigger.querySelector('svg')).toBeInTheDocument()
-    })
-  })
-
-  // Tests for visual states
-  describe('Visual States', () => {
-    it('should render portal content for document selection', () => {
-      renderComponent()
-      openPopover()
-
-      // Popover content is rendered after opening the trigger in our mock
-      expect(screen.getByTestId('popover-content')).toBeInTheDocument()
-    })
+    expect(await screen.findByRole('status')).toHaveTextContent('common.noData')
   })
 })
diff --git a/web/app/components/datasets/common/document-picker/document-list.tsx b/web/app/components/datasets/common/document-picker/document-list.tsx
index d2d8d1966c..366e744cbd 100644
--- a/web/app/components/datasets/common/document-picker/document-list.tsx
+++ b/web/app/components/datasets/common/document-picker/document-list.tsx
@@ -1,43 +1,49 @@
 'use client'
-import type { FC } from 'react'
-import type { DocumentItem } from '@/models/datasets'
+import type { SimpleDocumentDetail } from '@/models/datasets'
 import { cn } from '@langgenius/dify-ui/cn'
-import * as React from 'react'
-import { useCallback } from 'react'
+import {
+  ComboboxItem,
+  ComboboxItemText,
+  ComboboxList,
+} from '@langgenius/dify-ui/combobox'
 import FileIcon from '../document-file-icon'
 
 type Props = {
   className?: string
-  list: DocumentItem[]
-  onChange: (value: DocumentItem) => void
 }
 
-const DocumentList: FC<Props> = ({
-  className,
-  list,
-  onChange,
-}) => {
-  const handleChange = useCallback((item: DocumentItem) => {
-    return () => onChange(item)
-  }, [onChange])
+function getDocumentExtension(document: SimpleDocumentDetail) {
+  const detailExtension = document.data_source_detail_dict?.upload_file?.extension
+  if (detailExtension)
+    return detailExtension
 
+  const dataSourceInfo = document.data_source_info
+  if (dataSourceInfo && 'upload_file' in dataSourceInfo)
+    return dataSourceInfo.upload_file.extension
+
+  return ''
+}
+
+export default function DocumentList({
+  className,
+}: Props) {
   return (
-    <div className={cn('max-h-[calc(100vh-120px)] overflow-auto', className)}>
-      {list.map((item) => {
-        const { id, name, extension } = item
+    <ComboboxList className={cn('max-h-[calc(100vh-120px)] p-0', className)}>
+      {(item: SimpleDocumentDetail) => {
+        const extension = getDocumentExtension(item)
         return (
-          <div
-            key={id}
-            className="flex h-8 cursor-pointer items-center space-x-2 rounded-lg px-2 hover:bg-state-base-hover"
-            onClick={handleChange(item)}
+          <ComboboxItem
+            key={item.id}
+            value={item}
+            className="mx-0 flex h-8 grid-cols-none items-center gap-2 rounded-lg px-3 py-0"
           >
             <FileIcon name={item.name} extension={extension} size="lg" />
-            <div className="truncate text-sm text-text-secondary">{name}</div>
-          </div>
+            <ComboboxItemText className="min-w-0 px-0 system-sm-regular text-text-secondary">
+              {item.name}
+            </ComboboxItemText>
+          </ComboboxItem>
         )
-      })}
-    </div>
+      }}
+    </ComboboxList>
   )
 }
-
-export default React.memo(DocumentList)
diff --git a/web/app/components/datasets/common/document-picker/index.tsx b/web/app/components/datasets/common/document-picker/index.tsx
index 0566b590de..07f7443764 100644
--- a/web/app/components/datasets/common/document-picker/index.tsx
+++ b/web/app/components/datasets/common/document-picker/index.tsx
@@ -1,20 +1,22 @@
 'use client'
-import type { FC } from 'react'
-import type { DocumentItem, ParentMode, SimpleDocumentDetail } from '@/models/datasets'
+import type { ComboboxRootChangeEventDetails } from '@langgenius/dify-ui/combobox'
+import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets'
 import { cn } from '@langgenius/dify-ui/cn'
 import {
-  Popover,
-  PopoverContent,
-  PopoverTrigger,
-} from '@langgenius/dify-ui/popover'
+  Combobox,
+  ComboboxContent,
+  ComboboxEmpty,
+  ComboboxInput,
+  ComboboxInputGroup,
+  ComboboxStatus,
+  ComboboxTrigger,
+  ComboboxValue,
+} from '@langgenius/dify-ui/combobox'
 import { RiArrowDownSLine } from '@remixicon/react'
-import { useBoolean } from 'ahooks'
-import * as React from 'react'
-import { useCallback, useMemo, useState } from 'react'
+import { useDeferredValue, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { GeneralChunk, ParentChildChunk } from '@/app/components/base/icons/src/vender/knowledge'
 import Loading from '@/app/components/base/loading'
-import SearchInput from '@/app/components/base/search-input'
 import { ChunkingMode } from '@/models/datasets'
 import { useDocumentList } from '@/service/knowledge/use-document'
 import FileIcon from '../document-file-icon'
@@ -22,116 +24,177 @@ import DocumentList from './document-list'
 
 type Props = {
   datasetId: string
-  value: {
-    name?: string
-    extension?: string
-    chunkingMode?: ChunkingMode
-    parentMode?: ParentMode
-  }
+  value?: SimpleDocumentDetail | null
+  parentMode?: ParentMode
   onChange: (value: SimpleDocumentDetail) => void
 }
 
-const DocumentPicker: FC<Props> = ({
+function getDocumentLabel(document: SimpleDocumentDetail) {
+  return document.name
+}
+
+function getDocumentValue(document: SimpleDocumentDetail) {
+  return document.id
+}
+
+function isSameDocument(item: SimpleDocumentDetail, value: SimpleDocumentDetail) {
+  return item.id === value.id
+}
+
+function getDocumentExtension(document?: SimpleDocumentDetail | null) {
+  if (!document)
+    return ''
+
+  const detailExtension = document.data_source_detail_dict?.upload_file?.extension
+  if (detailExtension)
+    return detailExtension
+
+  const dataSourceInfo = document.data_source_info
+  if (dataSourceInfo && 'upload_file' in dataSourceInfo)
+    return dataSourceInfo.upload_file.extension
+
+  return ''
+}
+
+function DocumentPickerTriggerValue({
+  document,
+  parentMode,
+}: {
+  document?: SimpleDocumentDetail | null
+  parentMode?: ParentMode
+}) {
+  const { t } = useTranslation()
+  const isGeneralMode = document?.doc_form === ChunkingMode.text
+  const isParentChild = document?.doc_form === ChunkingMode.parentChild
+  const isQAMode = document?.doc_form === ChunkingMode.qa
+  const TypeIcon = isParentChild ? ParentChildChunk : GeneralChunk
+  const ArrowIcon = RiArrowDownSLine
+  const parentModeLabel = (() => {
+    if (!parentMode)
+      return '--'
+    return parentMode === 'paragraph' ? t('parentMode.paragraph', { ns: 'dataset' }) : t('parentMode.fullDoc', { ns: 'dataset' })
+  })()
+
+  return (
+    <span className="flex min-w-0 items-center gap-1.5">
+      <FileIcon name={document?.name} extension={getDocumentExtension(document)} size="xl" />
+      <span className="flex min-w-0 flex-col items-start">
+        <span className="flex max-w-full min-w-0 items-center gap-1">
+          <span className="max-w-[280px] min-w-0 truncate system-md-semibold text-text-primary">
+            {document?.name || '--'}
+          </span>
+          <ArrowIcon className="h-4 w-4 shrink-0 text-text-primary" aria-hidden="true" />
+        </span>
+        <span className="flex h-3 max-w-[300px] items-center gap-0.5 text-text-tertiary">
+          <TypeIcon className="h-3 w-3 shrink-0" />
+          <span className={cn('truncate system-2xs-medium-uppercase', isParentChild && 'mt-0.5')}>
+            {isGeneralMode && t('chunkingMode.general', { ns: 'dataset' })}
+            {isQAMode && t('chunkingMode.qa', { ns: 'dataset' })}
+            {isParentChild && `${t('chunkingMode.parentChild', { ns: 'dataset' })} · ${parentModeLabel}`}
+          </span>
+        </span>
+      </span>
+    </span>
+  )
+}
+
+export function DocumentPicker({
   datasetId,
   value,
+  parentMode,
   onChange,
-}) => {
+}: Props) {
   const { t } = useTranslation()
-  const {
-    name,
-    extension,
-    chunkingMode,
-    parentMode,
-  } = value
-  const [query, setQuery] = useState('')
+  const [searchValue, setSearchValue] = useState('')
+  const deferredSearchValue = useDeferredValue(searchValue)
 
   const { data } = useDocumentList({
     datasetId,
     query: {
-      keyword: query,
+      keyword: deferredSearchValue,
       page: 1,
       limit: 20,
     },
   })
-  const documentsList = data?.data
-  const isGeneralMode = chunkingMode === ChunkingMode.text
-  const isParentChild = chunkingMode === ChunkingMode.parentChild
-  const isQAMode = chunkingMode === ChunkingMode.qa
-  const TypeIcon = isParentChild ? ParentChildChunk : GeneralChunk
+  const documentsList = data?.data ?? []
 
-  const [open, {
-    set: setOpen,
-  }] = useBoolean(false)
-  const ArrowIcon = RiArrowDownSLine
+  const handleInputValueChange = (inputValue: string, details: ComboboxRootChangeEventDetails) => {
+    if (details.reason !== 'item-press')
+      setSearchValue(inputValue)
+  }
 
-  const handleChange = useCallback(({ id }: DocumentItem) => {
-    onChange(documentsList?.find(item => item.id === id) as SimpleDocumentDetail)
-    setOpen(false)
-  }, [documentsList, onChange, setOpen])
+  const handleOpenChange = (nextOpen: boolean) => {
+    if (!nextOpen)
+      setSearchValue('')
+  }
 
-  const parentModeLabel = useMemo(() => {
-    if (!parentMode)
-      return '--'
-    return parentMode === 'paragraph' ? t('parentMode.paragraph', { ns: 'dataset' }) : t('parentMode.fullDoc', { ns: 'dataset' })
-  }, [parentMode, t])
+  const handleDocumentChange = (document: SimpleDocumentDetail | null) => {
+    if (!document)
+      return
+
+    onChange(document)
+    setSearchValue('')
+  }
 
   return (
-    <Popover
-      open={open}
-      onOpenChange={setOpen}
+    <Combobox<SimpleDocumentDetail>
+      items={documentsList}
+      value={value ?? null}
+      inputValue={searchValue}
+      onOpenChange={handleOpenChange}
+      onInputValueChange={handleInputValueChange}
+      onValueChange={handleDocumentChange}
+      isItemEqualToValue={isSameDocument}
+      itemToStringLabel={getDocumentLabel}
+      itemToStringValue={getDocumentValue}
+      filter={null}
     >
-      <PopoverTrigger
-        nativeButton={false}
-        render={(
-          <div className={cn('ml-1 flex cursor-pointer items-center rounded-lg px-2 py-0.5 select-none hover:bg-state-base-hover', open && 'bg-state-base-hover')}>
-            <FileIcon name={name} extension={extension} size="xl" />
-            <div className="mr-0.5 ml-1 flex flex-col items-start">
-              <div className="flex items-center space-x-0.5">
-                <span className={cn('system-md-semibold text-text-primary')}>
-                  {' '}
-                  {name || '--'}
-                </span>
-                <ArrowIcon className="h-4 w-4 text-text-primary" />
-              </div>
-              <div className="flex h-3 items-center space-x-0.5 text-text-tertiary">
-                <TypeIcon className="h-3 w-3" />
-                <span className={cn('system-2xs-medium-uppercase', isParentChild && 'mt-0.5' /* to icon problem cause not ver align */)}>
-                  {isGeneralMode && t('chunkingMode.general', { ns: 'dataset' })}
-                  {isQAMode && t('chunkingMode.qa', { ns: 'dataset' })}
-                  {isParentChild && `${t('chunkingMode.parentChild', { ns: 'dataset' })} · ${parentModeLabel}`}
-                </span>
-              </div>
-            </div>
-          </div>
+      <ComboboxTrigger
+        aria-label={value?.name || t('operation.search', { ns: 'common' })}
+        icon={false}
+        className={cn(
+          'ml-1 flex h-auto w-auto rounded-lg border-0 bg-transparent px-2 py-1 hover:bg-state-base-hover focus-visible:bg-state-base-hover focus-visible:ring-1 focus-visible:ring-components-input-border-active data-open:bg-state-base-hover',
         )}
-      />
-      <PopoverContent
+      >
+        <ComboboxValue>
+          {(document: SimpleDocumentDetail | null) => (
+            <DocumentPickerTriggerValue document={document} parentMode={parentMode} />
+          )}
+        </ComboboxValue>
+      </ComboboxTrigger>
+      <ComboboxContent
         placement="bottom-start"
         sideOffset={0}
-        popupClassName="border-none bg-transparent shadow-none"
+        popupClassName="w-[360px] rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-2 shadow-lg backdrop-blur-[5px]"
       >
-        <div className="w-[360px] rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-1 pt-2 shadow-lg backdrop-blur-[5px]">
-          <SearchInput value={query} onChange={setQuery} className="mx-1" />
-          {documentsList
-            ? (
-                <DocumentList
-                  className="mt-2"
-                  list={documentsList.map(d => ({
-                    id: d.id,
-                    name: d.name,
-                    extension: d.data_source_detail_dict?.upload_file?.extension || '',
-                  }))}
-                  onChange={handleChange}
-                />
-              )
-            : (
-                <div className="mt-2 flex h-[100px] w-[360px] items-center justify-center">
-                  <Loading />
-                </div>
-              )}
-        </div>
-      </PopoverContent>
-    </Popover>
+        <ComboboxInputGroup className="h-8 min-h-8 px-2">
+          <span className="mr-0.5 i-ri-search-line size-4 shrink-0 text-text-tertiary" aria-hidden="true" />
+          <ComboboxInput
+            aria-label={t('operation.search', { ns: 'common' })}
+            placeholder={t('operation.search', { ns: 'common' })}
+            className="block h-4.5 grow px-1 py-0 text-[13px] text-text-primary"
+          />
+        </ComboboxInputGroup>
+        {data
+          ? (
+              documentsList.length > 0
+                ? (
+                    <DocumentList
+                      className="mt-2"
+                    />
+                  )
+                : (
+                    <ComboboxEmpty className="mt-2 flex h-[100px] w-full items-center justify-center">
+                      {t('noData', { ns: 'common' })}
+                    </ComboboxEmpty>
+                  )
+            )
+          : (
+              <ComboboxStatus className="mt-2 flex h-[100px] w-full items-center justify-center">
+                <Loading />
+              </ComboboxStatus>
+            )}
+      </ComboboxContent>
+    </Combobox>
   )
 }
-export default React.memo(DocumentPicker)
diff --git a/web/app/components/datasets/common/document-picker/preview-document-picker.tsx b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx
index 597ceda9a5..fb90bf57f7 100644
--- a/web/app/components/datasets/common/document-picker/preview-document-picker.tsx
+++ b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx
@@ -14,7 +14,6 @@ import { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 import Loading from '@/app/components/base/loading'
 import FileIcon from '../document-file-icon'
-import DocumentList from './document-list'
 
 type Props = {
   className?: string
@@ -74,7 +73,7 @@ const PreviewDocumentPicker: FC<Props> = ({
           {files?.length > 1 && <div className="flex h-8 items-center pl-2 system-xs-medium-uppercase text-text-tertiary">{t('preprocessDocument', { ns: 'dataset', num: files.length })}</div>}
           {files?.length > 0
             ? (
-                <DocumentList
+                <PreviewDocumentList
                   list={files}
                   onChange={handleChange}
                 />
@@ -90,3 +89,27 @@ const PreviewDocumentPicker: FC<Props> = ({
   )
 }
 export default React.memo(PreviewDocumentPicker)
+
+function PreviewDocumentList({
+  list,
+  onChange,
+}: {
+  list: DocumentItem[]
+  onChange: (value: DocumentItem) => void
+}) {
+  return (
+    <div className="max-h-[calc(100vh-120px)] overflow-auto">
+      {list.map(item => (
+        <button
+          key={item.id}
+          type="button"
+          className="flex h-8 w-full cursor-pointer items-center gap-2 rounded-lg border-0 bg-transparent px-2 text-left hover:bg-state-base-hover"
+          onClick={() => onChange(item)}
+        >
+          <FileIcon name={item.name} extension={item.extension} size="lg" />
+          <span className="truncate text-sm text-text-secondary">{item.name}</span>
+        </button>
+      ))}
+    </div>
+  )
+}
diff --git a/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx b/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx
index 3eb1017b8d..b48575d209 100644
--- a/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx
+++ b/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx
@@ -1,6 +1,7 @@
+import type { SimpleDocumentDetail } from '@/models/datasets'
 import { render } from '@testing-library/react'
 import { beforeEach, describe, expect, it, vi } from 'vitest'
-import { ChunkingMode } from '@/models/datasets'
+import { ChunkingMode, DataSourceType } from '@/models/datasets'
 
 import { DocumentTitle } from '../document-title'
 
@@ -11,13 +12,23 @@ vi.mock('@/next/navigation', () => ({
   }),
 }))
 
-// Mock DocumentPicker
 vi.mock('../../../common/document-picker', () => ({
-  default: ({ datasetId, value, onChange }: { datasetId: string, value: unknown, onChange: (doc: { id: string }) => void }) => (
+  DocumentPicker: ({
+    datasetId,
+    value,
+    parentMode,
+    onChange,
+  }: {
+    datasetId: string
+    value?: SimpleDocumentDetail | null
+    parentMode?: string
+    onChange: (doc: { id: string }) => void
+  }) => (
     <div
       data-testid="document-picker"
       data-dataset-id={datasetId}
-      data-value={JSON.stringify(value)}
+      data-value-id={value?.id ?? ''}
+      data-parent-mode={parentMode ?? ''}
       onClick={() => onChange({ id: 'new-doc-id' })}
     >
       Document Picker
@@ -25,6 +36,42 @@ vi.mock('../../../common/document-picker', () => ({
   ),
 }))
 
+const createDocument = (overrides: Partial<SimpleDocumentDetail> = {}): SimpleDocumentDetail => ({
+  id: 'doc-1',
+  batch: 'batch-1',
+  position: 1,
+  dataset_id: 'dataset-1',
+  data_source_type: DataSourceType.FILE,
+  data_source_info: {
+    upload_file: {
+      id: 'file-1',
+      name: 'document.pdf',
+      size: 1024,
+      extension: 'pdf',
+      mime_type: 'application/pdf',
+      created_by: 'user-1',
+      created_at: Date.now(),
+    },
+    job_id: 'job-1',
+    url: '',
+  },
+  dataset_process_rule_id: 'rule-1',
+  name: 'Document 1',
+  created_from: 'web',
+  created_by: 'user-1',
+  created_at: Date.now(),
+  indexing_status: 'completed',
+  display_status: 'enabled',
+  doc_form: ChunkingMode.text,
+  doc_language: 'en',
+  enabled: true,
+  word_count: 1000,
+  archived: false,
+  updated_at: Date.now(),
+  hit_count: 0,
+  ...overrides,
+})
+
 describe('DocumentTitle', () => {
   beforeEach(() => {
     vi.clearAllMocks()
@@ -69,31 +116,26 @@ describe('DocumentTitle', () => {
       expect(getByTestId('document-picker').getAttribute('data-dataset-id')).toBe('test-dataset-id')
     })
 
-    it('should pass value props to DocumentPicker', () => {
+    it('should pass the selected document to DocumentPicker', () => {
+      const document = createDocument({ id: 'doc-current' })
       const { getByTestId } = render(
         <DocumentTitle
           datasetId="dataset-1"
-          name="test-document"
-          extension="pdf"
-          chunkingMode={ChunkingMode.text}
-          parent_mode="paragraph"
+          document={document}
+          parentMode="paragraph"
         />,
       )
 
-      const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}')
-      expect(value.name).toBe('test-document')
-      expect(value.extension).toBe('pdf')
-      expect(value.chunkingMode).toBe(ChunkingMode.text)
-      expect(value.parentMode).toBe('paragraph')
+      expect(getByTestId('document-picker')).toHaveAttribute('data-value-id', 'doc-current')
+      expect(getByTestId('document-picker')).toHaveAttribute('data-parent-mode', 'paragraph')
     })
 
-    it('should default parentMode to paragraph when parent_mode is undefined', () => {
+    it('should pass no parent mode when it is undefined', () => {
       const { getByTestId } = render(
         <DocumentTitle datasetId="dataset-1" />,
       )
 
-      const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}')
-      expect(value.parentMode).toBe('paragraph')
+      expect(getByTestId('document-picker')).toHaveAttribute('data-parent-mode', '')
     })
 
     it('should apply custom wrapperCls', () => {
@@ -119,24 +161,23 @@ describe('DocumentTitle', () => {
   })
 
   describe('Edge Cases', () => {
-    it('should handle undefined optional props', () => {
+    it('should handle an empty document value', () => {
       const { getByTestId } = render(
         <DocumentTitle datasetId="dataset-1" />,
       )
 
-      const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}')
-      expect(value.name).toBeUndefined()
-      expect(value.extension).toBeUndefined()
+      expect(getByTestId('document-picker')).toHaveAttribute('data-value-id', '')
     })
 
     it('should maintain structure when rerendered', () => {
       const { rerender, getByTestId } = render(
-        <DocumentTitle datasetId="dataset-1" name="doc1" />,
+        <DocumentTitle datasetId="dataset-1" document={createDocument({ id: 'doc-1' })} />,
       )
 
-      rerender(<DocumentTitle datasetId="dataset-2" name="doc2" />)
+      rerender(<DocumentTitle datasetId="dataset-2" document={createDocument({ id: 'doc-2' })} />)
 
       expect(getByTestId('document-picker').getAttribute('data-dataset-id')).toBe('dataset-2')
+      expect(getByTestId('document-picker').getAttribute('data-value-id')).toBe('doc-2')
     })
   })
 })
diff --git a/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx b/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx
index e717475b38..e8946ce584 100644
--- a/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx
+++ b/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx
@@ -114,9 +114,20 @@ vi.mock('../batch-modal', () => ({
 }))
 
 vi.mock('../document-title', () => ({
-  DocumentTitle: ({ name, extension }: { name?: string, extension?: string }) => (
-    <div data-testid="document-title" data-extension={extension}>{name}</div>
-  ),
+  DocumentTitle: ({
+    document,
+  }: {
+    document?: {
+      name?: string
+      data_source_detail_dict?: { upload_file?: { extension?: string } }
+      data_source_info?: { upload_file?: { extension?: string } }
+    } | null
+  }) => {
+    const extension = document?.data_source_detail_dict?.upload_file?.extension
+      ?? document?.data_source_info?.upload_file?.extension
+
+    return <div data-testid="document-title" data-extension={extension}>{document?.name}</div>
+  },
 }))
 
 vi.mock('../segment-add', () => ({
diff --git a/web/app/components/datasets/documents/detail/document-title.tsx b/web/app/components/datasets/documents/detail/document-title.tsx
index d5bf5345ae..0a1cfbf61a 100644
--- a/web/app/components/datasets/documents/detail/document-title.tsx
+++ b/web/app/components/datasets/documents/detail/document-title.tsx
@@ -1,39 +1,29 @@
-import type { FC } from 'react'
-import type { ChunkingMode, ParentMode } from '@/models/datasets'
+import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets'
 import { cn } from '@langgenius/dify-ui/cn'
 import { useRouter } from '@/next/navigation'
-import DocumentPicker from '../../common/document-picker'
+import { DocumentPicker } from '../../common/document-picker'
 
 type DocumentTitleProps = {
   datasetId: string
-  extension?: string
-  name?: string
-  chunkingMode?: ChunkingMode
-  parent_mode?: ParentMode
-  iconCls?: string
-  textCls?: string
+  document?: SimpleDocumentDetail | null
+  parentMode?: ParentMode
   wrapperCls?: string
 }
 
-export const DocumentTitle: FC<DocumentTitleProps> = ({
+export function DocumentTitle({
   datasetId,
-  extension,
-  name,
-  chunkingMode,
-  parent_mode,
+  document,
+  parentMode,
   wrapperCls,
-}) => {
+}: DocumentTitleProps) {
   const router = useRouter()
+
   return (
     <div className={cn('flex flex-1 items-center justify-start', wrapperCls)}>
       <DocumentPicker
         datasetId={datasetId}
-        value={{
-          name,
-          extension,
-          chunkingMode,
-          parentMode: parent_mode || 'paragraph',
-        }}
+        value={document}
+        parentMode={parentMode}
         onChange={(doc) => {
           router.push(`/datasets/${datasetId}/documents/${doc.id}`)
         }}
diff --git a/web/app/components/datasets/documents/detail/index.tsx b/web/app/components/datasets/documents/detail/index.tsx
index 732d7ffb28..190cf8edf7 100644
--- a/web/app/components/datasets/documents/detail/index.tsx
+++ b/web/app/components/datasets/documents/detail/index.tsx
@@ -1,6 +1,6 @@
 'use client'
 import type { FC } from 'react'
-import type { DataSourceInfo, DocumentDisplayStatus, FileItem, FullDocumentDetail, LegacyDataSourceInfo } from '@/models/datasets'
+import type { DocumentDisplayStatus, FileItem, FullDocumentDetail } from '@/models/datasets'
 import type { SegmentImportStatus } from '@/types/dataset'
 import { cn } from '@langgenius/dify-ui/cn'
 import { toast } from '@langgenius/dify-ui/toast'
@@ -38,10 +38,6 @@ const NON_TERMINAL_DISPLAY_STATUSES = new Set<typeof DisplayStatusList[number]>(
   DisplayStatusList.filter(s => s === 'queuing' || s === 'indexing' || s === 'paused'),
 )
 
-const isLegacyDataSourceInfo = (info?: DataSourceInfo): info is LegacyDataSourceInfo => {
-  return !!info && 'upload_file' in info
-}
-
 const DocumentDetail: FC<DocumentDetailProps> = ({ datasetId, documentId }) => {
   const router = useRouter()
   const searchParams = useSearchParams()
@@ -123,14 +119,6 @@ const DocumentDetail: FC<DocumentDetailProps> = ({ datasetId, documentId }) => {
 
   const embedding = NON_TERMINAL_DISPLAY_STATUSES.has(documentDetail?.display_status as DocumentDisplayStatus)
 
-  const documentUploadFile = useMemo(() => {
-    if (!documentDetail?.data_source_info)
-      return undefined
-    if (isLegacyDataSourceInfo(documentDetail.data_source_info))
-      return documentDetail.data_source_info.upload_file
-    return undefined
-  }, [documentDetail?.data_source_info])
-
   const invalidChunkList = useInvalid(useSegmentListKey)
   const invalidChildChunkList = useInvalid(useChildSegmentListKey)
   const invalidDocumentList = useInvalidDocumentList(datasetId)
@@ -212,11 +200,9 @@ const DocumentDetail: FC<DocumentDetailProps> = ({ datasetId, documentId }) => {
           </button>
           <DocumentTitle
             datasetId={datasetId}
-            extension={documentUploadFile?.extension}
-            name={documentDetail?.name}
+            document={documentDetail}
             wrapperCls="mr-2"
-            parent_mode={parentMode}
-            chunkingMode={documentDetail?.doc_form as ChunkingMode}
+            parentMode={parentMode}
           />
           <div className="flex flex-wrap items-center">
             {embeddingAvailable && documentDetail && !documentDetail.archived && !isFullDocMode && (

From 1a93af5cd0c22c83da98eb2990cdea398aaf1064 Mon Sep 17 00:00:00 2001
From: Deepam Goyal <116721751+Deepam02@users.noreply.github.com>
Date: Tue, 12 May 2026 11:04:45 +0530
Subject: [PATCH 3/8] refactor: rewrite estimate_args_validate using Pydantic
 v2 models (#36036)

Signed-off-by: Deepam Goyal <deepam02goyal@gmail.com>
Co-authored-by: Asuka Minato <i@asukaminato.eu.org>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 api/services/dataset_service.py               | 181 +++++++++---------
 .../services/test_dataset_service_document.py |   4 +-
 2 files changed, 94 insertions(+), 91 deletions(-)

diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py
index 383474f4f6..4f5a95dcde 100644
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -7,9 +7,10 @@ import time
 import uuid
 from collections import Counter
 from collections.abc import Sequence
-from typing import Any, Literal, TypedDict, cast
+from typing import Annotated, Any, Literal, TypedDict, cast
 
 import sqlalchemy as sa
+from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator
 from redis.exceptions import LockNotOwnedError
 from sqlalchemy import delete, exists, func, select, update
 from sqlalchemy.orm import Session, sessionmaker
@@ -117,6 +118,86 @@ class AutoDisableLogsDict(TypedDict):
     count: int
 
 
+class _EstimatePreProcessingRule(BaseModel):
+    id: str = Field(min_length=1)
+    enabled: bool
+
+    @field_validator("id")
+    @classmethod
+    def _validate_id(cls, v: str) -> str:
+        if v not in DatasetProcessRule.PRE_PROCESSING_RULES:
+            raise ValueError("Process rule pre_processing_rules id is invalid")
+        return v
+
+
+class _EstimateSegmentation(BaseModel):
+    separator: str = Field(min_length=1)
+    max_tokens: int = Field(gt=0)
+
+
+class _EstimateRules(BaseModel):
+    pre_processing_rules: list[_EstimatePreProcessingRule]
+    segmentation: _EstimateSegmentation
+
+    @field_validator("pre_processing_rules")
+    @classmethod
+    def _deduplicate(cls, v: list[_EstimatePreProcessingRule]) -> list[_EstimatePreProcessingRule]:
+        seen: dict[str, _EstimatePreProcessingRule] = {}
+        for rule in v:
+            seen[rule.id] = rule
+        return list(seen.values())
+
+
+class _SummaryIndexSettingDisabled(BaseModel):
+    enable: Literal[False] = False
+
+
+class _SummaryIndexSettingEnabled(BaseModel):
+    enable: Literal[True]
+    model_name: str = Field(min_length=1)
+    model_provider_name: str = Field(min_length=1)
+
+
+_SummaryIndexSetting = Annotated[
+    _SummaryIndexSettingDisabled | _SummaryIndexSettingEnabled,
+    Field(discriminator="enable"),
+]
+
+
+class _AutomaticProcessRule(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    mode: Literal[ProcessRuleMode.AUTOMATIC]
+    summary_index_setting: _SummaryIndexSetting | None = None
+
+
+class _CustomProcessRule(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    mode: Literal[ProcessRuleMode.CUSTOM]
+    rules: _EstimateRules
+    summary_index_setting: _SummaryIndexSetting | None = None
+
+
+class _HierarchicalProcessRule(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    mode: Literal[ProcessRuleMode.HIERARCHICAL]
+    rules: _EstimateRules
+    summary_index_setting: _SummaryIndexSetting | None = None
+
+
+_EstimateProcessRule = Annotated[
+    _AutomaticProcessRule | _CustomProcessRule | _HierarchicalProcessRule,
+    Field(discriminator="mode"),
+]
+
+
+class _EstimateArgs(BaseModel):
+    info_list: dict[str, Any]
+    process_rule: _EstimateProcessRule
+
+
 class DatasetService:
     @staticmethod
     def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False):
@@ -2851,94 +2932,16 @@ class DocumentService:
 
     @classmethod
     def estimate_args_validate(cls, args: dict[str, Any]):
-        if "info_list" not in args or not args["info_list"]:
-            raise ValueError("Data source info is required")
-
-        if not isinstance(args["info_list"], dict):
-            raise ValueError("Data info is invalid")
-
-        if "process_rule" not in args or not args["process_rule"]:
-            raise ValueError("Process rule is required")
-
-        if not isinstance(args["process_rule"], dict):
-            raise ValueError("Process rule is invalid")
-
-        if "mode" not in args["process_rule"] or not args["process_rule"]["mode"]:
-            raise ValueError("Process rule mode is required")
-
-        if args["process_rule"]["mode"] not in DatasetProcessRule.MODES:
-            raise ValueError("Process rule mode is invalid")
-
-        if args["process_rule"]["mode"] == ProcessRuleMode.AUTOMATIC:
-            args["process_rule"]["rules"] = {}
-        else:
-            if "rules" not in args["process_rule"] or not args["process_rule"]["rules"]:
-                raise ValueError("Process rule rules is required")
-
-            if not isinstance(args["process_rule"]["rules"], dict):
-                raise ValueError("Process rule rules is invalid")
-
-            if (
-                "pre_processing_rules" not in args["process_rule"]["rules"]
-                or args["process_rule"]["rules"]["pre_processing_rules"] is None
-            ):
-                raise ValueError("Process rule pre_processing_rules is required")
-
-            if not isinstance(args["process_rule"]["rules"]["pre_processing_rules"], list):
-                raise ValueError("Process rule pre_processing_rules is invalid")
-
-            unique_pre_processing_rule_dicts = {}
-            for pre_processing_rule in args["process_rule"]["rules"]["pre_processing_rules"]:
-                if "id" not in pre_processing_rule or not pre_processing_rule["id"]:
-                    raise ValueError("Process rule pre_processing_rules id is required")
-
-                if pre_processing_rule["id"] not in DatasetProcessRule.PRE_PROCESSING_RULES:
-                    raise ValueError("Process rule pre_processing_rules id is invalid")
-
-                if "enabled" not in pre_processing_rule or pre_processing_rule["enabled"] is None:
-                    raise ValueError("Process rule pre_processing_rules enabled is required")
-
-                if not isinstance(pre_processing_rule["enabled"], bool):
-                    raise ValueError("Process rule pre_processing_rules enabled is invalid")
-
-                unique_pre_processing_rule_dicts[pre_processing_rule["id"]] = pre_processing_rule
-
-            args["process_rule"]["rules"]["pre_processing_rules"] = list(unique_pre_processing_rule_dicts.values())
-
-            if (
-                "segmentation" not in args["process_rule"]["rules"]
-                or args["process_rule"]["rules"]["segmentation"] is None
-            ):
-                raise ValueError("Process rule segmentation is required")
-
-            if not isinstance(args["process_rule"]["rules"]["segmentation"], dict):
-                raise ValueError("Process rule segmentation is invalid")
-
-            if (
-                "separator" not in args["process_rule"]["rules"]["segmentation"]
-                or not args["process_rule"]["rules"]["segmentation"]["separator"]
-            ):
-                raise ValueError("Process rule segmentation separator is required")
-
-            if not isinstance(args["process_rule"]["rules"]["segmentation"]["separator"], str):
-                raise ValueError("Process rule segmentation separator is invalid")
-
-            if (
-                "max_tokens" not in args["process_rule"]["rules"]["segmentation"]
-                or not args["process_rule"]["rules"]["segmentation"]["max_tokens"]
-            ):
-                raise ValueError("Process rule segmentation max_tokens is required")
-
-            if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
-                raise ValueError("Process rule segmentation max_tokens is invalid")
-
-        # valid summary index setting
-        summary_index_setting = args["process_rule"].get("summary_index_setting")
-        if summary_index_setting and summary_index_setting.get("enable"):
-            if "model_name" not in summary_index_setting or not summary_index_setting["model_name"]:
-                raise ValueError("Summary index model name is required")
-            if "model_provider_name" not in summary_index_setting or not summary_index_setting["model_provider_name"]:
-                raise ValueError("Summary index model provider name is required")
+        try:
+            validated = _EstimateArgs.model_validate(args)
+        except ValidationError as e:
+            first = e.errors()[0]
+            original = first.get("ctx", {}).get("error")
+            raise ValueError(str(original) if isinstance(original, ValueError) else first["msg"]) from e
+        process_rule_dict = validated.process_rule.model_dump(exclude_none=True)
+        if validated.process_rule.mode == ProcessRuleMode.AUTOMATIC:
+            process_rule_dict["rules"] = {}
+        args["process_rule"] = process_rule_dict
 
     @staticmethod
     def batch_update_document_status(
diff --git a/api/tests/unit_tests/services/test_dataset_service_document.py b/api/tests/unit_tests/services/test_dataset_service_document.py
index 1633194aa8..a78bc7f9d6 100644
--- a/api/tests/unit_tests/services/test_dataset_service_document.py
+++ b/api/tests/unit_tests/services/test_dataset_service_document.py
@@ -1297,7 +1297,7 @@ class TestDocumentServiceEstimateValidation:
     """Unit tests for estimate_args_validate branches."""
 
     def test_estimate_args_validate_rejects_missing_info_list(self):
-        with pytest.raises(ValueError, match="Data source info is required"):
+        with pytest.raises(ValueError, match="Field required"):
             DocumentService.estimate_args_validate({})
 
     def test_estimate_args_validate_sets_empty_rules_for_automatic_mode(self):
@@ -1357,7 +1357,7 @@ class TestDocumentServiceEstimateValidation:
             },
         }
 
-        with pytest.raises(ValueError, match="Summary index model provider name is required"):
+        with pytest.raises(ValueError, match="Field required"):
             DocumentService.estimate_args_validate(args)
 
 

From cbedcd2882ae4f7b2fd597b56647f34dcb87eebd Mon Sep 17 00:00:00 2001
From: -LAN- <laipz8200@outlook.com>
Date: Tue, 12 May 2026 13:35:24 +0800
Subject: [PATCH 4/8] fix(security): harden self-hosted SECRET_KEY bootstrap
 (#36049)

Co-authored-by: EndlessLucky <66432853+EndlessLucky@users.noreply.github.com>
---
 api/app_factory.py                            |  2 +-
 api/configs/feature/__init__.py               |  6 +-
 api/configs/secret_key.py                     | 38 ++++++++++
 api/core/app/workflow/file_runtime.py         |  2 +-
 .../datasource/datasource_file_manager.py     | 14 +++-
 api/core/tools/signature.py                   | 19 +++--
 api/core/tools/tool_file_manager.py           | 14 +++-
 api/extensions/ext_set_secretkey.py           | 11 ++-
 api/models/dataset.py                         |  8 +-
 .../unit_tests/configs/test_dify_config.py    | 41 ++++++++++
 .../test_datasource_file_manager.py           | 33 ---------
 .../extensions/test_set_secretkey.py          | 74 +++++++++++++++++++
 api/tests/unit_tests/libs/test_passport.py    | 23 +-----
 docker/.env.example                           |  3 +-
 docker/README.md                              |  2 +-
 docker/envs/security.env.example              |  3 +-
 16 files changed, 209 insertions(+), 84 deletions(-)
 create mode 100644 api/configs/secret_key.py
 create mode 100644 api/tests/unit_tests/extensions/test_set_secretkey.py

diff --git a/api/app_factory.py b/api/app_factory.py
index 48e50ceae9..5583071980 100644
--- a/api/app_factory.py
+++ b/api/app_factory.py
@@ -181,7 +181,6 @@ def initialize_extensions(app: DifyApp):
         ext_import_modules,
         ext_orjson,
         ext_forward_refs,
-        ext_set_secretkey,
         ext_compress,
         ext_code_based_extension,
         ext_database,
@@ -189,6 +188,7 @@ def initialize_extensions(app: DifyApp):
         ext_migrate,
         ext_redis,
         ext_storage,
+        ext_set_secretkey,
         ext_logstore,  # Initialize logstore after storage, before celery
         ext_celery,
         ext_login,
diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py
index 26b8ea670b..ccb97d96ef 100644
--- a/api/configs/feature/__init__.py
+++ b/api/configs/feature/__init__.py
@@ -23,9 +23,9 @@ class SecurityConfig(BaseSettings):
     """
 
     SECRET_KEY: str = Field(
-        description="Secret key for secure session cookie signing."
-        "Make sure you are changing this key for your deployment with a strong key."
-        "Generate a strong key using `openssl rand -base64 42` or set via the `SECRET_KEY` environment variable.",
+        description="Secret key for secure session cookie signing. "
+        "Leave empty to let Dify generate a persistent key in the storage directory, "
+        "or set a strong value via the `SECRET_KEY` environment variable.",
         default="",
     )
 
diff --git a/api/configs/secret_key.py b/api/configs/secret_key.py
new file mode 100644
index 0000000000..f8c33f6a2c
--- /dev/null
+++ b/api/configs/secret_key.py
@@ -0,0 +1,38 @@
+"""SECRET_KEY persistence helpers for runtime setup."""
+
+from __future__ import annotations
+
+import secrets
+
+from extensions.ext_storage import storage
+
+GENERATED_SECRET_KEY_FILENAME = ".dify_secret_key"
+
+
+def resolve_secret_key(secret_key: str) -> str:
+    """Return an explicit SECRET_KEY or a generated key persisted in storage."""
+    if secret_key:
+        return secret_key
+
+    return _load_or_create_secret_key()
+
+
+def _load_or_create_secret_key() -> str:
+    try:
+        persisted_key = storage.load_once(GENERATED_SECRET_KEY_FILENAME).decode("utf-8").strip()
+        if persisted_key:
+            return persisted_key
+    except FileNotFoundError:
+        pass
+
+    generated_key = secrets.token_urlsafe(48)
+
+    try:
+        storage.save(GENERATED_SECRET_KEY_FILENAME, f"{generated_key}\n".encode())
+    except Exception as exc:
+        raise ValueError(
+            f"SECRET_KEY is not set and could not be generated at {GENERATED_SECRET_KEY_FILENAME}. "
+            "Set SECRET_KEY explicitly or make storage writable."
+        ) from exc
+
+    return generated_key
diff --git a/api/core/app/workflow/file_runtime.py b/api/core/app/workflow/file_runtime.py
index 3a6f9d575a..587f700286 100644
--- a/api/core/app/workflow/file_runtime.py
+++ b/api/core/app/workflow/file_runtime.py
@@ -128,7 +128,7 @@ class DifyWorkflowFileRuntime(WorkflowFileRuntimeProtocol):
 
     @staticmethod
     def _secret_key() -> bytes:
-        return dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+        return dify_config.SECRET_KEY.encode()
 
     def _sign_query(self, *, payload: str) -> dict[str, str]:
         timestamp = str(int(time.time()))
diff --git a/api/core/datasource/datasource_file_manager.py b/api/core/datasource/datasource_file_manager.py
index 492b507aa9..79b84a28be 100644
--- a/api/core/datasource/datasource_file_manager.py
+++ b/api/core/datasource/datasource_file_manager.py
@@ -35,8 +35,11 @@ class DatasourceFileManager:
         timestamp = str(int(time.time()))
         nonce = os.urandom(16).hex()
         data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
-        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-        sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        sign = hmac.new(
+            dify_config.SECRET_KEY.encode(),
+            data_to_sign.encode(),
+            hashlib.sha256,
+        ).digest()
         encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
         return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@@ -47,8 +50,11 @@ class DatasourceFileManager:
         verify signature
         """
         data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
-        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-        recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        recalculated_sign = hmac.new(
+            dify_config.SECRET_KEY.encode(),
+            data_to_sign.encode(),
+            hashlib.sha256,
+        ).digest()
         recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
 
         # verify signature
diff --git a/api/core/tools/signature.py b/api/core/tools/signature.py
index 3c7b523ff1..ca4756f2a4 100644
--- a/api/core/tools/signature.py
+++ b/api/core/tools/signature.py
@@ -8,6 +8,10 @@ import urllib.parse
 from configs import dify_config
 
 
+def _secret_key() -> bytes:
+    return dify_config.SECRET_KEY.encode()
+
+
 def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True) -> str:
     """
     sign file to get a temporary url for plugin access
@@ -19,8 +23,7 @@ def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True)
     timestamp = str(int(time.time()))
     nonce = os.urandom(16).hex()
     data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
-    secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-    sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+    sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
     encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
     return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@@ -39,8 +42,7 @@ def sign_upload_file_preview_url(upload_file_id: str, extension: str) -> str:
     timestamp = str(int(time.time()))
     nonce = os.urandom(16).hex()
     data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
-    secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-    sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+    sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
     encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
     return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@@ -51,8 +53,7 @@ def verify_tool_file_signature(file_id: str, timestamp: str, nonce: str, sign: s
     verify signature
     """
     data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
-    secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-    recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+    recalculated_sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
     recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
 
     # verify signature
@@ -71,8 +72,7 @@ def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str,
     timestamp = str(int(time.time()))
     nonce = os.urandom(16).hex()
     data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}"
-    secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-    sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+    sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
     encoded_sign = base64.urlsafe_b64encode(sign).decode()
     query = urllib.parse.urlencode(
         {
@@ -92,8 +92,7 @@ def verify_plugin_file_signature(
     """Verify the signature used by the plugin-facing file upload endpoint."""
 
     data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}"
-    secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-    recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+    recalculated_sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
     recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
 
     if sign != recalculated_encoded_sign:
diff --git a/api/core/tools/tool_file_manager.py b/api/core/tools/tool_file_manager.py
index c87e8a3ae0..f2552e7cbd 100644
--- a/api/core/tools/tool_file_manager.py
+++ b/api/core/tools/tool_file_manager.py
@@ -51,8 +51,11 @@ class ToolFileManager:
         timestamp = str(int(time.time()))
         nonce = os.urandom(16).hex()
         data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
-        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-        sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        sign = hmac.new(
+            dify_config.SECRET_KEY.encode(),
+            data_to_sign.encode(),
+            hashlib.sha256,
+        ).digest()
         encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
         return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@@ -63,8 +66,11 @@ class ToolFileManager:
         verify signature
         """
         data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
-        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-        recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        recalculated_sign = hmac.new(
+            dify_config.SECRET_KEY.encode(),
+            data_to_sign.encode(),
+            hashlib.sha256,
+        ).digest()
         recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
 
         # verify signature
diff --git a/api/extensions/ext_set_secretkey.py b/api/extensions/ext_set_secretkey.py
index dfb87c0167..ca59a2de4d 100644
--- a/api/extensions/ext_set_secretkey.py
+++ b/api/extensions/ext_set_secretkey.py
@@ -1,6 +1,13 @@
 from configs import dify_config
+from configs.secret_key import resolve_secret_key
 from dify_app import DifyApp
 
 
-def init_app(app: DifyApp):
-    app.secret_key = dify_config.SECRET_KEY
+def init_app(app: DifyApp) -> None:
+    """Resolve SECRET_KEY after config loading and before session/login setup."""
+    secret_key = dify_config.SECRET_KEY
+    if not secret_key:
+        secret_key = resolve_secret_key(secret_key)
+    dify_config.SECRET_KEY = secret_key
+    app.config["SECRET_KEY"] = secret_key
+    app.secret_key = secret_key
diff --git a/api/models/dataset.py b/api/models/dataset.py
index f823e0aa10..65ea39969c 100644
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -945,7 +945,7 @@ class DocumentSegment(Base):
             nonce = os.urandom(16).hex()
             timestamp = str(int(time.time()))
             data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
-            secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+            secret_key = dify_config.SECRET_KEY.encode()
             sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
             encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
@@ -962,7 +962,7 @@ class DocumentSegment(Base):
             nonce = os.urandom(16).hex()
             timestamp = str(int(time.time()))
             data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}"
-            secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+            secret_key = dify_config.SECRET_KEY.encode()
             sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
             encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
@@ -981,7 +981,7 @@ class DocumentSegment(Base):
             nonce = os.urandom(16).hex()
             timestamp = str(int(time.time()))
             data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}"
-            secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+            secret_key = dify_config.SECRET_KEY.encode()
             sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
             encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
@@ -1019,7 +1019,7 @@ class DocumentSegment(Base):
             nonce = os.urandom(16).hex()
             timestamp = str(int(time.time()))
             data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
-            secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+            secret_key = dify_config.SECRET_KEY.encode()
             sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
             encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
diff --git a/api/tests/unit_tests/configs/test_dify_config.py b/api/tests/unit_tests/configs/test_dify_config.py
index 57dbf453de..919ebbc656 100644
--- a/api/tests/unit_tests/configs/test_dify_config.py
+++ b/api/tests/unit_tests/configs/test_dify_config.py
@@ -8,6 +8,47 @@ from yarl import URL
 from configs.app_config import DifyConfig
 
 
+def _set_basic_config_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    os.environ.clear()
+    monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
+    monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
+    monkeypatch.setenv("DB_TYPE", "postgresql")
+    monkeypatch.setenv("DB_USERNAME", "postgres")
+    monkeypatch.setenv("DB_PASSWORD", "postgres")
+    monkeypatch.setenv("DB_HOST", "localhost")
+    monkeypatch.setenv("DB_PORT", "5432")
+    monkeypatch.setenv("DB_DATABASE", "dify")
+
+
+def test_dify_config_keeps_secret_key_empty_when_missing(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path,
+) -> None:
+    _set_basic_config_env(monkeypatch)
+    monkeypatch.delenv("SECRET_KEY", raising=False)
+    monkeypatch.setenv("OPENDAL_FS_ROOT", str(tmp_path))
+
+    config = DifyConfig(_env_file=None)
+
+    assert config.SECRET_KEY == ""
+    assert not hasattr(config, "OPENDAL_FS_ROOT")
+    assert not (tmp_path / ".dify_secret_key").exists()
+
+
+def test_dify_config_preserves_explicit_secret_key(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path,
+) -> None:
+    _set_basic_config_env(monkeypatch)
+    monkeypatch.setenv("SECRET_KEY", "explicit")
+    monkeypatch.setenv("OPENDAL_FS_ROOT", str(tmp_path))
+
+    config = DifyConfig(_env_file=None)
+
+    assert config.SECRET_KEY == "explicit"
+    assert not (tmp_path / ".dify_secret_key").exists()
+
+
 def test_dify_config(monkeypatch: pytest.MonkeyPatch):
     # clear system environment variables
     os.environ.clear()
diff --git a/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py
index 4f39d38831..cee7d46083 100644
--- a/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py
+++ b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py
@@ -34,20 +34,6 @@ class TestDatasourceFileManager:
         assert f"nonce={mock_urandom.return_value.hex()}" in signed_url
         assert "sign=" in signed_url
 
-    @patch("core.datasource.datasource_file_manager.time.time")
-    @patch("core.datasource.datasource_file_manager.os.urandom")
-    @patch("core.datasource.datasource_file_manager.dify_config")
-    def test_sign_file_empty_secret(self, mock_config, mock_urandom, mock_time):
-        # Setup
-        mock_config.FILES_URL = "http://localhost:5001"
-        mock_config.SECRET_KEY = None  # Empty secret
-        mock_time.return_value = 1700000000
-        mock_urandom.return_value = b"1234567890abcdef"
-
-        # Execute
-        signed_url = DatasourceFileManager.sign_file("file_id", ".png")
-        assert "sign=" in signed_url
-
     @patch("core.datasource.datasource_file_manager.time.time")
     @patch("core.datasource.datasource_file_manager.dify_config")
     def test_verify_file(self, mock_config, mock_time):
@@ -76,25 +62,6 @@ class TestDatasourceFileManager:
         mock_time.return_value = 1700000500  # 700 seconds after timestamp (300 is timeout)
         assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is False
 
-    @patch("core.datasource.datasource_file_manager.time.time")
-    @patch("core.datasource.datasource_file_manager.dify_config")
-    def test_verify_file_empty_secret(self, mock_config, mock_time):
-        # Setup
-        mock_config.SECRET_KEY = ""  # Empty string secret
-        mock_config.FILES_ACCESS_TIMEOUT = 300
-        mock_time.return_value = 1700000000
-
-        datasource_file_id = "file_id_123"
-        timestamp = "1699999800"
-        nonce = "some_nonce"
-
-        # Calculate with empty secret
-        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
-        sign = hmac.new(b"", data_to_sign.encode(), hashlib.sha256).digest()
-        encoded_sign = base64.urlsafe_b64encode(sign).decode()
-
-        assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is True
-
     @patch("core.datasource.datasource_file_manager.db")
     @patch("core.datasource.datasource_file_manager.storage")
     @patch("core.datasource.datasource_file_manager.uuid4")
diff --git a/api/tests/unit_tests/extensions/test_set_secretkey.py b/api/tests/unit_tests/extensions/test_set_secretkey.py
new file mode 100644
index 0000000000..8a8e4e2b19
--- /dev/null
+++ b/api/tests/unit_tests/extensions/test_set_secretkey.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import pytest
+from flask import Flask
+
+from extensions import ext_set_secretkey
+
+
+class InMemoryStorage:
+    def __init__(self, files: dict[str, bytes] | None = None) -> None:
+        self.files = files or {}
+        self.saved_files: list[tuple[str, bytes]] = []
+
+    def load_once(self, filename: str) -> bytes:
+        try:
+            return self.files[filename]
+        except KeyError:
+            raise FileNotFoundError(filename)
+
+    def save(self, filename: str, data: bytes) -> None:
+        self.files[filename] = data
+        self.saved_files.append((filename, data))
+
+
+def test_init_app_uses_configured_secret_key(monkeypatch: pytest.MonkeyPatch) -> None:
+    secret_key = "configured-secret-key"
+    storage = InMemoryStorage()
+    monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", secret_key)
+    monkeypatch.setattr("configs.secret_key.storage", storage)
+    app = Flask(__name__)
+    app.config["SECRET_KEY"] = secret_key
+
+    ext_set_secretkey.init_app(app)
+
+    assert app.secret_key == secret_key
+    assert app.config["SECRET_KEY"] == secret_key
+    assert storage.saved_files == []
+
+
+def test_init_app_generates_and_persists_secret_key_when_missing(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    storage = InMemoryStorage()
+    monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", "")
+    monkeypatch.setattr("configs.secret_key.storage", storage)
+    app = Flask(__name__)
+    app.config["SECRET_KEY"] = ""
+
+    ext_set_secretkey.init_app(app)
+
+    persisted_key = storage.files[".dify_secret_key"].decode("utf-8").strip()
+    assert persisted_key
+    assert storage.saved_files == [(".dify_secret_key", f"{persisted_key}\n".encode())]
+    assert persisted_key == ext_set_secretkey.dify_config.SECRET_KEY
+    assert persisted_key == app.config["SECRET_KEY"]
+    assert persisted_key == app.secret_key
+
+
+def test_init_app_reuses_persisted_secret_key_when_missing(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    persisted_key = "persisted-secret-key"
+    storage = InMemoryStorage({".dify_secret_key": f"{persisted_key}\n".encode()})
+    monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", "")
+    monkeypatch.setattr("configs.secret_key.storage", storage)
+    app = Flask(__name__)
+    app.config["SECRET_KEY"] = ""
+
+    ext_set_secretkey.init_app(app)
+
+    assert persisted_key == ext_set_secretkey.dify_config.SECRET_KEY
+    assert persisted_key == app.config["SECRET_KEY"]
+    assert persisted_key == app.secret_key
+    assert storage.saved_files == []
diff --git a/api/tests/unit_tests/libs/test_passport.py b/api/tests/unit_tests/libs/test_passport.py
index f33484c18d..90b58ae548 100644
--- a/api/tests/unit_tests/libs/test_passport.py
+++ b/api/tests/unit_tests/libs/test_passport.py
@@ -143,28 +143,13 @@ class TestPassportService:
         assert str(exc_info.value) == "401 Unauthorized: Token has expired."
 
     # Configuration tests
-    def test_should_handle_empty_secret_key(self):
-        """Test behavior when SECRET_KEY is empty"""
+    def test_should_use_configured_secret_key_without_policy_validation(self):
+        """Test that policy decisions are owned by config, not PassportService."""
         with patch("libs.passport.dify_config") as mock_config:
-            mock_config.SECRET_KEY = ""
+            mock_config.SECRET_KEY = "configured"
             service = PassportService()
 
-            # Empty secret key should still work but is insecure
-            payload = {"test": "data"}
-            token = service.issue(payload)
-            decoded = service.verify(token)
-            assert decoded == payload
-
-    def test_should_handle_none_secret_key(self):
-        """Test behavior when SECRET_KEY is None"""
-        with patch("libs.passport.dify_config") as mock_config:
-            mock_config.SECRET_KEY = None
-            service = PassportService()
-
-            payload = {"test": "data"}
-            # JWT library will raise TypeError when secret is None
-            with pytest.raises((TypeError, jwt.exceptions.InvalidKeyError)):
-                service.issue(payload)
+        assert service.sk == "configured"
 
     # Boundary condition tests
     def test_should_handle_large_payload(self, passport_service):
diff --git a/docker/.env.example b/docker/.env.example
index 5a012973c0..c708a40c15 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -28,7 +28,8 @@ LANG=C.UTF-8
 LC_ALL=C.UTF-8
 PYTHONIOENCODING=utf-8
 UV_CACHE_DIR=/tmp/.uv-cache
-SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
+# Leave empty to auto-generate a persistent key in the storage directory.
+SECRET_KEY=
 INIT_PASSWORD=
 DEPLOY_ENV=PRODUCTION
 CHECK_UPDATE_URL=https://updates.dify.ai
diff --git a/docker/README.md b/docker/README.md
index a2d9b2eeba..26b1dac9ac 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -87,7 +87,7 @@ The root `.env.example` file contains the essential startup settings. Optional a
 1. **Server Configuration**:
 
    - `LOG_LEVEL`, `DEBUG`, `FLASK_DEBUG`: Logging and debug settings.
-   - `SECRET_KEY`: A key for encrypting session cookies and other sensitive data.
+   - `SECRET_KEY`: A key for signing sessions, JWTs, and file URLs. Leave it empty to let Dify generate a persistent key in the storage directory, or set a unique value yourself.
 
 1. **Database Configuration**:
 
diff --git a/docker/envs/security.env.example b/docker/envs/security.env.example
index 787aef2706..d7556d91e5 100644
--- a/docker/envs/security.env.example
+++ b/docker/envs/security.env.example
@@ -36,5 +36,6 @@ TIDB_PUBLIC_KEY=dify
 TIDB_PRIVATE_KEY=dify
 VIKINGDB_ACCESS_KEY=your-ak
 VIKINGDB_SECRET_KEY=your-sk
-SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
+# Leave empty to auto-generate a persistent key in the storage directory.
+SECRET_KEY=
 INIT_PASSWORD=

From 9424bf60b0daf564179446c0b5b189bf8aaf5cc8 Mon Sep 17 00:00:00 2001
From: orbisai0security <mediratta01.pally@gmail.com>
Date: Tue, 12 May 2026 11:13:37 +0530
Subject: [PATCH 5/8] fix: the /threads and /db-pool-stat endpoints in api...
 in... (#35665)

---
 api/extensions/ext_app_metrics.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/api/extensions/ext_app_metrics.py b/api/extensions/ext_app_metrics.py
index 4a6490b9f0..914baaadaf 100644
--- a/api/extensions/ext_app_metrics.py
+++ b/api/extensions/ext_app_metrics.py
@@ -5,6 +5,7 @@ import threading
 from flask import Response
 
 from configs import dify_config
+from controllers.console.admin import admin_required
 from dify_app import DifyApp
 
 
@@ -25,6 +26,7 @@ def init_app(app: DifyApp):
         )
 
     @app.route("/threads")
+    @admin_required
     def threads():  # pyright: ignore[reportUnusedFunction]
         num_threads = threading.active_count()
         threads = threading.enumerate()
@@ -50,6 +52,7 @@ def init_app(app: DifyApp):
         }
 
     @app.route("/db-pool-stat")
+    @admin_required
     def pool_stat():  # pyright: ignore[reportUnusedFunction]
         from extensions.ext_database import db
 

From bb73776339b1af45be0d1f1a4d39ff5ee916a7e4 Mon Sep 17 00:00:00 2001
From: -LAN- <laipz8200@outlook.com>
Date: Tue, 12 May 2026 14:56:16 +0800
Subject: [PATCH 6/8] chore(release): bump version to 1.14.1 (#36034)

---
 api/pyproject.toml                  |  2 +-
 api/uv.lock                         |  2 +-
 docker/docker-compose-template.yaml | 10 +++++-----
 docker/docker-compose.yaml          | 10 +++++-----
 web/package.json                    |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/api/pyproject.toml b/api/pyproject.toml
index 604d01594e..40834b806f 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dify-api"
-version = "1.14.0"
+version = "1.14.1"
 requires-python = "~=3.12.0"
 
 dependencies = [
diff --git a/api/uv.lock b/api/uv.lock
index 6861abdbdc..634dcc74b8 100644
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -1292,7 +1292,7 @@ wheels = [
 
 [[package]]
 name = "dify-api"
-version = "1.14.0"
+version = "1.14.1"
 source = { virtual = "." }
 dependencies = [
     { name = "aliyun-log-python-sdk" },
diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml
index 72c9d4fd90..d9e2fc5bc9 100644
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@@ -220,7 +220,7 @@ services:
   # API service
   api:
     <<: *shared-api-worker-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     environment:
       MODE: api
       SENTRY_DSN: ${API_SENTRY_DSN:-}
@@ -264,7 +264,7 @@ services:
   # WebSocket service for workflow collaboration.
   api_websocket:
     <<: *shared-api-worker-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     profiles:
       - collaboration
     environment:
@@ -290,7 +290,7 @@ services:
   # The Celery worker for processing all queues (dataset, workflow, mail, etc.)
   worker:
     <<: *shared-worker-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     environment:
       MODE: worker
       SENTRY_DSN: ${API_SENTRY_DSN:-}
@@ -333,7 +333,7 @@ services:
   # Celery beat for scheduling periodic tasks.
   worker_beat:
     <<: *shared-worker-beat-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     environment:
       MODE: beat
     depends_on:
@@ -366,7 +366,7 @@ services:
 
   # Frontend web application.
   web:
-    image: langgenius/dify-web:1.14.0
+    image: langgenius/dify-web:1.14.1
     restart: always
     env_file:
       - path: ./envs/core-services/web.env
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index c1d75e01f4..004140abfb 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -226,7 +226,7 @@ services:
   # API service
   api:
     <<: *shared-api-worker-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     environment:
       MODE: api
       SENTRY_DSN: ${API_SENTRY_DSN:-}
@@ -270,7 +270,7 @@ services:
   # WebSocket service for workflow collaboration.
   api_websocket:
     <<: *shared-api-worker-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     profiles:
       - collaboration
     environment:
@@ -296,7 +296,7 @@ services:
   # The Celery worker for processing all queues (dataset, workflow, mail, etc.)
   worker:
     <<: *shared-worker-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     environment:
       MODE: worker
       SENTRY_DSN: ${API_SENTRY_DSN:-}
@@ -339,7 +339,7 @@ services:
   # Celery beat for scheduling periodic tasks.
   worker_beat:
     <<: *shared-worker-beat-config
-    image: langgenius/dify-api:1.14.0
+    image: langgenius/dify-api:1.14.1
     environment:
       MODE: beat
     depends_on:
@@ -372,7 +372,7 @@ services:
 
   # Frontend web application.
   web:
-    image: langgenius/dify-web:1.14.0
+    image: langgenius/dify-web:1.14.1
     restart: always
     env_file:
       - path: ./envs/core-services/web.env
diff --git a/web/package.json b/web/package.json
index df1ceed01f..be373a1e68 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,7 +1,7 @@
 {
   "name": "dify-web",
   "type": "module",
-  "version": "1.14.0",
+  "version": "1.14.1",
   "private": true,
   "imports": {
     "#i18n": {

From 51a8f79d67de30f77f34d22105039aeb5526811f Mon Sep 17 00:00:00 2001
From: Asuka Minato <i@asukaminato.eu.org>
Date: Tue, 12 May 2026 16:02:17 +0900
Subject: [PATCH 7/8] chore: DocumentSegment to Typebase (#35635)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
---
 .../rag/datasource/keyword/jieba/jieba.py     |  2 +
 api/core/rag/datasource/retrieval_service.py  |  6 +-
 api/core/rag/docstore/dataset_docstore.py     |  3 +-
 .../rag/index_processor/index_processor.py    |  2 +-
 api/models/dataset.py                         | 58 ++++++++++---------
 api/services/vector_service.py                |  2 +
 api/tasks/batch_clean_document_task.py        |  2 +-
 .../batch_create_segment_to_index_task.py     |  3 +-
 api/tasks/clean_document_task.py              |  2 +-
 api/tasks/clean_notion_document_task.py       |  2 +-
 api/tasks/disable_segment_from_index_task.py  |  4 +-
 api/tasks/disable_segments_from_index_task.py |  2 +-
 api/tasks/document_indexing_sync_task.py      |  2 +-
 api/tasks/document_indexing_update_task.py    |  2 +-
 api/tasks/duplicate_document_indexing_task.py |  2 +-
 api/tasks/remove_document_from_index_task.py  |  2 +-
 api/tasks/retry_document_indexing_task.py     |  2 +-
 .../sync_website_document_indexing_task.py    |  2 +-
 .../test_dataset_service_get_segments.py      | 30 +++++-----
 .../tasks/test_add_document_to_index_task.py  |  6 --
 .../tasks/test_batch_clean_document_task.py   |  6 +-
 .../tasks/test_clean_dataset_task.py          | 13 -----
 .../tasks/test_clean_notion_document_task.py  | 28 +++------
 .../test_deal_dataset_vector_index_task.py    | 25 +-------
 .../test_delete_segment_from_index_task.py    | 39 ++++++-------
 .../test_disable_segment_from_index_task.py   |  2 +-
 .../test_disable_segments_from_index_task.py  | 49 ++++++++--------
 .../test_duplicate_document_indexing_task.py  |  1 -
 .../test_enable_segments_to_index_task.py     |  1 -
 .../services/test_dataset_service_segment.py  |  6 +-
 30 files changed, 132 insertions(+), 174 deletions(-)

diff --git a/api/core/rag/datasource/keyword/jieba/jieba.py b/api/core/rag/datasource/keyword/jieba/jieba.py
index 392af351b6..b3f174bf78 100644
--- a/api/core/rag/datasource/keyword/jieba/jieba.py
+++ b/api/core/rag/datasource/keyword/jieba/jieba.py
@@ -245,6 +245,7 @@ class Jieba(BaseKeyword):
             segment = pre_segment_data["segment"]
             if pre_segment_data["keywords"]:
                 segment.keywords = pre_segment_data["keywords"]
+                assert segment.index_node_id
                 keyword_table = self._add_text_to_keyword_table(
                     keyword_table or {}, segment.index_node_id, pre_segment_data["keywords"]
                 )
@@ -253,6 +254,7 @@ class Jieba(BaseKeyword):
 
                 keywords = keyword_table_handler.extract_keywords(segment.content, keyword_number)
                 segment.keywords = list(keywords)
+                assert segment.index_node_id
                 keyword_table = self._add_text_to_keyword_table(
                     keyword_table or {}, segment.index_node_id, list(keywords)
                 )
diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py
index 7769878e70..8cc2be8feb 100644
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -1,5 +1,6 @@
 import concurrent.futures
 import logging
+from collections.abc import Sequence
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, NotRequired, TypedDict
 
@@ -526,7 +527,7 @@ class RetrievalService:
             index_node_ids = [i for i in index_node_ids if i]
 
             segment_ids: list[str] = []
-            index_node_segments: list[DocumentSegment] = []
+            index_node_segments: Sequence[DocumentSegment] = []
             segments: list[DocumentSegment] = []
             attachment_map: dict[str, list[AttachmentInfoDict]] = {}
             child_chunk_map: dict[str, list[ChildChunk]] = {}
@@ -568,8 +569,9 @@ class RetrievalService:
                         DocumentSegment.status == "completed",
                         DocumentSegment.index_node_id.in_(index_node_ids),
                     )
-                    index_node_segments = session.execute(document_segment_stmt).scalars().all()  # type: ignore
+                    index_node_segments = session.execute(document_segment_stmt).scalars().all()
                     for index_node_segment in index_node_segments:
+                        assert index_node_segment.index_node_id
                         doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id]
 
                 if segment_ids:
diff --git a/api/core/rag/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py
index 78305a6ac0..c7d52d74cb 100644
--- a/api/core/rag/docstore/dataset_docstore.py
+++ b/api/core/rag/docstore/dataset_docstore.py
@@ -50,6 +50,7 @@ class DatasetDocumentStore:
 
         output = {}
         for document_segment in document_segments:
+            assert document_segment.index_node_id
             doc_id = document_segment.index_node_id
             output[doc_id] = Document(
                 page_content=document_segment.content,
@@ -103,7 +104,7 @@ class DatasetDocumentStore:
 
             if not segment_document:
                 max_position += 1
-
+                assert self._document_id
                 segment_document = DocumentSegment(
                     tenant_id=self._dataset.tenant_id,
                     dataset_id=self._dataset.id,
diff --git a/api/core/rag/index_processor/index_processor.py b/api/core/rag/index_processor/index_processor.py
index aded5315bd..757134e734 100644
--- a/api/core/rag/index_processor/index_processor.py
+++ b/api/core/rag/index_processor/index_processor.py
@@ -84,7 +84,7 @@ class IndexProcessor:
                     select(DocumentSegment).where(DocumentSegment.document_id == original_document_id)
                 ).all()
                 if segments:
-                    index_node_ids = [segment.index_node_id for segment in segments]
+                    index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
 
         indexing_start_at = time.perf_counter()
         # delete from vector index
diff --git a/api/models/dataset.py b/api/models/dataset.py
index 65ea39969c..8137ed4ff3 100644
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -8,7 +8,6 @@ import os
 import pickle
 import re
 import time
-from collections.abc import Sequence
 from datetime import datetime
 from json import JSONDecodeError
 from typing import Any, ClassVar, TypedDict, cast
@@ -831,7 +830,7 @@ class Document(Base):
         )
 
 
-class DocumentSegment(Base):
+class DocumentSegment(TypeBase):
     __tablename__ = "document_segments"
     __table_args__ = (
         sa.PrimaryKeyConstraint("id", name="document_segment_pkey"),
@@ -844,35 +843,40 @@ class DocumentSegment(Base):
     )
 
     # initial fields
-    id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4()))
-    tenant_id = mapped_column(StringUUID, nullable=False)
-    dataset_id = mapped_column(StringUUID, nullable=False)
-    document_id = mapped_column(StringUUID, nullable=False)
+    id: Mapped[str] = mapped_column(StringUUID, nullable=False, default_factory=lambda: str(uuid4()), init=False)
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    document_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
     position: Mapped[int]
-    content = mapped_column(LongText, nullable=False)
-    answer = mapped_column(LongText, nullable=True)
+    content: Mapped[str] = mapped_column(LongText, nullable=False)
     word_count: Mapped[int]
     tokens: Mapped[int]
 
-    # indexing fields
-    keywords = mapped_column(sa.JSON, nullable=True)
-    index_node_id = mapped_column(String(255), nullable=True)
-    index_node_hash = mapped_column(String(255), nullable=True)
-
+    created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
     # basic fields
+    # indexing fields
+    index_node_id: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None)
+    index_node_hash: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None)
+    enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"), default=True)
+    answer: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    keywords: Mapped[Any] = mapped_column(sa.JSON, nullable=True, default=None)
+    disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
+    disabled_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None)
+    status: Mapped[SegmentStatus] = mapped_column(
+        EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'"), default=SegmentStatus.WAITING
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime, nullable=False, server_default=func.current_timestamp(), init=False
+    )
+    updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None)
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime, nullable=False, server_default=func.current_timestamp(), init=False
+    )
+    indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
+    completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
+    error: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
     hit_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0)
-    enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
-    disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    disabled_by = mapped_column(StringUUID, nullable=True)
-    status: Mapped[str] = mapped_column(EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'"))
-    created_by = mapped_column(StringUUID, nullable=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_by = mapped_column(StringUUID, nullable=True)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
-    indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    error = mapped_column(LongText, nullable=True)
-    stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
 
     @property
     def dataset(self):
@@ -899,7 +903,7 @@ class DocumentSegment(Base):
         )
 
     @property
-    def child_chunks(self) -> Sequence[Any]:
+    def child_chunks(self):
         if not self.document:
             return []
         process_rule = self.document.dataset_process_rule
@@ -914,7 +918,7 @@ class DocumentSegment(Base):
                     return child_chunks or []
         return []
 
-    def get_child_chunks(self) -> Sequence[Any]:
+    def get_child_chunks(self):
         if not self.document:
             return []
         process_rule = self.document.dataset_process_rule
diff --git a/api/services/vector_service.py b/api/services/vector_service.py
index 7e689af35d..49c3b85831 100644
--- a/api/services/vector_service.py
+++ b/api/services/vector_service.py
@@ -111,6 +111,7 @@ class VectorService:
                 "dataset_id": segment.dataset_id,
             },
         )
+        assert segment.index_node_id
         if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
             # update vector index
             vector = Vector(dataset=dataset)
@@ -138,6 +139,7 @@ class VectorService:
         regenerate: bool = False,
     ):
         index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
+        assert segment.index_node_id
         if regenerate:
             # delete child chunks
             index_processor.clean(dataset, [segment.index_node_id], with_keywords=True, delete_child_chunks=True)
diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py
index 56c371fcc1..5794726716 100644
--- a/api/tasks/batch_clean_document_task.py
+++ b/api/tasks/batch_clean_document_task.py
@@ -50,7 +50,7 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form
             ).all()
 
             if segments:
-                index_node_ids = [segment.index_node_id for segment in segments]
+                index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
                 segment_ids = [segment.id for segment in segments]
 
                 # Collect image file IDs from segment content
diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py
index beb23d8354..9f19b03544 100644
--- a/api/tasks/batch_create_segment_to_index_task.py
+++ b/api/tasks/batch_create_segment_to_index_task.py
@@ -19,6 +19,7 @@ from graphon.model_runtime.entities.model_entities import ModelType
 from libs import helper
 from libs.datetime_utils import naive_utc_now
 from models.dataset import Dataset, Document, DocumentSegment
+from models.enums import SegmentStatus
 from models.model import UploadFile
 from services.vector_service import VectorService
 
@@ -156,7 +157,7 @@ def batch_create_segment_to_index_task(
                 tokens=tokens,
                 created_by=user_id,
                 indexing_at=naive_utc_now(),
-                status="completed",
+                status=SegmentStatus.COMPLETED,
                 completed_at=naive_utc_now(),
             )
             if document_config["doc_form"] == IndexStructureType.QA_INDEX:
diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py
index c8d0e31c06..869e2b3028 100644
--- a/api/tasks/clean_document_task.py
+++ b/api/tasks/clean_document_task.py
@@ -53,7 +53,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
             binding_ids = [binding.id for binding, _ in attachments_with_bindings]
             total_attachment_files.extend([attachment_file.key for _, attachment_file in attachments_with_bindings])
 
-            index_node_ids = [segment.index_node_id for segment in segments]
+            index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
             segment_contents = [segment.content for segment in segments]
         except Exception:
             logger.exception("Cleaned document when document deleted failed")
diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py
index 017d60efac..782d7d0226 100644
--- a/api/tasks/clean_notion_document_task.py
+++ b/api/tasks/clean_notion_document_task.py
@@ -38,7 +38,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str):
 
         for document_id in document_ids:
             segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
-            total_index_node_ids.extend([segment.index_node_id for segment in segments])
+            total_index_node_ids.extend([segment.index_node_id for segment in segments if segment.index_node_id])
 
     # Wrap vector / keyword index cleanup in try/except so that a transient
     # failure here (e.g. billing API hiccup propagated via FeatureService when
diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py
index dd1a40844b..d00e143093 100644
--- a/api/tasks/disable_segment_from_index_task.py
+++ b/api/tasks/disable_segment_from_index_task.py
@@ -9,6 +9,7 @@ from core.db.session_factory import session_factory
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
 from extensions.ext_redis import redis_client
 from models.dataset import DocumentSegment
+from models.enums import SegmentStatus
 
 logger = logging.getLogger(__name__)
 
@@ -30,7 +31,7 @@ def disable_segment_from_index_task(segment_id: str):
             logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
             return
 
-        if segment.status != "completed":
+        if segment.status != SegmentStatus.COMPLETED:
             logger.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red"))
             return
 
@@ -59,6 +60,7 @@ def disable_segment_from_index_task(segment_id: str):
 
             index_type = dataset_document.doc_form
             index_processor = IndexProcessorFactory(index_type).init_index_processor()
+            assert segment.index_node_id
             index_processor.clean(dataset, [segment.index_node_id])
 
             # Disable summary index for this segment
diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py
index 86e96ea3f0..cd91ddd074 100644
--- a/api/tasks/disable_segments_from_index_task.py
+++ b/api/tasks/disable_segments_from_index_task.py
@@ -55,7 +55,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen
             return
 
         try:
-            index_node_ids = [segment.index_node_id for segment in segments]
+            index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
             if dataset.is_multimodal:
                 segment_ids = [segment.id for segment in segments]
                 segment_attachment_bindings = session.scalars(
diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py
index 90c80be3a1..842e7dcdb2 100644
--- a/api/tasks/document_indexing_sync_task.py
+++ b/api/tasks/document_indexing_sync_task.py
@@ -69,7 +69,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
         index_type = document.doc_form
 
         segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
-        index_node_ids = [segment.index_node_id for segment in segments]
+        index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
 
     # Get credentials from datasource provider
     datasource_provider_service = DatasourceProviderService()
diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py
index 15f0e0162b..39564bbede 100644
--- a/api/tasks/document_indexing_update_task.py
+++ b/api/tasks/document_indexing_update_task.py
@@ -45,7 +45,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
 
         index_type = document.doc_form
         segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
-        index_node_ids = [segment.index_node_id for segment in segments]
+        index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
 
     clean_success = False
     try:
diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py
index 6bc58bdf9c..71f367c5e7 100644
--- a/api/tasks/duplicate_document_indexing_task.py
+++ b/api/tasks/duplicate_document_indexing_task.py
@@ -137,7 +137,7 @@ def _duplicate_document_indexing_task(dataset_id: str, document_ids: Sequence[st
                     select(DocumentSegment).where(DocumentSegment.document_id == document.id)
                 ).all()
                 if segments:
-                    index_node_ids = [segment.index_node_id for segment in segments]
+                    index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
 
                     # delete from vector index
                     index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py
index 74e8a012cf..2314d32232 100644
--- a/api/tasks/remove_document_from_index_task.py
+++ b/api/tasks/remove_document_from_index_task.py
@@ -61,7 +61,7 @@ def remove_document_from_index_task(document_id: str):
                 except Exception as e:
                     logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e))
 
-            index_node_ids = [segment.index_node_id for segment in segments]
+            index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
             if index_node_ids:
                 try:
                     index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)
diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py
index 7cc28d5226..0df5896ce3 100644
--- a/api/tasks/retry_document_indexing_task.py
+++ b/api/tasks/retry_document_indexing_task.py
@@ -85,7 +85,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
                         select(DocumentSegment).where(DocumentSegment.document_id == document_id)
                     ).all()
                     if segments:
-                        index_node_ids = [segment.index_node_id for segment in segments]
+                        index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
                         # delete from vector index
                         index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
diff --git a/api/tasks/sync_website_document_indexing_task.py b/api/tasks/sync_website_document_indexing_task.py
index ab21f63f7e..06eb460311 100644
--- a/api/tasks/sync_website_document_indexing_task.py
+++ b/api/tasks/sync_website_document_indexing_task.py
@@ -70,7 +70,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
 
             segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
             if segments:
-                index_node_ids = [segment.index_node_id for segment in segments]
+                index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
                 # delete from vector index
                 index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)
 
diff --git a/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py b/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py
index 87239b2cb3..bd8f5371b8 100644
--- a/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py
+++ b/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py
@@ -13,9 +13,9 @@ from uuid import uuid4
 from sqlalchemy.orm import Session
 
 from core.rag.index_processor.constant.index_type import IndexTechniqueType
-from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
+from models import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole, TenantStatus
 from models.dataset import Dataset, DatasetPermissionEnum, Document, DocumentSegment
-from models.enums import DataSourceType, DocumentCreatedFrom
+from models.enums import DataSourceType, DocumentCreatedFrom, SegmentStatus
 from services.dataset_service import SegmentService
 
 
@@ -35,13 +35,13 @@ class SegmentServiceTestDataFactory:
             email=f"{uuid4()}@example.com",
             name=f"user-{uuid4()}",
             interface_language="en-US",
-            status="active",
+            status=AccountStatus.ACTIVE,
         )
         db_session_with_containers.add(account)
         db_session_with_containers.commit()
 
         if tenant is None:
-            tenant = Tenant(name=f"tenant-{uuid4()}", status="normal")
+            tenant = Tenant(name=f"tenant-{uuid4()}", status=TenantStatus.NORMAL)
             db_session_with_containers.add(tenant)
             db_session_with_containers.commit()
 
@@ -103,7 +103,7 @@ class SegmentServiceTestDataFactory:
         created_by: str,
         position: int = 1,
         content: str = "Test content",
-        status: str = "completed",
+        status: SegmentStatus = SegmentStatus.COMPLETED,
         word_count: int = 10,
         tokens: int = 15,
     ) -> DocumentSegment:
@@ -203,7 +203,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=1,
-            status="completed",
+            status=SegmentStatus.COMPLETED,
         )
         SegmentServiceTestDataFactory.create_segment(
             db_session_with_containers,
@@ -212,7 +212,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=2,
-            status="indexing",
+            status=SegmentStatus.INDEXING,
         )
         SegmentServiceTestDataFactory.create_segment(
             db_session_with_containers,
@@ -221,7 +221,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=3,
-            status="waiting",
+            status=SegmentStatus.WAITING,
         )
 
         # Act
@@ -257,7 +257,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=1,
-            status="completed",
+            status=SegmentStatus.COMPLETED,
         )
         SegmentServiceTestDataFactory.create_segment(
             db_session_with_containers,
@@ -266,7 +266,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=2,
-            status="indexing",
+            status=SegmentStatus.INDEXING,
         )
 
         # Act
@@ -415,7 +415,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=1,
-            status="completed",
+            status=SegmentStatus.COMPLETED,
             content="This is important information",
         )
         SegmentServiceTestDataFactory.create_segment(
@@ -425,7 +425,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=2,
-            status="indexing",
+            status=SegmentStatus.INDEXING,
             content="This is also important",
         )
         SegmentServiceTestDataFactory.create_segment(
@@ -435,7 +435,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=3,
-            status="completed",
+            status=SegmentStatus.COMPLETED,
             content="This is irrelevant",
         )
 
@@ -477,7 +477,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=1,
-            status="completed",
+            status=SegmentStatus.COMPLETED,
         )
         SegmentServiceTestDataFactory.create_segment(
             db_session_with_containers,
@@ -486,7 +486,7 @@ class TestSegmentServiceGetSegments:
             document_id=document.id,
             created_by=owner.id,
             position=2,
-            status="waiting",
+            status=SegmentStatus.WAITING,
         )
 
         # Act
diff --git a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py
index fcc15aad42..94fd7602f5 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py
@@ -128,7 +128,6 @@ class TestAddDocumentToIndexTask:
 
         for i in range(3):
             segment = DocumentSegment(
-                id=fake.uuid4(),
                 tenant_id=document.tenant_id,
                 dataset_id=dataset.id,
                 document_id=document.id,
@@ -451,7 +450,6 @@ class TestAddDocumentToIndexTask:
         segments = []
         for i in range(3):
             segment = DocumentSegment(
-                id=fake.uuid4(),
                 tenant_id=document.tenant_id,
                 dataset_id=dataset.id,
                 document_id=document.id,
@@ -630,7 +628,6 @@ class TestAddDocumentToIndexTask:
 
         # Segment 1: Should be processed (enabled=False, status=SegmentStatus.COMPLETED)
         segment1 = DocumentSegment(
-            id=fake.uuid4(),
             tenant_id=document.tenant_id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -650,7 +647,6 @@ class TestAddDocumentToIndexTask:
         # Segment 2: Should be processed (enabled=True, status=SegmentStatus.COMPLETED)
         # Note: Implementation doesn't filter by enabled status, only by status=SegmentStatus.COMPLETED
         segment2 = DocumentSegment(
-            id=fake.uuid4(),
             tenant_id=document.tenant_id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -669,7 +665,6 @@ class TestAddDocumentToIndexTask:
 
         # Segment 3: Should NOT be processed (enabled=False, status="processing")
         segment3 = DocumentSegment(
-            id=fake.uuid4(),
             tenant_id=document.tenant_id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -688,7 +683,6 @@ class TestAddDocumentToIndexTask:
 
         # Segment 4: Should be processed (enabled=False, status=SegmentStatus.COMPLETED)
         segment4 = DocumentSegment(
-            id=fake.uuid4(),
             tenant_id=document.tenant_id,
             dataset_id=dataset.id,
             document_id=document.id,
diff --git a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py
index e29ca7ebab..436c8f11b0 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py
@@ -177,7 +177,6 @@ class TestBatchCleanDocumentTask:
         fake = Faker()
 
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=account.current_tenant.id,
             dataset_id=document.dataset_id,
             document_id=document.id,
@@ -290,10 +289,9 @@ class TestBatchCleanDocumentTask:
         account = self._create_test_account(db_session_with_containers)
         dataset = self._create_test_dataset(db_session_with_containers, account)
         document = self._create_test_document(db_session_with_containers, dataset, account)
-
+        assert account.current_tenant
         # Create segment with simple content (no image references)
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=account.current_tenant.id,
             dataset_id=document.dataset_id,
             document_id=document.id,
@@ -692,9 +690,9 @@ class TestBatchCleanDocumentTask:
 
         # Create multiple segments for the document
         segments = []
+        assert account.current_tenant
         for i in range(3):
             segment = DocumentSegment(
-                id=str(uuid.uuid4()),
                 tenant_id=account.current_tenant.id,
                 dataset_id=document.dataset_id,
                 document_id=document.id,
diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py
index 32bc2fc0bd..a31552a09e 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py
@@ -220,7 +220,6 @@ class TestCleanDatasetTask:
             DocumentSegment: Created document segment instance
         """
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -232,8 +231,6 @@ class TestCleanDatasetTask:
             status=SegmentStatus.COMPLETED,
             index_node_id=str(uuid.uuid4()),
             index_node_hash="test_hash",
-            created_at=datetime.now(),
-            updated_at=datetime.now(),
         )
 
         db_session_with_containers.add(segment)
@@ -614,7 +611,6 @@ class TestCleanDatasetTask:
         """
 
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -626,8 +622,6 @@ class TestCleanDatasetTask:
             status=SegmentStatus.COMPLETED,
             index_node_id=str(uuid.uuid4()),
             index_node_hash="test_hash",
-            created_at=datetime.now(),
-            updated_at=datetime.now(),
         )
 
         db_session_with_containers.add(segment)
@@ -729,8 +723,6 @@ class TestCleanDatasetTask:
                 type=DatasetMetadataType.STRING,
                 created_by=account.id,
             )
-            metadata.id = str(uuid.uuid4())
-            metadata.created_at = datetime.now()
             metadata_items.append(metadata)
 
             # Create binding for each metadata item
@@ -741,8 +733,6 @@ class TestCleanDatasetTask:
                 document_id=documents[i % len(documents)].id,
                 created_by=account.id,
             )
-            binding.id = str(uuid.uuid4())
-            binding.created_at = datetime.now()
             bindings.append(binding)
 
         db_session_with_containers.add_all(metadata_items)
@@ -946,7 +936,6 @@ class TestCleanDatasetTask:
         long_content = "Very long content " * 100  # Long content within reasonable limits
         segment_content = f"Segment with special chars: {special_content}\n{long_content}"
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -958,8 +947,6 @@ class TestCleanDatasetTask:
             status=SegmentStatus.COMPLETED,
             index_node_id=str(uuid.uuid4()),
             index_node_hash="test_hash_" + "x" * 50,  # Long hash within limits
-            created_at=datetime.now(),
-            updated_at=datetime.now(),
         )
         db_session_with_containers.add(segment)
         db_session_with_containers.commit()
diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py
index 1c8d5969e0..ef65b90508 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py
@@ -132,11 +132,10 @@ class TestCleanNotionDocumentTask:
             db_session_with_containers.add(document)
             db_session_with_containers.flush()
             document_ids.append(document.id)
-
+            assert tenant
             # Create segments for each document
             for j in range(2):
                 segment = DocumentSegment(
-                    id=str(uuid.uuid4()),
                     tenant_id=tenant.id,
                     dataset_id=dataset.id,
                     document_id=document.id,
@@ -297,10 +296,9 @@ class TestCleanNotionDocumentTask:
             )
             db_session_with_containers.add(document)
             db_session_with_containers.flush()
-
+            assert tenant
             # Create test segment
             segment = DocumentSegment(
-                id=str(uuid.uuid4()),
                 tenant_id=tenant.id,
                 dataset_id=dataset.id,
                 document_id=document.id,
@@ -379,12 +377,11 @@ class TestCleanNotionDocumentTask:
         )
         db_session_with_containers.add(document)
         db_session_with_containers.flush()
-
+        assert tenant
         # Create segments without index_node_ids
         segments = []
         for i in range(3):
             segment = DocumentSegment(
-                id=str(uuid.uuid4()),
                 tenant_id=tenant.id,
                 dataset_id=dataset.id,
                 document_id=document.id,
@@ -468,11 +465,10 @@ class TestCleanNotionDocumentTask:
             db_session_with_containers.add(document)
             db_session_with_containers.flush()
             documents.append(document)
-
+            assert tenant
             # Create segments for each document
             for j in range(2):
                 segment = DocumentSegment(
-                    id=str(uuid.uuid4()),
                     tenant_id=tenant.id,
                     dataset_id=dataset.id,
                     document_id=document.id,
@@ -569,10 +565,9 @@ class TestCleanNotionDocumentTask:
         segment_statuses = [SegmentStatus.WAITING, SegmentStatus.INDEXING, SegmentStatus.COMPLETED, SegmentStatus.ERROR]
         segments = []
         index_node_ids = []
-
+        assert tenant
         for i, status in enumerate(segment_statuses):
             segment = DocumentSegment(
-                id=str(uuid.uuid4()),
                 tenant_id=tenant.id,
                 dataset_id=dataset.id,
                 document_id=document.id,
@@ -665,10 +660,9 @@ class TestCleanNotionDocumentTask:
         )
         db_session_with_containers.add(document)
         db_session_with_containers.flush()
-
+        assert tenant
         # Create segment
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -765,12 +759,11 @@ class TestCleanNotionDocumentTask:
             db_session_with_containers.add(document)
             db_session_with_containers.flush()
             documents.append(document)
-
+            assert tenant
             # Create multiple segments for each document
             num_segments_per_doc = 5
             for j in range(num_segments_per_doc):
                 segment = DocumentSegment(
-                    id=str(uuid.uuid4()),
                     tenant_id=tenant.id,
                     dataset_id=dataset.id,
                     document_id=document.id,
@@ -875,7 +868,6 @@ class TestCleanNotionDocumentTask:
             # Create segments for each document
             for j in range(3):
                 segment = DocumentSegment(
-                    id=str(uuid.uuid4()),
                     tenant_id=account.current_tenant.id,
                     dataset_id=dataset.id,
                     document_id=document.id,
@@ -984,11 +976,10 @@ class TestCleanNotionDocumentTask:
             db_session_with_containers.add(document)
             db_session_with_containers.flush()
             documents.append(document)
-
+            assert tenant
             # Create segments for each document
             for j in range(2):
                 segment = DocumentSegment(
-                    id=str(uuid.uuid4()),
                     tenant_id=tenant.id,
                     dataset_id=dataset.id,
                     document_id=document.id,
@@ -1093,10 +1084,9 @@ class TestCleanNotionDocumentTask:
         # Create segments with metadata
         segments = []
         index_node_ids = []
-
+        assert tenant
         for i in range(3):
             segment = DocumentSegment(
-                id=str(uuid.uuid4()),
                 tenant_id=tenant.id,
                 dataset_id=dataset.id,
                 document_id=document.id,
diff --git a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py
index e4cbb9e589..aba2458d55 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py
@@ -90,7 +90,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -150,7 +149,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -202,7 +200,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -253,7 +250,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset with parent-child index
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -305,7 +301,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -371,7 +366,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset without documents
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -403,7 +397,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -461,7 +454,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset without documents
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -494,7 +486,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -546,7 +537,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -592,7 +582,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset with custom index type
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -624,7 +613,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -670,7 +658,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset without doc_form (should use default)
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -702,7 +689,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -748,7 +734,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -806,7 +791,6 @@ class TestDealDatasetVectorIndexTask:
         for i, document in enumerate(documents):
             for j in range(2):
                 segment = DocumentSegment(
-                    id=str(uuid.uuid4()),
                     tenant_id=tenant.id,
                     dataset_id=dataset.id,
                     document_id=document.id,
@@ -832,6 +816,7 @@ class TestDealDatasetVectorIndexTask:
             updated_document = db_session_with_containers.scalar(
                 select(Document).where(Document.id == document.id).limit(1)
             )
+            assert updated_document
             assert updated_document.indexing_status == IndexingStatus.COMPLETED
 
         # Verify index processor load was called multiple times
@@ -853,7 +838,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -905,7 +889,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=document.id,
@@ -952,7 +935,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -1024,7 +1006,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments for enabled document only
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=enabled_document.id,
@@ -1075,7 +1056,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -1147,7 +1127,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments for active document only
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=active_document.id,
@@ -1198,7 +1177,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create dataset
         dataset = Dataset(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             name=fake.company(),
             description=fake.text(max_nb_chars=100),
@@ -1270,7 +1248,6 @@ class TestDealDatasetVectorIndexTask:
 
         # Create segments for completed document only
         segment = DocumentSegment(
-            id=str(uuid.uuid4()),
             tenant_id=tenant.id,
             dataset_id=dataset.id,
             document_id=completed_document.id,
diff --git a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py
index f4a71040c1..a7edf4f77a 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py
@@ -209,26 +209,25 @@ class TestDeleteSegmentFromIndexTask:
         segments = []
 
         for i in range(count):
-            segment = DocumentSegment()
-            segment.id = fake.uuid4()
-            segment.tenant_id = document.tenant_id
-            segment.dataset_id = document.dataset_id
-            segment.document_id = document.id
-            segment.position = i + 1
-            segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
-            segment.answer = f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}"
-            segment.word_count = fake.random_int(min=10, max=100)
-            segment.tokens = fake.random_int(min=5, max=50)
-            segment.keywords = [fake.word() for _ in range(3)]
-            segment.index_node_id = f"node_{fake.uuid4()}"
-            segment.index_node_hash = fake.sha256()
-            segment.hit_count = 0
-            segment.enabled = True
-            segment.status = SegmentStatus.COMPLETED
-            segment.created_by = account.id
-            segment.created_at = fake.date_time_this_year()
-            segment.updated_by = account.id
-            segment.updated_at = segment.created_at
+            created_at = fake.date_time_this_year()
+            segment = DocumentSegment(
+                tenant_id=document.tenant_id,
+                dataset_id=document.dataset_id,
+                document_id=document.id,
+                position=i + 1,
+                content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}",
+                answer=f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}",
+                word_count=fake.random_int(min=10, max=100),
+                tokens=fake.random_int(min=5, max=50),
+                keywords=[fake.word() for _ in range(3)],
+                index_node_id=f"node_{fake.uuid4()}",
+                index_node_hash=fake.sha256(),
+                hit_count=0,
+                enabled=True,
+                status=SegmentStatus.COMPLETED,
+                created_by=account.id,
+                updated_by=account.id,
+            )
 
             db_session_with_containers.add(segment)
             segments.append(segment)
diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py
index 5bdf7d1389..34e2ce4e80 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py
@@ -159,7 +159,7 @@ class TestDisableSegmentFromIndexTask:
         dataset: Dataset,
         tenant: Tenant,
         account: Account,
-        status: str = "completed",
+        status: SegmentStatus = SegmentStatus.COMPLETED,
         enabled: bool = True,
     ) -> DocumentSegment:
         """
diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py
index 6a95bfc425..cb5fb5483c 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py
@@ -185,30 +185,31 @@ class TestDisableSegmentsFromIndexTask:
         segments = []
 
         for i in range(count):
-            segment = DocumentSegment()
-            segment.id = fake.uuid4()
-            segment.tenant_id = dataset.tenant_id
-            segment.dataset_id = dataset.id
-            segment.document_id = document.id
-            segment.position = i + 1
-            segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
-            segment.answer = f"Test answer {i + 1}" if i % 2 == 0 else None
-            segment.word_count = fake.random_int(min=10, max=100)
-            segment.tokens = fake.random_int(min=5, max=50)
-            segment.keywords = [fake.word() for _ in range(3)]
-            segment.index_node_id = f"node_{segment.id}"
-            segment.index_node_hash = fake.sha256()
-            segment.hit_count = 0
-            segment.enabled = True
-            segment.disabled_at = None
-            segment.disabled_by = None
-            segment.status = SegmentStatus.COMPLETED
-            segment.created_by = account.id
-            segment.updated_by = account.id
-            segment.indexing_at = fake.date_time_this_year()
-            segment.completed_at = fake.date_time_this_year()
-            segment.error = None
-            segment.stopped_at = None
+            id = fake.uuid4()
+            segment = DocumentSegment(
+                tenant_id=dataset.tenant_id,
+                dataset_id=dataset.id,
+                document_id=document.id,
+                position=i + 1,
+                content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}",
+                answer=f"Test answer {i + 1}" if i % 2 == 0 else None,
+                word_count=fake.random_int(min=10, max=100),
+                tokens=fake.random_int(min=5, max=50),
+                keywords=[fake.word() for _ in range(3)],
+                index_node_id=f"node_{id}",
+                index_node_hash=fake.sha256(),
+                hit_count=0,
+                enabled=True,
+                disabled_at=None,
+                disabled_by=None,
+                status=SegmentStatus.COMPLETED,
+                created_by=account.id,
+                updated_by=account.id,
+                indexing_at=fake.date_time_this_year(),
+                completed_at=fake.date_time_this_year(),
+                error=None,
+                stopped_at=None,
+            )
 
             segments.append(segment)
 
diff --git a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py
index 12440f3e6b..e1c7e3e09a 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py
@@ -175,7 +175,6 @@ class TestDuplicateDocumentIndexingTasks:
         for document in documents:
             for i in range(segments_per_doc):
                 segment = DocumentSegment(
-                    id=fake.uuid4(),
                     tenant_id=dataset.tenant_id,
                     dataset_id=dataset.id,
                     document_id=document.id,
diff --git a/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py
index e2f35067e3..6d3b90d41c 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py
@@ -139,7 +139,6 @@ class TestEnableSegmentsToIndexTask:
         for i in range(count):
             text = fake.text(max_nb_chars=200)
             segment = DocumentSegment(
-                id=fake.uuid4(),
                 tenant_id=document.tenant_id,
                 dataset_id=dataset.id,
                 document_id=document.id,
diff --git a/api/tests/unit_tests/services/test_dataset_service_segment.py b/api/tests/unit_tests/services/test_dataset_service_segment.py
index 6330e53765..1f8586e32f 100644
--- a/api/tests/unit_tests/services/test_dataset_service_segment.py
+++ b/api/tests/unit_tests/services/test_dataset_service_segment.py
@@ -282,7 +282,6 @@ class TestSegmentServiceQueries:
 
     def test_get_segment_by_id_returns_only_document_segment_instances(self):
         segment = DocumentSegment(
-            id="segment-1",
             tenant_id="tenant-1",
             dataset_id="dataset-1",
             document_id="doc-1",
@@ -292,7 +291,7 @@ class TestSegmentServiceQueries:
             tokens=2,
             created_by="user-1",
         )
-
+        segment.id = "segment-1"
         with patch("services.dataset_service.db") as mock_db:
             mock_db.session.scalar.return_value = segment
             result = SegmentService.get_segment_by_id("segment-1", "tenant-1")
@@ -307,7 +306,6 @@ class TestSegmentServiceQueries:
 
     def test_get_segments_by_document_and_dataset_returns_scalars_result(self):
         segment = DocumentSegment(
-            id="segment-1",
             tenant_id="tenant-1",
             dataset_id="dataset-1",
             document_id="doc-1",
@@ -318,6 +316,7 @@ class TestSegmentServiceQueries:
             created_by="user-1",
         )
 
+        segment.id = "segment-1"
         with patch("services.dataset_service.db") as mock_db:
             mock_db.session.scalars.return_value.all.return_value = [segment]
 
@@ -461,6 +460,7 @@ class TestSegmentServiceMutations:
             vector_service.create_segments_vector.side_effect = RuntimeError("vector failed")
 
             result = SegmentService.multi_create_segment(segments, document, dataset)
+            assert result
 
         assert len(result) == 2
         assert [segment.position for segment in result] == [2, 3]

From 7aa8f1a0b6595a0803fc898a46c42726dbc25192 Mon Sep 17 00:00:00 2001
From: twwu <twwu@dify.ai>
Date: Tue, 12 May 2026 16:26:15 +0800
Subject: [PATCH 8/8] fix: Fix frontend build error caused by merging main

---
 pnpm-lock.yaml                                | 21 +------
 .../components/app/app-publisher/index.tsx    |  3 +-
 .../tool-selector/index.tsx                   | 56 +++----------------
 .../workflow/block-selector/tool-picker.tsx   | 23 +++-----
 4 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 53cae0b179..40a7e3e935 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1806,12 +1806,6 @@ packages:
   '@emnapi/wasi-threads@1.2.1':
     resolution: {integrity: sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==}
 
-  '@emnapi/runtime@1.9.2':
-    resolution: {integrity: sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==}
-
-  '@emnapi/wasi-threads@1.2.1':
-    resolution: {integrity: sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==}
-
   '@emoji-mart/data@1.2.1':
     resolution: {integrity: sha512-no2pQMWiBy6gpBEiqGeU77/bFejDqUTRY7KX+0+iur13op3bqUsXdnwoZs6Xb1zbv0gAj5VvS1PWoUUckSr5Dw==}
 
@@ -2770,9 +2764,6 @@ packages:
   '@oxc-project/types@0.128.0':
     resolution: {integrity: sha512-huv1Y/LzBJkBVHt3OlC7u0zHBW9qXf1FdD7sGmc1rXc2P1mTwHssYv7jyGx5KAACSCH+9B3Bhn6Z9luHRvf7pQ==}
 
-  '@oxc-project/types@0.127.0':
-    resolution: {integrity: sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ==}
-
   '@oxc-resolver/binding-android-arm-eabi@11.19.1':
     resolution: {integrity: sha512-aUs47y+xyXHUKlbhqHUjBABjvycq6YSD7bpxSW7vplUmdzAlJ93yXY6ZR0c1o1x5A/QKbENCvs3+NlY8IpIVzg==}
     cpu: [arm]
@@ -9086,16 +9077,6 @@ snapshots:
       tslib: 2.8.1
     optional: true
 
-  '@emnapi/runtime@1.9.2':
-    dependencies:
-      tslib: 2.8.1
-    optional: true
-
-  '@emnapi/wasi-threads@1.2.1':
-    dependencies:
-      tslib: 2.8.1
-    optional: true
-
   '@emoji-mart/data@1.2.1': {}
 
   '@es-joy/jsdoccomment@0.84.0':
@@ -16578,4 +16559,4 @@ time:
   vitest-canvas-mock@1.1.4: '2026-03-24T14:42:39.285Z'
   zod@4.4.3: '2026-05-04T07:06:40.819Z'
   zundo@2.3.0: '2024-11-17T16:35:11.372Z'
-  zustand@5.0.13: '2026-05-05T00:04:17.510Z'
+  zustand@5.0.13: '2026-05-05T00:04:17.510Z'
\ No newline at end of file
diff --git a/web/app/components/app/app-publisher/index.tsx b/web/app/components/app/app-publisher/index.tsx
index d437e1de09..96fe9d3e38 100644
--- a/web/app/components/app/app-publisher/index.tsx
+++ b/web/app/components/app/app-publisher/index.tsx
@@ -7,7 +7,6 @@ import type { PublishWorkflowParams } from '@/types/workflow'
 import { Button } from '@langgenius/dify-ui/button'
 import { Popover, PopoverContent, PopoverTrigger } from '@langgenius/dify-ui/popover'
 import { toast } from '@langgenius/dify-ui/toast'
-import { RiStoreLine } from '@remixicon/react'
 import { useSuspenseQuery } from '@tanstack/react-query'
 import { useKeyPress } from 'ahooks'
 import {
@@ -39,7 +38,7 @@ import { appDefaultIconBackground } from '@/config'
 import { useAsyncWindowOpen } from '@/hooks/use-async-window-open'
 import { useFormatTimeFromNow } from '@/hooks/use-format-time-from-now'
 import { AccessMode } from '@/models/access-control'
-import { useAppWhiteListSubjects, useGetUserCanAccessApp } from '@/service/access-control'
+import { useAppWhiteListSubjects, useGetUserCanAccessApp } from '@/service/access-control/use-app-access-control'
 import { fetchAppDetailDirect, publishToCreatorsPlatform } from '@/service/apps'
 import { fetchInstalledAppList } from '@/service/explore'
 import { systemFeaturesQueryOptions } from '@/service/system-features'
diff --git a/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx b/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx
index cda0daf49a..40166ff9ec 100644
--- a/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx
+++ b/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx
@@ -6,14 +6,13 @@ import type { Node } from 'reactflow'
 import type { ToolValue } from '@/app/components/workflow/block-selector/types'
 import type { NodeOutPutVar } from '@/app/components/workflow/types'
 import { cn } from '@langgenius/dify-ui/cn'
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from '@langgenius/dify-ui/popover'
 import * as React from 'react'
 import { useTranslation } from 'react-i18next'
-// eslint-disable-next-line no-restricted-imports -- legacy overlay migration is handled separately from this change
-import {
-  PortalToFollowElem,
-  PortalToFollowElemContent,
-  PortalToFollowElemTrigger,
-} from '@/app/components/base/portal-to-follow-elem'
 import { CollectionType } from '@/app/components/tools/types'
 import Link from '@/next/link'
 import {
@@ -134,9 +133,7 @@ const ToolSelector: FC<Props> = ({
   )
 
   return (
-    <PortalToFollowElem
-      placement={placement}
-      offset={offset}
+    <Popover
       open={portalOpen}
       onOpenChange={handlePortalOpenChange}
     >
@@ -186,43 +183,6 @@ const ToolSelector: FC<Props> = ({
         alignOffset={alignOffset}
         popupClassName="border-none bg-transparent shadow-none"
       >
-        {trigger}
-
-        {/* Default trigger - no value */}
-        {!trigger && !value?.provider_name && (
-          <ToolTrigger
-            isConfigure
-            open={isShow}
-            value={value}
-            provider={currentProvider}
-          />
-        )}
-
-        {/* Default trigger - with value */}
-        {!trigger && value?.provider_name && (
-          <ToolItem
-            open={isShow}
-            icon={currentProvider?.icon || manifestIcon}
-            isMCPTool={currentProvider?.type === CollectionType.mcp}
-            providerName={value.provider_name}
-            providerShowName={value.provider_show_name}
-            toolLabel={value.tool_label || value.tool_name}
-            showSwitch={supportEnableSwitch}
-            switchValue={value.enabled}
-            onSwitchChange={handleEnabledChange}
-            onDelete={onDelete}
-            noAuth={currentProvider && currentTool && !currentProvider.is_team_authorization}
-            uninstalled={!currentProvider && inMarketPlace}
-            versionMismatch={currentProvider && inMarketPlace && !currentTool}
-            installInfo={manifest?.latest_package_identifier}
-            onInstall={handleInstall}
-            isError={(!currentProvider || !currentTool) && !inMarketPlace}
-            errorTip={renderErrorTip()}
-          />
-        )}
-      </PortalToFollowElemTrigger>
-
-      <PortalToFollowElemContent className="z-10">
         <div className={cn(
           'relative max-h-[642px] min-h-20 w-[361px] rounded-xl',
           'border-[0.5px] border-components-panel-border bg-components-panel-bg-blur',
@@ -277,8 +237,8 @@ const ToolSelector: FC<Props> = ({
             onParamsFormChange={handleParamsFormChange}
           />
         </div>
-      </PortalToFollowElemContent>
-    </PortalToFollowElem>
+      </PopoverContent>
+    </Popover>
   )
 }
 
diff --git a/web/app/components/workflow/block-selector/tool-picker.tsx b/web/app/components/workflow/block-selector/tool-picker.tsx
index 01ccf6242b..93ee1882db 100644
--- a/web/app/components/workflow/block-selector/tool-picker.tsx
+++ b/web/app/components/workflow/block-selector/tool-picker.tsx
@@ -6,18 +6,17 @@ import type { ToolDefaultValue, ToolValue } from './types'
 import type { CustomCollectionBackend } from '@/app/components/tools/types'
 import type { BlockEnum, OnSelectBlock } from '@/app/components/workflow/types'
 import { cn } from '@langgenius/dify-ui/cn'
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from '@langgenius/dify-ui/popover'
 import { toast } from '@langgenius/dify-ui/toast'
 import { useSuspenseQuery } from '@tanstack/react-query'
 import { useBoolean } from 'ahooks'
 import * as React from 'react'
 import { useMemo, useState } from 'react'
 import { useTranslation } from 'react-i18next'
-// eslint-disable-next-line no-restricted-imports -- legacy overlay migration is handled separately from this change
-import {
-  PortalToFollowElem,
-  PortalToFollowElemContent,
-  PortalToFollowElemTrigger,
-} from '@/app/components/base/portal-to-follow-elem'
 import SearchBox from '@/app/components/plugins/marketplace/search-box'
 import EditCustomToolModal from '@/app/components/tools/edit-custom-collection-modal'
 import AllTools from '@/app/components/workflow/block-selector/all-tools'
@@ -159,9 +158,7 @@ const ToolPicker: FC<Props> = ({
   }
 
   return (
-    <PortalToFollowElem
-      placement={placement}
-      offset={offset}
+    <Popover
       open={isShow}
       onOpenChange={handleOpenChange}
     >
@@ -178,10 +175,6 @@ const ToolPicker: FC<Props> = ({
         alignOffset={alignOffset}
         popupClassName="border-none bg-transparent shadow-none"
       >
-        {trigger}
-      </PortalToFollowElemTrigger>
-
-      <PortalToFollowElemContent className="z-1002">
         <div className={cn('relative min-h-20 rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur shadow-lg backdrop-blur-xs', panelClassName)}>
           <div className="p-2 pb-1">
             <SearchBox
@@ -220,8 +213,8 @@ const ToolPicker: FC<Props> = ({
             }}
           />
         </div>
-      </PortalToFollowElemContent>
-    </PortalToFollowElem>
+      </PopoverContent>
+    </Popover>
   )
 }