From 4bb987eca35314ddeda76bc8ceae91b77d2d6976 Mon Sep 17 00:00:00 2001 From: juyua9 Date: Tue, 12 May 2026 13:07:03 +0800 Subject: [PATCH 1/8] fix: validate missing text indexing technique (#35941) --- api/controllers/service_api/dataset/document.py | 2 +- .../controllers/service_api/dataset/test_document.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index cb48fe6715..e68eeeca25 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -136,7 +136,7 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[ if not dataset: raise ValueError("Dataset does not exist.") - if not dataset.indexing_technique and not args["indexing_technique"]: + if not dataset.indexing_technique and not args.get("indexing_technique"): raise ValueError("indexing_technique is required.") embedding_model_provider = payload.embedding_model_provider diff --git a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py index 230c51161f..738238d10a 100644 --- a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py +++ b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py @@ -1057,8 +1057,8 @@ class TestDocumentAddByTextApi: """Test error when both dataset and payload lack indexing_technique. When ``indexing_technique`` is ``None`` in the payload, ``model_dump(exclude_none=True)`` - omits the key. The production code accesses ``args["indexing_technique"]`` which raises - ``KeyError`` before the ``ValueError`` guard can fire. + omits the key. The service API should still raise the same validation error as other + document creation paths instead of leaking a ``KeyError`` from the dumped payload dict. """ # Arrange — neutralise billing decorators self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) @@ -1074,7 +1074,7 @@ class TestDocumentAddByTextApi: headers={"Authorization": "Bearer test_token"}, ): api = DocumentAddByTextApi() - with pytest.raises(KeyError): + with pytest.raises(ValueError, match="indexing_technique is required."): api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) From cd90d7ffc15fc1d4fa29c6b7a6870178d7da80d6 Mon Sep 17 00:00:00 2001 From: yyh <92089059+lyzno1@users.noreply.github.com> Date: Tue, 12 May 2026 13:34:19 +0800 Subject: [PATCH 2/8] refactor(web): migrate searchable pickers to combobox (#36066) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- eslint-suppressions.json | 5 - packages/dify-ui/AGENTS.md | 1 + .../__tests__/access-control.spec.tsx | 18 +- .../add-member-or-group-pop.spec.tsx | 32 +- .../add-member-or-group-pop.tsx | 336 +++--- .../__tests__/document-list.spec.tsx | 105 +- .../document-picker/__tests__/index.spec.tsx | 979 ++---------------- .../common/document-picker/document-list.tsx | 60 +- .../datasets/common/document-picker/index.tsx | 255 +++-- .../preview-document-picker.tsx | 27 +- .../detail/__tests__/document-title.spec.tsx | 87 +- .../documents/detail/__tests__/index.spec.tsx | 17 +- .../documents/detail/document-title.tsx | 32 +- .../datasets/documents/detail/index.tsx | 20 +- 14 files changed, 735 insertions(+), 1239 deletions(-) diff --git a/eslint-suppressions.json b/eslint-suppressions.json index 4adca38aa0..46277d3349 100644 --- a/eslint-suppressions.json +++ b/eslint-suppressions.json @@ -246,11 +246,6 @@ "count": 1 } }, - "web/app/components/app/app-access-control/add-member-or-group-pop.tsx": { - "no-restricted-imports": { - "count": 1 - } - }, "web/app/components/app/app-publisher/features-wrapper.tsx": { "ts/no-explicit-any": { "count": 4 diff --git a/packages/dify-ui/AGENTS.md b/packages/dify-ui/AGENTS.md index 9524394214..6eadd200f0 100644 --- a/packages/dify-ui/AGENTS.md +++ b/packages/dify-ui/AGENTS.md @@ -9,6 +9,7 @@ Shared design tokens, the `cn()` utility, CSS-first Tailwind styles, and headles - No imports from `web/`. No dependencies on next / i18next / ky / jotai / zustand. - One component per folder: `src//index.tsx`, optional `index.stories.tsx` and `__tests__/index.spec.tsx`. Add a matching `./` subpath to `package.json#exports`. - Props pattern: `Omit & VariantProps & { /* custom */ }`. +- Use plain `Omit<...>` only for non-union Base UI props. When a prop changes the valid shape of related props (for example `value` / `defaultValue`, `multiple` / `value`, or `clearable` / `onChange`), model that relationship with an explicit discriminated union or a distributive helper instead of flattening the props. - When a component accepts a prop typed from a shared internal module, `export type` it from that component so consumers import it from the component subpath. ## Overlay Primitive Selection: Tooltip vs PreviewCard vs Popover diff --git a/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx b/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx index a3c63f5a0c..52c2a0dd54 100644 --- a/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx +++ b/web/app/components/app/app-access-control/__tests__/access-control.spec.tsx @@ -254,9 +254,7 @@ describe('AddMemberOrGroupDialog', () => { await user.click(expandButton) expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([baseGroup]) - const memberLabel = screen.getByText(baseMember.name) - const memberCheckbox = memberLabel.parentElement?.previousElementSibling as HTMLElement - fireEvent.click(memberCheckbox) + await user.click(screen.getByRole('option', { name: /Member One/ })) expect(useAccessControlStore.getState().specificMembers).toEqual([baseMember]) }) @@ -277,13 +275,13 @@ describe('AddMemberOrGroupDialog', () => { await user.type(screen.getByPlaceholderText('app.accessControlDialog.operateGroupAndMember.searchPlaceholder'), 'Group') expect(document.querySelector('.spin-animation')).toBeInTheDocument() - const groupCheckbox = screen.getByText(baseGroup.name).closest('div')?.previousElementSibling as HTMLElement - fireEvent.click(groupCheckbox) - fireEvent.click(groupCheckbox) + const groupOption = screen.getByRole('option', { name: /Group One/ }) + fireEvent.click(groupOption) + fireEvent.click(groupOption) - const memberCheckbox = screen.getByText(baseMember.name).parentElement?.previousElementSibling as HTMLElement - fireEvent.click(memberCheckbox) - fireEvent.click(memberCheckbox) + const memberOption = screen.getByRole('option', { name: /Member One/ }) + fireEvent.click(memberOption) + fireEvent.click(memberOption) fireEvent.click(screen.getByText('app.accessControlDialog.operateGroupAndMember.expand')) fireEvent.click(screen.getByText('app.accessControlDialog.operateGroupAndMember.allMembers')) @@ -307,7 +305,7 @@ describe('AddMemberOrGroupDialog', () => { await user.click(screen.getByText('common.operation.add')) - expect(screen.getByText('app.accessControlDialog.operateGroupAndMember.noResult')).toBeInTheDocument() + expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult') }) }) diff --git a/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx b/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx index 725b121d30..d34756e85e 100644 --- a/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx +++ b/web/app/components/app/app-access-control/__tests__/add-member-or-group-pop.spec.tsx @@ -1,5 +1,5 @@ import type { AccessControlAccount, AccessControlGroup, Subject } from '@/models/access-control' -import { fireEvent, render, screen } from '@testing-library/react' +import { render, screen } from '@testing-library/react' import userEvent from '@testing-library/user-event' import useAccessControlStore from '@/context/access-control-store' import { SubjectType } from '@/models/access-control' @@ -106,8 +106,7 @@ describe('AddMemberOrGroupDialog', () => { expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([baseGroup]) - const memberCheckbox = screen.getByText(baseMember.name).parentElement?.previousElementSibling as HTMLElement - fireEvent.click(memberCheckbox) + await user.click(screen.getByRole('option', { name: /Member One/ })) expect(useAccessControlStore.getState().specificMembers).toEqual([baseMember]) }) @@ -125,6 +124,31 @@ describe('AddMemberOrGroupDialog', () => { await user.click(screen.getByText('common.operation.add')) - expect(screen.getByText('app.accessControlDialog.operateGroupAndMember.noResult')).toBeInTheDocument() + expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult') + }) + + it('should keep breadcrumbs visible when the current group has no candidates', async () => { + useAccessControlStore.setState({ + selectedGroupsForBreadcrumb: [baseGroup], + }) + mockUseSearchForWhiteListCandidates.mockReturnValue({ + isLoading: false, + isFetchingNextPage: false, + fetchNextPage: vi.fn(), + data: { pages: [{ currPage: 1, subjects: [], hasMore: false }] }, + }) + + const user = userEvent.setup() + render() + + await user.click(screen.getByText('common.operation.add')) + + expect(screen.getByRole('button', { name: 'app.accessControlDialog.operateGroupAndMember.allMembers' })).toBeInTheDocument() + expect(screen.getByText(baseGroup.name)).toBeInTheDocument() + expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult') + + await user.click(screen.getByRole('button', { name: 'app.accessControlDialog.operateGroupAndMember.allMembers' })) + + expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([]) }) }) diff --git a/web/app/components/app/app-access-control/add-member-or-group-pop.tsx b/web/app/components/app/app-access-control/add-member-or-group-pop.tsx index 8d9bf19ea3..1e3a992136 100644 --- a/web/app/components/app/app-access-control/add-member-or-group-pop.tsx +++ b/web/app/components/app/app-access-control/add-member-or-group-pop.tsx @@ -1,110 +1,207 @@ 'use client' +import type { ComboboxRootChangeEventDetails } from '@langgenius/dify-ui/combobox' import type { AccessControlAccount, AccessControlGroup, Subject, SubjectAccount, SubjectGroup } from '@/models/access-control' -import { FloatingOverlay } from '@floating-ui/react' import { Avatar } from '@langgenius/dify-ui/avatar' import { Button } from '@langgenius/dify-ui/button' import { cn } from '@langgenius/dify-ui/cn' -import { Popover, PopoverContent, PopoverTrigger } from '@langgenius/dify-ui/popover' +import { + Combobox, + ComboboxContent, + ComboboxEmpty, + ComboboxInput, + ComboboxInputGroup, + ComboboxItem, + ComboboxItemText, + ComboboxList, + ComboboxStatus, + ComboboxTrigger, +} from '@langgenius/dify-ui/combobox' import { RiAddCircleFill, RiArrowRightSLine, RiOrganizationChart } from '@remixicon/react' import { useDebounce } from 'ahooks' -import { useCallback, useEffect, useRef, useState } from 'react' +import { useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useSelector } from '@/context/app-context' import { SubjectType } from '@/models/access-control' import { useSearchForWhiteListCandidates } from '@/service/access-control' import useAccessControlStore from '../../../../context/access-control-store' -import Checkbox from '../../base/checkbox' -import Input from '../../base/input' import Loading from '../../base/loading' export default function AddMemberOrGroupDialog() { const { t } = useTranslation() const [open, setOpen] = useState(false) const [keyword, setKeyword] = useState('') + const scrollRootRef = useRef(null) + const anchorRef = useRef(null) + const specificGroups = useAccessControlStore(s => s.specificGroups) + const setSpecificGroups = useAccessControlStore(s => s.setSpecificGroups) + const specificMembers = useAccessControlStore(s => s.specificMembers) + const setSpecificMembers = useAccessControlStore(s => s.setSpecificMembers) const selectedGroupsForBreadcrumb = useAccessControlStore(s => s.selectedGroupsForBreadcrumb) const debouncedKeyword = useDebounce(keyword, { wait: 500 }) const lastAvailableGroup = selectedGroupsForBreadcrumb[selectedGroupsForBreadcrumb.length - 1] const { isLoading, isFetchingNextPage, fetchNextPage, data } = useSearchForWhiteListCandidates({ keyword: debouncedKeyword, groupId: lastAvailableGroup?.id, resultsPerPage: 10 }, open) - const handleKeywordChange = (e: React.ChangeEvent) => { - setKeyword(e.target.value) - } + const pages = data?.pages ?? [] + const subjects = pages.flatMap(page => page.subjects ?? []) + const selectedSubjects = [ + ...specificGroups.map(groupToSubject), + ...specificMembers.map(memberToSubject), + ] + const hasResults = pages.length > 0 && subjects.length > 0 + const shouldShowBreadcrumb = hasResults || selectedGroupsForBreadcrumb.length > 0 + const hasMore = pages[pages.length - 1]?.hasMore ?? false - const anchorRef = useRef(null) useEffect(() => { - const hasMore = data?.pages?.[0]?.hasMore ?? false let observer: IntersectionObserver | undefined if (anchorRef.current) { observer = new IntersectionObserver((entries) => { if (entries[0]!.isIntersecting && !isLoading && hasMore) fetchNextPage() - }, { rootMargin: '20px' }) + }, { root: scrollRootRef.current, rootMargin: '20px' }) observer.observe(anchorRef.current) } return () => observer?.disconnect() - }, [isLoading, fetchNextPage, anchorRef, data]) + }, [isLoading, fetchNextPage, hasMore]) + + const handleOpenChange = (nextOpen: boolean) => { + if (!nextOpen) + setKeyword('') + + setOpen(nextOpen) + } + + const handleInputValueChange = (inputValue: string, details: ComboboxRootChangeEventDetails) => { + if (details.reason !== 'item-press') + setKeyword(inputValue) + } + + const handleValueChange = (nextSubjects: Subject[]) => { + const nextGroups: AccessControlGroup[] = [] + const nextMembers: AccessControlAccount[] = [] + + for (const subject of nextSubjects) { + if (subject.subjectType === SubjectType.GROUP) + nextGroups.push((subject as SubjectGroup).groupData) + else + nextMembers.push((subject as SubjectAccount).accountData) + } + + setSpecificGroups(nextGroups) + setSpecificMembers(nextMembers) + } return ( - - - - {t('operation.add', { ns: 'common' })} - - )} - /> - {open && } - + multiple + open={open} + value={selectedSubjects} + inputValue={keyword} + items={subjects} + itemToStringLabel={getSubjectLabel} + itemToStringValue={getSubjectValue} + isItemEqualToValue={isSameSubject} + filter={null} + onOpenChange={handleOpenChange} + onInputValueChange={handleInputValueChange} + onValueChange={handleValueChange} + > + + + -
+
- + +
- { - isLoading - ?
- : (data?.pages?.length ?? 0) > 0 - ? ( - <> -
- -
-
- {renderGroupOrMember(data?.pages ?? [])} + {isLoading + ? ( + + + + ) + : ( + <> + {shouldShowBreadcrumb && ( +
+ +
+ )} + {hasResults + ? ( + <> + + {(subject: Subject) => } + {isFetchingNextPage && } -
-
- - ) - : ( -
- {t('accessControlDialog.operateGroupAndMember.noResult', { ns: 'app' })} -
- ) - } +
+ + ) + : ( + + {t('accessControlDialog.operateGroupAndMember.noResult', { ns: 'app' })} + + )} + + )}
- - + + ) } -type GroupOrMemberData = { subjects: Subject[], currPage: number }[] -function renderGroupOrMember(data: GroupOrMemberData) { - return data?.map((page) => { - return ( -
- {page.subjects?.map((item, index) => { - if (item.subjectType === SubjectType.GROUP) - return - return - })} -
- ) - }) ?? null +function groupToSubject(group: AccessControlGroup): SubjectGroup { + return { + subjectId: group.id, + subjectType: SubjectType.GROUP, + groupData: group, + } +} + +function memberToSubject(member: AccessControlAccount): SubjectAccount { + return { + subjectId: member.id, + subjectType: SubjectType.ACCOUNT, + accountData: member, + } +} + +function getSubjectLabel(subject: Subject) { + if (subject.subjectType === SubjectType.GROUP) + return (subject as SubjectGroup).groupData.name + + return (subject as SubjectAccount).accountData.name +} + +function getSubjectValue(subject: Subject) { + return `${subject.subjectType}:${subject.subjectId}` +} + +function isSameSubject(item: Subject, value: Subject) { + return item.subjectId === value.subjectId && item.subjectType === value.subjectType +} + +function SubjectItem({ subject }: { subject: Subject }) { + if (subject.subjectType === SubjectType.GROUP) + return + + return } function SelectedGroupsBreadCrumb() { @@ -112,13 +209,13 @@ function SelectedGroupsBreadCrumb() { const setSelectedGroupsForBreadcrumb = useAccessControlStore(s => s.setSelectedGroupsForBreadcrumb) const { t } = useTranslation() - const handleBreadCrumbClick = useCallback((index: number) => { + const handleBreadCrumbClick = (index: number) => { const newGroups = selectedGroupsForBreadcrumb.slice(0, index + 1) setSelectedGroupsForBreadcrumb(newGroups) - }, [setSelectedGroupsForBreadcrumb, selectedGroupsForBreadcrumb]) - const handleReset = useCallback(() => { + } + const handleReset = () => { setSelectedGroupsForBreadcrumb([]) - }, [setSelectedGroupsForBreadcrumb]) + } const hasBreadcrumb = selectedGroupsForBreadcrumb.length > 0 return ( @@ -162,104 +259,111 @@ function SelectedGroupsBreadCrumb() { type GroupItemProps = { group: AccessControlGroup + subject: Subject } -function GroupItem({ group }: GroupItemProps) { +function GroupItem({ group, subject }: GroupItemProps) { const { t } = useTranslation() const specificGroups = useAccessControlStore(s => s.specificGroups) - const setSpecificGroups = useAccessControlStore(s => s.setSpecificGroups) const selectedGroupsForBreadcrumb = useAccessControlStore(s => s.selectedGroupsForBreadcrumb) const setSelectedGroupsForBreadcrumb = useAccessControlStore(s => s.setSelectedGroupsForBreadcrumb) const isChecked = specificGroups.some(g => g.id === group.id) - const handleCheckChange = useCallback(() => { - if (!isChecked) { - const newGroups = [...specificGroups, group] - setSpecificGroups(newGroups) - } - else { - const newGroups = specificGroups.filter(g => g.id !== group.id) - setSpecificGroups(newGroups) - } - }, [specificGroups, setSpecificGroups, group, isChecked]) - const handleExpandClick = useCallback(() => { + const handleExpandClick = () => { setSelectedGroupsForBreadcrumb([...selectedGroupsForBreadcrumb, group]) - }, [selectedGroupsForBreadcrumb, setSelectedGroupsForBreadcrumb, group]) + } + return ( - - -
-
-
- +
+ + + +
+
+
-
-

{group.name}

-

{group.groupSize}

-
+ {group.name} + {group.groupSize} + + - +
) } type MemberItemProps = { member: AccessControlAccount + subject: Subject } -function MemberItem({ member }: MemberItemProps) { +function MemberItem({ member, subject }: MemberItemProps) { const currentUser = useSelector(s => s.userProfile) const { t } = useTranslation() const specificMembers = useAccessControlStore(s => s.specificMembers) - const setSpecificMembers = useAccessControlStore(s => s.setSpecificMembers) const isChecked = specificMembers.some(m => m.id === member.id) - const handleCheckChange = useCallback(() => { - if (!isChecked) { - const newMembers = [...specificMembers, member] - setSpecificMembers(newMembers) - } - else { - const newMembers = specificMembers.filter(m => m.id !== member.id) - setSpecificMembers(newMembers) - } - }, [specificMembers, setSpecificMembers, member, isChecked]) return ( - - -
+ + +
-

{member.name}

+ {member.name} {currentUser.email === member.email && ( -

+ ( {t('you', { ns: 'common' })} ) -

+ )} -
-

{member.email}

+ + {member.email}
) } type BaseItemProps = { className?: string + subject: Subject children: React.ReactNode } -function BaseItem({ children, className }: BaseItemProps) { +function BaseItem({ children, className, subject }: BaseItemProps) { return ( -
+ {children} -
+ + ) +} + +function SelectionBox({ checked }: { checked: boolean }) { + return ( +
+ - + + {(document: SimpleDocumentDetail | null) => ( + + )} + + + -
- - {documentsList - ? ( - ({ - id: d.id, - name: d.name, - extension: d.data_source_detail_dict?.upload_file?.extension || '', - }))} - onChange={handleChange} - /> - ) - : ( -
- -
- )} -
- - + + + {data + ? ( + documentsList.length > 0 + ? ( + + ) + : ( + + {t('noData', { ns: 'common' })} + + ) + ) + : ( + + + + )} +
+ ) } -export default React.memo(DocumentPicker) diff --git a/web/app/components/datasets/common/document-picker/preview-document-picker.tsx b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx index 597ceda9a5..fb90bf57f7 100644 --- a/web/app/components/datasets/common/document-picker/preview-document-picker.tsx +++ b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx @@ -14,7 +14,6 @@ import { useCallback } from 'react' import { useTranslation } from 'react-i18next' import Loading from '@/app/components/base/loading' import FileIcon from '../document-file-icon' -import DocumentList from './document-list' type Props = { className?: string @@ -74,7 +73,7 @@ const PreviewDocumentPicker: FC = ({ {files?.length > 1 &&
{t('preprocessDocument', { ns: 'dataset', num: files.length })}
} {files?.length > 0 ? ( - @@ -90,3 +89,27 @@ const PreviewDocumentPicker: FC = ({ ) } export default React.memo(PreviewDocumentPicker) + +function PreviewDocumentList({ + list, + onChange, +}: { + list: DocumentItem[] + onChange: (value: DocumentItem) => void +}) { + return ( +
+ {list.map(item => ( + + ))} +
+ ) +} diff --git a/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx b/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx index 3eb1017b8d..b48575d209 100644 --- a/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx +++ b/web/app/components/datasets/documents/detail/__tests__/document-title.spec.tsx @@ -1,6 +1,7 @@ +import type { SimpleDocumentDetail } from '@/models/datasets' import { render } from '@testing-library/react' import { beforeEach, describe, expect, it, vi } from 'vitest' -import { ChunkingMode } from '@/models/datasets' +import { ChunkingMode, DataSourceType } from '@/models/datasets' import { DocumentTitle } from '../document-title' @@ -11,13 +12,23 @@ vi.mock('@/next/navigation', () => ({ }), })) -// Mock DocumentPicker vi.mock('../../../common/document-picker', () => ({ - default: ({ datasetId, value, onChange }: { datasetId: string, value: unknown, onChange: (doc: { id: string }) => void }) => ( + DocumentPicker: ({ + datasetId, + value, + parentMode, + onChange, + }: { + datasetId: string + value?: SimpleDocumentDetail | null + parentMode?: string + onChange: (doc: { id: string }) => void + }) => (
onChange({ id: 'new-doc-id' })} > Document Picker @@ -25,6 +36,42 @@ vi.mock('../../../common/document-picker', () => ({ ), })) +const createDocument = (overrides: Partial = {}): SimpleDocumentDetail => ({ + id: 'doc-1', + batch: 'batch-1', + position: 1, + dataset_id: 'dataset-1', + data_source_type: DataSourceType.FILE, + data_source_info: { + upload_file: { + id: 'file-1', + name: 'document.pdf', + size: 1024, + extension: 'pdf', + mime_type: 'application/pdf', + created_by: 'user-1', + created_at: Date.now(), + }, + job_id: 'job-1', + url: '', + }, + dataset_process_rule_id: 'rule-1', + name: 'Document 1', + created_from: 'web', + created_by: 'user-1', + created_at: Date.now(), + indexing_status: 'completed', + display_status: 'enabled', + doc_form: ChunkingMode.text, + doc_language: 'en', + enabled: true, + word_count: 1000, + archived: false, + updated_at: Date.now(), + hit_count: 0, + ...overrides, +}) + describe('DocumentTitle', () => { beforeEach(() => { vi.clearAllMocks() @@ -69,31 +116,26 @@ describe('DocumentTitle', () => { expect(getByTestId('document-picker').getAttribute('data-dataset-id')).toBe('test-dataset-id') }) - it('should pass value props to DocumentPicker', () => { + it('should pass the selected document to DocumentPicker', () => { + const document = createDocument({ id: 'doc-current' }) const { getByTestId } = render( , ) - const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}') - expect(value.name).toBe('test-document') - expect(value.extension).toBe('pdf') - expect(value.chunkingMode).toBe(ChunkingMode.text) - expect(value.parentMode).toBe('paragraph') + expect(getByTestId('document-picker')).toHaveAttribute('data-value-id', 'doc-current') + expect(getByTestId('document-picker')).toHaveAttribute('data-parent-mode', 'paragraph') }) - it('should default parentMode to paragraph when parent_mode is undefined', () => { + it('should pass no parent mode when it is undefined', () => { const { getByTestId } = render( , ) - const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}') - expect(value.parentMode).toBe('paragraph') + expect(getByTestId('document-picker')).toHaveAttribute('data-parent-mode', '') }) it('should apply custom wrapperCls', () => { @@ -119,24 +161,23 @@ describe('DocumentTitle', () => { }) describe('Edge Cases', () => { - it('should handle undefined optional props', () => { + it('should handle an empty document value', () => { const { getByTestId } = render( , ) - const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}') - expect(value.name).toBeUndefined() - expect(value.extension).toBeUndefined() + expect(getByTestId('document-picker')).toHaveAttribute('data-value-id', '') }) it('should maintain structure when rerendered', () => { const { rerender, getByTestId } = render( - , + , ) - rerender() + rerender() expect(getByTestId('document-picker').getAttribute('data-dataset-id')).toBe('dataset-2') + expect(getByTestId('document-picker').getAttribute('data-value-id')).toBe('doc-2') }) }) }) diff --git a/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx b/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx index e717475b38..e8946ce584 100644 --- a/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx +++ b/web/app/components/datasets/documents/detail/__tests__/index.spec.tsx @@ -114,9 +114,20 @@ vi.mock('../batch-modal', () => ({ })) vi.mock('../document-title', () => ({ - DocumentTitle: ({ name, extension }: { name?: string, extension?: string }) => ( -
{name}
- ), + DocumentTitle: ({ + document, + }: { + document?: { + name?: string + data_source_detail_dict?: { upload_file?: { extension?: string } } + data_source_info?: { upload_file?: { extension?: string } } + } | null + }) => { + const extension = document?.data_source_detail_dict?.upload_file?.extension + ?? document?.data_source_info?.upload_file?.extension + + return
{document?.name}
+ }, })) vi.mock('../segment-add', () => ({ diff --git a/web/app/components/datasets/documents/detail/document-title.tsx b/web/app/components/datasets/documents/detail/document-title.tsx index d5bf5345ae..0a1cfbf61a 100644 --- a/web/app/components/datasets/documents/detail/document-title.tsx +++ b/web/app/components/datasets/documents/detail/document-title.tsx @@ -1,39 +1,29 @@ -import type { FC } from 'react' -import type { ChunkingMode, ParentMode } from '@/models/datasets' +import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets' import { cn } from '@langgenius/dify-ui/cn' import { useRouter } from '@/next/navigation' -import DocumentPicker from '../../common/document-picker' +import { DocumentPicker } from '../../common/document-picker' type DocumentTitleProps = { datasetId: string - extension?: string - name?: string - chunkingMode?: ChunkingMode - parent_mode?: ParentMode - iconCls?: string - textCls?: string + document?: SimpleDocumentDetail | null + parentMode?: ParentMode wrapperCls?: string } -export const DocumentTitle: FC = ({ +export function DocumentTitle({ datasetId, - extension, - name, - chunkingMode, - parent_mode, + document, + parentMode, wrapperCls, -}) => { +}: DocumentTitleProps) { const router = useRouter() + return (
{ router.push(`/datasets/${datasetId}/documents/${doc.id}`) }} diff --git a/web/app/components/datasets/documents/detail/index.tsx b/web/app/components/datasets/documents/detail/index.tsx index 732d7ffb28..190cf8edf7 100644 --- a/web/app/components/datasets/documents/detail/index.tsx +++ b/web/app/components/datasets/documents/detail/index.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import type { DataSourceInfo, DocumentDisplayStatus, FileItem, FullDocumentDetail, LegacyDataSourceInfo } from '@/models/datasets' +import type { DocumentDisplayStatus, FileItem, FullDocumentDetail } from '@/models/datasets' import type { SegmentImportStatus } from '@/types/dataset' import { cn } from '@langgenius/dify-ui/cn' import { toast } from '@langgenius/dify-ui/toast' @@ -38,10 +38,6 @@ const NON_TERMINAL_DISPLAY_STATUSES = new Set( DisplayStatusList.filter(s => s === 'queuing' || s === 'indexing' || s === 'paused'), ) -const isLegacyDataSourceInfo = (info?: DataSourceInfo): info is LegacyDataSourceInfo => { - return !!info && 'upload_file' in info -} - const DocumentDetail: FC = ({ datasetId, documentId }) => { const router = useRouter() const searchParams = useSearchParams() @@ -123,14 +119,6 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => { const embedding = NON_TERMINAL_DISPLAY_STATUSES.has(documentDetail?.display_status as DocumentDisplayStatus) - const documentUploadFile = useMemo(() => { - if (!documentDetail?.data_source_info) - return undefined - if (isLegacyDataSourceInfo(documentDetail.data_source_info)) - return documentDetail.data_source_info.upload_file - return undefined - }, [documentDetail?.data_source_info]) - const invalidChunkList = useInvalid(useSegmentListKey) const invalidChildChunkList = useInvalid(useChildSegmentListKey) const invalidDocumentList = useInvalidDocumentList(datasetId) @@ -212,11 +200,9 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => {
{embeddingAvailable && documentDetail && !documentDetail.archived && !isFullDocMode && ( From 1a93af5cd0c22c83da98eb2990cdea398aaf1064 Mon Sep 17 00:00:00 2001 From: Deepam Goyal <116721751+Deepam02@users.noreply.github.com> Date: Tue, 12 May 2026 11:04:45 +0530 Subject: [PATCH 3/8] refactor: rewrite estimate_args_validate using Pydantic v2 models (#36036) Signed-off-by: Deepam Goyal Co-authored-by: Asuka Minato Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- api/services/dataset_service.py | 181 +++++++++--------- .../services/test_dataset_service_document.py | 4 +- 2 files changed, 94 insertions(+), 91 deletions(-) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 383474f4f6..4f5a95dcde 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -7,9 +7,10 @@ import time import uuid from collections import Counter from collections.abc import Sequence -from typing import Any, Literal, TypedDict, cast +from typing import Annotated, Any, Literal, TypedDict, cast import sqlalchemy as sa +from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator from redis.exceptions import LockNotOwnedError from sqlalchemy import delete, exists, func, select, update from sqlalchemy.orm import Session, sessionmaker @@ -117,6 +118,86 @@ class AutoDisableLogsDict(TypedDict): count: int +class _EstimatePreProcessingRule(BaseModel): + id: str = Field(min_length=1) + enabled: bool + + @field_validator("id") + @classmethod + def _validate_id(cls, v: str) -> str: + if v not in DatasetProcessRule.PRE_PROCESSING_RULES: + raise ValueError("Process rule pre_processing_rules id is invalid") + return v + + +class _EstimateSegmentation(BaseModel): + separator: str = Field(min_length=1) + max_tokens: int = Field(gt=0) + + +class _EstimateRules(BaseModel): + pre_processing_rules: list[_EstimatePreProcessingRule] + segmentation: _EstimateSegmentation + + @field_validator("pre_processing_rules") + @classmethod + def _deduplicate(cls, v: list[_EstimatePreProcessingRule]) -> list[_EstimatePreProcessingRule]: + seen: dict[str, _EstimatePreProcessingRule] = {} + for rule in v: + seen[rule.id] = rule + return list(seen.values()) + + +class _SummaryIndexSettingDisabled(BaseModel): + enable: Literal[False] = False + + +class _SummaryIndexSettingEnabled(BaseModel): + enable: Literal[True] + model_name: str = Field(min_length=1) + model_provider_name: str = Field(min_length=1) + + +_SummaryIndexSetting = Annotated[ + _SummaryIndexSettingDisabled | _SummaryIndexSettingEnabled, + Field(discriminator="enable"), +] + + +class _AutomaticProcessRule(BaseModel): + model_config = ConfigDict(extra="allow") + + mode: Literal[ProcessRuleMode.AUTOMATIC] + summary_index_setting: _SummaryIndexSetting | None = None + + +class _CustomProcessRule(BaseModel): + model_config = ConfigDict(extra="allow") + + mode: Literal[ProcessRuleMode.CUSTOM] + rules: _EstimateRules + summary_index_setting: _SummaryIndexSetting | None = None + + +class _HierarchicalProcessRule(BaseModel): + model_config = ConfigDict(extra="allow") + + mode: Literal[ProcessRuleMode.HIERARCHICAL] + rules: _EstimateRules + summary_index_setting: _SummaryIndexSetting | None = None + + +_EstimateProcessRule = Annotated[ + _AutomaticProcessRule | _CustomProcessRule | _HierarchicalProcessRule, + Field(discriminator="mode"), +] + + +class _EstimateArgs(BaseModel): + info_list: dict[str, Any] + process_rule: _EstimateProcessRule + + class DatasetService: @staticmethod def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False): @@ -2851,94 +2932,16 @@ class DocumentService: @classmethod def estimate_args_validate(cls, args: dict[str, Any]): - if "info_list" not in args or not args["info_list"]: - raise ValueError("Data source info is required") - - if not isinstance(args["info_list"], dict): - raise ValueError("Data info is invalid") - - if "process_rule" not in args or not args["process_rule"]: - raise ValueError("Process rule is required") - - if not isinstance(args["process_rule"], dict): - raise ValueError("Process rule is invalid") - - if "mode" not in args["process_rule"] or not args["process_rule"]["mode"]: - raise ValueError("Process rule mode is required") - - if args["process_rule"]["mode"] not in DatasetProcessRule.MODES: - raise ValueError("Process rule mode is invalid") - - if args["process_rule"]["mode"] == ProcessRuleMode.AUTOMATIC: - args["process_rule"]["rules"] = {} - else: - if "rules" not in args["process_rule"] or not args["process_rule"]["rules"]: - raise ValueError("Process rule rules is required") - - if not isinstance(args["process_rule"]["rules"], dict): - raise ValueError("Process rule rules is invalid") - - if ( - "pre_processing_rules" not in args["process_rule"]["rules"] - or args["process_rule"]["rules"]["pre_processing_rules"] is None - ): - raise ValueError("Process rule pre_processing_rules is required") - - if not isinstance(args["process_rule"]["rules"]["pre_processing_rules"], list): - raise ValueError("Process rule pre_processing_rules is invalid") - - unique_pre_processing_rule_dicts = {} - for pre_processing_rule in args["process_rule"]["rules"]["pre_processing_rules"]: - if "id" not in pre_processing_rule or not pre_processing_rule["id"]: - raise ValueError("Process rule pre_processing_rules id is required") - - if pre_processing_rule["id"] not in DatasetProcessRule.PRE_PROCESSING_RULES: - raise ValueError("Process rule pre_processing_rules id is invalid") - - if "enabled" not in pre_processing_rule or pre_processing_rule["enabled"] is None: - raise ValueError("Process rule pre_processing_rules enabled is required") - - if not isinstance(pre_processing_rule["enabled"], bool): - raise ValueError("Process rule pre_processing_rules enabled is invalid") - - unique_pre_processing_rule_dicts[pre_processing_rule["id"]] = pre_processing_rule - - args["process_rule"]["rules"]["pre_processing_rules"] = list(unique_pre_processing_rule_dicts.values()) - - if ( - "segmentation" not in args["process_rule"]["rules"] - or args["process_rule"]["rules"]["segmentation"] is None - ): - raise ValueError("Process rule segmentation is required") - - if not isinstance(args["process_rule"]["rules"]["segmentation"], dict): - raise ValueError("Process rule segmentation is invalid") - - if ( - "separator" not in args["process_rule"]["rules"]["segmentation"] - or not args["process_rule"]["rules"]["segmentation"]["separator"] - ): - raise ValueError("Process rule segmentation separator is required") - - if not isinstance(args["process_rule"]["rules"]["segmentation"]["separator"], str): - raise ValueError("Process rule segmentation separator is invalid") - - if ( - "max_tokens" not in args["process_rule"]["rules"]["segmentation"] - or not args["process_rule"]["rules"]["segmentation"]["max_tokens"] - ): - raise ValueError("Process rule segmentation max_tokens is required") - - if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int): - raise ValueError("Process rule segmentation max_tokens is invalid") - - # valid summary index setting - summary_index_setting = args["process_rule"].get("summary_index_setting") - if summary_index_setting and summary_index_setting.get("enable"): - if "model_name" not in summary_index_setting or not summary_index_setting["model_name"]: - raise ValueError("Summary index model name is required") - if "model_provider_name" not in summary_index_setting or not summary_index_setting["model_provider_name"]: - raise ValueError("Summary index model provider name is required") + try: + validated = _EstimateArgs.model_validate(args) + except ValidationError as e: + first = e.errors()[0] + original = first.get("ctx", {}).get("error") + raise ValueError(str(original) if isinstance(original, ValueError) else first["msg"]) from e + process_rule_dict = validated.process_rule.model_dump(exclude_none=True) + if validated.process_rule.mode == ProcessRuleMode.AUTOMATIC: + process_rule_dict["rules"] = {} + args["process_rule"] = process_rule_dict @staticmethod def batch_update_document_status( diff --git a/api/tests/unit_tests/services/test_dataset_service_document.py b/api/tests/unit_tests/services/test_dataset_service_document.py index 1633194aa8..a78bc7f9d6 100644 --- a/api/tests/unit_tests/services/test_dataset_service_document.py +++ b/api/tests/unit_tests/services/test_dataset_service_document.py @@ -1297,7 +1297,7 @@ class TestDocumentServiceEstimateValidation: """Unit tests for estimate_args_validate branches.""" def test_estimate_args_validate_rejects_missing_info_list(self): - with pytest.raises(ValueError, match="Data source info is required"): + with pytest.raises(ValueError, match="Field required"): DocumentService.estimate_args_validate({}) def test_estimate_args_validate_sets_empty_rules_for_automatic_mode(self): @@ -1357,7 +1357,7 @@ class TestDocumentServiceEstimateValidation: }, } - with pytest.raises(ValueError, match="Summary index model provider name is required"): + with pytest.raises(ValueError, match="Field required"): DocumentService.estimate_args_validate(args) From cbedcd2882ae4f7b2fd597b56647f34dcb87eebd Mon Sep 17 00:00:00 2001 From: -LAN- Date: Tue, 12 May 2026 13:35:24 +0800 Subject: [PATCH 4/8] fix(security): harden self-hosted SECRET_KEY bootstrap (#36049) Co-authored-by: EndlessLucky <66432853+EndlessLucky@users.noreply.github.com> --- api/app_factory.py | 2 +- api/configs/feature/__init__.py | 6 +- api/configs/secret_key.py | 38 ++++++++++ api/core/app/workflow/file_runtime.py | 2 +- .../datasource/datasource_file_manager.py | 14 +++- api/core/tools/signature.py | 19 +++-- api/core/tools/tool_file_manager.py | 14 +++- api/extensions/ext_set_secretkey.py | 11 ++- api/models/dataset.py | 8 +- .../unit_tests/configs/test_dify_config.py | 41 ++++++++++ .../test_datasource_file_manager.py | 33 --------- .../extensions/test_set_secretkey.py | 74 +++++++++++++++++++ api/tests/unit_tests/libs/test_passport.py | 23 +----- docker/.env.example | 3 +- docker/README.md | 2 +- docker/envs/security.env.example | 3 +- 16 files changed, 209 insertions(+), 84 deletions(-) create mode 100644 api/configs/secret_key.py create mode 100644 api/tests/unit_tests/extensions/test_set_secretkey.py diff --git a/api/app_factory.py b/api/app_factory.py index 48e50ceae9..5583071980 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -181,7 +181,6 @@ def initialize_extensions(app: DifyApp): ext_import_modules, ext_orjson, ext_forward_refs, - ext_set_secretkey, ext_compress, ext_code_based_extension, ext_database, @@ -189,6 +188,7 @@ def initialize_extensions(app: DifyApp): ext_migrate, ext_redis, ext_storage, + ext_set_secretkey, ext_logstore, # Initialize logstore after storage, before celery ext_celery, ext_login, diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index 26b8ea670b..ccb97d96ef 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -23,9 +23,9 @@ class SecurityConfig(BaseSettings): """ SECRET_KEY: str = Field( - description="Secret key for secure session cookie signing." - "Make sure you are changing this key for your deployment with a strong key." - "Generate a strong key using `openssl rand -base64 42` or set via the `SECRET_KEY` environment variable.", + description="Secret key for secure session cookie signing. " + "Leave empty to let Dify generate a persistent key in the storage directory, " + "or set a strong value via the `SECRET_KEY` environment variable.", default="", ) diff --git a/api/configs/secret_key.py b/api/configs/secret_key.py new file mode 100644 index 0000000000..f8c33f6a2c --- /dev/null +++ b/api/configs/secret_key.py @@ -0,0 +1,38 @@ +"""SECRET_KEY persistence helpers for runtime setup.""" + +from __future__ import annotations + +import secrets + +from extensions.ext_storage import storage + +GENERATED_SECRET_KEY_FILENAME = ".dify_secret_key" + + +def resolve_secret_key(secret_key: str) -> str: + """Return an explicit SECRET_KEY or a generated key persisted in storage.""" + if secret_key: + return secret_key + + return _load_or_create_secret_key() + + +def _load_or_create_secret_key() -> str: + try: + persisted_key = storage.load_once(GENERATED_SECRET_KEY_FILENAME).decode("utf-8").strip() + if persisted_key: + return persisted_key + except FileNotFoundError: + pass + + generated_key = secrets.token_urlsafe(48) + + try: + storage.save(GENERATED_SECRET_KEY_FILENAME, f"{generated_key}\n".encode()) + except Exception as exc: + raise ValueError( + f"SECRET_KEY is not set and could not be generated at {GENERATED_SECRET_KEY_FILENAME}. " + "Set SECRET_KEY explicitly or make storage writable." + ) from exc + + return generated_key diff --git a/api/core/app/workflow/file_runtime.py b/api/core/app/workflow/file_runtime.py index 3a6f9d575a..587f700286 100644 --- a/api/core/app/workflow/file_runtime.py +++ b/api/core/app/workflow/file_runtime.py @@ -128,7 +128,7 @@ class DifyWorkflowFileRuntime(WorkflowFileRuntimeProtocol): @staticmethod def _secret_key() -> bytes: - return dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" + return dify_config.SECRET_KEY.encode() def _sign_query(self, *, payload: str) -> dict[str, str]: timestamp = str(int(time.time())) diff --git a/api/core/datasource/datasource_file_manager.py b/api/core/datasource/datasource_file_manager.py index 492b507aa9..79b84a28be 100644 --- a/api/core/datasource/datasource_file_manager.py +++ b/api/core/datasource/datasource_file_manager.py @@ -35,8 +35,11 @@ class DatasourceFileManager: timestamp = str(int(time.time())) nonce = os.urandom(16).hex() data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + sign = hmac.new( + dify_config.SECRET_KEY.encode(), + data_to_sign.encode(), + hashlib.sha256, + ).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" @@ -47,8 +50,11 @@ class DatasourceFileManager: verify signature """ data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_sign = hmac.new( + dify_config.SECRET_KEY.encode(), + data_to_sign.encode(), + hashlib.sha256, + ).digest() recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() # verify signature diff --git a/api/core/tools/signature.py b/api/core/tools/signature.py index 3c7b523ff1..ca4756f2a4 100644 --- a/api/core/tools/signature.py +++ b/api/core/tools/signature.py @@ -8,6 +8,10 @@ import urllib.parse from configs import dify_config +def _secret_key() -> bytes: + return dify_config.SECRET_KEY.encode() + + def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True) -> str: """ sign file to get a temporary url for plugin access @@ -19,8 +23,7 @@ def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True) timestamp = str(int(time.time())) nonce = os.urandom(16).hex() data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" @@ -39,8 +42,7 @@ def sign_upload_file_preview_url(upload_file_id: str, extension: str) -> str: timestamp = str(int(time.time())) nonce = os.urandom(16).hex() data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" @@ -51,8 +53,7 @@ def verify_tool_file_signature(file_id: str, timestamp: str, nonce: str, sign: s verify signature """ data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest() recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() # verify signature @@ -71,8 +72,7 @@ def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str, timestamp = str(int(time.time())) nonce = os.urandom(16).hex() data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() query = urllib.parse.urlencode( { @@ -92,8 +92,7 @@ def verify_plugin_file_signature( """Verify the signature used by the plugin-facing file upload endpoint.""" data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest() recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() if sign != recalculated_encoded_sign: diff --git a/api/core/tools/tool_file_manager.py b/api/core/tools/tool_file_manager.py index c87e8a3ae0..f2552e7cbd 100644 --- a/api/core/tools/tool_file_manager.py +++ b/api/core/tools/tool_file_manager.py @@ -51,8 +51,11 @@ class ToolFileManager: timestamp = str(int(time.time())) nonce = os.urandom(16).hex() data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + sign = hmac.new( + dify_config.SECRET_KEY.encode(), + data_to_sign.encode(), + hashlib.sha256, + ).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" @@ -63,8 +66,11 @@ class ToolFileManager: verify signature """ data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" - recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_sign = hmac.new( + dify_config.SECRET_KEY.encode(), + data_to_sign.encode(), + hashlib.sha256, + ).digest() recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() # verify signature diff --git a/api/extensions/ext_set_secretkey.py b/api/extensions/ext_set_secretkey.py index dfb87c0167..ca59a2de4d 100644 --- a/api/extensions/ext_set_secretkey.py +++ b/api/extensions/ext_set_secretkey.py @@ -1,6 +1,13 @@ from configs import dify_config +from configs.secret_key import resolve_secret_key from dify_app import DifyApp -def init_app(app: DifyApp): - app.secret_key = dify_config.SECRET_KEY +def init_app(app: DifyApp) -> None: + """Resolve SECRET_KEY after config loading and before session/login setup.""" + secret_key = dify_config.SECRET_KEY + if not secret_key: + secret_key = resolve_secret_key(secret_key) + dify_config.SECRET_KEY = secret_key + app.config["SECRET_KEY"] = secret_key + app.secret_key = secret_key diff --git a/api/models/dataset.py b/api/models/dataset.py index f823e0aa10..65ea39969c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -945,7 +945,7 @@ class DocumentSegment(Base): nonce = os.urandom(16).hex() timestamp = str(int(time.time())) data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" + secret_key = dify_config.SECRET_KEY.encode() sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() @@ -962,7 +962,7 @@ class DocumentSegment(Base): nonce = os.urandom(16).hex() timestamp = str(int(time.time())) data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" + secret_key = dify_config.SECRET_KEY.encode() sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() @@ -981,7 +981,7 @@ class DocumentSegment(Base): nonce = os.urandom(16).hex() timestamp = str(int(time.time())) data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" + secret_key = dify_config.SECRET_KEY.encode() sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() @@ -1019,7 +1019,7 @@ class DocumentSegment(Base): nonce = os.urandom(16).hex() timestamp = str(int(time.time())) data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}" - secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" + secret_key = dify_config.SECRET_KEY.encode() sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() encoded_sign = base64.urlsafe_b64encode(sign).decode() diff --git a/api/tests/unit_tests/configs/test_dify_config.py b/api/tests/unit_tests/configs/test_dify_config.py index 57dbf453de..919ebbc656 100644 --- a/api/tests/unit_tests/configs/test_dify_config.py +++ b/api/tests/unit_tests/configs/test_dify_config.py @@ -8,6 +8,47 @@ from yarl import URL from configs.app_config import DifyConfig +def _set_basic_config_env(monkeypatch: pytest.MonkeyPatch) -> None: + os.environ.clear() + monkeypatch.setenv("CONSOLE_API_URL", "https://example.com") + monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com") + monkeypatch.setenv("DB_TYPE", "postgresql") + monkeypatch.setenv("DB_USERNAME", "postgres") + monkeypatch.setenv("DB_PASSWORD", "postgres") + monkeypatch.setenv("DB_HOST", "localhost") + monkeypatch.setenv("DB_PORT", "5432") + monkeypatch.setenv("DB_DATABASE", "dify") + + +def test_dify_config_keeps_secret_key_empty_when_missing( + monkeypatch: pytest.MonkeyPatch, + tmp_path, +) -> None: + _set_basic_config_env(monkeypatch) + monkeypatch.delenv("SECRET_KEY", raising=False) + monkeypatch.setenv("OPENDAL_FS_ROOT", str(tmp_path)) + + config = DifyConfig(_env_file=None) + + assert config.SECRET_KEY == "" + assert not hasattr(config, "OPENDAL_FS_ROOT") + assert not (tmp_path / ".dify_secret_key").exists() + + +def test_dify_config_preserves_explicit_secret_key( + monkeypatch: pytest.MonkeyPatch, + tmp_path, +) -> None: + _set_basic_config_env(monkeypatch) + monkeypatch.setenv("SECRET_KEY", "explicit") + monkeypatch.setenv("OPENDAL_FS_ROOT", str(tmp_path)) + + config = DifyConfig(_env_file=None) + + assert config.SECRET_KEY == "explicit" + assert not (tmp_path / ".dify_secret_key").exists() + + def test_dify_config(monkeypatch: pytest.MonkeyPatch): # clear system environment variables os.environ.clear() diff --git a/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py index 4f39d38831..cee7d46083 100644 --- a/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py +++ b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py @@ -34,20 +34,6 @@ class TestDatasourceFileManager: assert f"nonce={mock_urandom.return_value.hex()}" in signed_url assert "sign=" in signed_url - @patch("core.datasource.datasource_file_manager.time.time") - @patch("core.datasource.datasource_file_manager.os.urandom") - @patch("core.datasource.datasource_file_manager.dify_config") - def test_sign_file_empty_secret(self, mock_config, mock_urandom, mock_time): - # Setup - mock_config.FILES_URL = "http://localhost:5001" - mock_config.SECRET_KEY = None # Empty secret - mock_time.return_value = 1700000000 - mock_urandom.return_value = b"1234567890abcdef" - - # Execute - signed_url = DatasourceFileManager.sign_file("file_id", ".png") - assert "sign=" in signed_url - @patch("core.datasource.datasource_file_manager.time.time") @patch("core.datasource.datasource_file_manager.dify_config") def test_verify_file(self, mock_config, mock_time): @@ -76,25 +62,6 @@ class TestDatasourceFileManager: mock_time.return_value = 1700000500 # 700 seconds after timestamp (300 is timeout) assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is False - @patch("core.datasource.datasource_file_manager.time.time") - @patch("core.datasource.datasource_file_manager.dify_config") - def test_verify_file_empty_secret(self, mock_config, mock_time): - # Setup - mock_config.SECRET_KEY = "" # Empty string secret - mock_config.FILES_ACCESS_TIMEOUT = 300 - mock_time.return_value = 1700000000 - - datasource_file_id = "file_id_123" - timestamp = "1699999800" - nonce = "some_nonce" - - # Calculate with empty secret - data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}" - sign = hmac.new(b"", data_to_sign.encode(), hashlib.sha256).digest() - encoded_sign = base64.urlsafe_b64encode(sign).decode() - - assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is True - @patch("core.datasource.datasource_file_manager.db") @patch("core.datasource.datasource_file_manager.storage") @patch("core.datasource.datasource_file_manager.uuid4") diff --git a/api/tests/unit_tests/extensions/test_set_secretkey.py b/api/tests/unit_tests/extensions/test_set_secretkey.py new file mode 100644 index 0000000000..8a8e4e2b19 --- /dev/null +++ b/api/tests/unit_tests/extensions/test_set_secretkey.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import pytest +from flask import Flask + +from extensions import ext_set_secretkey + + +class InMemoryStorage: + def __init__(self, files: dict[str, bytes] | None = None) -> None: + self.files = files or {} + self.saved_files: list[tuple[str, bytes]] = [] + + def load_once(self, filename: str) -> bytes: + try: + return self.files[filename] + except KeyError: + raise FileNotFoundError(filename) + + def save(self, filename: str, data: bytes) -> None: + self.files[filename] = data + self.saved_files.append((filename, data)) + + +def test_init_app_uses_configured_secret_key(monkeypatch: pytest.MonkeyPatch) -> None: + secret_key = "configured-secret-key" + storage = InMemoryStorage() + monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", secret_key) + monkeypatch.setattr("configs.secret_key.storage", storage) + app = Flask(__name__) + app.config["SECRET_KEY"] = secret_key + + ext_set_secretkey.init_app(app) + + assert app.secret_key == secret_key + assert app.config["SECRET_KEY"] == secret_key + assert storage.saved_files == [] + + +def test_init_app_generates_and_persists_secret_key_when_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + storage = InMemoryStorage() + monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", "") + monkeypatch.setattr("configs.secret_key.storage", storage) + app = Flask(__name__) + app.config["SECRET_KEY"] = "" + + ext_set_secretkey.init_app(app) + + persisted_key = storage.files[".dify_secret_key"].decode("utf-8").strip() + assert persisted_key + assert storage.saved_files == [(".dify_secret_key", f"{persisted_key}\n".encode())] + assert persisted_key == ext_set_secretkey.dify_config.SECRET_KEY + assert persisted_key == app.config["SECRET_KEY"] + assert persisted_key == app.secret_key + + +def test_init_app_reuses_persisted_secret_key_when_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + persisted_key = "persisted-secret-key" + storage = InMemoryStorage({".dify_secret_key": f"{persisted_key}\n".encode()}) + monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", "") + monkeypatch.setattr("configs.secret_key.storage", storage) + app = Flask(__name__) + app.config["SECRET_KEY"] = "" + + ext_set_secretkey.init_app(app) + + assert persisted_key == ext_set_secretkey.dify_config.SECRET_KEY + assert persisted_key == app.config["SECRET_KEY"] + assert persisted_key == app.secret_key + assert storage.saved_files == [] diff --git a/api/tests/unit_tests/libs/test_passport.py b/api/tests/unit_tests/libs/test_passport.py index f33484c18d..90b58ae548 100644 --- a/api/tests/unit_tests/libs/test_passport.py +++ b/api/tests/unit_tests/libs/test_passport.py @@ -143,28 +143,13 @@ class TestPassportService: assert str(exc_info.value) == "401 Unauthorized: Token has expired." # Configuration tests - def test_should_handle_empty_secret_key(self): - """Test behavior when SECRET_KEY is empty""" + def test_should_use_configured_secret_key_without_policy_validation(self): + """Test that policy decisions are owned by config, not PassportService.""" with patch("libs.passport.dify_config") as mock_config: - mock_config.SECRET_KEY = "" + mock_config.SECRET_KEY = "configured" service = PassportService() - # Empty secret key should still work but is insecure - payload = {"test": "data"} - token = service.issue(payload) - decoded = service.verify(token) - assert decoded == payload - - def test_should_handle_none_secret_key(self): - """Test behavior when SECRET_KEY is None""" - with patch("libs.passport.dify_config") as mock_config: - mock_config.SECRET_KEY = None - service = PassportService() - - payload = {"test": "data"} - # JWT library will raise TypeError when secret is None - with pytest.raises((TypeError, jwt.exceptions.InvalidKeyError)): - service.issue(payload) + assert service.sk == "configured" # Boundary condition tests def test_should_handle_large_payload(self, passport_service): diff --git a/docker/.env.example b/docker/.env.example index 5a012973c0..c708a40c15 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -28,7 +28,8 @@ LANG=C.UTF-8 LC_ALL=C.UTF-8 PYTHONIOENCODING=utf-8 UV_CACHE_DIR=/tmp/.uv-cache -SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U +# Leave empty to auto-generate a persistent key in the storage directory. +SECRET_KEY= INIT_PASSWORD= DEPLOY_ENV=PRODUCTION CHECK_UPDATE_URL=https://updates.dify.ai diff --git a/docker/README.md b/docker/README.md index a2d9b2eeba..26b1dac9ac 100644 --- a/docker/README.md +++ b/docker/README.md @@ -87,7 +87,7 @@ The root `.env.example` file contains the essential startup settings. Optional a 1. **Server Configuration**: - `LOG_LEVEL`, `DEBUG`, `FLASK_DEBUG`: Logging and debug settings. - - `SECRET_KEY`: A key for encrypting session cookies and other sensitive data. + - `SECRET_KEY`: A key for signing sessions, JWTs, and file URLs. Leave it empty to let Dify generate a persistent key in the storage directory, or set a unique value yourself. 1. **Database Configuration**: diff --git a/docker/envs/security.env.example b/docker/envs/security.env.example index 787aef2706..d7556d91e5 100644 --- a/docker/envs/security.env.example +++ b/docker/envs/security.env.example @@ -36,5 +36,6 @@ TIDB_PUBLIC_KEY=dify TIDB_PRIVATE_KEY=dify VIKINGDB_ACCESS_KEY=your-ak VIKINGDB_SECRET_KEY=your-sk -SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U +# Leave empty to auto-generate a persistent key in the storage directory. +SECRET_KEY= INIT_PASSWORD= From 9424bf60b0daf564179446c0b5b189bf8aaf5cc8 Mon Sep 17 00:00:00 2001 From: orbisai0security Date: Tue, 12 May 2026 11:13:37 +0530 Subject: [PATCH 5/8] fix: the /threads and /db-pool-stat endpoints in api... in... (#35665) --- api/extensions/ext_app_metrics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/extensions/ext_app_metrics.py b/api/extensions/ext_app_metrics.py index 4a6490b9f0..914baaadaf 100644 --- a/api/extensions/ext_app_metrics.py +++ b/api/extensions/ext_app_metrics.py @@ -5,6 +5,7 @@ import threading from flask import Response from configs import dify_config +from controllers.console.admin import admin_required from dify_app import DifyApp @@ -25,6 +26,7 @@ def init_app(app: DifyApp): ) @app.route("/threads") + @admin_required def threads(): # pyright: ignore[reportUnusedFunction] num_threads = threading.active_count() threads = threading.enumerate() @@ -50,6 +52,7 @@ def init_app(app: DifyApp): } @app.route("/db-pool-stat") + @admin_required def pool_stat(): # pyright: ignore[reportUnusedFunction] from extensions.ext_database import db From bb73776339b1af45be0d1f1a4d39ff5ee916a7e4 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Tue, 12 May 2026 14:56:16 +0800 Subject: [PATCH 6/8] chore(release): bump version to 1.14.1 (#36034) --- api/pyproject.toml | 2 +- api/uv.lock | 2 +- docker/docker-compose-template.yaml | 10 +++++----- docker/docker-compose.yaml | 10 +++++----- web/package.json | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index 604d01594e..40834b806f 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dify-api" -version = "1.14.0" +version = "1.14.1" requires-python = "~=3.12.0" dependencies = [ diff --git a/api/uv.lock b/api/uv.lock index 6861abdbdc..634dcc74b8 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1292,7 +1292,7 @@ wheels = [ [[package]] name = "dify-api" -version = "1.14.0" +version = "1.14.1" source = { virtual = "." } dependencies = [ { name = "aliyun-log-python-sdk" }, diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index 72c9d4fd90..d9e2fc5bc9 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -220,7 +220,7 @@ services: # API service api: <<: *shared-api-worker-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 environment: MODE: api SENTRY_DSN: ${API_SENTRY_DSN:-} @@ -264,7 +264,7 @@ services: # WebSocket service for workflow collaboration. api_websocket: <<: *shared-api-worker-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 profiles: - collaboration environment: @@ -290,7 +290,7 @@ services: # The Celery worker for processing all queues (dataset, workflow, mail, etc.) worker: <<: *shared-worker-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 environment: MODE: worker SENTRY_DSN: ${API_SENTRY_DSN:-} @@ -333,7 +333,7 @@ services: # Celery beat for scheduling periodic tasks. worker_beat: <<: *shared-worker-beat-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 environment: MODE: beat depends_on: @@ -366,7 +366,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:1.14.0 + image: langgenius/dify-web:1.14.1 restart: always env_file: - path: ./envs/core-services/web.env diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index c1d75e01f4..004140abfb 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -226,7 +226,7 @@ services: # API service api: <<: *shared-api-worker-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 environment: MODE: api SENTRY_DSN: ${API_SENTRY_DSN:-} @@ -270,7 +270,7 @@ services: # WebSocket service for workflow collaboration. api_websocket: <<: *shared-api-worker-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 profiles: - collaboration environment: @@ -296,7 +296,7 @@ services: # The Celery worker for processing all queues (dataset, workflow, mail, etc.) worker: <<: *shared-worker-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 environment: MODE: worker SENTRY_DSN: ${API_SENTRY_DSN:-} @@ -339,7 +339,7 @@ services: # Celery beat for scheduling periodic tasks. worker_beat: <<: *shared-worker-beat-config - image: langgenius/dify-api:1.14.0 + image: langgenius/dify-api:1.14.1 environment: MODE: beat depends_on: @@ -372,7 +372,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:1.14.0 + image: langgenius/dify-web:1.14.1 restart: always env_file: - path: ./envs/core-services/web.env diff --git a/web/package.json b/web/package.json index df1ceed01f..be373a1e68 100644 --- a/web/package.json +++ b/web/package.json @@ -1,7 +1,7 @@ { "name": "dify-web", "type": "module", - "version": "1.14.0", + "version": "1.14.1", "private": true, "imports": { "#i18n": { From 51a8f79d67de30f77f34d22105039aeb5526811f Mon Sep 17 00:00:00 2001 From: Asuka Minato Date: Tue, 12 May 2026 16:02:17 +0900 Subject: [PATCH 7/8] chore: DocumentSegment to Typebase (#35635) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../rag/datasource/keyword/jieba/jieba.py | 2 + api/core/rag/datasource/retrieval_service.py | 6 +- api/core/rag/docstore/dataset_docstore.py | 3 +- .../rag/index_processor/index_processor.py | 2 +- api/models/dataset.py | 58 ++++++++++--------- api/services/vector_service.py | 2 + api/tasks/batch_clean_document_task.py | 2 +- .../batch_create_segment_to_index_task.py | 3 +- api/tasks/clean_document_task.py | 2 +- api/tasks/clean_notion_document_task.py | 2 +- api/tasks/disable_segment_from_index_task.py | 4 +- api/tasks/disable_segments_from_index_task.py | 2 +- api/tasks/document_indexing_sync_task.py | 2 +- api/tasks/document_indexing_update_task.py | 2 +- api/tasks/duplicate_document_indexing_task.py | 2 +- api/tasks/remove_document_from_index_task.py | 2 +- api/tasks/retry_document_indexing_task.py | 2 +- .../sync_website_document_indexing_task.py | 2 +- .../test_dataset_service_get_segments.py | 30 +++++----- .../tasks/test_add_document_to_index_task.py | 6 -- .../tasks/test_batch_clean_document_task.py | 6 +- .../tasks/test_clean_dataset_task.py | 13 ----- .../tasks/test_clean_notion_document_task.py | 28 +++------ .../test_deal_dataset_vector_index_task.py | 25 +------- .../test_delete_segment_from_index_task.py | 39 ++++++------- .../test_disable_segment_from_index_task.py | 2 +- .../test_disable_segments_from_index_task.py | 49 ++++++++-------- .../test_duplicate_document_indexing_task.py | 1 - .../test_enable_segments_to_index_task.py | 1 - .../services/test_dataset_service_segment.py | 6 +- 30 files changed, 132 insertions(+), 174 deletions(-) diff --git a/api/core/rag/datasource/keyword/jieba/jieba.py b/api/core/rag/datasource/keyword/jieba/jieba.py index 392af351b6..b3f174bf78 100644 --- a/api/core/rag/datasource/keyword/jieba/jieba.py +++ b/api/core/rag/datasource/keyword/jieba/jieba.py @@ -245,6 +245,7 @@ class Jieba(BaseKeyword): segment = pre_segment_data["segment"] if pre_segment_data["keywords"]: segment.keywords = pre_segment_data["keywords"] + assert segment.index_node_id keyword_table = self._add_text_to_keyword_table( keyword_table or {}, segment.index_node_id, pre_segment_data["keywords"] ) @@ -253,6 +254,7 @@ class Jieba(BaseKeyword): keywords = keyword_table_handler.extract_keywords(segment.content, keyword_number) segment.keywords = list(keywords) + assert segment.index_node_id keyword_table = self._add_text_to_keyword_table( keyword_table or {}, segment.index_node_id, list(keywords) ) diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index 7769878e70..8cc2be8feb 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -1,5 +1,6 @@ import concurrent.futures import logging +from collections.abc import Sequence from concurrent.futures import ThreadPoolExecutor from typing import Any, NotRequired, TypedDict @@ -526,7 +527,7 @@ class RetrievalService: index_node_ids = [i for i in index_node_ids if i] segment_ids: list[str] = [] - index_node_segments: list[DocumentSegment] = [] + index_node_segments: Sequence[DocumentSegment] = [] segments: list[DocumentSegment] = [] attachment_map: dict[str, list[AttachmentInfoDict]] = {} child_chunk_map: dict[str, list[ChildChunk]] = {} @@ -568,8 +569,9 @@ class RetrievalService: DocumentSegment.status == "completed", DocumentSegment.index_node_id.in_(index_node_ids), ) - index_node_segments = session.execute(document_segment_stmt).scalars().all() # type: ignore + index_node_segments = session.execute(document_segment_stmt).scalars().all() for index_node_segment in index_node_segments: + assert index_node_segment.index_node_id doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id] if segment_ids: diff --git a/api/core/rag/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py index 78305a6ac0..c7d52d74cb 100644 --- a/api/core/rag/docstore/dataset_docstore.py +++ b/api/core/rag/docstore/dataset_docstore.py @@ -50,6 +50,7 @@ class DatasetDocumentStore: output = {} for document_segment in document_segments: + assert document_segment.index_node_id doc_id = document_segment.index_node_id output[doc_id] = Document( page_content=document_segment.content, @@ -103,7 +104,7 @@ class DatasetDocumentStore: if not segment_document: max_position += 1 - + assert self._document_id segment_document = DocumentSegment( tenant_id=self._dataset.tenant_id, dataset_id=self._dataset.id, diff --git a/api/core/rag/index_processor/index_processor.py b/api/core/rag/index_processor/index_processor.py index aded5315bd..757134e734 100644 --- a/api/core/rag/index_processor/index_processor.py +++ b/api/core/rag/index_processor/index_processor.py @@ -84,7 +84,7 @@ class IndexProcessor: select(DocumentSegment).where(DocumentSegment.document_id == original_document_id) ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] indexing_start_at = time.perf_counter() # delete from vector index diff --git a/api/models/dataset.py b/api/models/dataset.py index 65ea39969c..8137ed4ff3 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -8,7 +8,6 @@ import os import pickle import re import time -from collections.abc import Sequence from datetime import datetime from json import JSONDecodeError from typing import Any, ClassVar, TypedDict, cast @@ -831,7 +830,7 @@ class Document(Base): ) -class DocumentSegment(Base): +class DocumentSegment(TypeBase): __tablename__ = "document_segments" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="document_segment_pkey"), @@ -844,35 +843,40 @@ class DocumentSegment(Base): ) # initial fields - id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4())) - tenant_id = mapped_column(StringUUID, nullable=False) - dataset_id = mapped_column(StringUUID, nullable=False) - document_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, nullable=False, default_factory=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + document_id: Mapped[str] = mapped_column(StringUUID, nullable=False) position: Mapped[int] - content = mapped_column(LongText, nullable=False) - answer = mapped_column(LongText, nullable=True) + content: Mapped[str] = mapped_column(LongText, nullable=False) word_count: Mapped[int] tokens: Mapped[int] - # indexing fields - keywords = mapped_column(sa.JSON, nullable=True) - index_node_id = mapped_column(String(255), nullable=True) - index_node_hash = mapped_column(String(255), nullable=True) - + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) # basic fields + # indexing fields + index_node_id: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None) + index_node_hash: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None) + enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"), default=True) + answer: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) + keywords: Mapped[Any] = mapped_column(sa.JSON, nullable=True, default=None) + disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) + disabled_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) + status: Mapped[SegmentStatus] = mapped_column( + EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'"), default=SegmentStatus.WAITING + ) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) + completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) + error: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) + stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) hit_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) - enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) - disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - disabled_by = mapped_column(StringUUID, nullable=True) - status: Mapped[str] = mapped_column(EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'")) - created_by = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_by = mapped_column(StringUUID, nullable=True) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - error = mapped_column(LongText, nullable=True) - stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) @property def dataset(self): @@ -899,7 +903,7 @@ class DocumentSegment(Base): ) @property - def child_chunks(self) -> Sequence[Any]: + def child_chunks(self): if not self.document: return [] process_rule = self.document.dataset_process_rule @@ -914,7 +918,7 @@ class DocumentSegment(Base): return child_chunks or [] return [] - def get_child_chunks(self) -> Sequence[Any]: + def get_child_chunks(self): if not self.document: return [] process_rule = self.document.dataset_process_rule diff --git a/api/services/vector_service.py b/api/services/vector_service.py index 7e689af35d..49c3b85831 100644 --- a/api/services/vector_service.py +++ b/api/services/vector_service.py @@ -111,6 +111,7 @@ class VectorService: "dataset_id": segment.dataset_id, }, ) + assert segment.index_node_id if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY: # update vector index vector = Vector(dataset=dataset) @@ -138,6 +139,7 @@ class VectorService: regenerate: bool = False, ): index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor() + assert segment.index_node_id if regenerate: # delete child chunks index_processor.clean(dataset, [segment.index_node_id], with_keywords=True, delete_child_chunks=True) diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index 56c371fcc1..5794726716 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -50,7 +50,7 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] segment_ids = [segment.id for segment in segments] # Collect image file IDs from segment content diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index beb23d8354..9f19b03544 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -19,6 +19,7 @@ from graphon.model_runtime.entities.model_entities import ModelType from libs import helper from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment +from models.enums import SegmentStatus from models.model import UploadFile from services.vector_service import VectorService @@ -156,7 +157,7 @@ def batch_create_segment_to_index_task( tokens=tokens, created_by=user_id, indexing_at=naive_utc_now(), - status="completed", + status=SegmentStatus.COMPLETED, completed_at=naive_utc_now(), ) if document_config["doc_form"] == IndexStructureType.QA_INDEX: diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py index c8d0e31c06..869e2b3028 100644 --- a/api/tasks/clean_document_task.py +++ b/api/tasks/clean_document_task.py @@ -53,7 +53,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i binding_ids = [binding.id for binding, _ in attachments_with_bindings] total_attachment_files.extend([attachment_file.key for _, attachment_file in attachments_with_bindings]) - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] segment_contents = [segment.content for segment in segments] except Exception: logger.exception("Cleaned document when document deleted failed") diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py index 017d60efac..782d7d0226 100644 --- a/api/tasks/clean_notion_document_task.py +++ b/api/tasks/clean_notion_document_task.py @@ -38,7 +38,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str): for document_id in document_ids: segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() - total_index_node_ids.extend([segment.index_node_id for segment in segments]) + total_index_node_ids.extend([segment.index_node_id for segment in segments if segment.index_node_id]) # Wrap vector / keyword index cleanup in try/except so that a transient # failure here (e.g. billing API hiccup propagated via FeatureService when diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py index dd1a40844b..d00e143093 100644 --- a/api/tasks/disable_segment_from_index_task.py +++ b/api/tasks/disable_segment_from_index_task.py @@ -9,6 +9,7 @@ from core.db.session_factory import session_factory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_redis import redis_client from models.dataset import DocumentSegment +from models.enums import SegmentStatus logger = logging.getLogger(__name__) @@ -30,7 +31,7 @@ def disable_segment_from_index_task(segment_id: str): logger.info(click.style(f"Segment not found: {segment_id}", fg="red")) return - if segment.status != "completed": + if segment.status != SegmentStatus.COMPLETED: logger.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red")) return @@ -59,6 +60,7 @@ def disable_segment_from_index_task(segment_id: str): index_type = dataset_document.doc_form index_processor = IndexProcessorFactory(index_type).init_index_processor() + assert segment.index_node_id index_processor.clean(dataset, [segment.index_node_id]) # Disable summary index for this segment diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py index 86e96ea3f0..cd91ddd074 100644 --- a/api/tasks/disable_segments_from_index_task.py +++ b/api/tasks/disable_segments_from_index_task.py @@ -55,7 +55,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen return try: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] if dataset.is_multimodal: segment_ids = [segment.id for segment in segments] segment_attachment_bindings = session.scalars( diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index 90c80be3a1..842e7dcdb2 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -69,7 +69,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): index_type = document.doc_form segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # Get credentials from datasource provider datasource_provider_service = DatasourceProviderService() diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py index 15f0e0162b..39564bbede 100644 --- a/api/tasks/document_indexing_update_task.py +++ b/api/tasks/document_indexing_update_task.py @@ -45,7 +45,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str): index_type = document.doc_form segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] clean_success = False try: diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py index 6bc58bdf9c..71f367c5e7 100644 --- a/api/tasks/duplicate_document_indexing_task.py +++ b/api/tasks/duplicate_document_indexing_task.py @@ -137,7 +137,7 @@ def _duplicate_document_indexing_task(dataset_id: str, document_ids: Sequence[st select(DocumentSegment).where(DocumentSegment.document_id == document.id) ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # delete from vector index index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py index 74e8a012cf..2314d32232 100644 --- a/api/tasks/remove_document_from_index_task.py +++ b/api/tasks/remove_document_from_index_task.py @@ -61,7 +61,7 @@ def remove_document_from_index_task(document_id: str): except Exception as e: logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e)) - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] if index_node_ids: try: index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index 7cc28d5226..0df5896ce3 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -85,7 +85,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_ select(DocumentSegment).where(DocumentSegment.document_id == document_id) ).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # delete from vector index index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) diff --git a/api/tasks/sync_website_document_indexing_task.py b/api/tasks/sync_website_document_indexing_task.py index ab21f63f7e..06eb460311 100644 --- a/api/tasks/sync_website_document_indexing_task.py +++ b/api/tasks/sync_website_document_indexing_task.py @@ -70,7 +70,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all() if segments: - index_node_ids = [segment.index_node_id for segment in segments] + index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id] # delete from vector index index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) diff --git a/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py b/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py index 87239b2cb3..bd8f5371b8 100644 --- a/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py +++ b/api/tests/test_containers_integration_tests/services/test_dataset_service_get_segments.py @@ -13,9 +13,9 @@ from uuid import uuid4 from sqlalchemy.orm import Session from core.rag.index_processor.constant.index_type import IndexTechniqueType -from models import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole, TenantStatus from models.dataset import Dataset, DatasetPermissionEnum, Document, DocumentSegment -from models.enums import DataSourceType, DocumentCreatedFrom +from models.enums import DataSourceType, DocumentCreatedFrom, SegmentStatus from services.dataset_service import SegmentService @@ -35,13 +35,13 @@ class SegmentServiceTestDataFactory: email=f"{uuid4()}@example.com", name=f"user-{uuid4()}", interface_language="en-US", - status="active", + status=AccountStatus.ACTIVE, ) db_session_with_containers.add(account) db_session_with_containers.commit() if tenant is None: - tenant = Tenant(name=f"tenant-{uuid4()}", status="normal") + tenant = Tenant(name=f"tenant-{uuid4()}", status=TenantStatus.NORMAL) db_session_with_containers.add(tenant) db_session_with_containers.commit() @@ -103,7 +103,7 @@ class SegmentServiceTestDataFactory: created_by: str, position: int = 1, content: str = "Test content", - status: str = "completed", + status: SegmentStatus = SegmentStatus.COMPLETED, word_count: int = 10, tokens: int = 15, ) -> DocumentSegment: @@ -203,7 +203,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -212,7 +212,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="indexing", + status=SegmentStatus.INDEXING, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -221,7 +221,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=3, - status="waiting", + status=SegmentStatus.WAITING, ) # Act @@ -257,7 +257,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -266,7 +266,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="indexing", + status=SegmentStatus.INDEXING, ) # Act @@ -415,7 +415,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, content="This is important information", ) SegmentServiceTestDataFactory.create_segment( @@ -425,7 +425,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="indexing", + status=SegmentStatus.INDEXING, content="This is also important", ) SegmentServiceTestDataFactory.create_segment( @@ -435,7 +435,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=3, - status="completed", + status=SegmentStatus.COMPLETED, content="This is irrelevant", ) @@ -477,7 +477,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=1, - status="completed", + status=SegmentStatus.COMPLETED, ) SegmentServiceTestDataFactory.create_segment( db_session_with_containers, @@ -486,7 +486,7 @@ class TestSegmentServiceGetSegments: document_id=document.id, created_by=owner.id, position=2, - status="waiting", + status=SegmentStatus.WAITING, ) # Act diff --git a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py index fcc15aad42..94fd7602f5 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py @@ -128,7 +128,6 @@ class TestAddDocumentToIndexTask: for i in range(3): segment = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -451,7 +450,6 @@ class TestAddDocumentToIndexTask: segments = [] for i in range(3): segment = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -630,7 +628,6 @@ class TestAddDocumentToIndexTask: # Segment 1: Should be processed (enabled=False, status=SegmentStatus.COMPLETED) segment1 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -650,7 +647,6 @@ class TestAddDocumentToIndexTask: # Segment 2: Should be processed (enabled=True, status=SegmentStatus.COMPLETED) # Note: Implementation doesn't filter by enabled status, only by status=SegmentStatus.COMPLETED segment2 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -669,7 +665,6 @@ class TestAddDocumentToIndexTask: # Segment 3: Should NOT be processed (enabled=False, status="processing") segment3 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, @@ -688,7 +683,6 @@ class TestAddDocumentToIndexTask: # Segment 4: Should be processed (enabled=False, status=SegmentStatus.COMPLETED) segment4 = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py index e29ca7ebab..436c8f11b0 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py @@ -177,7 +177,6 @@ class TestBatchCleanDocumentTask: fake = Faker() segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=document.dataset_id, document_id=document.id, @@ -290,10 +289,9 @@ class TestBatchCleanDocumentTask: account = self._create_test_account(db_session_with_containers) dataset = self._create_test_dataset(db_session_with_containers, account) document = self._create_test_document(db_session_with_containers, dataset, account) - + assert account.current_tenant # Create segment with simple content (no image references) segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=document.dataset_id, document_id=document.id, @@ -692,9 +690,9 @@ class TestBatchCleanDocumentTask: # Create multiple segments for the document segments = [] + assert account.current_tenant for i in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=document.dataset_id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py index 32bc2fc0bd..a31552a09e 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py @@ -220,7 +220,6 @@ class TestCleanDatasetTask: DocumentSegment: Created document segment instance """ segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -232,8 +231,6 @@ class TestCleanDatasetTask: status=SegmentStatus.COMPLETED, index_node_id=str(uuid.uuid4()), index_node_hash="test_hash", - created_at=datetime.now(), - updated_at=datetime.now(), ) db_session_with_containers.add(segment) @@ -614,7 +611,6 @@ class TestCleanDatasetTask: """ segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -626,8 +622,6 @@ class TestCleanDatasetTask: status=SegmentStatus.COMPLETED, index_node_id=str(uuid.uuid4()), index_node_hash="test_hash", - created_at=datetime.now(), - updated_at=datetime.now(), ) db_session_with_containers.add(segment) @@ -729,8 +723,6 @@ class TestCleanDatasetTask: type=DatasetMetadataType.STRING, created_by=account.id, ) - metadata.id = str(uuid.uuid4()) - metadata.created_at = datetime.now() metadata_items.append(metadata) # Create binding for each metadata item @@ -741,8 +733,6 @@ class TestCleanDatasetTask: document_id=documents[i % len(documents)].id, created_by=account.id, ) - binding.id = str(uuid.uuid4()) - binding.created_at = datetime.now() bindings.append(binding) db_session_with_containers.add_all(metadata_items) @@ -946,7 +936,6 @@ class TestCleanDatasetTask: long_content = "Very long content " * 100 # Long content within reasonable limits segment_content = f"Segment with special chars: {special_content}\n{long_content}" segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -958,8 +947,6 @@ class TestCleanDatasetTask: status=SegmentStatus.COMPLETED, index_node_id=str(uuid.uuid4()), index_node_hash="test_hash_" + "x" * 50, # Long hash within limits - created_at=datetime.now(), - updated_at=datetime.now(), ) db_session_with_containers.add(segment) db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py index 1c8d5969e0..ef65b90508 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py @@ -132,11 +132,10 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() document_ids.append(document.id) - + assert tenant # Create segments for each document for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -297,10 +296,9 @@ class TestCleanNotionDocumentTask: ) db_session_with_containers.add(document) db_session_with_containers.flush() - + assert tenant # Create test segment segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -379,12 +377,11 @@ class TestCleanNotionDocumentTask: ) db_session_with_containers.add(document) db_session_with_containers.flush() - + assert tenant # Create segments without index_node_ids segments = [] for i in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -468,11 +465,10 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() documents.append(document) - + assert tenant # Create segments for each document for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -569,10 +565,9 @@ class TestCleanNotionDocumentTask: segment_statuses = [SegmentStatus.WAITING, SegmentStatus.INDEXING, SegmentStatus.COMPLETED, SegmentStatus.ERROR] segments = [] index_node_ids = [] - + assert tenant for i, status in enumerate(segment_statuses): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -665,10 +660,9 @@ class TestCleanNotionDocumentTask: ) db_session_with_containers.add(document) db_session_with_containers.flush() - + assert tenant # Create segment segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -765,12 +759,11 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() documents.append(document) - + assert tenant # Create multiple segments for each document num_segments_per_doc = 5 for j in range(num_segments_per_doc): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -875,7 +868,6 @@ class TestCleanNotionDocumentTask: # Create segments for each document for j in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=account.current_tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -984,11 +976,10 @@ class TestCleanNotionDocumentTask: db_session_with_containers.add(document) db_session_with_containers.flush() documents.append(document) - + assert tenant # Create segments for each document for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -1093,10 +1084,9 @@ class TestCleanNotionDocumentTask: # Create segments with metadata segments = [] index_node_ids = [] - + assert tenant for i in range(3): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py index e4cbb9e589..aba2458d55 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py @@ -90,7 +90,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -150,7 +149,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -202,7 +200,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -253,7 +250,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset with parent-child index dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -305,7 +301,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -371,7 +366,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset without documents dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -403,7 +397,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -461,7 +454,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset without documents dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -494,7 +486,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -546,7 +537,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -592,7 +582,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset with custom index type dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -624,7 +613,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -670,7 +658,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset without doc_form (should use default) dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -702,7 +689,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -748,7 +734,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -806,7 +791,6 @@ class TestDealDatasetVectorIndexTask: for i, document in enumerate(documents): for j in range(2): segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -832,6 +816,7 @@ class TestDealDatasetVectorIndexTask: updated_document = db_session_with_containers.scalar( select(Document).where(Document.id == document.id).limit(1) ) + assert updated_document assert updated_document.indexing_status == IndexingStatus.COMPLETED # Verify index processor load was called multiple times @@ -853,7 +838,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -905,7 +889,6 @@ class TestDealDatasetVectorIndexTask: # Create segments segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=document.id, @@ -952,7 +935,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -1024,7 +1006,6 @@ class TestDealDatasetVectorIndexTask: # Create segments for enabled document only segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=enabled_document.id, @@ -1075,7 +1056,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -1147,7 +1127,6 @@ class TestDealDatasetVectorIndexTask: # Create segments for active document only segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=active_document.id, @@ -1198,7 +1177,6 @@ class TestDealDatasetVectorIndexTask: # Create dataset dataset = Dataset( - id=str(uuid.uuid4()), tenant_id=tenant.id, name=fake.company(), description=fake.text(max_nb_chars=100), @@ -1270,7 +1248,6 @@ class TestDealDatasetVectorIndexTask: # Create segments for completed document only segment = DocumentSegment( - id=str(uuid.uuid4()), tenant_id=tenant.id, dataset_id=dataset.id, document_id=completed_document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py index f4a71040c1..a7edf4f77a 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py @@ -209,26 +209,25 @@ class TestDeleteSegmentFromIndexTask: segments = [] for i in range(count): - segment = DocumentSegment() - segment.id = fake.uuid4() - segment.tenant_id = document.tenant_id - segment.dataset_id = document.dataset_id - segment.document_id = document.id - segment.position = i + 1 - segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}" - segment.answer = f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}" - segment.word_count = fake.random_int(min=10, max=100) - segment.tokens = fake.random_int(min=5, max=50) - segment.keywords = [fake.word() for _ in range(3)] - segment.index_node_id = f"node_{fake.uuid4()}" - segment.index_node_hash = fake.sha256() - segment.hit_count = 0 - segment.enabled = True - segment.status = SegmentStatus.COMPLETED - segment.created_by = account.id - segment.created_at = fake.date_time_this_year() - segment.updated_by = account.id - segment.updated_at = segment.created_at + created_at = fake.date_time_this_year() + segment = DocumentSegment( + tenant_id=document.tenant_id, + dataset_id=document.dataset_id, + document_id=document.id, + position=i + 1, + content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}", + answer=f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}", + word_count=fake.random_int(min=10, max=100), + tokens=fake.random_int(min=5, max=50), + keywords=[fake.word() for _ in range(3)], + index_node_id=f"node_{fake.uuid4()}", + index_node_hash=fake.sha256(), + hit_count=0, + enabled=True, + status=SegmentStatus.COMPLETED, + created_by=account.id, + updated_by=account.id, + ) db_session_with_containers.add(segment) segments.append(segment) diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py index 5bdf7d1389..34e2ce4e80 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segment_from_index_task.py @@ -159,7 +159,7 @@ class TestDisableSegmentFromIndexTask: dataset: Dataset, tenant: Tenant, account: Account, - status: str = "completed", + status: SegmentStatus = SegmentStatus.COMPLETED, enabled: bool = True, ) -> DocumentSegment: """ diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py index 6a95bfc425..cb5fb5483c 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py @@ -185,30 +185,31 @@ class TestDisableSegmentsFromIndexTask: segments = [] for i in range(count): - segment = DocumentSegment() - segment.id = fake.uuid4() - segment.tenant_id = dataset.tenant_id - segment.dataset_id = dataset.id - segment.document_id = document.id - segment.position = i + 1 - segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}" - segment.answer = f"Test answer {i + 1}" if i % 2 == 0 else None - segment.word_count = fake.random_int(min=10, max=100) - segment.tokens = fake.random_int(min=5, max=50) - segment.keywords = [fake.word() for _ in range(3)] - segment.index_node_id = f"node_{segment.id}" - segment.index_node_hash = fake.sha256() - segment.hit_count = 0 - segment.enabled = True - segment.disabled_at = None - segment.disabled_by = None - segment.status = SegmentStatus.COMPLETED - segment.created_by = account.id - segment.updated_by = account.id - segment.indexing_at = fake.date_time_this_year() - segment.completed_at = fake.date_time_this_year() - segment.error = None - segment.stopped_at = None + id = fake.uuid4() + segment = DocumentSegment( + tenant_id=dataset.tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=i + 1, + content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}", + answer=f"Test answer {i + 1}" if i % 2 == 0 else None, + word_count=fake.random_int(min=10, max=100), + tokens=fake.random_int(min=5, max=50), + keywords=[fake.word() for _ in range(3)], + index_node_id=f"node_{id}", + index_node_hash=fake.sha256(), + hit_count=0, + enabled=True, + disabled_at=None, + disabled_by=None, + status=SegmentStatus.COMPLETED, + created_by=account.id, + updated_by=account.id, + indexing_at=fake.date_time_this_year(), + completed_at=fake.date_time_this_year(), + error=None, + stopped_at=None, + ) segments.append(segment) diff --git a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py index 12440f3e6b..e1c7e3e09a 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py @@ -175,7 +175,6 @@ class TestDuplicateDocumentIndexingTasks: for document in documents: for i in range(segments_per_doc): segment = DocumentSegment( - id=fake.uuid4(), tenant_id=dataset.tenant_id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py index e2f35067e3..6d3b90d41c 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py @@ -139,7 +139,6 @@ class TestEnableSegmentsToIndexTask: for i in range(count): text = fake.text(max_nb_chars=200) segment = DocumentSegment( - id=fake.uuid4(), tenant_id=document.tenant_id, dataset_id=dataset.id, document_id=document.id, diff --git a/api/tests/unit_tests/services/test_dataset_service_segment.py b/api/tests/unit_tests/services/test_dataset_service_segment.py index 6330e53765..1f8586e32f 100644 --- a/api/tests/unit_tests/services/test_dataset_service_segment.py +++ b/api/tests/unit_tests/services/test_dataset_service_segment.py @@ -282,7 +282,6 @@ class TestSegmentServiceQueries: def test_get_segment_by_id_returns_only_document_segment_instances(self): segment = DocumentSegment( - id="segment-1", tenant_id="tenant-1", dataset_id="dataset-1", document_id="doc-1", @@ -292,7 +291,7 @@ class TestSegmentServiceQueries: tokens=2, created_by="user-1", ) - + segment.id = "segment-1" with patch("services.dataset_service.db") as mock_db: mock_db.session.scalar.return_value = segment result = SegmentService.get_segment_by_id("segment-1", "tenant-1") @@ -307,7 +306,6 @@ class TestSegmentServiceQueries: def test_get_segments_by_document_and_dataset_returns_scalars_result(self): segment = DocumentSegment( - id="segment-1", tenant_id="tenant-1", dataset_id="dataset-1", document_id="doc-1", @@ -318,6 +316,7 @@ class TestSegmentServiceQueries: created_by="user-1", ) + segment.id = "segment-1" with patch("services.dataset_service.db") as mock_db: mock_db.session.scalars.return_value.all.return_value = [segment] @@ -461,6 +460,7 @@ class TestSegmentServiceMutations: vector_service.create_segments_vector.side_effect = RuntimeError("vector failed") result = SegmentService.multi_create_segment(segments, document, dataset) + assert result assert len(result) == 2 assert [segment.position for segment in result] == [2, 3] From 7aa8f1a0b6595a0803fc898a46c42726dbc25192 Mon Sep 17 00:00:00 2001 From: twwu Date: Tue, 12 May 2026 16:26:15 +0800 Subject: [PATCH 8/8] fix: Fix frontend build error caused by merging main --- pnpm-lock.yaml | 21 +------ .../components/app/app-publisher/index.tsx | 3 +- .../tool-selector/index.tsx | 56 +++---------------- .../workflow/block-selector/tool-picker.tsx | 23 +++----- 4 files changed, 18 insertions(+), 85 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 53cae0b179..40a7e3e935 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1806,12 +1806,6 @@ packages: '@emnapi/wasi-threads@1.2.1': resolution: {integrity: sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==} - '@emnapi/runtime@1.9.2': - resolution: {integrity: sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==} - - '@emnapi/wasi-threads@1.2.1': - resolution: {integrity: sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==} - '@emoji-mart/data@1.2.1': resolution: {integrity: sha512-no2pQMWiBy6gpBEiqGeU77/bFejDqUTRY7KX+0+iur13op3bqUsXdnwoZs6Xb1zbv0gAj5VvS1PWoUUckSr5Dw==} @@ -2770,9 +2764,6 @@ packages: '@oxc-project/types@0.128.0': resolution: {integrity: sha512-huv1Y/LzBJkBVHt3OlC7u0zHBW9qXf1FdD7sGmc1rXc2P1mTwHssYv7jyGx5KAACSCH+9B3Bhn6Z9luHRvf7pQ==} - '@oxc-project/types@0.127.0': - resolution: {integrity: sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ==} - '@oxc-resolver/binding-android-arm-eabi@11.19.1': resolution: {integrity: sha512-aUs47y+xyXHUKlbhqHUjBABjvycq6YSD7bpxSW7vplUmdzAlJ93yXY6ZR0c1o1x5A/QKbENCvs3+NlY8IpIVzg==} cpu: [arm] @@ -9086,16 +9077,6 @@ snapshots: tslib: 2.8.1 optional: true - '@emnapi/runtime@1.9.2': - dependencies: - tslib: 2.8.1 - optional: true - - '@emnapi/wasi-threads@1.2.1': - dependencies: - tslib: 2.8.1 - optional: true - '@emoji-mart/data@1.2.1': {} '@es-joy/jsdoccomment@0.84.0': @@ -16578,4 +16559,4 @@ time: vitest-canvas-mock@1.1.4: '2026-03-24T14:42:39.285Z' zod@4.4.3: '2026-05-04T07:06:40.819Z' zundo@2.3.0: '2024-11-17T16:35:11.372Z' - zustand@5.0.13: '2026-05-05T00:04:17.510Z' + zustand@5.0.13: '2026-05-05T00:04:17.510Z' \ No newline at end of file diff --git a/web/app/components/app/app-publisher/index.tsx b/web/app/components/app/app-publisher/index.tsx index d437e1de09..96fe9d3e38 100644 --- a/web/app/components/app/app-publisher/index.tsx +++ b/web/app/components/app/app-publisher/index.tsx @@ -7,7 +7,6 @@ import type { PublishWorkflowParams } from '@/types/workflow' import { Button } from '@langgenius/dify-ui/button' import { Popover, PopoverContent, PopoverTrigger } from '@langgenius/dify-ui/popover' import { toast } from '@langgenius/dify-ui/toast' -import { RiStoreLine } from '@remixicon/react' import { useSuspenseQuery } from '@tanstack/react-query' import { useKeyPress } from 'ahooks' import { @@ -39,7 +38,7 @@ import { appDefaultIconBackground } from '@/config' import { useAsyncWindowOpen } from '@/hooks/use-async-window-open' import { useFormatTimeFromNow } from '@/hooks/use-format-time-from-now' import { AccessMode } from '@/models/access-control' -import { useAppWhiteListSubjects, useGetUserCanAccessApp } from '@/service/access-control' +import { useAppWhiteListSubjects, useGetUserCanAccessApp } from '@/service/access-control/use-app-access-control' import { fetchAppDetailDirect, publishToCreatorsPlatform } from '@/service/apps' import { fetchInstalledAppList } from '@/service/explore' import { systemFeaturesQueryOptions } from '@/service/system-features' diff --git a/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx b/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx index cda0daf49a..40166ff9ec 100644 --- a/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx +++ b/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx @@ -6,14 +6,13 @@ import type { Node } from 'reactflow' import type { ToolValue } from '@/app/components/workflow/block-selector/types' import type { NodeOutPutVar } from '@/app/components/workflow/types' import { cn } from '@langgenius/dify-ui/cn' +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@langgenius/dify-ui/popover' import * as React from 'react' import { useTranslation } from 'react-i18next' -// eslint-disable-next-line no-restricted-imports -- legacy overlay migration is handled separately from this change -import { - PortalToFollowElem, - PortalToFollowElemContent, - PortalToFollowElemTrigger, -} from '@/app/components/base/portal-to-follow-elem' import { CollectionType } from '@/app/components/tools/types' import Link from '@/next/link' import { @@ -134,9 +133,7 @@ const ToolSelector: FC = ({ ) return ( - @@ -186,43 +183,6 @@ const ToolSelector: FC = ({ alignOffset={alignOffset} popupClassName="border-none bg-transparent shadow-none" > - {trigger} - - {/* Default trigger - no value */} - {!trigger && !value?.provider_name && ( - - )} - - {/* Default trigger - with value */} - {!trigger && value?.provider_name && ( - - )} - - -
= ({ onParamsFormChange={handleParamsFormChange} />
-
-
+ + ) } diff --git a/web/app/components/workflow/block-selector/tool-picker.tsx b/web/app/components/workflow/block-selector/tool-picker.tsx index 01ccf6242b..93ee1882db 100644 --- a/web/app/components/workflow/block-selector/tool-picker.tsx +++ b/web/app/components/workflow/block-selector/tool-picker.tsx @@ -6,18 +6,17 @@ import type { ToolDefaultValue, ToolValue } from './types' import type { CustomCollectionBackend } from '@/app/components/tools/types' import type { BlockEnum, OnSelectBlock } from '@/app/components/workflow/types' import { cn } from '@langgenius/dify-ui/cn' +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@langgenius/dify-ui/popover' import { toast } from '@langgenius/dify-ui/toast' import { useSuspenseQuery } from '@tanstack/react-query' import { useBoolean } from 'ahooks' import * as React from 'react' import { useMemo, useState } from 'react' import { useTranslation } from 'react-i18next' -// eslint-disable-next-line no-restricted-imports -- legacy overlay migration is handled separately from this change -import { - PortalToFollowElem, - PortalToFollowElemContent, - PortalToFollowElemTrigger, -} from '@/app/components/base/portal-to-follow-elem' import SearchBox from '@/app/components/plugins/marketplace/search-box' import EditCustomToolModal from '@/app/components/tools/edit-custom-collection-modal' import AllTools from '@/app/components/workflow/block-selector/all-tools' @@ -159,9 +158,7 @@ const ToolPicker: FC = ({ } return ( - @@ -178,10 +175,6 @@ const ToolPicker: FC = ({ alignOffset={alignOffset} popupClassName="border-none bg-transparent shadow-none" > - {trigger} - - -
= ({ }} />
- - + + ) }