diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index f398989d27..92c85b4951 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -162,6 +162,7 @@ class DatasetDocumentListApi(Resource): "keyword": "Search keyword", "sort": "Sort order (default: -created_at)", "fetch": "Fetch full details (default: false)", + "status": "Filter documents by display status", } ) @api.response(200, "Documents retrieved successfully") @@ -175,6 +176,7 @@ class DatasetDocumentListApi(Resource): limit = request.args.get("limit", default=20, type=int) search = request.args.get("keyword", default=None, type=str) sort = request.args.get("sort", default="-created_at", type=str) + status = request.args.get("status", default=None, type=str) # "yes", "true", "t", "y", "1" convert to True, while others convert to False. try: fetch_val = request.args.get("fetch", default="false") @@ -203,6 +205,9 @@ class DatasetDocumentListApi(Resource): query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=current_tenant_id) + if status: + query = DocumentService.apply_display_status_filter(query, status) + if search: search = f"%{search}%" query = query.where(Document.name.like(search)) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 358605e8a8..7c525845a7 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -456,12 +456,16 @@ class DocumentListApi(DatasetApiResource): page = request.args.get("page", default=1, type=int) limit = request.args.get("limit", default=20, type=int) search = request.args.get("keyword", default=None, type=str) + status = request.args.get("status", default=None, type=str) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=tenant_id) + if status: + query = DocumentService.apply_display_status_filter(query, status) + if search: search = f"%{search}%" query = query.where(Document.name.like(search)) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 78de76df7e..037ef469d2 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -1082,6 +1082,62 @@ class DocumentService: }, } + DISPLAY_STATUS_ALIASES: dict[str, str] = { + "active": "available", + "enabled": "available", + } + + _INDEXING_STATUSES: tuple[str, ...] = ("parsing", "cleaning", "splitting", "indexing") + + DISPLAY_STATUS_FILTERS: dict[str, tuple[Any, ...]] = { + "queuing": (Document.indexing_status == "waiting",), + "indexing": ( + Document.indexing_status.in_(_INDEXING_STATUSES), + Document.is_paused.is_not(True), + ), + "paused": ( + Document.indexing_status.in_(_INDEXING_STATUSES), + Document.is_paused.is_(True), + ), + "error": (Document.indexing_status == "error",), + "available": ( + Document.indexing_status == "completed", + Document.archived.is_(False), + Document.enabled.is_(True), + ), + "disabled": ( + Document.indexing_status == "completed", + Document.archived.is_(False), + Document.enabled.is_(False), + ), + "archived": ( + Document.indexing_status == "completed", + Document.archived.is_(True), + ), + } + + @classmethod + def normalize_display_status(cls, status: str | None) -> str | None: + if not status: + return None + normalized = status.lower() + normalized = cls.DISPLAY_STATUS_ALIASES.get(normalized, normalized) + return normalized if normalized in cls.DISPLAY_STATUS_FILTERS else None + + @classmethod + def build_display_status_filters(cls, status: str | None) -> tuple[Any, ...]: + normalized = cls.normalize_display_status(status) + if not normalized: + return () + return cls.DISPLAY_STATUS_FILTERS[normalized] + + @classmethod + def apply_display_status_filter(cls, query, status: str | None): + filters = cls.build_display_status_filters(status) + if not filters: + return query + return query.where(*filters) + DOCUMENT_METADATA_SCHEMA: dict[str, Any] = { "book": { "title": str, diff --git a/api/tests/unit_tests/services/test_document_service_display_status.py b/api/tests/unit_tests/services/test_document_service_display_status.py new file mode 100644 index 0000000000..a2f4e30c97 --- /dev/null +++ b/api/tests/unit_tests/services/test_document_service_display_status.py @@ -0,0 +1,33 @@ +import sqlalchemy as sa + +from models.dataset import Document +from services.dataset_service import DocumentService + + +def test_normalize_display_status_alias_mapping(): + assert DocumentService.normalize_display_status("ACTIVE") == "available" + assert DocumentService.normalize_display_status("enabled") == "available" + assert DocumentService.normalize_display_status("archived") == "archived" + assert DocumentService.normalize_display_status("unknown") is None + + +def test_build_display_status_filters_available(): + filters = DocumentService.build_display_status_filters("available") + assert len(filters) == 3 + for condition in filters: + assert condition is not None + + +def test_apply_display_status_filter_applies_when_status_present(): + query = sa.select(Document) + filtered = DocumentService.apply_display_status_filter(query, "queuing") + compiled = str(filtered.compile(compile_kwargs={"literal_binds": True})) + assert "WHERE" in compiled + assert "document.indexing_status = 'waiting'" in compiled + + +def test_apply_display_status_filter_returns_same_when_invalid(): + query = sa.select(Document) + filtered = DocumentService.apply_display_status_filter(query, "invalid") + compiled = str(filtered.compile(compile_kwargs={"literal_binds": True})) + assert "WHERE" not in compiled diff --git a/web/app/components/base/sort/index.tsx b/web/app/components/base/sort/index.tsx index af90233575..3823b13d1a 100644 --- a/web/app/components/base/sort/index.tsx +++ b/web/app/components/base/sort/index.tsx @@ -47,10 +47,10 @@ const Sort: FC = ({ className='block' >
-
+
{t('appLog.filter.sortBy')}
{triggerContent} diff --git a/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts b/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts index 4531b7e658..f2a251d99d 100644 --- a/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts +++ b/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts @@ -1,16 +1,31 @@ import { type ReadonlyURLSearchParams, usePathname, useRouter, useSearchParams } from 'next/navigation' import { useCallback, useMemo } from 'react' +import { sanitizeStatusValue } from '../status-filter' +import type { SortType } from '@/service/datasets' + +const ALLOWED_SORT_VALUES: SortType[] = ['-created_at', 'created_at', '-hit_count', 'hit_count'] + +const sanitizeSortValue = (value?: string | null): SortType => { + if (!value) + return '-created_at' + + return (ALLOWED_SORT_VALUES.includes(value as SortType) ? value : '-created_at') as SortType +} export type DocumentListQuery = { page: number limit: number keyword: string + status: string + sort: SortType } const DEFAULT_QUERY: DocumentListQuery = { page: 1, limit: 10, keyword: '', + status: 'all', + sort: '-created_at', } // Parse the query parameters from the URL search string. @@ -18,17 +33,21 @@ function parseParams(params: ReadonlyURLSearchParams): DocumentListQuery { const page = Number.parseInt(params.get('page') || '1', 10) const limit = Number.parseInt(params.get('limit') || '10', 10) const keyword = params.get('keyword') || '' + const status = sanitizeStatusValue(params.get('status')) + const sort = sanitizeSortValue(params.get('sort')) return { page: page > 0 ? page : 1, limit: (limit > 0 && limit <= 100) ? limit : 10, keyword: keyword ? decodeURIComponent(keyword) : '', + status, + sort, } } // Update the URL search string with the given query parameters. function updateSearchParams(query: DocumentListQuery, searchParams: URLSearchParams) { - const { page, limit, keyword } = query || {} + const { page, limit, keyword, status, sort } = query || {} const hasNonDefaultParams = (page && page > 1) || (limit && limit !== 10) || (keyword && keyword.trim()) @@ -45,6 +64,18 @@ function updateSearchParams(query: DocumentListQuery, searchParams: URLSearchPar searchParams.set('keyword', encodeURIComponent(keyword)) else searchParams.delete('keyword') + + const sanitizedStatus = sanitizeStatusValue(status) + if (sanitizedStatus && sanitizedStatus !== 'all') + searchParams.set('status', sanitizedStatus) + else + searchParams.delete('status') + + const sanitizedSort = sanitizeSortValue(sort) + if (sanitizedSort !== '-created_at') + searchParams.set('sort', sanitizedSort) + else + searchParams.delete('sort') } function useDocumentListQueryState() { @@ -57,6 +88,8 @@ function useDocumentListQueryState() { // Helper function to update specific query parameters const updateQuery = useCallback((updates: Partial) => { const newQuery = { ...query, ...updates } + newQuery.status = sanitizeStatusValue(newQuery.status) + newQuery.sort = sanitizeSortValue(newQuery.sort) const params = new URLSearchParams() updateSearchParams(newQuery, params) const search = params.toString() diff --git a/web/app/components/datasets/documents/index.tsx b/web/app/components/datasets/documents/index.tsx index 613257efee..40e6d48a41 100644 --- a/web/app/components/datasets/documents/index.tsx +++ b/web/app/components/datasets/documents/index.tsx @@ -25,10 +25,12 @@ import useEditDocumentMetadata from '../metadata/hooks/use-edit-dataset-metadata import DatasetMetadataDrawer from '../metadata/metadata-dataset/dataset-metadata-drawer' import StatusWithAction from '../common/document-status-with-action/status-with-action' import { useDocLink } from '@/context/i18n' -import { SimpleSelect } from '../../base/select' -import StatusItem from './detail/completed/status-item' +import Chip from '../../base/chip' +import Sort from '../../base/sort' +import type { SortType } from '@/service/datasets' import type { Item } from '@/app/components/base/select' import { useIndexStatus } from './status-item/hooks' +import { normalizeStatusForQuery, sanitizeStatusValue } from './status-filter' const FolderPlusIcon = ({ className }: React.SVGProps) => { return @@ -84,13 +86,12 @@ const Documents: FC = ({ datasetId }) => { const docLink = useDocLink() const { plan } = useProviderContext() const isFreePlan = plan.type === 'sandbox' + const { query, updateQuery } = useDocumentListQueryState() const [inputValue, setInputValue] = useState('') // the input value const [searchValue, setSearchValue] = useState('') - const [statusFilter, setStatusFilter] = useState({ value: 'all', name: 'All Status' }) + const [statusFilterValue, setStatusFilterValue] = useState(() => sanitizeStatusValue(query.status)) + const [sortValue, setSortValue] = useState(query.sort) const DOC_INDEX_STATUS_MAP = useIndexStatus() - - // Use the new hook for URL state management - const { query, updateQuery } = useDocumentListQueryState() const [currPage, setCurrPage] = React.useState(query.page - 1) // Convert to 0-based index const [limit, setLimit] = useState(query.limit) @@ -104,7 +105,7 @@ const Documents: FC = ({ datasetId }) => { const debouncedSearchValue = useDebounce(searchValue, { wait: 500 }) const statusFilterItems: Item[] = useMemo(() => [ - { value: 'all', name: 'All Status' }, + { value: 'all', name: t('datasetDocuments.list.index.all') as string }, { value: 'queuing', name: DOC_INDEX_STATUS_MAP.queuing.text }, { value: 'indexing', name: DOC_INDEX_STATUS_MAP.indexing.text }, { value: 'paused', name: DOC_INDEX_STATUS_MAP.paused.text }, @@ -114,6 +115,11 @@ const Documents: FC = ({ datasetId }) => { { value: 'disabled', name: DOC_INDEX_STATUS_MAP.disabled.text }, { value: 'archived', name: DOC_INDEX_STATUS_MAP.archived.text }, ], [DOC_INDEX_STATUS_MAP, t]) + const normalizedStatusFilterValue = useMemo(() => normalizeStatusForQuery(statusFilterValue), [statusFilterValue]) + const sortItems: Item[] = useMemo(() => [ + { value: 'created_at', name: t('datasetDocuments.list.sort.uploadTime') as string }, + { value: 'hit_count', name: t('datasetDocuments.list.sort.hitCount') as string }, + ], [t]) // Initialize search value from URL on mount useEffect(() => { @@ -131,12 +137,17 @@ const Documents: FC = ({ datasetId }) => { setInputValue(query.keyword) setSearchValue(query.keyword) } + setStatusFilterValue((prev) => { + const nextValue = sanitizeStatusValue(query.status) + return prev === nextValue ? prev : nextValue + }) + setSortValue(query.sort) }, [query]) // Update URL when pagination changes const handlePageChange = (newPage: number) => { setCurrPage(newPage) - updateQuery({ page: newPage + 1 }) // Convert to 1-based index + updateQuery({ page: newPage + 1 }) // Pagination emits 0-based page, convert to 1-based for URL } // Update URL when limit changes @@ -160,6 +171,8 @@ const Documents: FC = ({ datasetId }) => { page: currPage + 1, limit, keyword: debouncedSearchValue, + status: normalizedStatusFilterValue, + sort: sortValue, }, refetchInterval: timerCanRun ? 2500 : 0, }) @@ -211,8 +224,14 @@ const Documents: FC = ({ datasetId }) => { percent, } }) - setTimerCanRun(completedNum !== documentsRes?.data?.length) - }, [documentsRes]) + + const hasIncompleteDocuments = completedNum !== documentsRes?.data?.length + const transientStatuses = ['queuing', 'indexing', 'paused'] + const shouldForcePolling = normalizedStatusFilterValue === 'all' + ? false + : transientStatuses.includes(normalizedStatusFilterValue) + setTimerCanRun(shouldForcePolling || hasIncompleteDocuments) + }, [documentsRes, normalizedStatusFilterValue]) const total = documentsRes?.total || 0 const routeToDocCreate = () => { @@ -233,6 +252,10 @@ const Documents: FC = ({ datasetId }) => { setSelectedIds([]) }, [searchValue, query.keyword]) + useEffect(() => { + setSelectedIds([]) + }, [normalizedStatusFilterValue]) + const { run: handleSearch } = useDebounceFn(() => { setSearchValue(inputValue) }, { wait: 500 }) @@ -278,17 +301,24 @@ const Documents: FC = ({ datasetId }) => {
- { - setStatusFilter(item) - }} + } - optionClassName='p-0' - notClearable + onSelect={(item) => { + const selectedValue = sanitizeStatusValue(item?.value ? String(item.value) : '') + setStatusFilterValue(selectedValue) + setCurrPage(0) + updateQuery({ status: selectedValue, page: 1 }) + }} + onClear={() => { + if (statusFilterValue === 'all') + return + setStatusFilterValue('all') + setCurrPage(0) + updateQuery({ status: 'all', page: 1 }) + }} /> = ({ datasetId }) => { onChange={e => handleInputChange(e.target.value)} onClear={() => handleInputChange('')} /> +
+ { + const next = String(value) as SortType + if (next === sortValue) + return + setSortValue(next) + setCurrPage(0) + updateQuery({ sort: next, page: 1 }) + }} + />
{!isFreePlan && } @@ -343,7 +387,8 @@ const Documents: FC = ({ datasetId }) => { onUpdate={handleUpdate} selectedIds={selectedIds} onSelectedIdChange={setSelectedIds} - statusFilter={statusFilter} + statusFilterValue={normalizedStatusFilterValue} + remoteSortValue={sortValue} pagination={{ total, limit, diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index 9659925b3a..b6b0335375 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import React, { useCallback, useMemo, useState } from 'react' +import React, { useCallback, useEffect, useMemo, useState } from 'react' import { useBoolean } from 'ahooks' import { ArrowDownIcon } from '@heroicons/react/24/outline' import { pick, uniq } from 'lodash-es' @@ -18,7 +18,6 @@ import BatchAction from './detail/completed/common/batch-action' import cn from '@/utils/classnames' import Tooltip from '@/app/components/base/tooltip' import Toast from '@/app/components/base/toast' -import type { Item } from '@/app/components/base/select' import { asyncRunSafe } from '@/utils' import { formatNumber } from '@/utils/format' import NotionIcon from '@/app/components/base/notion-icon' @@ -37,6 +36,7 @@ import EditMetadataBatchModal from '@/app/components/datasets/metadata/edit-meta import StatusItem from './status-item' import Operations from './operations' import { DatasourceType } from '@/models/pipeline' +import { normalizeStatusForQuery } from '@/app/components/datasets/documents/status-filter' export const renderTdValue = (value: string | number | null, isEmptyStyle = false) => { return ( @@ -66,7 +66,8 @@ type IDocumentListProps = { pagination: PaginationProps onUpdate: () => void onManageMetadata: () => void - statusFilter: Item + statusFilterValue: string + remoteSortValue: string } /** @@ -81,7 +82,8 @@ const DocumentList: FC = ({ pagination, onUpdate, onManageMetadata, - statusFilter, + statusFilterValue, + remoteSortValue, }) => { const { t } = useTranslation() const { formatTime } = useTimestamp() @@ -90,9 +92,14 @@ const DocumentList: FC = ({ const chunkingMode = datasetConfig?.doc_form const isGeneralMode = chunkingMode !== ChunkingMode.parentChild const isQAMode = chunkingMode === ChunkingMode.qa - const [sortField, setSortField] = useState<'name' | 'word_count' | 'hit_count' | 'created_at' | null>('created_at') + const [sortField, setSortField] = useState<'name' | 'word_count' | 'hit_count' | 'created_at' | null>(null) const [sortOrder, setSortOrder] = useState<'asc' | 'desc'>('desc') + useEffect(() => { + setSortField(null) + setSortOrder('desc') + }, [remoteSortValue]) + const { isShowEditModal, showEditModal, @@ -109,11 +116,10 @@ const DocumentList: FC = ({ const localDocs = useMemo(() => { let filteredDocs = documents - if (statusFilter.value !== 'all') { + if (statusFilterValue && statusFilterValue !== 'all') { filteredDocs = filteredDocs.filter(doc => typeof doc.display_status === 'string' - && typeof statusFilter.value === 'string' - && doc.display_status.toLowerCase() === statusFilter.value.toLowerCase(), + && normalizeStatusForQuery(doc.display_status) === statusFilterValue, ) } @@ -156,7 +162,7 @@ const DocumentList: FC = ({ }) return sortedDocs - }, [documents, sortField, sortOrder, statusFilter]) + }, [documents, sortField, sortOrder, statusFilterValue]) const handleSort = (field: 'name' | 'word_count' | 'hit_count' | 'created_at') => { if (sortField === field) { diff --git a/web/app/components/datasets/documents/status-filter.ts b/web/app/components/datasets/documents/status-filter.ts new file mode 100644 index 0000000000..d345774351 --- /dev/null +++ b/web/app/components/datasets/documents/status-filter.ts @@ -0,0 +1,33 @@ +import { DisplayStatusList } from '@/models/datasets' + +const KNOWN_STATUS_VALUES = new Set([ + 'all', + ...DisplayStatusList.map(item => item.toLowerCase()), +]) + +const URL_STATUS_ALIASES: Record = { + active: 'available', +} + +const QUERY_STATUS_ALIASES: Record = { + enabled: 'available', +} + +export const sanitizeStatusValue = (value?: string | null) => { + if (!value) + return 'all' + + const normalized = value.toLowerCase() + if (URL_STATUS_ALIASES[normalized]) + return URL_STATUS_ALIASES[normalized] + + return KNOWN_STATUS_VALUES.has(normalized) ? normalized : 'all' +} + +export const normalizeStatusForQuery = (value?: string | null) => { + const sanitized = sanitizeStatusValue(value) + if (sanitized === 'all') + return 'all' + + return QUERY_STATUS_ALIASES[sanitized] || sanitized +} diff --git a/web/i18n/en-US/dataset-documents.ts b/web/i18n/en-US/dataset-documents.ts index 31704636ea..5d337ae892 100644 --- a/web/i18n/en-US/dataset-documents.ts +++ b/web/i18n/en-US/dataset-documents.ts @@ -40,6 +40,10 @@ const translation = { enableTip: 'The file can be indexed', disableTip: 'The file cannot be indexed', }, + sort: { + uploadTime: 'Upload Time', + hitCount: 'Retrieval Count', + }, status: { queuing: 'Queuing', indexing: 'Indexing', diff --git a/web/i18n/zh-Hans/dataset-documents.ts b/web/i18n/zh-Hans/dataset-documents.ts index dd9c6ba3af..6b22871611 100644 --- a/web/i18n/zh-Hans/dataset-documents.ts +++ b/web/i18n/zh-Hans/dataset-documents.ts @@ -40,6 +40,10 @@ const translation = { enableTip: '该文件可以被索引', disableTip: '该文件无法被索引', }, + sort: { + uploadTime: '上传时间', + hitCount: '召回次数', + }, status: { queuing: '排队中', indexing: '索引中', diff --git a/web/service/knowledge/use-document.ts b/web/service/knowledge/use-document.ts index 5691128e7d..c3321b7a76 100644 --- a/web/service/knowledge/use-document.ts +++ b/web/service/knowledge/use-document.ts @@ -9,6 +9,7 @@ import { pauseDocIndexing, resumeDocIndexing } from '../datasets' import type { DocumentDetailResponse, DocumentListResponse, UpdateDocumentBatchParams } from '@/models/datasets' import { DocumentActionType } from '@/models/datasets' import type { CommonResponse } from '@/models/common' +import { normalizeStatusForQuery } from '@/app/components/datasets/documents/status-filter' const NAME_SPACE = 'knowledge/document' @@ -20,15 +21,26 @@ export const useDocumentList = (payload: { page: number limit: number sort?: SortType + status?: string }, refetchInterval?: number | false }) => { const { query, datasetId, refetchInterval } = payload - const { keyword, page, limit, sort } = query + const { keyword, page, limit, sort, status } = query + const normalizedStatus = normalizeStatusForQuery(status) + const params: Record = { + keyword, + page, + limit, + } + if (sort) + params.sort = sort + if (normalizedStatus && normalizedStatus !== 'all') + params.status = normalizedStatus return useQuery({ - queryKey: [...useDocumentListKey, datasetId, keyword, page, limit, sort], + queryKey: [...useDocumentListKey, datasetId, keyword, page, limit, sort, normalizedStatus], queryFn: () => get(`/datasets/${datasetId}/documents`, { - params: query, + params, }), refetchInterval, })