diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 8a5389a341..023f555681 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -96,6 +96,8 @@ export enum IndexingType { } const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' +const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500 +const DEFAULT_OVERLAP = 50 type ParentChildConfig = { chunkForContext: ParentMode @@ -155,9 +157,9 @@ const StepTwo = ({ const setSegmentIdentifier = useCallback((value: string) => { doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER) }, []) - const [maxChunkLength, setMaxChunkLength] = useState(4000) // default chunk length + const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000) - const [overlap, setOverlap] = useState(50) + const [overlap, setOverlap] = useState(DEFAULT_OVERLAP) const [rules, setRules] = useState([]) const [defaultConfig, setDefaultConfig] = useState() const hasSetIndexType = !!indexingType diff --git a/web/app/components/datasets/create/step-two/inputs.tsx b/web/app/components/datasets/create/step-two/inputs.tsx index 7a35db9153..ab62003da8 100644 --- a/web/app/components/datasets/create/step-two/inputs.tsx +++ b/web/app/components/datasets/create/step-two/inputs.tsx @@ -32,7 +32,7 @@ export const DelimiterInput: FC = (props) => { @@ -46,7 +46,7 @@ export const MaxLengthInput: FC = (props) => { = ({ }}>
e.stopPropagation()}> - = (props) => { > {label} diff --git a/web/app/components/datasets/hit-testing/assets/test-data.ts b/web/app/components/datasets/hit-testing/assets/test-data.ts new file mode 100644 index 0000000000..39a9788afa --- /dev/null +++ b/web/app/components/datasets/hit-testing/assets/test-data.ts @@ -0,0 +1,137 @@ +export const generalResultData = [ + { + segment: { + id: 'b621b153-f8a7-4e85-bd3d-07feaf61bd9e', + position: 1, + document_id: '990c1ba7-a170-42ed-a71f-579e4875eaba', + content: '张家界森林覆盖率达 90%以上,生物多样性丰富。这里是许多珍稀动植物的栖息地,例\r\n如银杉、中华秋沙鸭等。清新的空气和丰富的负氧离子,让它成为“ 天然氧吧”。\r\n历史背景\r\n1. 古代历史\r\n张家界地区在古代是土家族和苗族等少数民族的居住地,历史可以追溯到新石器时代。\r\n这里曾是楚国的属地,后来成为武陵山地区的重要组成部分。\r\n2. 近代发展\r\n张家界介绍\r\n张家界概述\r\n张家界位于中国湖南省西北部,是中国知名的旅游胜地,以独特的喀斯特地貌和壮美的\r\n自然风光闻名世界。它不仅是自然景观的瑰宝,还蕴含了丰富的历史与人文底蕴。\r\n地理特色\r\n1. 地貌特征\r\n张家界以其石英砂岩峰林地貌而著称,峰峦如刀劈斧削,形态各异,被誉为“ 天然山水\r\n画卷”。\r\n• 武陵源风景名胜区\r\n被列入联合国教科文组织世界自然遗产名录,其中包括张家界国家森林公园、天子山、\r\n索溪峪等景区。', + answer: null, + word_count: 387, + tokens: 471, + keywords: [ + '氧吧', + '丰富', + '90%', + '天子山', + '地貌', + '历史', + '张家界', + '索溪峪', + '天然', + '负氧离子', + ], + index_node_id: '483fad87-3b7e-486d-afae-75e4f0b2f3dd', + index_node_hash: '61bb7556a32e3e09ed83f2de731c2ac2d669c598de6d85708e11f78817c882bb', + hit_count: 0, + enabled: true, + disabled_at: null, + disabled_by: null, + status: 'completed', + created_by: '6d8ad01f-edf9-43a6-b863-a034b1828ac7', + created_at: 1732605173, + indexing_at: 1732605173, + completed_at: 1732605177, + error: null, + stopped_at: null, + document: { + id: '990c1ba7-a170-42ed-a71f-579e4875eaba', + data_source_type: 'upload_file', + name: '张家界介绍.pdf', + doc_type: null, + }, + }, + child_chunks: null, + score: 0.8771945, + tsne_position: null, + }, + { + segment: { + id: '0859a14d-697e-4703-b59d-2ff69a7a9795', + position: 5, + document_id: '990c1ba7-a170-42ed-a71f-579e4875eaba', + content: '茅岩河漂流和黄石寨徒步是体验张家界山水魅力的绝佳方式。\r\n总结\r\n张家界是集自然奇观与人文风情于一体的旅游胜地。无论是其独特的地貌景观,还是浓\r\n郁的土家文化,都展现了人与自然的和谐之美。这里的每一座山、每一片森林,似乎都\r\n在诉说着古老的故事,吸引着来自世界各地的游客流连忘返。', + answer: null, + word_count: 140, + tokens: 173, + keywords: [ + '绝佳', + '徒步', + '人与自然', + '流连忘返', + '河漂流', + '之美', + '张家界', + '黄石寨', + '诉说着', + '茅岩', + ], + index_node_id: '1d8e46bd-27ea-47fa-b8c4-87737bf2e021', + index_node_hash: '8ac318494724ac44120b2f9db397bb02186b456fff76f9f8b86156fb8a864999', + hit_count: 0, + enabled: true, + disabled_at: null, + disabled_by: null, + status: 'completed', + created_by: '6d8ad01f-edf9-43a6-b863-a034b1828ac7', + created_at: 1732605173, + indexing_at: 1732605173, + completed_at: 1732605177, + error: null, + stopped_at: null, + document: { + id: '990c1ba7-a170-42ed-a71f-579e4875eaba', + data_source_type: 'upload_file', + name: '张家界介绍.pdf', + doc_type: null, + }, + }, + child_chunks: null, + score: 0.8642928, + tsne_position: null, + }, + { + segment: { + id: 'f5e63d62-984f-419f-a8ec-781e1280c739', + position: 4, + document_id: '990c1ba7-a170-42ed-a71f-579e4875eaba', + content: '葛粉汤\r\n一种用当地葛根制成的食品,清热解毒,深受游客喜爱。\r\n3. 艺术与传说\r\n张家界的山水常与中国传统文化和神话传说相结合,例如天子山据说是土家族起义领袖', + answer: null, + word_count: 80, + tokens: 94, + keywords: [ + '葛根', + '清热解毒', + '葛粉', + '天子山', + '起义领袖', + '深受', + '张家界', + '神话传说', + '土家族', + '山水', + ], + index_node_id: '80f71f0d-6218-4160-8575-c59d58ac15e3', + index_node_hash: '155ad96a96b984d7058fdb377f98bd50158d58574b75bea0187c9e3af5680ad5', + hit_count: 0, + enabled: true, + disabled_at: null, + disabled_by: null, + status: 'completed', + created_by: '6d8ad01f-edf9-43a6-b863-a034b1828ac7', + created_at: 1732605173, + indexing_at: 1732605173, + completed_at: 1732605177, + error: null, + stopped_at: null, + document: { + id: '990c1ba7-a170-42ed-a71f-579e4875eaba', + data_source_type: 'upload_file', + name: '张家界介绍.pdf', + doc_type: null, + }, + }, + child_chunks: null, + score: 0.80618876, + tsne_position: null, + }, +] diff --git a/web/app/components/datasets/hit-testing/components/result-item.tsx b/web/app/components/datasets/hit-testing/components/result-item.tsx new file mode 100644 index 0000000000..35d9d1bdf9 --- /dev/null +++ b/web/app/components/datasets/hit-testing/components/result-item.tsx @@ -0,0 +1,33 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import { useTranslation } from 'react-i18next' +import { SegmentIndexTag } from '../../documents/detail/completed' +import type { HitTesting } from '@/models/datasets' +import cn from '@/utils/classnames' +type Props = { + payload: HitTesting +} + +const ResultItem: FC = ({ + payload, +}) => { + const { t } = useTranslation() + const { segment } = payload + const { position, word_count } = segment + + return ( +
+
+
+ +
·
+
{word_count} {t('datasetDocuments.segment.characters')}
+
+ {/* Score */} +
+ +
+ ) +} +export default React.memo(ResultItem) diff --git a/web/app/components/datasets/hit-testing/index.tsx b/web/app/components/datasets/hit-testing/index.tsx index ce47f2bfa6..bf3c02a84f 100644 --- a/web/app/components/datasets/hit-testing/index.tsx +++ b/web/app/components/datasets/hit-testing/index.tsx @@ -7,11 +7,11 @@ import { omit } from 'lodash-es' import { useBoolean } from 'ahooks' import { useContext } from 'use-context-selector' import SegmentCard from '../documents/detail/completed/SegmentCard' -import docStyle from '../documents/detail/completed/style.module.css' import Textarea from './textarea' import s from './style.module.css' import HitDetail from './hit-detail' import ModifyRetrievalModal from './modify-retrieval-modal' +import { generalResultData } from './assets/test-data' import cn from '@/utils/classnames' import type { ExternalKnowledgeBaseHitTestingResponse, ExternalKnowledgeBaseHitTesting as ExternalKnowledgeBaseHitTestingType, HitTestingResponse, HitTesting as HitTestingType } from '@/models/datasets' import Loading from '@/app/components/base/loading' @@ -24,7 +24,6 @@ import DatasetDetailContext from '@/context/dataset-detail' import type { RetrievalConfig } from '@/types/app' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import useTimestamp from '@/hooks/use-timestamp' - const limit = 10 type Props = { @@ -49,6 +48,7 @@ const HitTesting: FC = ({ datasetId }: Props) => { const isMobile = media === MediaType.mobile const [hitResult, setHitResult] = useState() // 初始化记录为空数组 + // console.log(hitResult?.records) const [externalHitResult, setExternalHitResult] = useState() const [submitLoading, setSubmitLoading] = useState(false) const [currParagraph, setCurrParagraph] = useState<{ paraInfo?: HitTestingType; showModal: boolean }>({ showModal: false }) @@ -77,7 +77,6 @@ const HitTesting: FC = ({ datasetId }: Props) => { const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict as RetrievalConfig) const [isShowModifyRetrievalModal, setIsShowModifyRetrievalModal] = useState(false) const [isShowRightPanel, { setTrue: showRightPanel, setFalse: hideRightPanel, set: setShowRightPanel }] = useBoolean(!isMobile) - const renderHitResults = (results: any[], onClickCard: (record: any) => void) => ( <>
{t('datasetHitTesting.hit.title')}
@@ -87,7 +86,7 @@ const HitTesting: FC = ({ datasetId }: Props) => { = ({ datasetId }: Props) => { ) const renderEmptyState = () => ( -
-
-
- {t('datasetHitTesting.hit.emptyTip')} -
-
+ // for test +
+ //
+ //
+ //
+ // {t('datasetHitTesting.hit.emptyTip')} + //
+ //
) useEffect(() => { @@ -190,6 +191,7 @@ const HitTesting: FC = ({ datasetId }: Props) => {
+ {renderHitResults(generalResultData, onClickCard)} {submitLoading ?