diff --git a/web/app/components/base/divider/index.tsx b/web/app/components/base/divider/index.tsx index 4b351dea99..2f1245e782 100644 --- a/web/app/components/base/divider/index.tsx +++ b/web/app/components/base/divider/index.tsx @@ -18,7 +18,7 @@ const dividerVariants = cva( }, ) -type DividerProps = { +export type DividerProps = { className?: string style?: CSSProperties } & VariantProps diff --git a/web/app/components/base/divider/with-label.tsx b/web/app/components/base/divider/with-label.tsx new file mode 100644 index 0000000000..608bc79998 --- /dev/null +++ b/web/app/components/base/divider/with-label.tsx @@ -0,0 +1,23 @@ +import type { FC } from 'react' +import type { DividerProps } from '.' +import Divider from '.' +import classNames from '@/utils/classnames' + +export type DividerWithLabelProps = DividerProps & { + label: string +} + +export const DividerWithLabel: FC = (props) => { + const { label, className, ...rest } = props + return
+ + + {label} + + +
+} + +export default DividerWithLabel diff --git a/web/app/components/datasets/assets/selection-mod-nocolor.svg b/web/app/components/datasets/assets/selection-mod-nocolor.svg new file mode 100644 index 0000000000..ae3c9c5c75 --- /dev/null +++ b/web/app/components/datasets/assets/selection-mod-nocolor.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/web/app/components/datasets/chunk.tsx b/web/app/components/datasets/chunk.tsx new file mode 100644 index 0000000000..08797fbb59 --- /dev/null +++ b/web/app/components/datasets/chunk.tsx @@ -0,0 +1,55 @@ +import type { FC, PropsWithChildren } from 'react' +import Image from 'next/image' +import SelectionMod from './assets/selection-mod-nocolor.svg' +import type { QA } from '@/models/datasets' + +export type ChunkLabelProps = { + label: string + characterCount: number +} + +export const ChunkLabel: FC = (props) => { + const { label, characterCount } = props + return
+ Selection Mod +

+ {label} + + + · + + + {`${characterCount} characters`} +

+
+} + +export type ChunkContainerProps = ChunkLabelProps & PropsWithChildren + +export const ChunkContainer: FC = (props) => { + const { label, characterCount, children } = props + return
+ +

+ {children} +

+
+} + +export type QAPreviewProps = { + qa: QA +} + +export const QAPreview: FC = (props) => { + const { qa } = props + return
+
+ +

{qa.question}

+
+
+ +

{qa.answer}

+
+
+} diff --git a/web/app/components/datasets/create/step-two/index.module.css b/web/app/components/datasets/create/step-two/index.module.css index 4d857968b7..85a7f8ab35 100644 --- a/web/app/components/datasets/create/step-two/index.module.css +++ b/web/app/components/datasets/create/step-two/index.module.css @@ -394,19 +394,6 @@ max-width: 524px; } -.previewHeader { - position: sticky; - top: 0; - left: 0; - padding-top: 42px; - background-color: #fff; - font-weight: 600; - font-size: 18px; - line-height: 28px; - color: #101828; - z-index: 10; -} - /* * `fixed` must under `previewHeader` because of style override would not work */ diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index ac8f2c873a..b904ed17bc 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -1,17 +1,14 @@ 'use client' -import type { FC, PropsWithChildren, ReactNode } from 'react' -import React, { useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react' +import type { FC, PropsWithChildren } from 'react' +import React, { useCallback, useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' -import { useBoolean } from 'ahooks' -import { XMarkIcon } from '@heroicons/react/20/solid' import { RiArrowLeftLine, RiCloseLine, RiSearchEyeLine, } from '@remixicon/react' import Link from 'next/link' -import { groupBy } from 'lodash-es' import Image from 'next/image' import SettingCog from '../assets/setting-gear-mod.svg' import OrangeEffect from '../assets/option-card-effect-orange.svg' @@ -19,7 +16,9 @@ import FamilyMod from '../assets/family-mod.svg' import Note from '../assets/note-mod.svg' import FileList from '../assets/file-list-3-fill.svg' import { indexMethodIcon } from '../icons' -import PreviewItem, { PreviewType } from './preview-item' +import { PreviewContainer } from '../../preview/container' +import { ChunkContainer, QAPreview } from '../../chunk' +import { PreviewHeader } from '../../preview/header' import s from './index.module.css' import unescape from './unescape' import escape from './escape' @@ -27,15 +26,9 @@ import { OptionCard } from './option-card' import LanguageSelect from './language-select' import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' import cn from '@/utils/classnames' -import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' -import { - createDocument, - createFirstDocument, - fetchFileIndexingEstimate as didFetchFileIndexingEstimate, - fetchDefaultProcessRule, -} from '@/service/datasets' +import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FullDocumentDetail, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' + import Button from '@/app/components/base/button' -import Loading from '@/app/components/base/loading' import FloatRightContainer from '@/app/components/base/float-right-container' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' @@ -60,26 +53,20 @@ import { MessageChatSquare } from '@/app/components/base/icons/src/public/common import { IS_CE_EDITION } from '@/config' import Switch from '@/app/components/base/switch' import Divider from '@/app/components/base/divider' +import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument, useFetchDefaultProcessRule, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/use-datasets' +import Loading from '@/app/components/base/loading' const TextLabel: FC = (props) => { return } -const FormField: FC> = (props) => { - return
- {props.label} - {props.children} -
-} - -type ValueOf = T[keyof T] type StepTwoProps = { isSetting?: boolean documentDetail?: FullDocumentDetail isAPIKeySet: boolean onSetting: () => void datasetId?: string - indexingType?: ValueOf + indexingType?: IndexingType retrievalMethod?: string dataSourceType: DataSourceType files: CustomFile[] @@ -96,11 +83,11 @@ type StepTwoProps = { onCancel?: () => void } -enum SegmentType { +export enum SegmentType { AUTO = 'automatic', CUSTOM = 'custom', } -enum IndexingType { +export enum IndexingType { QUALIFIED = 'high_quality', ECONOMICAL = 'economy', } @@ -117,7 +104,6 @@ type ParentChildConfig = { delimiter: string maxLength: number } - rules: PreProcessingRule[] } const defaultParentChildConfig: ParentChildConfig = { @@ -130,7 +116,6 @@ const defaultParentChildConfig: ParentChildConfig = { delimiter: '\\n\\n', maxLength: 4000, }, - rules: [], } const StepTwo = ({ @@ -162,10 +147,6 @@ const StepTwo = ({ const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext() const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type) const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type - const scrollRef = useRef(null) - const [scrolled, setScrolled] = useState(false) - const previewScrollRef = useRef(null) - const [previewScrolled, setPreviewScrolled] = useState(false) const [segmentationType, setSegmentationType] = useState(SegmentType.AUTO) const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER) const setSegmentIdentifier = useCallback((value: string) => { @@ -176,7 +157,7 @@ const StepTwo = ({ const [rules, setRules] = useState([]) const [defaultConfig, setDefaultConfig] = useState() const hasSetIndexType = !!indexingType - const [indexType, setIndexType] = useState>( + const [indexType, setIndexType] = useState( (indexingType || isAPIKeySet) ? IndexingType.QUALIFIED @@ -190,37 +171,96 @@ const StepTwo = ({ (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'), ) const [QATipHide, setQATipHide] = useState(false) - const [previewSwitched, setPreviewSwitched] = useState(false) - const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean() - const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState(null) - const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState(null) - - const fileIndexingEstimate = (() => { - return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate - })() - const [isCreating, setIsCreating] = useState(false) + const [qaPreviewSwitched, setQAPreviewSwitched] = useState(false) const [parentChildConfig, setParentChildConfig] = useState(defaultParentChildConfig) - const scrollHandle = (e: Event) => { - if ((e.target as HTMLDivElement).scrollTop > 0) - setScrolled(true) + const getIndexing_technique = () => indexingType || indexType - else - setScrolled(false) + const getProcessRule = () => { + const processRule: ProcessRule = { + rules: {} as any, // api will check this. It will be removed after api refactored. + mode: segmentationType, + } + if (segmentationType === SegmentType.CUSTOM) { + const ruleObj = { + pre_processing_rules: rules, + segmentation: { + separator: unescape(segmentIdentifier), + max_tokens: max, + chunk_overlap: overlap, + }, + } + processRule.rules = ruleObj + } + return processRule } - const previewScrollHandle = (e: Event) => { - if ((e.target as HTMLDivElement).scrollTop > 0) - setPreviewScrolled(true) + const fileIndexingEstimateQuery = useFetchFileIndexingEstimateForFile({ + docForm: docForm as DocForm, + docLanguage, + dataSourceType: DataSourceType.FILE, + files, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId!, + }) + const notionIndexingEstimateQuery = useFetchFileIndexingEstimateForNotion({ + docForm: docForm as DocForm, + docLanguage, + dataSourceType: DataSourceType.NOTION, + notionPages, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId || '', + }) - else - setPreviewScrolled(false) - } - const getFileName = (name: string) => { - const arr = name.split('.') - return arr.slice(0, -1).join('.') - } + const websiteIndexingEstimateQuery = useFetchFileIndexingEstimateForWeb({ + docForm: docForm as DocForm, + docLanguage, + dataSourceType: DataSourceType.WEB, + websitePages, + crawlOptions, + websiteCrawlProvider, + websiteCrawlJobId, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId || '', + }) + + const fetchEstimate = useCallback(() => { + if (dataSourceType === DataSourceType.FILE) + fileIndexingEstimateQuery.mutate() + + if (dataSourceType === DataSourceType.NOTION) + notionIndexingEstimateQuery.mutate() + + if (dataSourceType === DataSourceType.WEB) + websiteIndexingEstimateQuery.mutate() + }, [dataSourceType, fileIndexingEstimateQuery, notionIndexingEstimateQuery, websiteIndexingEstimateQuery]) + + const estimate + = dataSourceType === DataSourceType.FILE + ? fileIndexingEstimateQuery.data + : dataSourceType === DataSourceType.NOTION + ? notionIndexingEstimateQuery.data + : websiteIndexingEstimateQuery.data + + // const getIsEstimateReady = useCallback(() => { + // if (dataSourceType === DataSourceType.FILE) + // return fileIndexingEstimateQuery.isSuccess + + // if (dataSourceType === DataSourceType.NOTION) + // return notionIndexingEstimateQuery.isSuccess + + // if (dataSourceType === DataSourceType.WEB) + // return websiteIndexingEstimateQuery.isSuccess + // }, [dataSourceType, fileIndexingEstimateQuery.isSuccess, notionIndexingEstimateQuery.isSuccess, websiteIndexingEstimateQuery.isSuccess]) + + // const getFileName = (name: string) => { + // const arr = name.split('.') + // return arr.slice(0, -1).join('.') + // } const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') @@ -248,129 +288,21 @@ const StepTwo = ({ if (defaultConfig) { setSegmentIdentifier(defaultConfig.segmentation.separator) setMax(defaultConfig.segmentation.max_tokens) - setOverlap(defaultConfig.segmentation.chunk_overlap) + setOverlap(defaultConfig.segmentation.chunk_overlap!) setRules(defaultConfig.pre_processing_rules) } setParentChildConfig(defaultParentChildConfig) } - const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT, language?: string) => { - // eslint-disable-next-line @typescript-eslint/no-use-before-define - const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm, language)!) - if (segmentationType === SegmentType.CUSTOM) - setCustomFileIndexingEstimate(res) - else - setAutomaticFileIndexingEstimate(res) - } - - const confirmChangeCustomConfig = () => { + const updatePreview = () => { if (segmentationType === SegmentType.CUSTOM && max > 4000) { Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) return } - setCustomFileIndexingEstimate(null) - setShowPreview() - fetchFileIndexingEstimate() - setPreviewSwitched(false) + fetchEstimate() + setQAPreviewSwitched(false) } - const getIndexing_technique = () => indexingType || indexType - - const getProcessRule = () => { - const processRule: ProcessRule = { - rules: {} as any, // api will check this. It will be removed after api refactored. - mode: segmentationType, - } - if (segmentationType === SegmentType.CUSTOM) { - const ruleObj = { - pre_processing_rules: rules, - segmentation: { - separator: unescape(segmentIdentifier), - max_tokens: max, - chunk_overlap: overlap, - }, - } - processRule.rules = ruleObj - } - return processRule - } - - const getNotionInfo = () => { - const workspacesMap = groupBy(notionPages, 'workspace_id') - const workspaces = Object.keys(workspacesMap).map((workspaceId) => { - return { - workspaceId, - pages: workspacesMap[workspaceId], - } - }) - return workspaces.map((workspace) => { - return { - workspace_id: workspace.workspaceId, - pages: workspace.pages.map((page) => { - const { page_id, page_name, page_icon, type } = page - return { - page_id, - page_name, - page_icon, - type, - } - }), - } - }) as NotionInfo[] - } - - const getWebsiteInfo = () => { - return { - provider: websiteCrawlProvider, - job_id: websiteCrawlJobId, - urls: websitePages.map(page => page.source_url), - only_main_content: crawlOptions?.only_main_content, - } - } - - const getFileIndexingEstimateParams = (docForm: DocForm, language?: string): IndexingEstimateParams | undefined => { - if (dataSourceType === DataSourceType.FILE) { - return { - info_list: { - data_source_type: dataSourceType, - file_info_list: { - file_ids: files.map(file => file.id) as string[], - }, - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - if (dataSourceType === DataSourceType.NOTION) { - return { - info_list: { - data_source_type: dataSourceType, - notion_info_list: getNotionInfo(), - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - if (dataSourceType === DataSourceType.WEB) { - return { - info_list: { - data_source_type: dataSourceType, - website_info_list: getWebsiteInfo(), - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - } const { modelList: rerankModelList, defaultModel: rerankDefaultModel, @@ -454,28 +386,35 @@ const StepTwo = ({ } } if (dataSourceType === DataSourceType.NOTION) - params.data_source.info_list.notion_info_list = getNotionInfo() + params.data_source.info_list.notion_info_list = getNotionInfo(notionPages) - if (dataSourceType === DataSourceType.WEB) - params.data_source.info_list.website_info_list = getWebsiteInfo() + if (dataSourceType === DataSourceType.WEB) { + params.data_source.info_list.website_info_list = getWebsiteInfo({ + websiteCrawlProvider, + websiteCrawlJobId, + websitePages, + }) + } } return params } - const getRules = async () => { - try { - const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) - const separator = res.rules.segmentation.separator + const fetchDefaultProcessRuleMutation = useFetchDefaultProcessRule({ + onSuccess(data) { + const separator = data.rules.segmentation.separator setSegmentIdentifier(separator) - setMax(res.rules.segmentation.max_tokens) - setOverlap(res.rules.segmentation.chunk_overlap) - setRules(res.rules.pre_processing_rules) - setDefaultConfig(res.rules) - } - catch (err) { - console.log(err) - } - } + setMax(data.rules.segmentation.max_tokens) + setOverlap(data.rules.segmentation.chunk_overlap!) + setRules(data.rules.pre_processing_rules) + setDefaultConfig(data.rules) + }, + onError(error) { + Toast.notify({ + type: 'error', + message: `${error}`, + }) + }, + }) const getRulesFromDetail = () => { if (documentDetail) { @@ -485,7 +424,7 @@ const StepTwo = ({ const overlap = rules.segmentation.chunk_overlap setSegmentIdentifier(separator) setMax(max) - setOverlap(overlap) + setOverlap(overlap as number) setRules(rules.pre_processing_rules) setDefaultConfig(rules) } @@ -496,77 +435,75 @@ const StepTwo = ({ setSegmentationType(documentDetail.dataset_process_rule.mode) } - const createHandle = async () => { - if (isCreating) - return - setIsCreating(true) - try { - let res - const params = getCreationParams() - if (!params) - return false - - setIsCreating(true) - if (!datasetId) { - res = await createFirstDocument({ - body: params as CreateDocumentReq, - }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) - updateResultCache && updateResultCache(res) - // eslint-disable-next-line @typescript-eslint/no-use-before-define - updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string) - } - else { - res = await createDocument({ - datasetId, - body: params as CreateDocumentReq, - }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) - updateResultCache && updateResultCache(res) - } - if (mutateDatasetRes) - mutateDatasetRes() - onStepChange && onStepChange(+1) - isSetting && onSave && onSave() - } - catch (err) { + const createFirstDocumentMutation = useCreateFirstDocument({ + onError(error) { Toast.notify({ type: 'error', - message: `${err}`, + message: `${error}`, + }) + }, + }) + const createDocumentMutation = useCreateDocument(datasetId!, { + onError(error) { + Toast.notify({ + type: 'error', + message: `${error}`, + }) + }, + }) + + const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending + + const createHandle = async () => { + const params = getCreationParams() + if (!params) + return false + + if (!datasetId) { + await createFirstDocumentMutation.mutateAsync( + params, + { + onSuccess(data) { + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) + updateResultCache && updateResultCache(data) + // eslint-disable-next-line @typescript-eslint/no-use-before-define + updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string) + }, + }, + ) + } + else { + await createDocumentMutation.mutateAsync(params, { + onSuccess(data) { + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) + updateResultCache && updateResultCache(data) + }, }) } - finally { - setIsCreating(false) - } + if (mutateDatasetRes) + mutateDatasetRes() + onStepChange && onStepChange(+1) + isSetting && onSave && onSave() } - const handleSwitch = (state: boolean) => { - if (state) + const handleDocformSwitch = (isQAMode: boolean) => { + if (isQAMode) setDocForm(DocForm.QA) else setDocForm(DocForm.TEXT) } - const previewSwitch = async (language?: string) => { - setPreviewSwitched(true) + const previewSwitch = () => { + setQAPreviewSwitched(true) setIsLanguageSelectDisabled(true) - if (segmentationType === SegmentType.AUTO) - setAutomaticFileIndexingEstimate(null) - else - setCustomFileIndexingEstimate(null) - try { - await fetchFileIndexingEstimate(DocForm.QA, language) - } - finally { - setIsLanguageSelectDisabled(false) - } + fetchEstimate() } const handleSelect = (language: string) => { setDocLanguage(language) // Switch language, re-cutter - if (docForm === DocForm.QA && previewSwitched) - previewSwitch(language) + if (docForm === DocForm.QA && qaPreviewSwitched) + previewSwitch() } const changeToEconomicalType = () => { @@ -579,7 +516,7 @@ const StepTwo = ({ useEffect(() => { // fetch rules if (!isSetting) { - getRules() + fetchDefaultProcessRuleMutation.mutate('/datasets/process-rule') } else { getRulesFromDetail() @@ -587,22 +524,6 @@ const StepTwo = ({ } }, []) - useEffect(() => { - scrollRef.current?.addEventListener('scroll', scrollHandle) - return () => { - scrollRef.current?.removeEventListener('scroll', scrollHandle) - } - }, []) - - useLayoutEffect(() => { - if (showPreview) { - previewScrollRef.current?.addEventListener('scroll', previewScrollHandle) - return () => { - previewScrollRef.current?.removeEventListener('scroll', previewScrollHandle) - } - } - }, [showPreview]) - useEffect(() => { if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA) setDocForm(DocForm.TEXT) @@ -617,20 +538,6 @@ const StepTwo = ({ setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL) }, [isAPIKeySet, indexingType, datasetId]) - useEffect(() => { - if (segmentationType === SegmentType.AUTO) { - setAutomaticFileIndexingEstimate(null) - !isMobile && setShowPreview() - fetchFileIndexingEstimate() - setPreviewSwitched(false) - } - else { - hidePreview() - setCustomFileIndexingEstimate(null) - setPreviewSwitched(false) - } - }, [segmentationType, indexType]) - const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || { search_method: RETRIEVE_METHOD.semantic, reranking_enable: false, @@ -659,7 +566,7 @@ const StepTwo = ({ onClick={() => setSegmentationType(SegmentType.AUTO)} actions={ <> - @@ -714,7 +621,7 @@ const StepTwo = ({ onClick={() => setSegmentationType(SegmentType.CUSTOM)} actions={ <> - @@ -910,7 +817,7 @@ const StepTwo = ({ @@ -1000,70 +907,40 @@ const StepTwo = ({ { }} footer={null}> - {showPreview &&
+ } + className={cn(s.previewWrap, isMobile && s.isMobile, 'relative h-full overflow-y-scroll space-y-4')} > -
-
-
-
{t('datasetCreation.stepTwo.previewTitle')}
- {docForm === DocForm.QA && !previewSwitched && ( - - )} -
-
- -
+ {qaPreviewSwitched && docForm === DocForm.QA && estimate?.qa_preview && ( + estimate?.qa_preview.map(item => ( + + )) + )} + {(docForm === DocForm.TEXT || !qaPreviewSwitched) && estimate?.preview && ( + estimate?.preview.map((item, index) => ( + + {item} + + )) + )} + {qaPreviewSwitched && docForm === DocForm.QA && !estimate?.qa_preview && ( +
+
- {docForm === DocForm.QA && !previewSwitched && ( -
- {t('datasetCreation.stepTwo.previewSwitchTipStart')} - {t('datasetCreation.stepTwo.previewSwitchTipEnd')} -
- )} -
-
- {previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && ( - <> - {fileIndexingEstimate?.qa_preview.map((item, index) => ( - - ))} - - )} - {(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && ( - <> - {fileIndexingEstimate?.preview.map((item, index) => ( - - ))} - - )} - {previewSwitched && docForm === DocForm.QA && !fileIndexingEstimate?.qa_preview && ( -
- -
- )} - {!previewSwitched && !fileIndexingEstimate?.preview && ( -
- -
- )} -
-
} - {!showPreview && ( -
-
- -
{t('datasetCreation.stepTwo.sideTipTitle')}
-
-

{t('datasetCreation.stepTwo.sideTipP1')}

-

{t('datasetCreation.stepTwo.sideTipP2')}

-

{t('datasetCreation.stepTwo.sideTipP3')}

-

{t('datasetCreation.stepTwo.sideTipP4')}

-
+ )} + {!qaPreviewSwitched && !estimate?.preview && ( +
+
-
- )} + )} +
) diff --git a/web/app/components/datasets/formatted-text/flavours/shared.tsx b/web/app/components/datasets/formatted-text/flavours/shared.tsx index b8102e4ebf..0ce17db7e4 100644 --- a/web/app/components/datasets/formatted-text/flavours/shared.tsx +++ b/web/app/components/datasets/formatted-text/flavours/shared.tsx @@ -34,7 +34,7 @@ export const SliceContent: FC = forwardRef((props, ref) => { const { className, children, ...rest } = props return {children} diff --git a/web/app/components/datasets/preview/container.tsx b/web/app/components/datasets/preview/container.tsx new file mode 100644 index 0000000000..7ce8e226e2 --- /dev/null +++ b/web/app/components/datasets/preview/container.tsx @@ -0,0 +1,27 @@ +import type { ComponentProps, FC, ReactNode } from 'react' +import { forwardRef } from 'react' +import classNames from '@/utils/classnames' + +export type PreviewContainerProps = ComponentProps<'div'> & { + header: ReactNode +} + +export const PreviewContainer: FC = forwardRef((props, ref) => { + const { children, className, header, ...rest } = props + return
+
+ {header} +
+
+ {children} +
+
+}) +PreviewContainer.displayName = 'PreviewContainer' diff --git a/web/app/components/datasets/preview/header.tsx b/web/app/components/datasets/preview/header.tsx new file mode 100644 index 0000000000..1f17f2ca2d --- /dev/null +++ b/web/app/components/datasets/preview/header.tsx @@ -0,0 +1,23 @@ +import type { ComponentProps, FC } from 'react' +import classNames from '@/utils/classnames' + +export type PreviewHeaderProps = Omit, 'title'> & { + title: string +} + +export const PreviewHeader: FC = (props) => { + const { title, className, children, ...rest } = props + return
+
+ {title} +
+ {children} +
+} diff --git a/web/app/components/datasets/preview/index.tsx b/web/app/components/datasets/preview/index.tsx new file mode 100644 index 0000000000..e69de29bb2 diff --git a/web/app/dev-preview/page.tsx b/web/app/dev-preview/page.tsx index 72434cafd9..92263d99a0 100644 --- a/web/app/dev-preview/page.tsx +++ b/web/app/dev-preview/page.tsx @@ -1,35 +1,76 @@ 'use client' +import { useState } from 'react' import { FormattedText } from '../components/datasets/formatted-text/formatted' import { PreviewSlice } from '../components/datasets/formatted-text/flavours/preview-slice' -import { EditSlice } from '../components/datasets/formatted-text/flavours/edit-slice' +import { PreviewContainer } from '../components/datasets/preview/container' +import { PreviewHeader } from '../components/datasets/preview/header' +import FileIcon from '../components/base/file-icon' +import { ChevronDown } from '../components/base/icons/src/vender/solid/arrows' +import Badge from '../components/base/badge' +import { DividerWithLabel } from '../components/base/divider/with-label' +import Button from '../components/base/button' +import { ChunkContainer, QAPreview } from '../components/datasets/chunk' +import classNames from '@/utils/classnames' export default function Page() { + const [parentChild, setParentChild] = useState(false) + const [vertical, setVertical] = useState(false) + const [qa, setQa] = useState(false) return
- - - - - - - - -
- - - - - +
+ + +
+ +
+ +

EOS R3 Tech Sheet.pdf

+ + +
+ + }> +
{parentChild + ? Array.from({ length: 4 }, (_, i) => { + return + + {Array.from({ length: 4 }, (_, i) => { + return + })} + + + }) + : Array.from({ length: 2 }, (_, i) => { + return + { + qa + ? + : 'In December of 2009, I was preparing to teach SI502 - Networked Programming at the University of Michigan for the fifth semester in a row and decided it was time to write a Python textbook that focused on exploring data instead of understanding algorithms and abstractions. My goal in SI502 is to teach people life-long data handling skills using Python. Few of my students were planning to be professional computer programmers. Instead, they planned be librarians, managers, lawyers, biologists, economists, etc. who happened to want to skillfully use technology in their chosen field.' + } + + }) + }
+ +
} diff --git a/web/models/datasets.ts b/web/models/datasets.ts index fc0e91f06d..ed4ab2fb7d 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -330,6 +330,7 @@ export type NotionPage = { } export type ProcessRule = { + processRule: { pre_processing_rules: PreProcessingRule[]; segmentation: { separator: string; max_tokens: number; chunk_overlap: number } } mode: string rules: Rules } diff --git a/web/service/use-datasets.ts b/web/service/use-datasets.ts new file mode 100644 index 0000000000..a00c34ec12 --- /dev/null +++ b/web/service/use-datasets.ts @@ -0,0 +1,223 @@ +import groupBy from 'lodash-es/groupBy' +import type { MutationOptions } from '@tanstack/react-query' +import { useMutation } from '@tanstack/react-query' +import { createDocument, createFirstDocument, fetchDefaultProcessRule, fetchFileIndexingEstimate } from './datasets' +import { type IndexingType } from '@/app/components/datasets/create/step-two' +import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DataSourceType, DocForm, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule, ProcessRuleResponse, createDocumentResponse } from '@/models/datasets' +import type { DataSourceProvider, NotionPage } from '@/models/common' + +export const getNotionInfo = ( + notionPages: NotionPage[], +) => { + const workspacesMap = groupBy(notionPages, 'workspace_id') + const workspaces = Object.keys(workspacesMap).map((workspaceId) => { + return { + workspaceId, + pages: workspacesMap[workspaceId], + } + }) + return workspaces.map((workspace) => { + return { + workspace_id: workspace.workspaceId, + pages: workspace.pages.map((page) => { + const { page_id, page_name, page_icon, type } = page + return { + page_id, + page_name, + page_icon, + type, + } + }), + } + }) as NotionInfo[] +} + +export const getWebsiteInfo = ( + opts: { + websiteCrawlProvider: DataSourceProvider + websiteCrawlJobId: string + websitePages: CrawlResultItem[] + crawlOptions?: CrawlOptions + }, +) => { + const { websiteCrawlProvider, websiteCrawlJobId, websitePages, crawlOptions } = opts + return { + provider: websiteCrawlProvider, + job_id: websiteCrawlJobId, + urls: websitePages.map(page => page.source_url), + only_main_content: crawlOptions?.only_main_content, + } +} + +type GetFileIndexingEstimateParamsOptionBase = { + docForm: DocForm + docLanguage: string + indexingTechnique: IndexingType + processRule: ProcessRule + dataset_id: string +} + +type GetFileIndexingEstimateParamsOptionFile = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.FILE + files: CustomFile[] +} + +const getFileIndexingEstimateParamsForFile = ({ + docForm, + docLanguage, + dataSourceType, + files, + indexingTechnique, + processRule, + dataset_id, +}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => { + return { + info_list: { + data_source_type: dataSourceType, + file_info_list: { + file_ids: files.map(file => file.id) as string[], + }, + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } +} + +export const useFetchFileIndexingEstimateForFile = ( + options: GetFileIndexingEstimateParamsOptionFile, + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options)) + }, + ...mutationOptions, + }) +} + +type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.NOTION + notionPages: NotionPage[] +} + +const getFileIndexingEstimateParamsForNotion = ({ + docForm, + docLanguage, + dataSourceType, + notionPages, + indexingTechnique, + processRule, + dataset_id, +}: GetFileIndexingEstimateParamsOptionNotion): IndexingEstimateParams => { + return { + info_list: { + data_source_type: dataSourceType, + notion_info_list: getNotionInfo(notionPages), + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } +} + +export const useFetchFileIndexingEstimateForNotion = ( + options: GetFileIndexingEstimateParamsOptionNotion, + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options)) + }, + ...mutationOptions, + }) +} + +type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.WEB + websitePages: CrawlResultItem[] + crawlOptions?: CrawlOptions + websiteCrawlProvider: DataSourceProvider + websiteCrawlJobId: string +} + +const getFileIndexingEstimateParamsForWeb = ({ + docForm, + docLanguage, + dataSourceType, + websitePages, + crawlOptions, + websiteCrawlProvider, + websiteCrawlJobId, + indexingTechnique, + processRule, + dataset_id, +}: GetFileIndexingEstimateParamsOptionWeb): IndexingEstimateParams => { + return { + info_list: { + data_source_type: dataSourceType, + website_info_list: getWebsiteInfo({ + websiteCrawlProvider, + websiteCrawlJobId, + websitePages, + crawlOptions, + }), + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } +} + +export const useFetchFileIndexingEstimateForWeb = ( + options: GetFileIndexingEstimateParamsOptionWeb, + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForWeb(options)) + }, + ...mutationOptions, + }) +} + +export const useCreateFirstDocument = ( + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async (createDocumentReq: CreateDocumentReq, + ) => { + return createFirstDocument({ body: createDocumentReq }) + }, + ...mutationOptions, + }) +} + +export const useCreateDocument = ( + datasetId: string, + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async (req: CreateDocumentReq) => { + return createDocument({ datasetId, body: req }) + }, + ...mutationOptions, + }) +} + +export const useFetchDefaultProcessRule = ( + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async (url: string) => { + return fetchDefaultProcessRule({ url }) + }, + ...mutationOptions, + }) +}