From f71b0eccb2a14026a0e81a335963c538a1b2a3d1 Mon Sep 17 00:00:00 2001 From: twwu Date: Mon, 28 Apr 2025 13:33:16 +0800 Subject: [PATCH] Refactor: dataset creation components and implement Firecrawl functionality --- .../base/notion-page-selector/base.tsx | 8 +- .../create/website/base/header.tsx} | 28 ++- .../create/website/firecrawl/header.tsx | 43 ---- .../create/website/firecrawl/index.tsx | 18 +- .../jina-reader/base/error-message.tsx | 30 --- .../create/website/jina-reader/base/field.tsx | 54 ----- .../create/website/jina-reader/base/input.tsx | 58 ----- .../website/jina-reader/base/options-wrap.tsx | 55 ----- .../website/jina-reader/base/url-input.tsx | 48 ----- .../create/website/jina-reader/header.tsx | 43 ---- .../create/website/jina-reader/index.tsx | 10 +- .../create/website/watercrawl/header.tsx | 43 ---- .../create/website/watercrawl/index.tsx | 10 +- .../input-field/editor/form/index.tsx | 7 +- .../components/panel/test-run/consts.ts | 11 + .../website}/base/checkbox-with-label.tsx | 11 +- .../website/base/crawled-result-item.tsx | 45 ++++ .../website/base/crawled-result.tsx | 77 +++++++ .../data-source/website/base/crawling.tsx | 89 ++++++++ .../website/base/error-message.tsx | 34 +++ .../data-source/website/firecrawl/hooks.ts | 89 ++++++++ .../data-source/website/firecrawl/index.tsx | 201 ++++++++++++++++++ .../data-source/website/firecrawl/options.tsx | 118 ++++++++++ .../components/panel/test-run/index.tsx | 104 +++++---- web/i18n/en-US/dataset-creation.ts | 1 + web/i18n/zh-Hans/dataset-creation.ts | 1 + 26 files changed, 788 insertions(+), 448 deletions(-) rename web/app/components/{base/notion-page-selector/header/index.tsx => datasets/create/website/base/header.tsx} (68%) delete mode 100644 web/app/components/datasets/create/website/firecrawl/header.tsx delete mode 100644 web/app/components/datasets/create/website/jina-reader/base/error-message.tsx delete mode 100644 web/app/components/datasets/create/website/jina-reader/base/field.tsx delete mode 100644 web/app/components/datasets/create/website/jina-reader/base/input.tsx delete mode 100644 web/app/components/datasets/create/website/jina-reader/base/options-wrap.tsx delete mode 100644 web/app/components/datasets/create/website/jina-reader/base/url-input.tsx delete mode 100644 web/app/components/datasets/create/website/jina-reader/header.tsx delete mode 100644 web/app/components/datasets/create/website/watercrawl/header.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/consts.ts rename web/app/components/{datasets/create/website/jina-reader => rag-pipeline/components/panel/test-run/data-source/website}/base/checkbox-with-label.tsx (74%) create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawled-result-item.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawled-result.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawling.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/error-message.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/firecrawl/hooks.ts create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/firecrawl/index.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/firecrawl/options.tsx diff --git a/web/app/components/base/notion-page-selector/base.tsx b/web/app/components/base/notion-page-selector/base.tsx index cbeba00ffd..203eee77e6 100644 --- a/web/app/components/base/notion-page-selector/base.tsx +++ b/web/app/components/base/notion-page-selector/base.tsx @@ -6,7 +6,7 @@ import type { DataSourceNotionPageMap, DataSourceNotionWorkspace, NotionPage } f import { useModalContextSelector } from '@/context/modal-context' import NotionConnector from '../notion-connector' import { usePreImportNotionPages } from '@/service/knowledge/use-import' -import Header from './header' +import Header from '../../datasets/create/website/base/header' type NotionPageSelectorProps = { value?: string[] @@ -99,7 +99,11 @@ const NotionPageSelector = ({
diff --git a/web/app/components/base/notion-page-selector/header/index.tsx b/web/app/components/datasets/create/website/base/header.tsx similarity index 68% rename from web/app/components/base/notion-page-selector/header/index.tsx rename to web/app/components/datasets/create/website/base/header.tsx index 5aec106561..dc6191d78f 100644 --- a/web/app/components/base/notion-page-selector/header/index.tsx +++ b/web/app/components/datasets/create/website/base/header.tsx @@ -1,17 +1,25 @@ import React from 'react' -import Divider from '../../divider' -import Button from '../../button' +import Divider from '@/app/components/base/divider' +import Button from '@/app/components/base/button' import cn from '@/utils/classnames' import { RiBookOpenLine, RiEqualizer2Line } from '@remixicon/react' type HeaderProps = { isInPipeline?: boolean - handleConfigureNotion: () => void + onClickConfiguration: () => void + title: string + buttonText: string + docTitle: string + docLink: string } const Header = ({ isInPipeline = false, - handleConfigureNotion, + onClickConfiguration, + title, + buttonText, + docTitle, + docLink, }: HeaderProps) => { return (
@@ -20,7 +28,7 @@ const Header = ({ 'text-text-secondary', isInPipeline ? 'system-sm-semibold' : 'system-md-semibold', )}> - Choose notion pages + {title}
- Notion docs + {docTitle}
) diff --git a/web/app/components/datasets/create/website/firecrawl/header.tsx b/web/app/components/datasets/create/website/firecrawl/header.tsx deleted file mode 100644 index d23c57f6b4..0000000000 --- a/web/app/components/datasets/create/website/firecrawl/header.tsx +++ /dev/null @@ -1,43 +0,0 @@ -'use client' -import type { FC } from 'react' -import React from 'react' -import { useTranslation } from 'react-i18next' -import { RiBookOpenLine, RiEqualizer2Line } from '@remixicon/react' -import Button from '@/app/components/base/button' - -const I18N_PREFIX = 'datasetCreation.stepOne.website' - -type Props = { - onSetting: () => void -} - -const Header: FC = ({ - onSetting, -}) => { - const { t } = useTranslation() - - return ( -
-
-
{t(`${I18N_PREFIX}.firecrawlTitle`)}
-
- -
- - - {t(`${I18N_PREFIX}.firecrawlDoc`)} - -
- ) -} -export default React.memo(Header) diff --git a/web/app/components/datasets/create/website/firecrawl/index.tsx b/web/app/components/datasets/create/website/firecrawl/index.tsx index 77f9666a48..d542769e2d 100644 --- a/web/app/components/datasets/create/website/firecrawl/index.tsx +++ b/web/app/components/datasets/create/website/firecrawl/index.tsx @@ -7,13 +7,13 @@ import OptionsWrap from '../base/options-wrap' import CrawledResult from '../base/crawled-result' import Crawling from '../base/crawling' import ErrorMessage from '../base/error-message' -import Header from './header' import Options from './options' -import { useModalContext } from '@/context/modal-context' +import { useModalContextSelector } from '@/context/modal-context' import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' import Toast from '@/app/components/base/toast' import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets' import { sleep } from '@/utils' +import Header from '../base/header' const ERROR_I18N_PREFIX = 'common.errorMsg' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -48,7 +48,7 @@ const FireCrawl: FC = ({ if (step !== Step.init) setControlFoldOptions(Date.now()) }, [step]) - const { setShowAccountSettingModal } = useModalContext() + const setShowAccountSettingModal = useModalContextSelector(s => s.setShowAccountSettingModal) const handleSetting = useCallback(() => { setShowAccountSettingModal({ payload: 'data-source', @@ -131,7 +131,7 @@ const FireCrawl: FC = ({ }, } } - }, [crawlOptions.limit]) + }, [crawlOptions.limit, onCheckedCrawlResultChange]) const handleRun = useCallback(async (url: string) => { const { isValid, errorMsg } = checkValid(url) @@ -173,11 +173,17 @@ const FireCrawl: FC = ({ finally { setStep(Step.finished) } - }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished]) + }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished, onCheckedCrawlResultChange]) return (
-
+
= ({ - className, - title, - errorMsg, -}) => { - return ( -
-
- -
{title}
-
- {errorMsg && ( -
{errorMsg}
- )} -
- ) -} -export default React.memo(ErrorMessage) diff --git a/web/app/components/datasets/create/website/jina-reader/base/field.tsx b/web/app/components/datasets/create/website/jina-reader/base/field.tsx deleted file mode 100644 index 13477ff5ff..0000000000 --- a/web/app/components/datasets/create/website/jina-reader/base/field.tsx +++ /dev/null @@ -1,54 +0,0 @@ -'use client' -import type { FC } from 'react' -import React from 'react' -import Input from './input' -import cn from '@/utils/classnames' -import Tooltip from '@/app/components/base/tooltip' - -type Props = { - className?: string - label: string - labelClassName?: string - value: string | number - onChange: (value: string | number) => void - isRequired?: boolean - placeholder?: string - isNumber?: boolean - tooltip?: string -} - -const Field: FC = ({ - className, - label, - labelClassName, - value, - onChange, - isRequired = false, - placeholder = '', - isNumber = false, - tooltip, -}) => { - return ( -
-
-
{label}
- {isRequired && *} - {tooltip && ( - {tooltip}
- } - triggerClassName='ml-0.5 w-4 h-4' - /> - )} -
- -
- ) -} -export default React.memo(Field) diff --git a/web/app/components/datasets/create/website/jina-reader/base/input.tsx b/web/app/components/datasets/create/website/jina-reader/base/input.tsx deleted file mode 100644 index bc7a15a23e..0000000000 --- a/web/app/components/datasets/create/website/jina-reader/base/input.tsx +++ /dev/null @@ -1,58 +0,0 @@ -'use client' -import type { FC } from 'react' -import React, { useCallback } from 'react' - -type Props = { - value: string | number - onChange: (value: string | number) => void - placeholder?: string - isNumber?: boolean -} - -const MIN_VALUE = 0 - -const Input: FC = ({ - value, - onChange, - placeholder = '', - isNumber = false, -}) => { - const handleChange = useCallback((e: React.ChangeEvent) => { - const value = e.target.value - if (isNumber) { - let numberValue = Number.parseInt(value, 10) // integer only - if (isNaN(numberValue)) { - onChange('') - return - } - if (numberValue < MIN_VALUE) - numberValue = MIN_VALUE - - onChange(numberValue) - return - } - onChange(value) - }, [isNumber, onChange]) - - const otherOption = (() => { - if (isNumber) { - return { - min: MIN_VALUE, - } - } - return { - - } - })() - return ( - - ) -} -export default React.memo(Input) diff --git a/web/app/components/datasets/create/website/jina-reader/base/options-wrap.tsx b/web/app/components/datasets/create/website/jina-reader/base/options-wrap.tsx deleted file mode 100644 index a5fa6871cb..0000000000 --- a/web/app/components/datasets/create/website/jina-reader/base/options-wrap.tsx +++ /dev/null @@ -1,55 +0,0 @@ -'use client' -import { useBoolean } from 'ahooks' -import type { FC } from 'react' -import React, { useEffect } from 'react' -import { useTranslation } from 'react-i18next' -import cn from '@/utils/classnames' -import { Settings04 } from '@/app/components/base/icons/src/vender/line/general' -import { ChevronRight } from '@/app/components/base/icons/src/vender/line/arrows' -const I18N_PREFIX = 'datasetCreation.stepOne.website' - -type Props = { - className?: string - children: React.ReactNode - controlFoldOptions?: number -} - -const OptionsWrap: FC = ({ - className = '', - children, - controlFoldOptions, -}) => { - const { t } = useTranslation() - - const [fold, { - toggle: foldToggle, - setTrue: foldHide, - }] = useBoolean(false) - - useEffect(() => { - if (controlFoldOptions) - foldHide() - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [controlFoldOptions]) - return ( -
-
-
- -
{t(`${I18N_PREFIX}.options`)}
-
- -
- {!fold && ( -
- {children} -
- )} - -
- ) -} -export default React.memo(OptionsWrap) diff --git a/web/app/components/datasets/create/website/jina-reader/base/url-input.tsx b/web/app/components/datasets/create/website/jina-reader/base/url-input.tsx deleted file mode 100644 index e6b0475874..0000000000 --- a/web/app/components/datasets/create/website/jina-reader/base/url-input.tsx +++ /dev/null @@ -1,48 +0,0 @@ -'use client' -import type { FC } from 'react' -import React, { useCallback, useState } from 'react' -import { useTranslation } from 'react-i18next' -import Input from './input' -import Button from '@/app/components/base/button' - -const I18N_PREFIX = 'datasetCreation.stepOne.website' - -type Props = { - isRunning: boolean - onRun: (url: string) => void -} - -const UrlInput: FC = ({ - isRunning, - onRun, -}) => { - const { t } = useTranslation() - const [url, setUrl] = useState('') - const handleUrlChange = useCallback((url: string | number) => { - setUrl(url as string) - }, []) - const handleOnRun = useCallback(() => { - if (isRunning) - return - onRun(url) - }, [isRunning, onRun, url]) - - return ( -
- - -
- ) -} -export default React.memo(UrlInput) diff --git a/web/app/components/datasets/create/website/jina-reader/header.tsx b/web/app/components/datasets/create/website/jina-reader/header.tsx deleted file mode 100644 index 13b8a9e6fe..0000000000 --- a/web/app/components/datasets/create/website/jina-reader/header.tsx +++ /dev/null @@ -1,43 +0,0 @@ -'use client' -import type { FC } from 'react' -import React from 'react' -import { useTranslation } from 'react-i18next' -import { RiBookOpenLine, RiEqualizer2Line } from '@remixicon/react' -import Button from '@/app/components/base/button' - -const I18N_PREFIX = 'datasetCreation.stepOne.website' - -type Props = { - onSetting: () => void -} - -const Header: FC = ({ - onSetting, -}) => { - const { t } = useTranslation() - - return ( -
-
-
{t(`${I18N_PREFIX}.jinaReaderTitle`)}
-
- -
- - - {t(`${I18N_PREFIX}.jinaReaderDoc`)} - -
- ) -} -export default React.memo(Header) diff --git a/web/app/components/datasets/create/website/jina-reader/index.tsx b/web/app/components/datasets/create/website/jina-reader/index.tsx index 2ab0444586..6e513dd797 100644 --- a/web/app/components/datasets/create/website/jina-reader/index.tsx +++ b/web/app/components/datasets/create/website/jina-reader/index.tsx @@ -7,13 +7,13 @@ import OptionsWrap from '../base/options-wrap' import CrawledResult from '../base/crawled-result' import Crawling from '../base/crawling' import ErrorMessage from '../base/error-message' -import Header from './header' import Options from './options' import { useModalContext } from '@/context/modal-context' import Toast from '@/app/components/base/toast' import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datasets' import { sleep } from '@/utils' import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' +import Header from '../base/header' const ERROR_I18N_PREFIX = 'common.errorMsg' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -190,7 +190,13 @@ const JinaReader: FC = ({ return (
-
+
void -} - -const Header: FC = ({ - onSetting, -}) => { - const { t } = useTranslation() - - return ( -
-
-
{t(`${I18N_PREFIX}.watercrawlTitle`)}
-
- -
- - - {t(`${I18N_PREFIX}.watercrawlDoc`)} - -
- ) -} -export default React.memo(Header) diff --git a/web/app/components/datasets/create/website/watercrawl/index.tsx b/web/app/components/datasets/create/website/watercrawl/index.tsx index bd4faa1383..640b1c2063 100644 --- a/web/app/components/datasets/create/website/watercrawl/index.tsx +++ b/web/app/components/datasets/create/website/watercrawl/index.tsx @@ -7,13 +7,13 @@ import OptionsWrap from '../base/options-wrap' import CrawledResult from '../base/crawled-result' import Crawling from '../base/crawling' import ErrorMessage from '../base/error-message' -import Header from './header' import Options from './options' import { useModalContext } from '@/context/modal-context' import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' import Toast from '@/app/components/base/toast' import { checkWatercrawlTaskStatus, createWatercrawlTask } from '@/service/datasets' import { sleep } from '@/utils' +import Header from '../base/header' const ERROR_I18N_PREFIX = 'common.errorMsg' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -177,7 +177,13 @@ const WaterCrawl: FC = ({ return (
-
+
void @@ -14,18 +13,18 @@ type Props = { tooltip?: string } -const CheckboxWithLabel: FC = ({ +const CheckboxWithLabel = ({ className = '', isChecked, onChange, label, labelClassName, tooltip, -}) => { +}: CheckboxWithLabelProps) => { return ( -
) } diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts index 5a2ad90620..bb23020e6e 100644 --- a/web/i18n/en-US/dataset-creation.ts +++ b/web/i18n/en-US/dataset-creation.ts @@ -78,6 +78,7 @@ const translation = { configureWatercrawl: 'Configure Watercrawl', configureJinaReader: 'Configure Jina Reader', run: 'Run', + running: 'Running', firecrawlTitle: 'Extract web content with 🔥Firecrawl', firecrawlDoc: 'Firecrawl docs', firecrawlDocLink: 'https://docs.dify.ai/guides/knowledge-base/sync-from-website', diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts index aec029be2e..815b121ee4 100644 --- a/web/i18n/zh-Hans/dataset-creation.ts +++ b/web/i18n/zh-Hans/dataset-creation.ts @@ -77,6 +77,7 @@ const translation = { configureFirecrawl: '配置 Firecrawl', configureJinaReader: '配置 Jina Reader', run: '运行', + running: '运行中', firecrawlTitle: '使用 🔥Firecrawl 提取网页内容', firecrawlDoc: 'Firecrawl 文档', firecrawlDocLink: 'https://docs.dify.ai/v/zh-hans/guides/knowledge-base/sync-from-website',