From 49bc602fb237183de94cae25761033bd768e0109 Mon Sep 17 00:00:00 2001 From: eux Date: Tue, 24 Dec 2024 21:58:05 +0800 Subject: [PATCH 01/39] fix: --name option for the create-tenant command does not take effect (#11993) --- api/commands.py | 9 +++++++-- api/services/account_service.py | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/api/commands.py b/api/commands.py index ad7ad972f3..59dfce68e0 100644 --- a/api/commands.py +++ b/api/commands.py @@ -561,8 +561,13 @@ def create_tenant(email: str, language: Optional[str] = None, name: Optional[str new_password = secrets.token_urlsafe(16) # register account - account = RegisterService.register(email=email, name=account_name, password=new_password, language=language) - + account = RegisterService.register( + email=email, + name=account_name, + password=new_password, + language=language, + create_workspace_required=False, + ) TenantService.create_owner_tenant_if_not_exist(account, name) click.echo( diff --git a/api/services/account_service.py b/api/services/account_service.py index 91075ec46b..2d37db391c 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -797,6 +797,7 @@ class RegisterService: language: Optional[str] = None, status: Optional[AccountStatus] = None, is_setup: Optional[bool] = False, + create_workspace_required: Optional[bool] = True, ) -> Account: db.session.begin_nested() """Register account""" @@ -814,7 +815,7 @@ class RegisterService: if open_id is not None and provider is not None: AccountService.link_account_integrate(provider, open_id, account) - if FeatureService.get_system_features().is_allow_create_workspace: + if FeatureService.get_system_features().is_allow_create_workspace and create_workspace_required: tenant = TenantService.create_tenant(f"{account.name}'s Workspace") TenantService.create_tenant_member(tenant, account, role="owner") account.current_tenant = tenant From 0ea6a926c5e7d213b06c3825b29cbd4563f47ced Mon Sep 17 00:00:00 2001 From: yihong Date: Tue, 24 Dec 2024 23:14:32 +0800 Subject: [PATCH 02/39] fix: tool can not run (#12054) Signed-off-by: yihong0618 --- api/models/tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/models/tools.py b/api/models/tools.py index 4151a2e9f6..13a112ee83 100644 --- a/api/models/tools.py +++ b/api/models/tools.py @@ -1,5 +1,5 @@ import json -from typing import Optional +from typing import Any, Optional import sqlalchemy as sa from sqlalchemy import ForeignKey, func @@ -282,8 +282,8 @@ class ToolConversationVariables(db.Model): # type: ignore[name-defined] updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) @property - def variables(self) -> dict: - return dict(json.loads(self.variables_str)) + def variables(self) -> Any: + return json.loads(self.variables_str) class ToolFile(db.Model): # type: ignore[name-defined] From 7a24c957bdb3a1477e5d8e2128af25fcb91d6627 Mon Sep 17 00:00:00 2001 From: yihong Date: Tue, 24 Dec 2024 23:14:51 +0800 Subject: [PATCH 03/39] fix: i18n error (#12052) Signed-off-by: yihong0618 --- api/core/tools/entities/tool_entities.py | 8 +++++--- api/services/tools/tools_transform_service.py | 5 +---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py index 260e4e457f..c87a90c03a 100644 --- a/api/core/tools/entities/tool_entities.py +++ b/api/core/tools/entities/tool_entities.py @@ -243,9 +243,11 @@ class ToolParameter(BaseModel): :param options: the options of the parameter """ # convert options to ToolParameterOption + # FIXME fix the type error if options: - options_tool_parametor = [ - ToolParameterOption(value=option, label=I18nObject(en_US=option, zh_Hans=option)) for option in options + options = [ + ToolParameterOption(value=option, label=I18nObject(en_US=option, zh_Hans=option)) # type: ignore + for option in options # type: ignore ] return cls( name=name, @@ -256,7 +258,7 @@ class ToolParameter(BaseModel): form=cls.ToolParameterForm.LLM, llm_description=llm_description, required=required, - options=options_tool_parametor, + options=options, # type: ignore ) diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py index b501554bcd..6e3a45be0d 100644 --- a/api/services/tools/tools_transform_service.py +++ b/api/services/tools/tools_transform_service.py @@ -275,10 +275,7 @@ class ToolTransformService: author=tool.identity.author, name=tool.identity.name, label=tool.identity.label, - description=I18nObject( - en_US=tool.description.human if tool.description else "", - zh_Hans=tool.description.human if tool.description else "", - ), + description=tool.description.human if tool.description else "", # type: ignore parameters=current_parameters, labels=labels, ) From 7da4fb68da065385b78db63ee51183615f74b351 Mon Sep 17 00:00:00 2001 From: yihong Date: Wed, 25 Dec 2024 08:42:52 +0800 Subject: [PATCH 04/39] fix: can not find model bug (#12051) Signed-off-by: yihong0618 --- api/core/entities/provider_configuration.py | 2 +- api/services/entities/model_provider_entities.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index 2e27b362d3..bff5a0ec9c 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -872,7 +872,7 @@ class ProviderConfiguration(BaseModel): # if llm name not in restricted llm list, remove it restrict_model_names = [rm.model for rm in restrict_models] for model in provider_models: - if model.model_type == ModelType.LLM and m.model not in restrict_model_names: + if model.model_type == ModelType.LLM and model.model not in restrict_model_names: model.status = ModelStatus.NO_PERMISSION elif not quota_configuration.is_valid: model.status = ModelStatus.QUOTA_EXCEEDED diff --git a/api/services/entities/model_provider_entities.py b/api/services/entities/model_provider_entities.py index 334d009ee5..f1417c6cb9 100644 --- a/api/services/entities/model_provider_entities.py +++ b/api/services/entities/model_provider_entities.py @@ -7,7 +7,6 @@ from configs import dify_config from core.entities.model_entities import ( ModelWithProviderEntity, ProviderModelWithStatusEntity, - SimpleModelProviderEntity, ) from core.entities.provider_entities import QuotaConfiguration from core.model_runtime.entities.common_entities import I18nObject @@ -152,7 +151,8 @@ class ModelWithProviderEntityResponse(ModelWithProviderEntity): Model with provider entity. """ - provider: SimpleModelProviderEntity + # FIXME type error ignore here + provider: SimpleProviderEntityResponse # type: ignore def __init__(self, model: ModelWithProviderEntity) -> None: super().__init__(**model.model_dump()) From 1d3f218662527db991e2b520e961ec3fa07603df Mon Sep 17 00:00:00 2001 From: yihong Date: Wed, 25 Dec 2024 10:57:52 +0800 Subject: [PATCH 05/39] fix: like failed close #12057 (#12058) Signed-off-by: yihong0618 --- api/controllers/console/explore/message.py | 2 +- api/controllers/service_api/app/message.py | 2 +- api/controllers/web/message.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py index c3488de299..690297048e 100644 --- a/api/controllers/console/explore/message.py +++ b/api/controllers/console/explore/message.py @@ -69,7 +69,7 @@ class MessageFeedbackApi(InstalledAppResource): args = parser.parse_args() try: - MessageService.create_feedback(app_model, message_id, current_user, args["rating"], args["content"]) + MessageService.create_feedback(app_model, message_id, current_user, args.get("rating"), args.get("content")) except services.errors.message.MessageNotExistsError: raise NotFound("Message Not Exists.") diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index 522c7509b9..bed89a99a5 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -108,7 +108,7 @@ class MessageFeedbackApi(Resource): args = parser.parse_args() try: - MessageService.create_feedback(app_model, message_id, end_user, args["rating"], args["content"]) + MessageService.create_feedback(app_model, message_id, end_user, args.get("rating"), args.get("content")) except services.errors.message.MessageNotExistsError: raise NotFound("Message Not Exists.") diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py index 0f47e64370..b636e6be62 100644 --- a/api/controllers/web/message.py +++ b/api/controllers/web/message.py @@ -108,7 +108,7 @@ class MessageFeedbackApi(WebApiResource): args = parser.parse_args() try: - MessageService.create_feedback(app_model, message_id, end_user, args["rating"], args["content"]) + MessageService.create_feedback(app_model, message_id, end_user, args.get("rating"), args.get("content")) except services.errors.message.MessageNotExistsError: raise NotFound("Message Not Exists.") From 3ea54e9d2574446664a137280875ea3ec04ba7a7 Mon Sep 17 00:00:00 2001 From: Kalo Chin <91766386+fdb02983rhy@users.noreply.github.com> Date: Wed, 25 Dec 2024 12:00:45 +0900 Subject: [PATCH 06/39] =?UTF-8?q?fix:=20update=20S3=20and=20Azure=20config?= =?UTF-8?q?uration=20typos=20in=20.env.example=20and=20corr=E2=80=A6=20(#1?= =?UTF-8?q?2055)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/.env.example | 6 +++--- api/.ruff.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/.env.example b/api/.env.example index 071a200e68..cc3e868717 100644 --- a/api/.env.example +++ b/api/.env.example @@ -65,7 +65,7 @@ OPENDAL_FS_ROOT=storage # S3 Storage configuration S3_USE_AWS_MANAGED_IAM=false -S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com +S3_ENDPOINT=https://your-bucket-name.storage.s3.cloudflare.com S3_BUCKET_NAME=your-bucket-name S3_ACCESS_KEY=your-access-key S3_SECRET_KEY=your-secret-key @@ -74,7 +74,7 @@ S3_REGION=your-region # Azure Blob Storage configuration AZURE_BLOB_ACCOUNT_NAME=your-account-name AZURE_BLOB_ACCOUNT_KEY=your-account-key -AZURE_BLOB_CONTAINER_NAME=yout-container-name +AZURE_BLOB_CONTAINER_NAME=your-container-name AZURE_BLOB_ACCOUNT_URL=https://.blob.core.windows.net # Aliyun oss Storage configuration @@ -88,7 +88,7 @@ ALIYUN_OSS_REGION=your-region ALIYUN_OSS_PATH=your-path # Google Storage configuration -GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name +GOOGLE_STORAGE_BUCKET_NAME=your-bucket-name GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string # Tencent COS Storage configuration diff --git a/api/.ruff.toml b/api/.ruff.toml index 26a1b977a9..f30275a943 100644 --- a/api/.ruff.toml +++ b/api/.ruff.toml @@ -67,7 +67,7 @@ ignore = [ "SIM105", # suppressible-exception "SIM107", # return-in-try-except-finally "SIM108", # if-else-block-instead-of-if-exp - "SIM113", # eumerate-for-loop + "SIM113", # enumerate-for-loop "SIM117", # multiple-with-statements "SIM210", # if-expr-with-true-false ] From c98d91e44d75cf03f395eee521e5af9a36a45ad8 Mon Sep 17 00:00:00 2001 From: jiangbo721 <365065261@qq.com> Date: Wed, 25 Dec 2024 13:29:43 +0800 Subject: [PATCH 07/39] fix: o1 model error, use max_completion_tokens instead of max_tokens. (#12037) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 刘江波 --- .../model_providers/azure_openai/llm/llm.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/api/core/model_runtime/model_providers/azure_openai/llm/llm.py b/api/core/model_runtime/model_providers/azure_openai/llm/llm.py index c5d7a83a4e..03818741f6 100644 --- a/api/core/model_runtime/model_providers/azure_openai/llm/llm.py +++ b/api/core/model_runtime/model_providers/azure_openai/llm/llm.py @@ -113,7 +113,7 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel): try: client = AzureOpenAI(**self._to_credential_kwargs(credentials)) - if "o1" in model: + if model.startswith("o1"): client.chat.completions.create( messages=[{"role": "user", "content": "ping"}], model=model, @@ -311,7 +311,10 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel): prompt_messages = self._clear_illegal_prompt_messages(model, prompt_messages) block_as_stream = False - if "o1" in model: + if model.startswith("o1"): + if "max_tokens" in model_parameters: + model_parameters["max_completion_tokens"] = model_parameters["max_tokens"] + del model_parameters["max_tokens"] if stream: block_as_stream = True stream = False @@ -404,7 +407,7 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel): ] ) - if "o1" in model: + if model.startswith("o1"): system_message_count = len([m for m in prompt_messages if isinstance(m, SystemPromptMessage)]) if system_message_count > 0: new_prompt_messages = [] From b281a80150139d419e43df3ee08286aa4c4f6513 Mon Sep 17 00:00:00 2001 From: marvin-season <64943287+marvin-season@users.noreply.github.com> Date: Wed, 25 Dec 2024 13:30:51 +0800 Subject: [PATCH 08/39] fix: zoom in/out click (#12056) Co-authored-by: marvin --- .../components/workflow/operator/zoom-in-out.tsx | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/web/app/components/workflow/operator/zoom-in-out.tsx b/web/app/components/workflow/operator/zoom-in-out.tsx index 6c4bed3751..90b5b46256 100644 --- a/web/app/components/workflow/operator/zoom-in-out.tsx +++ b/web/app/components/workflow/operator/zoom-in-out.tsx @@ -129,7 +129,7 @@ const ZoomInOut: FC = () => { crossAxis: -2, }} > - +
{ shortcuts={['ctrl', '-']} >
{ + if (zoom <= 0.25) + return + e.stopPropagation() zoomOut() }} @@ -153,14 +156,17 @@ const ZoomInOut: FC = () => {
-
{parseFloat(`${zoom * 100}`).toFixed(0)}%
+
{parseFloat(`${zoom * 100}`).toFixed(0)}%
= 2 ? 'cursor-not-allowed' : 'cursor-pointer hover:bg-black/5'}`} onClick={(e) => { + if (zoom >= 2) + return + e.stopPropagation() zoomIn() }} From 83ea931e3cfc4467003b93949f86281052325902 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Wed, 25 Dec 2024 16:24:52 +0800 Subject: [PATCH 09/39] refactor: optimize database usage (#12071) Signed-off-by: -LAN- --- .../advanced_chat/generate_task_pipeline.py | 348 +++++++++--------- .../app/apps/message_based_app_generator.py | 1 - .../apps/workflow/generate_task_pipeline.py | 180 +++++---- .../based_generate_task_pipeline.py | 36 +- .../easy_ui_based_generate_task_pipeline.py | 145 ++++---- .../app/task_pipeline/message_cycle_manage.py | 4 +- .../task_pipeline/workflow_cycle_manage.py | 182 +++++---- api/core/ops/ops_trace_manager.py | 176 +++++---- api/core/ops/utils.py | 2 +- api/models/account.py | 3 +- api/models/model.py | 10 +- api/models/workflow.py | 48 +-- 12 files changed, 574 insertions(+), 561 deletions(-) diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index 1073a0f2e4..691d178ba2 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -5,6 +5,9 @@ from collections.abc import Generator, Mapping from threading import Thread from typing import Any, Optional, Union +from sqlalchemy import select +from sqlalchemy.orm import Session + from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom @@ -79,8 +82,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc _task_state: WorkflowTaskState _application_generate_entity: AdvancedChatAppGenerateEntity - _workflow: Workflow - _user: Union[Account, EndUser] _workflow_system_variables: dict[SystemVariableKey, Any] _wip_workflow_node_executions: dict[str, WorkflowNodeExecution] _conversation_name_generate_thread: Optional[Thread] = None @@ -96,32 +97,35 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc stream: bool, dialogue_count: int, ) -> None: - """ - Initialize AdvancedChatAppGenerateTaskPipeline. - :param application_generate_entity: application generate entity - :param workflow: workflow - :param queue_manager: queue manager - :param conversation: conversation - :param message: message - :param user: user - :param stream: stream - :param dialogue_count: dialogue count - """ - super().__init__(application_generate_entity, queue_manager, user, stream) + super().__init__( + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + stream=stream, + ) - if isinstance(self._user, EndUser): - user_id = self._user.session_id + if isinstance(user, EndUser): + self._user_id = user.session_id + self._created_by_role = CreatedByRole.END_USER + elif isinstance(user, Account): + self._user_id = user.id + self._created_by_role = CreatedByRole.ACCOUNT else: - user_id = self._user.id + raise NotImplementedError(f"User type not supported: {type(user)}") + + self._workflow_id = workflow.id + self._workflow_features_dict = workflow.features_dict + + self._conversation_id = conversation.id + self._conversation_mode = conversation.mode + + self._message_id = message.id + self._message_created_at = int(message.created_at.timestamp()) - self._workflow = workflow - self._conversation = conversation - self._message = message self._workflow_system_variables = { SystemVariableKey.QUERY: message.query, SystemVariableKey.FILES: application_generate_entity.files, SystemVariableKey.CONVERSATION_ID: conversation.id, - SystemVariableKey.USER_ID: user_id, + SystemVariableKey.USER_ID: self._user_id, SystemVariableKey.DIALOGUE_COUNT: dialogue_count, SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id, SystemVariableKey.WORKFLOW_ID: workflow.id, @@ -139,13 +143,9 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc Process generate task pipeline. :return: """ - db.session.refresh(self._workflow) - db.session.refresh(self._user) - db.session.close() - # start generate conversation name thread self._conversation_name_generate_thread = self._generate_conversation_name( - self._conversation, self._application_generate_entity.query + conversation_id=self._conversation_id, query=self._application_generate_entity.query ) generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager) @@ -171,12 +171,12 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc return ChatbotAppBlockingResponse( task_id=stream_response.task_id, data=ChatbotAppBlockingResponse.Data( - id=self._message.id, - mode=self._conversation.mode, - conversation_id=self._conversation.id, - message_id=self._message.id, + id=self._message_id, + mode=self._conversation_mode, + conversation_id=self._conversation_id, + message_id=self._message_id, answer=self._task_state.answer, - created_at=int(self._message.created_at.timestamp()), + created_at=self._message_created_at, **extras, ), ) @@ -194,9 +194,9 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc """ for stream_response in generator: yield ChatbotAppStreamResponse( - conversation_id=self._conversation.id, - message_id=self._message.id, - created_at=int(self._message.created_at.timestamp()), + conversation_id=self._conversation_id, + message_id=self._message_id, + created_at=self._message_created_at, stream_response=stream_response, ) @@ -214,7 +214,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc tts_publisher = None task_id = self._application_generate_entity.task_id tenant_id = self._application_generate_entity.app_config.tenant_id - features_dict = self._workflow.features_dict + features_dict = self._workflow_features_dict if ( features_dict.get("text_to_speech") @@ -274,26 +274,33 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc if isinstance(event, QueuePingEvent): yield self._ping_stream_response() elif isinstance(event, QueueErrorEvent): - err = self._handle_error(event, self._message) + with Session(db.engine) as session: + err = self._handle_error(event=event, session=session, message_id=self._message_id) + session.commit() yield self._error_to_stream_response(err) break elif isinstance(event, QueueWorkflowStartedEvent): # override graph runtime state graph_runtime_state = event.graph_runtime_state - # init workflow run - workflow_run = self._handle_workflow_run_start() + with Session(db.engine) as session: + # init workflow run + workflow_run = self._handle_workflow_run_start( + session=session, + workflow_id=self._workflow_id, + user_id=self._user_id, + created_by_role=self._created_by_role, + ) + message = self._get_message(session=session) + if not message: + raise ValueError(f"Message not found: {self._message_id}") + message.workflow_run_id = workflow_run.id + session.commit() - self._refetch_message() - self._message.workflow_run_id = workflow_run.id - - db.session.commit() - db.session.refresh(self._message) - db.session.close() - - yield self._workflow_start_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) + workflow_start_resp = self._workflow_start_to_stream_response( + session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + ) + yield workflow_start_resp elif isinstance( event, QueueNodeRetryEvent, @@ -304,28 +311,28 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc workflow_run=workflow_run, event=event ) - response = self._workflow_node_retry_to_stream_response( + node_retry_resp = self._workflow_node_retry_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, ) - if response: - yield response + if node_retry_resp: + yield node_retry_resp elif isinstance(event, QueueNodeStartedEvent): if not workflow_run: raise ValueError("workflow run not initialized.") workflow_node_execution = self._handle_node_execution_start(workflow_run=workflow_run, event=event) - response_start = self._workflow_node_start_to_stream_response( + node_start_resp = self._workflow_node_start_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, ) - if response_start: - yield response_start + if node_start_resp: + yield node_start_resp elif isinstance(event, QueueNodeSucceededEvent): workflow_node_execution = self._handle_workflow_node_execution_success(event) @@ -333,25 +340,24 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc if event.node_type in [NodeType.ANSWER, NodeType.END]: self._recorded_files.extend(self._fetch_files_from_node_outputs(event.outputs or {})) - response_finish = self._workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, ) - if response_finish: - yield response_finish + if node_finish_resp: + yield node_finish_resp elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent): workflow_node_execution = self._handle_workflow_node_execution_failed(event) - response_finish = self._workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, ) - - if response: - yield response + if node_finish_resp: + yield node_finish_resp elif isinstance(event, QueueParallelBranchRunStartedEvent): if not workflow_run: @@ -395,20 +401,24 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc if not graph_runtime_state: raise ValueError("workflow run not initialized.") - workflow_run = self._handle_workflow_run_success( - workflow_run=workflow_run, - start_at=graph_runtime_state.start_at, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - outputs=event.outputs, - conversation_id=self._conversation.id, - trace_manager=trace_manager, - ) + with Session(db.engine) as session: + workflow_run = self._handle_workflow_run_success( + session=session, + workflow_run=workflow_run, + start_at=graph_runtime_state.start_at, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + outputs=event.outputs, + conversation_id=self._conversation_id, + trace_manager=trace_manager, + ) - yield self._workflow_finish_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) + workflow_finish_resp = self._workflow_finish_to_stream_response( + session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + ) + session.commit() + yield workflow_finish_resp self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE) elif isinstance(event, QueueWorkflowPartialSuccessEvent): if not workflow_run: @@ -417,21 +427,25 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc if not graph_runtime_state: raise ValueError("graph runtime state not initialized.") - workflow_run = self._handle_workflow_run_partial_success( - workflow_run=workflow_run, - start_at=graph_runtime_state.start_at, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - outputs=event.outputs, - exceptions_count=event.exceptions_count, - conversation_id=None, - trace_manager=trace_manager, - ) + with Session(db.engine) as session: + workflow_run = self._handle_workflow_run_partial_success( + session=session, + workflow_run=workflow_run, + start_at=graph_runtime_state.start_at, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + outputs=event.outputs, + exceptions_count=event.exceptions_count, + conversation_id=None, + trace_manager=trace_manager, + ) - yield self._workflow_finish_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) + workflow_finish_resp = self._workflow_finish_to_stream_response( + session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + ) + session.commit() + yield workflow_finish_resp self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE) elif isinstance(event, QueueWorkflowFailedEvent): if not workflow_run: @@ -440,71 +454,73 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc if not graph_runtime_state: raise ValueError("graph runtime state not initialized.") - workflow_run = self._handle_workflow_run_failed( - workflow_run=workflow_run, - start_at=graph_runtime_state.start_at, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - status=WorkflowRunStatus.FAILED, - error=event.error, - conversation_id=self._conversation.id, - trace_manager=trace_manager, - exceptions_count=event.exceptions_count, - ) - - yield self._workflow_finish_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) - - err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_run.error}")) - yield self._error_to_stream_response(self._handle_error(err_event, self._message)) - break - elif isinstance(event, QueueStopEvent): - if workflow_run and graph_runtime_state: + with Session(db.engine) as session: workflow_run = self._handle_workflow_run_failed( + session=session, workflow_run=workflow_run, start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, - status=WorkflowRunStatus.STOPPED, - error=event.get_stop_reason(), - conversation_id=self._conversation.id, + status=WorkflowRunStatus.FAILED, + error=event.error, + conversation_id=self._conversation_id, trace_manager=trace_manager, + exceptions_count=event.exceptions_count, ) - - yield self._workflow_finish_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_finish_to_stream_response( + session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run ) + err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_run.error}")) + err = self._handle_error(event=err_event, session=session, message_id=self._message_id) + session.commit() + yield workflow_finish_resp + yield self._error_to_stream_response(err) + break + elif isinstance(event, QueueStopEvent): + if workflow_run and graph_runtime_state: + with Session(db.engine) as session: + workflow_run = self._handle_workflow_run_failed( + session=session, + workflow_run=workflow_run, + start_at=graph_runtime_state.start_at, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + status=WorkflowRunStatus.STOPPED, + error=event.get_stop_reason(), + conversation_id=self._conversation_id, + trace_manager=trace_manager, + ) - # Save message - self._save_message(graph_runtime_state=graph_runtime_state) + workflow_finish_resp = self._workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_run=workflow_run, + ) + # Save message + self._save_message(session=session, graph_runtime_state=graph_runtime_state) + session.commit() + yield workflow_finish_resp yield self._message_end_to_stream_response() break elif isinstance(event, QueueRetrieverResourcesEvent): self._handle_retriever_resources(event) - self._refetch_message() - - self._message.message_metadata = ( - json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None - ) - - db.session.commit() - db.session.refresh(self._message) - db.session.close() + with Session(db.engine) as session: + message = self._get_message(session=session) + message.message_metadata = ( + json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None + ) + session.commit() elif isinstance(event, QueueAnnotationReplyEvent): self._handle_annotation_reply(event) - self._refetch_message() - - self._message.message_metadata = ( - json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None - ) - - db.session.commit() - db.session.refresh(self._message) - db.session.close() + with Session(db.engine) as session: + message = self._get_message(session=session) + message.message_metadata = ( + json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None + ) + session.commit() elif isinstance(event, QueueTextChunkEvent): delta_text = event.text if delta_text is None: @@ -521,7 +537,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc self._task_state.answer += delta_text yield self._message_to_stream_response( - answer=delta_text, message_id=self._message.id, from_variable_selector=event.from_variable_selector + answer=delta_text, message_id=self._message_id, from_variable_selector=event.from_variable_selector ) elif isinstance(event, QueueMessageReplaceEvent): # published by moderation @@ -536,7 +552,9 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc yield self._message_replace_to_stream_response(answer=output_moderation_answer) # Save message - self._save_message(graph_runtime_state=graph_runtime_state) + with Session(db.engine) as session: + self._save_message(session=session, graph_runtime_state=graph_runtime_state) + session.commit() yield self._message_end_to_stream_response() else: @@ -549,54 +567,46 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc if self._conversation_name_generate_thread: self._conversation_name_generate_thread.join() - def _save_message(self, graph_runtime_state: Optional[GraphRuntimeState] = None) -> None: - self._refetch_message() - - self._message.answer = self._task_state.answer - self._message.provider_response_latency = time.perf_counter() - self._start_at - self._message.message_metadata = ( + def _save_message(self, *, session: Session, graph_runtime_state: Optional[GraphRuntimeState] = None) -> None: + message = self._get_message(session=session) + message.answer = self._task_state.answer + message.provider_response_latency = time.perf_counter() - self._start_at + message.message_metadata = ( json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None ) message_files = [ MessageFile( - message_id=self._message.id, + message_id=message.id, type=file["type"], transfer_method=file["transfer_method"], url=file["remote_url"], belongs_to="assistant", upload_file_id=file["related_id"], created_by_role=CreatedByRole.ACCOUNT - if self._message.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} + if message.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else CreatedByRole.END_USER, - created_by=self._message.from_account_id or self._message.from_end_user_id or "", + created_by=message.from_account_id or message.from_end_user_id or "", ) for file in self._recorded_files ] - db.session.add_all(message_files) + session.add_all(message_files) if graph_runtime_state and graph_runtime_state.llm_usage: usage = graph_runtime_state.llm_usage - self._message.message_tokens = usage.prompt_tokens - self._message.message_unit_price = usage.prompt_unit_price - self._message.message_price_unit = usage.prompt_price_unit - self._message.answer_tokens = usage.completion_tokens - self._message.answer_unit_price = usage.completion_unit_price - self._message.answer_price_unit = usage.completion_price_unit - self._message.total_price = usage.total_price - self._message.currency = usage.currency - + message.message_tokens = usage.prompt_tokens + message.message_unit_price = usage.prompt_unit_price + message.message_price_unit = usage.prompt_price_unit + message.answer_tokens = usage.completion_tokens + message.answer_unit_price = usage.completion_unit_price + message.answer_price_unit = usage.completion_price_unit + message.total_price = usage.total_price + message.currency = usage.currency self._task_state.metadata["usage"] = jsonable_encoder(usage) else: self._task_state.metadata["usage"] = jsonable_encoder(LLMUsage.empty_usage()) - - db.session.commit() - message_was_created.send( - self._message, + message, application_generate_entity=self._application_generate_entity, - conversation=self._conversation, - is_first_message=self._application_generate_entity.conversation_id is None, - extras=self._application_generate_entity.extras, ) def _message_end_to_stream_response(self) -> MessageEndStreamResponse: @@ -613,7 +623,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc return MessageEndStreamResponse( task_id=self._application_generate_entity.task_id, - id=self._message.id, + id=self._message_id, files=self._recorded_files, metadata=extras.get("metadata", {}), ) @@ -641,11 +651,9 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc return False - def _refetch_message(self) -> None: - """ - Refetch message. - :return: - """ - message = db.session.query(Message).filter(Message.id == self._message.id).first() - if message: - self._message = message + def _get_message(self, *, session: Session): + stmt = select(Message).where(Message.id == self._message_id) + message = session.scalar(stmt) + if not message: + raise ValueError(f"Message not found: {self._message_id}") + return message diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index c2e35faf89..dcd9463b8a 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -70,7 +70,6 @@ class MessageBasedAppGenerator(BaseAppGenerator): queue_manager=queue_manager, conversation=conversation, message=message, - user=user, stream=stream, ) diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index c47b38f560..574596d4f5 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -3,6 +3,8 @@ import time from collections.abc import Generator from typing import Any, Optional, Union +from sqlalchemy.orm import Session + from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk from core.app.apps.base_app_queue_manager import AppQueueManager @@ -50,6 +52,7 @@ from core.ops.ops_trace_manager import TraceQueueManager from core.workflow.enums import SystemVariableKey from extensions.ext_database import db from models.account import Account +from models.enums import CreatedByRole from models.model import EndUser from models.workflow import ( Workflow, @@ -68,8 +71,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa WorkflowAppGenerateTaskPipeline is a class that generate stream output and state management for Application. """ - _workflow: Workflow - _user: Union[Account, EndUser] _task_state: WorkflowTaskState _application_generate_entity: WorkflowAppGenerateEntity _workflow_system_variables: dict[SystemVariableKey, Any] @@ -83,25 +84,27 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa user: Union[Account, EndUser], stream: bool, ) -> None: - """ - Initialize GenerateTaskPipeline. - :param application_generate_entity: application generate entity - :param workflow: workflow - :param queue_manager: queue manager - :param user: user - :param stream: is streamed - """ - super().__init__(application_generate_entity, queue_manager, user, stream) + super().__init__( + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + stream=stream, + ) - if isinstance(self._user, EndUser): - user_id = self._user.session_id + if isinstance(user, EndUser): + self._user_id = user.session_id + self._created_by_role = CreatedByRole.END_USER + elif isinstance(user, Account): + self._user_id = user.id + self._created_by_role = CreatedByRole.ACCOUNT else: - user_id = self._user.id + raise ValueError(f"Invalid user type: {type(user)}") + + self._workflow_id = workflow.id + self._workflow_features_dict = workflow.features_dict - self._workflow = workflow self._workflow_system_variables = { SystemVariableKey.FILES: application_generate_entity.files, - SystemVariableKey.USER_ID: user_id, + SystemVariableKey.USER_ID: self._user_id, SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id, SystemVariableKey.WORKFLOW_ID: workflow.id, SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, @@ -115,10 +118,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa Process generate task pipeline. :return: """ - db.session.refresh(self._workflow) - db.session.refresh(self._user) - db.session.close() - generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager) if self._stream: return self._to_stream_response(generator) @@ -185,7 +184,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa tts_publisher = None task_id = self._application_generate_entity.task_id tenant_id = self._application_generate_entity.app_config.tenant_id - features_dict = self._workflow.features_dict + features_dict = self._workflow_features_dict if ( features_dict.get("text_to_speech") @@ -242,18 +241,26 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa if isinstance(event, QueuePingEvent): yield self._ping_stream_response() elif isinstance(event, QueueErrorEvent): - err = self._handle_error(event) + err = self._handle_error(event=event) yield self._error_to_stream_response(err) break elif isinstance(event, QueueWorkflowStartedEvent): # override graph runtime state graph_runtime_state = event.graph_runtime_state - # init workflow run - workflow_run = self._handle_workflow_run_start() - yield self._workflow_start_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) + with Session(db.engine) as session: + # init workflow run + workflow_run = self._handle_workflow_run_start( + session=session, + workflow_id=self._workflow_id, + user_id=self._user_id, + created_by_role=self._created_by_role, + ) + start_resp = self._workflow_start_to_stream_response( + session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + ) + session.commit() + yield start_resp elif isinstance( event, QueueNodeRetryEvent, @@ -350,22 +357,28 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa if not graph_runtime_state: raise ValueError("graph runtime state not initialized.") - workflow_run = self._handle_workflow_run_success( - workflow_run=workflow_run, - start_at=graph_runtime_state.start_at, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - outputs=event.outputs, - conversation_id=None, - trace_manager=trace_manager, - ) + with Session(db.engine) as session: + workflow_run = self._handle_workflow_run_success( + session=session, + workflow_run=workflow_run, + start_at=graph_runtime_state.start_at, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + outputs=event.outputs, + conversation_id=None, + trace_manager=trace_manager, + ) - # save workflow app log - self._save_workflow_app_log(workflow_run) + # save workflow app log + self._save_workflow_app_log(session=session, workflow_run=workflow_run) - yield self._workflow_finish_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) + workflow_finish_resp = self._workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_run=workflow_run, + ) + session.commit() + yield workflow_finish_resp elif isinstance(event, QueueWorkflowPartialSuccessEvent): if not workflow_run: raise ValueError("workflow run not initialized.") @@ -373,49 +386,58 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa if not graph_runtime_state: raise ValueError("graph runtime state not initialized.") - workflow_run = self._handle_workflow_run_partial_success( - workflow_run=workflow_run, - start_at=graph_runtime_state.start_at, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - outputs=event.outputs, - exceptions_count=event.exceptions_count, - conversation_id=None, - trace_manager=trace_manager, - ) + with Session(db.engine) as session: + workflow_run = self._handle_workflow_run_partial_success( + session=session, + workflow_run=workflow_run, + start_at=graph_runtime_state.start_at, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + outputs=event.outputs, + exceptions_count=event.exceptions_count, + conversation_id=None, + trace_manager=trace_manager, + ) - # save workflow app log - self._save_workflow_app_log(workflow_run) + # save workflow app log + self._save_workflow_app_log(session=session, workflow_run=workflow_run) - yield self._workflow_finish_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) + workflow_finish_resp = self._workflow_finish_to_stream_response( + session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + ) + session.commit() + + yield workflow_finish_resp elif isinstance(event, QueueWorkflowFailedEvent | QueueStopEvent): if not workflow_run: raise ValueError("workflow run not initialized.") if not graph_runtime_state: raise ValueError("graph runtime state not initialized.") - workflow_run = self._handle_workflow_run_failed( - workflow_run=workflow_run, - start_at=graph_runtime_state.start_at, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - status=WorkflowRunStatus.FAILED - if isinstance(event, QueueWorkflowFailedEvent) - else WorkflowRunStatus.STOPPED, - error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(), - conversation_id=None, - trace_manager=trace_manager, - exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0, - ) + with Session(db.engine) as session: + workflow_run = self._handle_workflow_run_failed( + session=session, + workflow_run=workflow_run, + start_at=graph_runtime_state.start_at, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + status=WorkflowRunStatus.FAILED + if isinstance(event, QueueWorkflowFailedEvent) + else WorkflowRunStatus.STOPPED, + error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(), + conversation_id=None, + trace_manager=trace_manager, + exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0, + ) - # save workflow app log - self._save_workflow_app_log(workflow_run) + # save workflow app log + self._save_workflow_app_log(session=session, workflow_run=workflow_run) - yield self._workflow_finish_to_stream_response( - task_id=self._application_generate_entity.task_id, workflow_run=workflow_run - ) + workflow_finish_resp = self._workflow_finish_to_stream_response( + session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + ) + session.commit() + yield workflow_finish_resp elif isinstance(event, QueueTextChunkEvent): delta_text = event.text if delta_text is None: @@ -435,7 +457,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa if tts_publisher: tts_publisher.publish(None) - def _save_workflow_app_log(self, workflow_run: WorkflowRun) -> None: + def _save_workflow_app_log(self, *, session: Session, workflow_run: WorkflowRun) -> None: """ Save workflow app log. :return: @@ -457,12 +479,10 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa workflow_app_log.workflow_id = workflow_run.workflow_id workflow_app_log.workflow_run_id = workflow_run.id workflow_app_log.created_from = created_from.value - workflow_app_log.created_by_role = "account" if isinstance(self._user, Account) else "end_user" - workflow_app_log.created_by = self._user.id + workflow_app_log.created_by_role = self._created_by_role + workflow_app_log.created_by = self._user_id - db.session.add(workflow_app_log) - db.session.commit() - db.session.close() + session.add(workflow_app_log) def _text_chunk_to_stream_response( self, text: str, from_variable_selector: Optional[list[str]] = None diff --git a/api/core/app/task_pipeline/based_generate_task_pipeline.py b/api/core/app/task_pipeline/based_generate_task_pipeline.py index 03a81353d0..e363a7f642 100644 --- a/api/core/app/task_pipeline/based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/based_generate_task_pipeline.py @@ -1,6 +1,9 @@ import logging import time -from typing import Optional, Union +from typing import Optional + +from sqlalchemy import select +from sqlalchemy.orm import Session from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import ( @@ -17,9 +20,7 @@ from core.app.entities.task_entities import ( from core.errors.error import QuotaExceededError from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError from core.moderation.output_moderation import ModerationRule, OutputModeration -from extensions.ext_database import db -from models.account import Account -from models.model import EndUser, Message +from models.model import Message logger = logging.getLogger(__name__) @@ -36,7 +37,6 @@ class BasedGenerateTaskPipeline: self, application_generate_entity: AppGenerateEntity, queue_manager: AppQueueManager, - user: Union[Account, EndUser], stream: bool, ) -> None: """ @@ -48,18 +48,11 @@ class BasedGenerateTaskPipeline: """ self._application_generate_entity = application_generate_entity self._queue_manager = queue_manager - self._user = user self._start_at = time.perf_counter() self._output_moderation_handler = self._init_output_moderation() self._stream = stream - def _handle_error(self, event: QueueErrorEvent, message: Optional[Message] = None): - """ - Handle error event. - :param event: event - :param message: message - :return: - """ + def _handle_error(self, *, event: QueueErrorEvent, session: Session | None = None, message_id: str = ""): logger.debug("error: %s", event.error) e = event.error err: Exception @@ -71,16 +64,17 @@ class BasedGenerateTaskPipeline: else: err = Exception(e.description if getattr(e, "description", None) is not None else str(e)) - if message: - refetch_message = db.session.query(Message).filter(Message.id == message.id).first() + if not message_id or not session: + return err - if refetch_message: - err_desc = self._error_to_desc(err) - refetch_message.status = "error" - refetch_message.error = err_desc - - db.session.commit() + stmt = select(Message).where(Message.id == message_id) + message = session.scalar(stmt) + if not message: + return err + err_desc = self._error_to_desc(err) + message.status = "error" + message.error = err_desc return err def _error_to_desc(self, e: Exception) -> str: diff --git a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py index b9f8e7ca56..c84f8ba3e4 100644 --- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py @@ -5,6 +5,9 @@ from collections.abc import Generator from threading import Thread from typing import Optional, Union, cast +from sqlalchemy import select +from sqlalchemy.orm import Session + from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom @@ -55,8 +58,7 @@ from core.prompt.utils.prompt_message_util import PromptMessageUtil from core.prompt.utils.prompt_template_parser import PromptTemplateParser from events.message_event import message_was_created from extensions.ext_database import db -from models.account import Account -from models.model import AppMode, Conversation, EndUser, Message, MessageAgentThought +from models.model import AppMode, Conversation, Message, MessageAgentThought logger = logging.getLogger(__name__) @@ -77,23 +79,21 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan queue_manager: AppQueueManager, conversation: Conversation, message: Message, - user: Union[Account, EndUser], stream: bool, ) -> None: - """ - Initialize GenerateTaskPipeline. - :param application_generate_entity: application generate entity - :param queue_manager: queue manager - :param conversation: conversation - :param message: message - :param user: user - :param stream: stream - """ - super().__init__(application_generate_entity, queue_manager, user, stream) + super().__init__( + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + stream=stream, + ) self._model_config = application_generate_entity.model_conf self._app_config = application_generate_entity.app_config - self._conversation = conversation - self._message = message + + self._conversation_id = conversation.id + self._conversation_mode = conversation.mode + + self._message_id = message.id + self._message_created_at = int(message.created_at.timestamp()) self._task_state = EasyUITaskState( llm_result=LLMResult( @@ -113,18 +113,10 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan CompletionAppBlockingResponse, Generator[Union[ChatbotAppStreamResponse, CompletionAppStreamResponse], None, None], ]: - """ - Process generate task pipeline. - :return: - """ - db.session.refresh(self._conversation) - db.session.refresh(self._message) - db.session.close() - if self._application_generate_entity.app_config.app_mode != AppMode.COMPLETION: # start generate conversation name thread self._conversation_name_generate_thread = self._generate_conversation_name( - self._conversation, self._application_generate_entity.query or "" + conversation_id=self._conversation_id, query=self._application_generate_entity.query or "" ) generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager) @@ -148,15 +140,15 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan if self._task_state.metadata: extras["metadata"] = self._task_state.metadata response: Union[ChatbotAppBlockingResponse, CompletionAppBlockingResponse] - if self._conversation.mode == AppMode.COMPLETION.value: + if self._conversation_mode == AppMode.COMPLETION.value: response = CompletionAppBlockingResponse( task_id=self._application_generate_entity.task_id, data=CompletionAppBlockingResponse.Data( - id=self._message.id, - mode=self._conversation.mode, - message_id=self._message.id, + id=self._message_id, + mode=self._conversation_mode, + message_id=self._message_id, answer=cast(str, self._task_state.llm_result.message.content), - created_at=int(self._message.created_at.timestamp()), + created_at=self._message_created_at, **extras, ), ) @@ -164,12 +156,12 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan response = ChatbotAppBlockingResponse( task_id=self._application_generate_entity.task_id, data=ChatbotAppBlockingResponse.Data( - id=self._message.id, - mode=self._conversation.mode, - conversation_id=self._conversation.id, - message_id=self._message.id, + id=self._message_id, + mode=self._conversation_mode, + conversation_id=self._conversation_id, + message_id=self._message_id, answer=cast(str, self._task_state.llm_result.message.content), - created_at=int(self._message.created_at.timestamp()), + created_at=self._message_created_at, **extras, ), ) @@ -190,15 +182,15 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan for stream_response in generator: if isinstance(self._application_generate_entity, CompletionAppGenerateEntity): yield CompletionAppStreamResponse( - message_id=self._message.id, - created_at=int(self._message.created_at.timestamp()), + message_id=self._message_id, + created_at=self._message_created_at, stream_response=stream_response, ) else: yield ChatbotAppStreamResponse( - conversation_id=self._conversation.id, - message_id=self._message.id, - created_at=int(self._message.created_at.timestamp()), + conversation_id=self._conversation_id, + message_id=self._message_id, + created_at=self._message_created_at, stream_response=stream_response, ) @@ -265,7 +257,9 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan event = message.event if isinstance(event, QueueErrorEvent): - err = self._handle_error(event, self._message) + with Session(db.engine) as session: + err = self._handle_error(event=event, session=session, message_id=self._message_id) + session.commit() yield self._error_to_stream_response(err) break elif isinstance(event, QueueStopEvent | QueueMessageEndEvent): @@ -283,10 +277,12 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan self._task_state.llm_result.message.content = output_moderation_answer yield self._message_replace_to_stream_response(answer=output_moderation_answer) - # Save message - self._save_message(trace_manager) - - yield self._message_end_to_stream_response() + with Session(db.engine) as session: + # Save message + self._save_message(session=session, trace_manager=trace_manager) + session.commit() + message_end_resp = self._message_end_to_stream_response() + yield message_end_resp elif isinstance(event, QueueRetrieverResourcesEvent): self._handle_retriever_resources(event) elif isinstance(event, QueueAnnotationReplyEvent): @@ -320,9 +316,15 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan self._task_state.llm_result.message.content = current_content if isinstance(event, QueueLLMChunkEvent): - yield self._message_to_stream_response(cast(str, delta_text), self._message.id) + yield self._message_to_stream_response( + answer=cast(str, delta_text), + message_id=self._message_id, + ) else: - yield self._agent_message_to_stream_response(cast(str, delta_text), self._message.id) + yield self._agent_message_to_stream_response( + answer=cast(str, delta_text), + message_id=self._message_id, + ) elif isinstance(event, QueueMessageReplaceEvent): yield self._message_replace_to_stream_response(answer=event.text) elif isinstance(event, QueuePingEvent): @@ -334,7 +336,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan if self._conversation_name_generate_thread: self._conversation_name_generate_thread.join() - def _save_message(self, trace_manager: Optional[TraceQueueManager] = None) -> None: + def _save_message(self, *, session: Session, trace_manager: Optional[TraceQueueManager] = None) -> None: """ Save message. :return: @@ -342,53 +344,46 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan llm_result = self._task_state.llm_result usage = llm_result.usage - message = db.session.query(Message).filter(Message.id == self._message.id).first() + message_stmt = select(Message).where(Message.id == self._message_id) + message = session.scalar(message_stmt) if not message: - raise Exception(f"Message {self._message.id} not found") - self._message = message - conversation = db.session.query(Conversation).filter(Conversation.id == self._conversation.id).first() + raise ValueError(f"message {self._message_id} not found") + conversation_stmt = select(Conversation).where(Conversation.id == self._conversation_id) + conversation = session.scalar(conversation_stmt) if not conversation: - raise Exception(f"Conversation {self._conversation.id} not found") - self._conversation = conversation + raise ValueError(f"Conversation {self._conversation_id} not found") - self._message.message = PromptMessageUtil.prompt_messages_to_prompt_for_saving( + message.message = PromptMessageUtil.prompt_messages_to_prompt_for_saving( self._model_config.mode, self._task_state.llm_result.prompt_messages ) - self._message.message_tokens = usage.prompt_tokens - self._message.message_unit_price = usage.prompt_unit_price - self._message.message_price_unit = usage.prompt_price_unit - self._message.answer = ( + message.message_tokens = usage.prompt_tokens + message.message_unit_price = usage.prompt_unit_price + message.message_price_unit = usage.prompt_price_unit + message.answer = ( PromptTemplateParser.remove_template_variables(cast(str, llm_result.message.content).strip()) if llm_result.message.content else "" ) - self._message.answer_tokens = usage.completion_tokens - self._message.answer_unit_price = usage.completion_unit_price - self._message.answer_price_unit = usage.completion_price_unit - self._message.provider_response_latency = time.perf_counter() - self._start_at - self._message.total_price = usage.total_price - self._message.currency = usage.currency - self._message.message_metadata = ( + message.answer_tokens = usage.completion_tokens + message.answer_unit_price = usage.completion_unit_price + message.answer_price_unit = usage.completion_price_unit + message.provider_response_latency = time.perf_counter() - self._start_at + message.total_price = usage.total_price + message.currency = usage.currency + message.message_metadata = ( json.dumps(jsonable_encoder(self._task_state.metadata)) if self._task_state.metadata else None ) - db.session.commit() - if trace_manager: trace_manager.add_trace_task( TraceTask( - TraceTaskName.MESSAGE_TRACE, conversation_id=self._conversation.id, message_id=self._message.id + TraceTaskName.MESSAGE_TRACE, conversation_id=self._conversation_id, message_id=self._message_id ) ) message_was_created.send( - self._message, + message, application_generate_entity=self._application_generate_entity, - conversation=self._conversation, - is_first_message=self._application_generate_entity.app_config.app_mode in {AppMode.AGENT_CHAT, AppMode.CHAT} - and hasattr(self._application_generate_entity, "conversation_id") - and self._application_generate_entity.conversation_id is None, - extras=self._application_generate_entity.extras, ) def _handle_stop(self, event: QueueStopEvent) -> None: @@ -434,7 +429,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan return MessageEndStreamResponse( task_id=self._application_generate_entity.task_id, - id=self._message.id, + id=self._message_id, metadata=extras.get("metadata", {}), ) diff --git a/api/core/app/task_pipeline/message_cycle_manage.py b/api/core/app/task_pipeline/message_cycle_manage.py index 007543f6d0..15f2c25c66 100644 --- a/api/core/app/task_pipeline/message_cycle_manage.py +++ b/api/core/app/task_pipeline/message_cycle_manage.py @@ -36,7 +36,7 @@ class MessageCycleManage: ] _task_state: Union[EasyUITaskState, WorkflowTaskState] - def _generate_conversation_name(self, conversation: Conversation, query: str) -> Optional[Thread]: + def _generate_conversation_name(self, *, conversation_id: str, query: str) -> Optional[Thread]: """ Generate conversation name. :param conversation: conversation @@ -56,7 +56,7 @@ class MessageCycleManage: target=self._generate_conversation_name_worker, kwargs={ "flask_app": current_app._get_current_object(), # type: ignore - "conversation_id": conversation.id, + "conversation_id": conversation_id, "query": query, }, ) diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py index f581e564f2..2692008c66 100644 --- a/api/core/app/task_pipeline/workflow_cycle_manage.py +++ b/api/core/app/task_pipeline/workflow_cycle_manage.py @@ -5,6 +5,7 @@ from datetime import UTC, datetime from typing import Any, Optional, Union, cast from uuid import uuid4 +from sqlalchemy import func, select from sqlalchemy.orm import Session from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity @@ -63,27 +64,34 @@ from .exc import WorkflowNodeExecutionNotFoundError, WorkflowRunNotFoundError class WorkflowCycleManage: _application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity] - _workflow: Workflow - _user: Union[Account, EndUser] _task_state: WorkflowTaskState _workflow_system_variables: dict[SystemVariableKey, Any] _wip_workflow_node_executions: dict[str, WorkflowNodeExecution] - def _handle_workflow_run_start(self) -> WorkflowRun: - max_sequence = ( - db.session.query(db.func.max(WorkflowRun.sequence_number)) - .filter(WorkflowRun.tenant_id == self._workflow.tenant_id) - .filter(WorkflowRun.app_id == self._workflow.app_id) - .scalar() - or 0 + def _handle_workflow_run_start( + self, + *, + session: Session, + workflow_id: str, + user_id: str, + created_by_role: CreatedByRole, + ) -> WorkflowRun: + workflow_stmt = select(Workflow).where(Workflow.id == workflow_id) + workflow = session.scalar(workflow_stmt) + if not workflow: + raise ValueError(f"Workflow not found: {workflow_id}") + + max_sequence_stmt = select(func.max(WorkflowRun.sequence_number)).where( + WorkflowRun.tenant_id == workflow.tenant_id, + WorkflowRun.app_id == workflow.app_id, ) + max_sequence = session.scalar(max_sequence_stmt) or 0 new_sequence_number = max_sequence + 1 inputs = {**self._application_generate_entity.inputs} for key, value in (self._workflow_system_variables or {}).items(): if key.value == "conversation": continue - inputs[f"sys.{key.value}"] = value triggered_from = ( @@ -96,33 +104,32 @@ class WorkflowCycleManage: inputs = dict(WorkflowEntry.handle_special_values(inputs) or {}) # init workflow run - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = WorkflowRun() - system_id = self._workflow_system_variables[SystemVariableKey.WORKFLOW_RUN_ID] - workflow_run.id = system_id or str(uuid4()) - workflow_run.tenant_id = self._workflow.tenant_id - workflow_run.app_id = self._workflow.app_id - workflow_run.sequence_number = new_sequence_number - workflow_run.workflow_id = self._workflow.id - workflow_run.type = self._workflow.type - workflow_run.triggered_from = triggered_from.value - workflow_run.version = self._workflow.version - workflow_run.graph = self._workflow.graph - workflow_run.inputs = json.dumps(inputs) - workflow_run.status = WorkflowRunStatus.RUNNING - workflow_run.created_by_role = ( - CreatedByRole.ACCOUNT if isinstance(self._user, Account) else CreatedByRole.END_USER - ) - workflow_run.created_by = self._user.id - workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None) + workflow_run_id = str(self._workflow_system_variables.get(SystemVariableKey.WORKFLOW_RUN_ID, uuid4())) - session.add(workflow_run) - session.commit() + workflow_run = WorkflowRun() + workflow_run.id = workflow_run_id + workflow_run.tenant_id = workflow.tenant_id + workflow_run.app_id = workflow.app_id + workflow_run.sequence_number = new_sequence_number + workflow_run.workflow_id = workflow.id + workflow_run.type = workflow.type + workflow_run.triggered_from = triggered_from.value + workflow_run.version = workflow.version + workflow_run.graph = workflow.graph + workflow_run.inputs = json.dumps(inputs) + workflow_run.status = WorkflowRunStatus.RUNNING + workflow_run.created_by_role = created_by_role + workflow_run.created_by = user_id + workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None) + + session.add(workflow_run) return workflow_run def _handle_workflow_run_success( self, + *, + session: Session, workflow_run: WorkflowRun, start_at: float, total_tokens: int, @@ -141,7 +148,7 @@ class WorkflowCycleManage: :param conversation_id: conversation id :return: """ - workflow_run = self._refetch_workflow_run(workflow_run.id) + workflow_run = self._refetch_workflow_run(session=session, workflow_run_id=workflow_run.id) outputs = WorkflowEntry.handle_special_values(outputs) @@ -152,9 +159,6 @@ class WorkflowCycleManage: workflow_run.total_steps = total_steps workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) - db.session.commit() - db.session.refresh(workflow_run) - if trace_manager: trace_manager.add_trace_task( TraceTask( @@ -165,12 +169,12 @@ class WorkflowCycleManage: ) ) - db.session.close() - return workflow_run def _handle_workflow_run_partial_success( self, + *, + session: Session, workflow_run: WorkflowRun, start_at: float, total_tokens: int, @@ -190,7 +194,7 @@ class WorkflowCycleManage: :param conversation_id: conversation id :return: """ - workflow_run = self._refetch_workflow_run(workflow_run.id) + workflow_run = self._refetch_workflow_run(session=session, workflow_run_id=workflow_run.id) outputs = WorkflowEntry.handle_special_values(dict(outputs) if outputs else None) @@ -201,8 +205,6 @@ class WorkflowCycleManage: workflow_run.total_steps = total_steps workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) workflow_run.exceptions_count = exceptions_count - db.session.commit() - db.session.refresh(workflow_run) if trace_manager: trace_manager.add_trace_task( @@ -214,12 +216,12 @@ class WorkflowCycleManage: ) ) - db.session.close() - return workflow_run def _handle_workflow_run_failed( self, + *, + session: Session, workflow_run: WorkflowRun, start_at: float, total_tokens: int, @@ -240,7 +242,7 @@ class WorkflowCycleManage: :param error: error message :return: """ - workflow_run = self._refetch_workflow_run(workflow_run.id) + workflow_run = self._refetch_workflow_run(session=session, workflow_run_id=workflow_run.id) workflow_run.status = status.value workflow_run.error = error @@ -249,21 +251,18 @@ class WorkflowCycleManage: workflow_run.total_steps = total_steps workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) workflow_run.exceptions_count = exceptions_count - db.session.commit() - running_workflow_node_executions = ( - db.session.query(WorkflowNodeExecution) - .filter( - WorkflowNodeExecution.tenant_id == workflow_run.tenant_id, - WorkflowNodeExecution.app_id == workflow_run.app_id, - WorkflowNodeExecution.workflow_id == workflow_run.workflow_id, - WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value, - WorkflowNodeExecution.workflow_run_id == workflow_run.id, - WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING.value, - ) - .all() + stmt = select(WorkflowNodeExecution).where( + WorkflowNodeExecution.tenant_id == workflow_run.tenant_id, + WorkflowNodeExecution.app_id == workflow_run.app_id, + WorkflowNodeExecution.workflow_id == workflow_run.workflow_id, + WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value, + WorkflowNodeExecution.workflow_run_id == workflow_run.id, + WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING.value, ) + running_workflow_node_executions = session.scalars(stmt).all() + for workflow_node_execution in running_workflow_node_executions: workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value workflow_node_execution.error = error @@ -271,13 +270,6 @@ class WorkflowCycleManage: workflow_node_execution.elapsed_time = ( workflow_node_execution.finished_at - workflow_node_execution.created_at ).total_seconds() - db.session.commit() - - db.session.close() - - # with Session(db.engine, expire_on_commit=False) as session: - # session.add(workflow_run) - # session.refresh(workflow_run) if trace_manager: trace_manager.add_trace_task( @@ -485,14 +477,14 @@ class WorkflowCycleManage: ################################################# def _workflow_start_to_stream_response( - self, task_id: str, workflow_run: WorkflowRun + self, + *, + session: Session, + task_id: str, + workflow_run: WorkflowRun, ) -> WorkflowStartStreamResponse: - """ - Workflow start to stream response. - :param task_id: task id - :param workflow_run: workflow run - :return: - """ + # receive session to make sure the workflow_run won't be expired, need a more elegant way to handle this + _ = session return WorkflowStartStreamResponse( task_id=task_id, workflow_run_id=workflow_run.id, @@ -506,36 +498,32 @@ class WorkflowCycleManage: ) def _workflow_finish_to_stream_response( - self, task_id: str, workflow_run: WorkflowRun + self, + *, + session: Session, + task_id: str, + workflow_run: WorkflowRun, ) -> WorkflowFinishStreamResponse: - """ - Workflow finish to stream response. - :param task_id: task id - :param workflow_run: workflow run - :return: - """ - # Attach WorkflowRun to an active session so "created_by_role" can be accessed. - workflow_run = db.session.merge(workflow_run) - - # Refresh to ensure any expired attributes are fully loaded - db.session.refresh(workflow_run) - created_by = None - if workflow_run.created_by_role == CreatedByRole.ACCOUNT.value: - created_by_account = workflow_run.created_by_account - if created_by_account: + if workflow_run.created_by_role == CreatedByRole.ACCOUNT: + stmt = select(Account).where(Account.id == workflow_run.created_by) + account = session.scalar(stmt) + if account: created_by = { - "id": created_by_account.id, - "name": created_by_account.name, - "email": created_by_account.email, + "id": account.id, + "name": account.name, + "email": account.email, + } + elif workflow_run.created_by_role == CreatedByRole.END_USER: + stmt = select(EndUser).where(EndUser.id == workflow_run.created_by) + end_user = session.scalar(stmt) + if end_user: + created_by = { + "id": end_user.id, + "user": end_user.session_id, } else: - created_by_end_user = workflow_run.created_by_end_user - if created_by_end_user: - created_by = { - "id": created_by_end_user.id, - "user": created_by_end_user.session_id, - } + raise NotImplementedError(f"unknown created_by_role: {workflow_run.created_by_role}") return WorkflowFinishStreamResponse( task_id=task_id, @@ -895,14 +883,14 @@ class WorkflowCycleManage: return None - def _refetch_workflow_run(self, workflow_run_id: str) -> WorkflowRun: + def _refetch_workflow_run(self, *, session: Session, workflow_run_id: str) -> WorkflowRun: """ Refetch workflow run :param workflow_run_id: workflow run id :return: """ - workflow_run = db.session.query(WorkflowRun).filter(WorkflowRun.id == workflow_run_id).first() - + stmt = select(WorkflowRun).where(WorkflowRun.id == workflow_run_id) + workflow_run = session.scalar(stmt) if not workflow_run: raise WorkflowRunNotFoundError(workflow_run_id) diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index f538eaef5b..691cb8d400 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -9,6 +9,8 @@ from typing import Any, Optional, Union from uuid import UUID, uuid4 from flask import current_app +from sqlalchemy import select +from sqlalchemy.orm import Session from core.helper.encrypter import decrypt_token, encrypt_token, obfuscated_token from core.ops.entities.config_entity import ( @@ -329,15 +331,15 @@ class TraceTask: ): self.trace_type = trace_type self.message_id = message_id - self.workflow_run = workflow_run + self.workflow_run_id = workflow_run.id if workflow_run else None self.conversation_id = conversation_id self.user_id = user_id self.timer = timer - self.kwargs = kwargs self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001") - self.app_id = None + self.kwargs = kwargs + def execute(self): return self.preprocess() @@ -345,19 +347,23 @@ class TraceTask: preprocess_map = { TraceTaskName.CONVERSATION_TRACE: lambda: self.conversation_trace(**self.kwargs), TraceTaskName.WORKFLOW_TRACE: lambda: self.workflow_trace( - self.workflow_run, self.conversation_id, self.user_id + workflow_run_id=self.workflow_run_id, conversation_id=self.conversation_id, user_id=self.user_id + ), + TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(message_id=self.message_id), + TraceTaskName.MODERATION_TRACE: lambda: self.moderation_trace( + message_id=self.message_id, timer=self.timer, **self.kwargs ), - TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(self.message_id), - TraceTaskName.MODERATION_TRACE: lambda: self.moderation_trace(self.message_id, self.timer, **self.kwargs), TraceTaskName.SUGGESTED_QUESTION_TRACE: lambda: self.suggested_question_trace( - self.message_id, self.timer, **self.kwargs + message_id=self.message_id, timer=self.timer, **self.kwargs ), TraceTaskName.DATASET_RETRIEVAL_TRACE: lambda: self.dataset_retrieval_trace( - self.message_id, self.timer, **self.kwargs + message_id=self.message_id, timer=self.timer, **self.kwargs + ), + TraceTaskName.TOOL_TRACE: lambda: self.tool_trace( + message_id=self.message_id, timer=self.timer, **self.kwargs ), - TraceTaskName.TOOL_TRACE: lambda: self.tool_trace(self.message_id, self.timer, **self.kwargs), TraceTaskName.GENERATE_NAME_TRACE: lambda: self.generate_name_trace( - self.conversation_id, self.timer, **self.kwargs + conversation_id=self.conversation_id, timer=self.timer, **self.kwargs ), } @@ -367,86 +373,100 @@ class TraceTask: def conversation_trace(self, **kwargs): return kwargs - def workflow_trace(self, workflow_run: WorkflowRun | None, conversation_id, user_id): - if not workflow_run: - raise ValueError("Workflow run not found") + def workflow_trace( + self, + *, + workflow_run_id: str | None, + conversation_id: str | None, + user_id: str | None, + ): + if not workflow_run_id: + return {} - db.session.merge(workflow_run) - db.session.refresh(workflow_run) + with Session(db.engine) as session: + workflow_run_stmt = select(WorkflowRun).where(WorkflowRun.id == workflow_run_id) + workflow_run = session.scalars(workflow_run_stmt).first() + if not workflow_run: + raise ValueError("Workflow run not found") - workflow_id = workflow_run.workflow_id - tenant_id = workflow_run.tenant_id - workflow_run_id = workflow_run.id - workflow_run_elapsed_time = workflow_run.elapsed_time - workflow_run_status = workflow_run.status - workflow_run_inputs = workflow_run.inputs_dict - workflow_run_outputs = workflow_run.outputs_dict - workflow_run_version = workflow_run.version - error = workflow_run.error or "" + workflow_id = workflow_run.workflow_id + tenant_id = workflow_run.tenant_id + workflow_run_id = workflow_run.id + workflow_run_elapsed_time = workflow_run.elapsed_time + workflow_run_status = workflow_run.status + workflow_run_inputs = workflow_run.inputs_dict + workflow_run_outputs = workflow_run.outputs_dict + workflow_run_version = workflow_run.version + error = workflow_run.error or "" - total_tokens = workflow_run.total_tokens + total_tokens = workflow_run.total_tokens - file_list = workflow_run_inputs.get("sys.file") or [] - query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or "" + file_list = workflow_run_inputs.get("sys.file") or [] + query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or "" - # get workflow_app_log_id - workflow_app_log_data = ( - db.session.query(WorkflowAppLog) - .filter_by(tenant_id=tenant_id, app_id=workflow_run.app_id, workflow_run_id=workflow_run.id) - .first() - ) - workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None - # get message_id - message_data = ( - db.session.query(Message.id) - .filter_by(conversation_id=conversation_id, workflow_run_id=workflow_run_id) - .first() - ) - message_id = str(message_data.id) if message_data else None + # get workflow_app_log_id + workflow_app_log_data_stmt = select(WorkflowAppLog.id).where( + WorkflowAppLog.tenant_id == tenant_id, + WorkflowAppLog.app_id == workflow_run.app_id, + WorkflowAppLog.workflow_run_id == workflow_run.id, + ) + workflow_app_log_id = session.scalar(workflow_app_log_data_stmt) + # get message_id + message_id = None + if conversation_id: + message_data_stmt = select(Message.id).where( + Message.conversation_id == conversation_id, + Message.workflow_run_id == workflow_run_id, + ) + message_id = session.scalar(message_data_stmt) - metadata = { - "workflow_id": workflow_id, - "conversation_id": conversation_id, - "workflow_run_id": workflow_run_id, - "tenant_id": tenant_id, - "elapsed_time": workflow_run_elapsed_time, - "status": workflow_run_status, - "version": workflow_run_version, - "total_tokens": total_tokens, - "file_list": file_list, - "triggered_form": workflow_run.triggered_from, - "user_id": user_id, - } - - workflow_trace_info = WorkflowTraceInfo( - workflow_data=workflow_run.to_dict(), - conversation_id=conversation_id, - workflow_id=workflow_id, - tenant_id=tenant_id, - workflow_run_id=workflow_run_id, - workflow_run_elapsed_time=workflow_run_elapsed_time, - workflow_run_status=workflow_run_status, - workflow_run_inputs=workflow_run_inputs, - workflow_run_outputs=workflow_run_outputs, - workflow_run_version=workflow_run_version, - error=error, - total_tokens=total_tokens, - file_list=file_list, - query=query, - metadata=metadata, - workflow_app_log_id=workflow_app_log_id, - message_id=message_id, - start_time=workflow_run.created_at, - end_time=workflow_run.finished_at, - ) + metadata = { + "workflow_id": workflow_id, + "conversation_id": conversation_id, + "workflow_run_id": workflow_run_id, + "tenant_id": tenant_id, + "elapsed_time": workflow_run_elapsed_time, + "status": workflow_run_status, + "version": workflow_run_version, + "total_tokens": total_tokens, + "file_list": file_list, + "triggered_form": workflow_run.triggered_from, + "user_id": user_id, + } + workflow_trace_info = WorkflowTraceInfo( + workflow_data=workflow_run.to_dict(), + conversation_id=conversation_id, + workflow_id=workflow_id, + tenant_id=tenant_id, + workflow_run_id=workflow_run_id, + workflow_run_elapsed_time=workflow_run_elapsed_time, + workflow_run_status=workflow_run_status, + workflow_run_inputs=workflow_run_inputs, + workflow_run_outputs=workflow_run_outputs, + workflow_run_version=workflow_run_version, + error=error, + total_tokens=total_tokens, + file_list=file_list, + query=query, + metadata=metadata, + workflow_app_log_id=workflow_app_log_id, + message_id=message_id, + start_time=workflow_run.created_at, + end_time=workflow_run.finished_at, + ) return workflow_trace_info - def message_trace(self, message_id): + def message_trace(self, message_id: str | None): + if not message_id: + return {} message_data = get_message_data(message_id) if not message_data: return {} - conversation_mode = db.session.query(Conversation.mode).filter_by(id=message_data.conversation_id).first() + conversation_mode_stmt = select(Conversation.mode).where(Conversation.id == message_data.conversation_id) + conversation_mode = db.session.scalars(conversation_mode_stmt).all() + if not conversation_mode or len(conversation_mode) == 0: + return {} conversation_mode = conversation_mode[0] created_at = message_data.created_at inputs = message_data.message diff --git a/api/core/ops/utils.py b/api/core/ops/utils.py index 998eba9ea9..8b06df1930 100644 --- a/api/core/ops/utils.py +++ b/api/core/ops/utils.py @@ -18,7 +18,7 @@ def filter_none_values(data: dict): return new_data -def get_message_data(message_id): +def get_message_data(message_id: str): return db.session.query(Message).filter(Message.id == message_id).first() diff --git a/api/models/account.py b/api/models/account.py index 88c96da1a1..35a28df750 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -3,6 +3,7 @@ import json from flask_login import UserMixin # type: ignore from sqlalchemy import func +from sqlalchemy.orm import Mapped, mapped_column from .engine import db from .types import StringUUID @@ -20,7 +21,7 @@ class Account(UserMixin, db.Model): # type: ignore[name-defined] __tablename__ = "accounts" __table_args__ = (db.PrimaryKeyConstraint("id", name="account_pkey"), db.Index("account_email_idx", "email")) - id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()")) name = db.Column(db.String(255), nullable=False) email = db.Column(db.String(255), nullable=False) password = db.Column(db.String(255), nullable=True) diff --git a/api/models/model.py b/api/models/model.py index 2a593f0829..d2d4d5853f 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -530,13 +530,13 @@ class Conversation(db.Model): # type: ignore[name-defined] db.Index("conversation_app_from_user_idx", "app_id", "from_source", "from_end_user_id"), ) - id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()")) app_id = db.Column(StringUUID, nullable=False) app_model_config_id = db.Column(StringUUID, nullable=True) model_provider = db.Column(db.String(255), nullable=True) override_model_configs = db.Column(db.Text) model_id = db.Column(db.String(255), nullable=True) - mode = db.Column(db.String(255), nullable=False) + mode: Mapped[str] = mapped_column(db.String(255)) name = db.Column(db.String(255), nullable=False) summary = db.Column(db.Text) _inputs: Mapped[dict] = mapped_column("inputs", db.JSON) @@ -770,7 +770,7 @@ class Message(db.Model): # type: ignore[name-defined] db.Index("message_created_at_idx", "created_at"), ) - id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()")) app_id = db.Column(StringUUID, nullable=False) model_provider = db.Column(db.String(255), nullable=True) model_id = db.Column(db.String(255), nullable=True) @@ -797,7 +797,7 @@ class Message(db.Model): # type: ignore[name-defined] from_source = db.Column(db.String(255), nullable=False) from_end_user_id: Mapped[Optional[str]] = db.Column(StringUUID) from_account_id: Mapped[Optional[str]] = db.Column(StringUUID) - created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column(db.DateTime, server_default=func.current_timestamp()) updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) agent_based = db.Column(db.Boolean, nullable=False, server_default=db.text("false")) workflow_run_id = db.Column(StringUUID) @@ -1322,7 +1322,7 @@ class EndUser(UserMixin, db.Model): # type: ignore[name-defined] external_user_id = db.Column(db.String(255), nullable=True) name = db.Column(db.String(255)) is_anonymous = db.Column(db.Boolean, nullable=False, server_default=db.text("true")) - session_id = db.Column(db.String(255), nullable=False) + session_id: Mapped[str] = mapped_column() created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) diff --git a/api/models/workflow.py b/api/models/workflow.py index 880e044d07..78a7f8169f 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -392,40 +392,28 @@ class WorkflowRun(db.Model): # type: ignore[name-defined] db.Index("workflow_run_tenant_app_sequence_idx", "tenant_id", "app_id", "sequence_number"), ) - id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) - tenant_id = db.Column(StringUUID, nullable=False) - app_id = db.Column(StringUUID, nullable=False) - sequence_number = db.Column(db.Integer, nullable=False) - workflow_id = db.Column(StringUUID, nullable=False) - type = db.Column(db.String(255), nullable=False) - triggered_from = db.Column(db.String(255), nullable=False) - version = db.Column(db.String(255), nullable=False) - graph = db.Column(db.Text) - inputs = db.Column(db.Text) - status = db.Column(db.String(255), nullable=False) # running, succeeded, failed, stopped, partial-succeeded + id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()")) + tenant_id: Mapped[str] = mapped_column(StringUUID) + app_id: Mapped[str] = mapped_column(StringUUID) + sequence_number: Mapped[int] = mapped_column() + workflow_id: Mapped[str] = mapped_column(StringUUID) + type: Mapped[str] = mapped_column(db.String(255)) + triggered_from: Mapped[str] = mapped_column(db.String(255)) + version: Mapped[str] = mapped_column(db.String(255)) + graph: Mapped[str] = mapped_column(db.Text) + inputs: Mapped[str] = mapped_column(db.Text) + status: Mapped[str] = mapped_column(db.String(255)) # running, succeeded, failed, stopped, partial-succeeded outputs: Mapped[Optional[str]] = mapped_column(sa.Text, default="{}") - error = db.Column(db.Text) + error: Mapped[str] = mapped_column(db.Text) elapsed_time = db.Column(db.Float, nullable=False, server_default=db.text("0")) - total_tokens = db.Column(db.Integer, nullable=False, server_default=db.text("0")) + total_tokens: Mapped[int] = mapped_column(server_default=db.text("0")) total_steps = db.Column(db.Integer, server_default=db.text("0")) - created_by_role = db.Column(db.String(255), nullable=False) # account, end_user + created_by_role: Mapped[str] = mapped_column(db.String(255)) # account, end_user created_by = db.Column(StringUUID, nullable=False) created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) finished_at = db.Column(db.DateTime) exceptions_count = db.Column(db.Integer, server_default=db.text("0")) - @property - def created_by_account(self): - created_by_role = CreatedByRole(self.created_by_role) - return db.session.get(Account, self.created_by) if created_by_role == CreatedByRole.ACCOUNT else None - - @property - def created_by_end_user(self): - from models.model import EndUser - - created_by_role = CreatedByRole(self.created_by_role) - return db.session.get(EndUser, self.created_by) if created_by_role == CreatedByRole.END_USER else None - @property def graph_dict(self): return json.loads(self.graph) if self.graph else {} @@ -750,11 +738,11 @@ class WorkflowAppLog(db.Model): # type: ignore[name-defined] db.Index("workflow_app_log_app_idx", "tenant_id", "app_id"), ) - id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) - tenant_id = db.Column(StringUUID, nullable=False) - app_id = db.Column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()")) + tenant_id: Mapped[str] = mapped_column(StringUUID) + app_id: Mapped[str] = mapped_column(StringUUID) workflow_id = db.Column(StringUUID, nullable=False) - workflow_run_id = db.Column(StringUUID, nullable=False) + workflow_run_id: Mapped[str] = mapped_column(StringUUID) created_from = db.Column(db.String(255), nullable=False) created_by_role = db.Column(db.String(255), nullable=False) created_by = db.Column(StringUUID, nullable=False) From 1885d3df9968d778664bacdde4ea7a4d9448070f Mon Sep 17 00:00:00 2001 From: Cemre Mengu Date: Wed, 25 Dec 2024 11:31:01 +0300 Subject: [PATCH 10/39] fix: unquote urls in docker-compose.yaml (#12072) Signed-off-by: -LAN- Co-authored-by: -LAN- --- docker/docker-compose.yaml | 56 +++++++++++++++++----------------- docker/generate_docker_compose | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 7122f4a6d0..e65ca45858 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -15,15 +15,15 @@ x-shared-env: &shared-api-worker-env LOG_FILE: ${LOG_FILE:-/app/logs/server.log} LOG_FILE_MAX_SIZE: ${LOG_FILE_MAX_SIZE:-20} LOG_FILE_BACKUP_COUNT: ${LOG_FILE_BACKUP_COUNT:-5} - LOG_DATEFORMAT: ${LOG_DATEFORMAT:-"%Y-%m-%d %H:%M:%S"} + LOG_DATEFORMAT: ${LOG_DATEFORMAT:-%Y-%m-%d %H:%M:%S} LOG_TZ: ${LOG_TZ:-UTC} DEBUG: ${DEBUG:-false} FLASK_DEBUG: ${FLASK_DEBUG:-false} SECRET_KEY: ${SECRET_KEY:-sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U} INIT_PASSWORD: ${INIT_PASSWORD:-} DEPLOY_ENV: ${DEPLOY_ENV:-PRODUCTION} - CHECK_UPDATE_URL: ${CHECK_UPDATE_URL:-"https://updates.dify.ai"} - OPENAI_API_BASE: ${OPENAI_API_BASE:-"https://api.openai.com/v1"} + CHECK_UPDATE_URL: ${CHECK_UPDATE_URL:-https://updates.dify.ai} + OPENAI_API_BASE: ${OPENAI_API_BASE:-https://api.openai.com/v1} MIGRATION_ENABLED: ${MIGRATION_ENABLED:-true} FILES_ACCESS_TIMEOUT: ${FILES_ACCESS_TIMEOUT:-300} ACCESS_TOKEN_EXPIRE_MINUTES: ${ACCESS_TOKEN_EXPIRE_MINUTES:-60} @@ -69,7 +69,7 @@ x-shared-env: &shared-api-worker-env REDIS_USE_CLUSTERS: ${REDIS_USE_CLUSTERS:-false} REDIS_CLUSTERS: ${REDIS_CLUSTERS:-} REDIS_CLUSTERS_PASSWORD: ${REDIS_CLUSTERS_PASSWORD:-} - CELERY_BROKER_URL: ${CELERY_BROKER_URL:-"redis://:difyai123456@redis:6379/1"} + CELERY_BROKER_URL: ${CELERY_BROKER_URL:-redis://:difyai123456@redis:6379/1} BROKER_USE_SSL: ${BROKER_USE_SSL:-false} CELERY_USE_SENTINEL: ${CELERY_USE_SENTINEL:-false} CELERY_SENTINEL_MASTER_NAME: ${CELERY_SENTINEL_MASTER_NAME:-} @@ -88,13 +88,13 @@ x-shared-env: &shared-api-worker-env AZURE_BLOB_ACCOUNT_NAME: ${AZURE_BLOB_ACCOUNT_NAME:-difyai} AZURE_BLOB_ACCOUNT_KEY: ${AZURE_BLOB_ACCOUNT_KEY:-difyai} AZURE_BLOB_CONTAINER_NAME: ${AZURE_BLOB_CONTAINER_NAME:-difyai-container} - AZURE_BLOB_ACCOUNT_URL: ${AZURE_BLOB_ACCOUNT_URL:-"https://.blob.core.windows.net"} + AZURE_BLOB_ACCOUNT_URL: ${AZURE_BLOB_ACCOUNT_URL:-https://.blob.core.windows.net} GOOGLE_STORAGE_BUCKET_NAME: ${GOOGLE_STORAGE_BUCKET_NAME:-your-bucket-name} GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64: ${GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64:-your-google-service-account-json-base64-string} ALIYUN_OSS_BUCKET_NAME: ${ALIYUN_OSS_BUCKET_NAME:-your-bucket-name} ALIYUN_OSS_ACCESS_KEY: ${ALIYUN_OSS_ACCESS_KEY:-your-access-key} ALIYUN_OSS_SECRET_KEY: ${ALIYUN_OSS_SECRET_KEY:-your-secret-key} - ALIYUN_OSS_ENDPOINT: ${ALIYUN_OSS_ENDPOINT:-"https://oss-ap-southeast-1-internal.aliyuncs.com"} + ALIYUN_OSS_ENDPOINT: ${ALIYUN_OSS_ENDPOINT:-https://oss-ap-southeast-1-internal.aliyuncs.com} ALIYUN_OSS_REGION: ${ALIYUN_OSS_REGION:-ap-southeast-1} ALIYUN_OSS_AUTH_VERSION: ${ALIYUN_OSS_AUTH_VERSION:-v4} ALIYUN_OSS_PATH: ${ALIYUN_OSS_PATH:-your-path} @@ -103,7 +103,7 @@ x-shared-env: &shared-api-worker-env TENCENT_COS_SECRET_ID: ${TENCENT_COS_SECRET_ID:-your-secret-id} TENCENT_COS_REGION: ${TENCENT_COS_REGION:-your-region} TENCENT_COS_SCHEME: ${TENCENT_COS_SCHEME:-your-scheme} - OCI_ENDPOINT: ${OCI_ENDPOINT:-"https://objectstorage.us-ashburn-1.oraclecloud.com"} + OCI_ENDPOINT: ${OCI_ENDPOINT:-https://objectstorage.us-ashburn-1.oraclecloud.com} OCI_BUCKET_NAME: ${OCI_BUCKET_NAME:-your-bucket-name} OCI_ACCESS_KEY: ${OCI_ACCESS_KEY:-your-access-key} OCI_SECRET_KEY: ${OCI_SECRET_KEY:-your-secret-key} @@ -125,14 +125,14 @@ x-shared-env: &shared-api-worker-env SUPABASE_API_KEY: ${SUPABASE_API_KEY:-your-access-key} SUPABASE_URL: ${SUPABASE_URL:-your-server-url} VECTOR_STORE: ${VECTOR_STORE:-weaviate} - WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-"http://weaviate:8080"} + WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-http://weaviate:8080} WEAVIATE_API_KEY: ${WEAVIATE_API_KEY:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih} - QDRANT_URL: ${QDRANT_URL:-"http://qdrant:6333"} + QDRANT_URL: ${QDRANT_URL:-http://qdrant:6333} QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456} QDRANT_CLIENT_TIMEOUT: ${QDRANT_CLIENT_TIMEOUT:-20} QDRANT_GRPC_ENABLED: ${QDRANT_GRPC_ENABLED:-false} QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334} - MILVUS_URI: ${MILVUS_URI:-"http://127.0.0.1:19530"} + MILVUS_URI: ${MILVUS_URI:-http://127.0.0.1:19530} MILVUS_TOKEN: ${MILVUS_TOKEN:-} MILVUS_USER: ${MILVUS_USER:-root} MILVUS_PASSWORD: ${MILVUS_PASSWORD:-Milvus} @@ -142,7 +142,7 @@ x-shared-env: &shared-api-worker-env MYSCALE_PASSWORD: ${MYSCALE_PASSWORD:-} MYSCALE_DATABASE: ${MYSCALE_DATABASE:-dify} MYSCALE_FTS_PARAMS: ${MYSCALE_FTS_PARAMS:-} - COUCHBASE_CONNECTION_STRING: ${COUCHBASE_CONNECTION_STRING:-"couchbase://couchbase-server"} + COUCHBASE_CONNECTION_STRING: ${COUCHBASE_CONNECTION_STRING:-couchbase://couchbase-server} COUCHBASE_USER: ${COUCHBASE_USER:-Administrator} COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password} COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings} @@ -176,15 +176,15 @@ x-shared-env: &shared-api-worker-env TIDB_VECTOR_USER: ${TIDB_VECTOR_USER:-} TIDB_VECTOR_PASSWORD: ${TIDB_VECTOR_PASSWORD:-} TIDB_VECTOR_DATABASE: ${TIDB_VECTOR_DATABASE:-dify} - TIDB_ON_QDRANT_URL: ${TIDB_ON_QDRANT_URL:-"http://127.0.0.1"} + TIDB_ON_QDRANT_URL: ${TIDB_ON_QDRANT_URL:-http://127.0.0.1} TIDB_ON_QDRANT_API_KEY: ${TIDB_ON_QDRANT_API_KEY:-dify} TIDB_ON_QDRANT_CLIENT_TIMEOUT: ${TIDB_ON_QDRANT_CLIENT_TIMEOUT:-20} TIDB_ON_QDRANT_GRPC_ENABLED: ${TIDB_ON_QDRANT_GRPC_ENABLED:-false} TIDB_ON_QDRANT_GRPC_PORT: ${TIDB_ON_QDRANT_GRPC_PORT:-6334} TIDB_PUBLIC_KEY: ${TIDB_PUBLIC_KEY:-dify} TIDB_PRIVATE_KEY: ${TIDB_PRIVATE_KEY:-dify} - TIDB_API_URL: ${TIDB_API_URL:-"http://127.0.0.1"} - TIDB_IAM_API_URL: ${TIDB_IAM_API_URL:-"http://127.0.0.1"} + TIDB_API_URL: ${TIDB_API_URL:-http://127.0.0.1} + TIDB_IAM_API_URL: ${TIDB_IAM_API_URL:-http://127.0.0.1} TIDB_REGION: ${TIDB_REGION:-regions/aws-us-east-1} TIDB_PROJECT_ID: ${TIDB_PROJECT_ID:-dify} TIDB_SPEND_LIMIT: ${TIDB_SPEND_LIMIT:-100} @@ -209,7 +209,7 @@ x-shared-env: &shared-api-worker-env OPENSEARCH_USER: ${OPENSEARCH_USER:-admin} OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin} OPENSEARCH_SECURE: ${OPENSEARCH_SECURE:-true} - TENCENT_VECTOR_DB_URL: ${TENCENT_VECTOR_DB_URL:-"http://127.0.0.1"} + TENCENT_VECTOR_DB_URL: ${TENCENT_VECTOR_DB_URL:-http://127.0.0.1} TENCENT_VECTOR_DB_API_KEY: ${TENCENT_VECTOR_DB_API_KEY:-dify} TENCENT_VECTOR_DB_TIMEOUT: ${TENCENT_VECTOR_DB_TIMEOUT:-30} TENCENT_VECTOR_DB_USERNAME: ${TENCENT_VECTOR_DB_USERNAME:-dify} @@ -221,7 +221,7 @@ x-shared-env: &shared-api-worker-env ELASTICSEARCH_USERNAME: ${ELASTICSEARCH_USERNAME:-elastic} ELASTICSEARCH_PASSWORD: ${ELASTICSEARCH_PASSWORD:-elastic} KIBANA_PORT: ${KIBANA_PORT:-5601} - BAIDU_VECTOR_DB_ENDPOINT: ${BAIDU_VECTOR_DB_ENDPOINT:-"http://127.0.0.1:5287"} + BAIDU_VECTOR_DB_ENDPOINT: ${BAIDU_VECTOR_DB_ENDPOINT:-http://127.0.0.1:5287} BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS: ${BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS:-30000} BAIDU_VECTOR_DB_ACCOUNT: ${BAIDU_VECTOR_DB_ACCOUNT:-root} BAIDU_VECTOR_DB_API_KEY: ${BAIDU_VECTOR_DB_API_KEY:-dify} @@ -235,7 +235,7 @@ x-shared-env: &shared-api-worker-env VIKINGDB_SCHEMA: ${VIKINGDB_SCHEMA:-http} VIKINGDB_CONNECTION_TIMEOUT: ${VIKINGDB_CONNECTION_TIMEOUT:-30} VIKINGDB_SOCKET_TIMEOUT: ${VIKINGDB_SOCKET_TIMEOUT:-30} - LINDORM_URL: ${LINDORM_URL:-"http://lindorm:30070"} + LINDORM_URL: ${LINDORM_URL:-http://lindorm:30070} LINDORM_USERNAME: ${LINDORM_USERNAME:-lindorm} LINDORM_PASSWORD: ${LINDORM_PASSWORD:-lindorm} OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase} @@ -245,7 +245,7 @@ x-shared-env: &shared-api-worker-env OCEANBASE_VECTOR_DATABASE: ${OCEANBASE_VECTOR_DATABASE:-test} OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} - UPSTASH_VECTOR_URL: ${UPSTASH_VECTOR_URL:-"https://xxx-vector.upstash.io"} + UPSTASH_VECTOR_URL: ${UPSTASH_VECTOR_URL:-https://xxx-vector.upstash.io} UPSTASH_VECTOR_TOKEN: ${UPSTASH_VECTOR_TOKEN:-dify} UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15} UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5} @@ -270,7 +270,7 @@ x-shared-env: &shared-api-worker-env NOTION_INTERNAL_SECRET: ${NOTION_INTERNAL_SECRET:-} MAIL_TYPE: ${MAIL_TYPE:-resend} MAIL_DEFAULT_SEND_FROM: ${MAIL_DEFAULT_SEND_FROM:-} - RESEND_API_URL: ${RESEND_API_URL:-"https://api.resend.com"} + RESEND_API_URL: ${RESEND_API_URL:-https://api.resend.com} RESEND_API_KEY: ${RESEND_API_KEY:-your-resend-api-key} SMTP_SERVER: ${SMTP_SERVER:-} SMTP_PORT: ${SMTP_PORT:-465} @@ -281,7 +281,7 @@ x-shared-env: &shared-api-worker-env INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-4000} INVITE_EXPIRY_HOURS: ${INVITE_EXPIRY_HOURS:-72} RESET_PASSWORD_TOKEN_EXPIRY_MINUTES: ${RESET_PASSWORD_TOKEN_EXPIRY_MINUTES:-5} - CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-"http://sandbox:8194"} + CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194} CODE_EXECUTION_API_KEY: ${CODE_EXECUTION_API_KEY:-dify-sandbox} CODE_MAX_NUMBER: ${CODE_MAX_NUMBER:-9223372036854775807} CODE_MIN_NUMBER: ${CODE_MIN_NUMBER:--9223372036854775808} @@ -303,8 +303,8 @@ x-shared-env: &shared-api-worker-env WORKFLOW_FILE_UPLOAD_LIMIT: ${WORKFLOW_FILE_UPLOAD_LIMIT:-10} HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760} HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576} - SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-"http://ssrf_proxy:3128"} - SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-"http://ssrf_proxy:3128"} + SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128} + SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128} TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} PGUSER: ${PGUSER:-${DB_USERNAME}} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-${DB_PASSWORD}} @@ -314,8 +314,8 @@ x-shared-env: &shared-api-worker-env SANDBOX_GIN_MODE: ${SANDBOX_GIN_MODE:-release} SANDBOX_WORKER_TIMEOUT: ${SANDBOX_WORKER_TIMEOUT:-15} SANDBOX_ENABLE_NETWORK: ${SANDBOX_ENABLE_NETWORK:-true} - SANDBOX_HTTP_PROXY: ${SANDBOX_HTTP_PROXY:-"http://ssrf_proxy:3128"} - SANDBOX_HTTPS_PROXY: ${SANDBOX_HTTPS_PROXY:-"http://ssrf_proxy:3128"} + SANDBOX_HTTP_PROXY: ${SANDBOX_HTTP_PROXY:-http://ssrf_proxy:3128} + SANDBOX_HTTPS_PROXY: ${SANDBOX_HTTPS_PROXY:-http://ssrf_proxy:3128} SANDBOX_PORT: ${SANDBOX_PORT:-8194} WEAVIATE_PERSISTENCE_DATA_PATH: ${WEAVIATE_PERSISTENCE_DATA_PATH:-/var/lib/weaviate} WEAVIATE_QUERY_DEFAULTS_LIMIT: ${WEAVIATE_QUERY_DEFAULTS_LIMIT:-25} @@ -338,8 +338,8 @@ x-shared-env: &shared-api-worker-env ETCD_SNAPSHOT_COUNT: ${ETCD_SNAPSHOT_COUNT:-50000} MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin} MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin} - ETCD_ENDPOINTS: ${ETCD_ENDPOINTS:-"etcd:2379"} - MINIO_ADDRESS: ${MINIO_ADDRESS:-"minio:9000"} + ETCD_ENDPOINTS: ${ETCD_ENDPOINTS:-etcd:2379} + MINIO_ADDRESS: ${MINIO_ADDRESS:-minio:9000} MILVUS_AUTHORIZATION_ENABLED: ${MILVUS_AUTHORIZATION_ENABLED:-true} PGVECTOR_PGUSER: ${PGVECTOR_PGUSER:-postgres} PGVECTOR_POSTGRES_PASSWORD: ${PGVECTOR_POSTGRES_PASSWORD:-difyai123456} @@ -360,7 +360,7 @@ x-shared-env: &shared-api-worker-env NGINX_SSL_PORT: ${NGINX_SSL_PORT:-443} NGINX_SSL_CERT_FILENAME: ${NGINX_SSL_CERT_FILENAME:-dify.crt} NGINX_SSL_CERT_KEY_FILENAME: ${NGINX_SSL_CERT_KEY_FILENAME:-dify.key} - NGINX_SSL_PROTOCOLS: ${NGINX_SSL_PROTOCOLS:-"TLSv1.1 TLSv1.2 TLSv1.3"} + NGINX_SSL_PROTOCOLS: ${NGINX_SSL_PROTOCOLS:-TLSv1.1 TLSv1.2 TLSv1.3} NGINX_WORKER_PROCESSES: ${NGINX_WORKER_PROCESSES:-auto} NGINX_CLIENT_MAX_BODY_SIZE: ${NGINX_CLIENT_MAX_BODY_SIZE:-15M} NGINX_KEEPALIVE_TIMEOUT: ${NGINX_KEEPALIVE_TIMEOUT:-65} @@ -374,7 +374,7 @@ x-shared-env: &shared-api-worker-env SSRF_COREDUMP_DIR: ${SSRF_COREDUMP_DIR:-/var/spool/squid} SSRF_REVERSE_PROXY_PORT: ${SSRF_REVERSE_PROXY_PORT:-8194} SSRF_SANDBOX_HOST: ${SSRF_SANDBOX_HOST:-sandbox} - COMPOSE_PROFILES: ${COMPOSE_PROFILES:-"${VECTOR_STORE:-weaviate}"} + COMPOSE_PROFILES: ${COMPOSE_PROFILES:-${VECTOR_STORE:-weaviate}} EXPOSE_NGINX_PORT: ${EXPOSE_NGINX_PORT:-80} EXPOSE_NGINX_SSL_PORT: ${EXPOSE_NGINX_SSL_PORT:-443} POSITION_TOOL_PINS: ${POSITION_TOOL_PINS:-} diff --git a/docker/generate_docker_compose b/docker/generate_docker_compose index 54b6d55217..dc4460f96c 100755 --- a/docker/generate_docker_compose +++ b/docker/generate_docker_compose @@ -43,7 +43,7 @@ def generate_shared_env_block(env_vars, anchor_name="shared-api-worker-env"): else: # If default value contains special characters, wrap it in quotes if re.search(r"[:\s]", default): - default = f'"{default}"' + default = f"{default}" lines.append(f" {key}: ${{{key}:-{default}}}") return "\n".join(lines) From 39ace9bdee5426f09197d7d8ab0bd347db6fc1c1 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Wed, 25 Dec 2024 16:34:38 +0800 Subject: [PATCH 11/39] =?UTF-8?q?fix(app=5Fgenerator):=20improve=20error?= =?UTF-8?q?=20handling=20for=20closed=20file=20I/O=20operat=E2=80=A6=20(#1?= =?UTF-8?q?2073)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: -LAN- --- api/core/app/apps/advanced_chat/app_generator.py | 2 +- api/core/app/apps/message_based_app_generator.py | 2 +- api/core/app/apps/workflow/app_generator.py | 2 +- api/core/tools/tool_engine.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index a18b40712b..b006de2369 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -383,7 +383,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): try: return generate_task_pipeline.process() except ValueError as e: - if e.args[0] == "I/O operation on closed file.": # ignore this error + if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.": # ignore this error raise GenerateTaskStoppedError() else: logger.exception(f"Failed to process generate task pipeline, conversation_id: {conversation.id}") diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index dcd9463b8a..4e3aa840ce 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -76,7 +76,7 @@ class MessageBasedAppGenerator(BaseAppGenerator): try: return generate_task_pipeline.process() except ValueError as e: - if e.args[0] == "I/O operation on closed file.": # ignore this error + if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.": # ignore this error raise GenerateTaskStoppedError() else: logger.exception(f"Failed to handle response, conversation_id: {conversation.id}") diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index 1d5f21b9e0..42bc17277f 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -309,7 +309,7 @@ class WorkflowAppGenerator(BaseAppGenerator): try: return generate_task_pipeline.process() except ValueError as e: - if e.args[0] == "I/O operation on closed file.": # ignore this error + if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.": # ignore this error raise GenerateTaskStoppedError() else: logger.exception( diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py index 425a892527..f7a8ed63f4 100644 --- a/api/core/tools/tool_engine.py +++ b/api/core/tools/tool_engine.py @@ -113,7 +113,7 @@ class ToolEngine: error_response = f"tool invoke error: {e}" agent_tool_callback.on_tool_error(e) except ToolEngineInvokeError as e: - meta = e.args[0] + meta = e.meta error_response = f"tool invoke error: {meta.error}" agent_tool_callback.on_tool_error(e) return error_response, [], meta From 2b2263a349326e21da813640435e8fcd4c9ce536 Mon Sep 17 00:00:00 2001 From: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Date: Wed, 25 Dec 2024 18:17:15 +0800 Subject: [PATCH 12/39] Feat/parent child retrieval (#12086) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: yihong0618 Signed-off-by: -LAN- Co-authored-by: AkaraChen Co-authored-by: nite-knite Co-authored-by: Joel Co-authored-by: Warren Chen Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: Yi Xiao <54782454+YIXIAO0@users.noreply.github.com> Co-authored-by: yihong Co-authored-by: -LAN- Co-authored-by: KVOJJJin Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com> Co-authored-by: Charlie.Wei Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: huayaoyue6 Co-authored-by: kurokobo Co-authored-by: Matsuda Co-authored-by: shirochan Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: Huỳnh Gia Bôi Co-authored-by: Julian Huynh Co-authored-by: Hash Brown Co-authored-by: 非法操作 Co-authored-by: Kazuki Takamatsu Co-authored-by: Trey Dong <1346650911@qq.com> Co-authored-by: VoidIsVoid <343750470@qq.com> Co-authored-by: Gimling Co-authored-by: xiandan-erizo Co-authored-by: Muneyuki Noguchi Co-authored-by: zhaobingshuang <1475195565@qq.com> Co-authored-by: zhaobs Co-authored-by: suzuki.sh Co-authored-by: Yingchun Lai Co-authored-by: huanshare Co-authored-by: huanshare Co-authored-by: orangeclk Co-authored-by: 문정현 <120004247+JungHyunMoon@users.noreply.github.com> Co-authored-by: barabicu Co-authored-by: Wei Mingzhi Co-authored-by: Paul van Oorschot <20116814+pvoo@users.noreply.github.com> Co-authored-by: zkyTech Co-authored-by: zhangkunyuan Co-authored-by: Tommy <34446820+Asterovim@users.noreply.github.com> Co-authored-by: zxhlyh Co-authored-by: Novice <857526207@qq.com> Co-authored-by: Novice Lee Co-authored-by: Novice Lee Co-authored-by: zxhlyh <16177003+zxhlyh@users.noreply.github.com> Co-authored-by: liuzhenghua <1090179900@qq.com> Co-authored-by: Jiang <65766008+AlwaysBluer@users.noreply.github.com> Co-authored-by: jiangzhijie Co-authored-by: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Co-authored-by: Alok Shrivastwa Co-authored-by: Alok Shrivastwa Co-authored-by: JasonVV Co-authored-by: Hiroshi Fujita Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com> Co-authored-by: NFish Co-authored-by: Junyan Qin <1010553892@qq.com> Co-authored-by: IWAI, Masaharu Co-authored-by: IWAI, Masaharu Co-authored-by: Bowen Liang Co-authored-by: luckylhb90 Co-authored-by: hobo.l Co-authored-by: douxc <7553076+douxc@users.noreply.github.com> --- api/poetry.lock | 13 +- .../[datasetId]/layout.tsx | 156 +- .../[datasetId]/settings/page.tsx | 6 +- .../[datasetId]/style.module.css | 9 - web/app/(commonLayout)/datasets/Container.tsx | 17 +- .../(commonLayout)/datasets/DatasetCard.tsx | 10 +- .../components/app-sidebar/dataset-info.tsx | 45 + web/app/components/app-sidebar/index.tsx | 30 +- .../dataset-config/settings-modal/index.tsx | 16 +- .../components/base/app-icon/style.module.css | 19 + .../base/auto-height-textarea/common.tsx | 2 + web/app/components/base/badge.tsx | 6 +- .../components/base/checkbox/assets/mixed.svg | 5 + .../components/base/checkbox/index.module.css | 10 + web/app/components/base/checkbox/index.tsx | 5 +- web/app/components/base/divider/index.tsx | 2 +- .../components/base/divider/with-label.tsx | 23 + web/app/components/base/drawer/index.tsx | 4 +- .../base/file-uploader/file-type-icon.tsx | 7 +- .../icons/assets/public/knowledge/chunk.svg | 13 + .../assets/public/knowledge/collapse.svg | 9 + .../assets/public/knowledge/general-type.svg | 5 + .../knowledge/layout-right-2-line-mod.svg | 5 + .../public/knowledge/parent-child-type.svg | 7 + .../assets/public/knowledge/selection-mod.svg | 13 + .../icons/src/public/knowledge/Chunk.json | 116 ++ .../base/icons/src/public/knowledge/Chunk.tsx | 16 + .../icons/src/public/knowledge/Collapse.json | 62 + .../icons/src/public/knowledge/Collapse.tsx | 16 + .../src/public/knowledge/GeneralType.json | 38 + .../src/public/knowledge/GeneralType.tsx | 16 + .../public/knowledge/LayoutRight2LineMod.json | 36 + .../public/knowledge/LayoutRight2LineMod.tsx | 16 + .../src/public/knowledge/ParentChildType.json | 56 + .../src/public/knowledge/ParentChildType.tsx | 16 + .../src/public/knowledge/SelectionMod.json | 116 ++ .../src/public/knowledge/SelectionMod.tsx | 16 + .../base/icons/src/public/knowledge/index.ts | 6 + .../base/icons/src/vender/features/index.ts | 2 +- .../components/base/input-number/index.tsx | 86 + .../base/linked-apps-panel/index.tsx | 62 + web/app/components/base/pagination/index.tsx | 2 +- web/app/components/base/param-item/index.tsx | 36 +- web/app/components/base/radio-card/index.tsx | 23 +- .../components/base/retry-button/index.tsx | 85 - .../base/retry-button/style.module.css | 4 - .../base/simple-pie-chart/index.tsx | 7 +- web/app/components/base/skeleton/index.tsx | 13 +- web/app/components/base/switch/index.tsx | 3 + web/app/components/base/tag-input/index.tsx | 51 +- web/app/components/base/toast/index.tsx | 19 +- web/app/components/base/tooltip/index.tsx | 4 +- .../billing/priority-label/index.tsx | 19 +- web/app/components/datasets/chunk.tsx | 54 + .../datasets/common/chunking-mode-label.tsx | 29 + .../datasets/common/document-file-icon.tsx | 40 + .../common/document-picker/document-list.tsx | 42 + .../datasets/common/document-picker/index.tsx | 118 ++ .../preview-document-picker.tsx | 82 + .../auto-disabled-document.tsx | 38 + .../index-failed.tsx | 69 + .../status-with-action.tsx | 65 + .../index.tsx | 27 +- .../common/retrieval-method-config/index.tsx | 89 +- .../common/retrieval-method-info/index.tsx | 20 +- .../common/retrieval-param-config/index.tsx | 38 +- .../datasets/create/assets/family-mod.svg | 6 + .../create/assets/file-list-3-fill.svg | 5 + .../datasets/create/assets/gold.svg | 4 + .../datasets/create/assets/note-mod.svg | 5 + .../create/assets/option-card-effect-blue.svg | 12 + .../assets/option-card-effect-orange.svg | 12 + .../assets/option-card-effect-purple.svg | 12 + .../create/assets/pattern-recognition-mod.svg | 12 + .../datasets/create/assets/piggy-bank-mod.svg | 7 + .../create/assets/progress-indicator.svg | 8 + .../datasets/create/assets/rerank.svg | 13 + .../datasets/create/assets/research-mod.svg | 6 + .../datasets/create/assets/selection-mod.svg | 12 + .../create/assets/setting-gear-mod.svg | 4 + .../create/embedding-process/index.module.css | 52 +- .../create/embedding-process/index.tsx | 192 +- .../create/file-preview/index.module.css | 3 +- .../datasets/create/file-preview/index.tsx | 4 +- .../create/file-uploader/index.module.css | 67 +- .../datasets/create/file-uploader/index.tsx | 84 +- web/app/components/datasets/create/icons.ts | 16 + web/app/components/datasets/create/index.tsx | 58 +- .../create/notion-page-preview/index.tsx | 4 +- .../datasets/create/step-one/index.module.css | 38 +- .../datasets/create/step-one/index.tsx | 293 +-- .../datasets/create/step-three/index.tsx | 53 +- .../datasets/create/step-two/index.module.css | 38 +- .../datasets/create/step-two/index.tsx | 1579 +++++++++-------- .../datasets/create/step-two/inputs.tsx | 77 + .../create/step-two/language-select/index.tsx | 33 +- .../datasets/create/step-two/option-card.tsx | 98 + .../datasets/create/stepper/index.tsx | 27 + .../datasets/create/stepper/step.tsx | 46 + .../datasets/create/top-bar/index.tsx | 41 + .../create/website/base/error-message.tsx | 2 +- .../create/website/jina-reader/index.tsx | 1 - .../datasets/create/website/preview.tsx | 4 +- .../detail/batch-modal/csv-downloader.tsx | 14 +- .../documents/detail/batch-modal/index.tsx | 4 +- .../detail/completed/InfiniteVirtualList.tsx | 98 - .../detail/completed/SegmentCard.tsx | 20 +- .../detail/completed/child-segment-detail.tsx | 134 ++ .../detail/completed/child-segment-list.tsx | 195 ++ .../completed/common/action-buttons.tsx | 86 + .../detail/completed/common/add-another.tsx | 32 + .../detail/completed/common/batch-action.tsx | 103 ++ .../detail/completed/common/chunk-content.tsx | 192 ++ .../documents/detail/completed/common/dot.tsx | 11 + .../detail/completed/common/empty.tsx | 78 + .../completed/common/full-screen-drawer.tsx | 35 + .../detail/completed/common/keywords.tsx | 47 + .../completed/common/regeneration-modal.tsx | 131 ++ .../completed/common/segment-index-tag.tsx | 40 + .../documents/detail/completed/common/tag.tsx | 15 + .../detail/completed/display-toggle.tsx | 40 + .../documents/detail/completed/index.tsx | 877 +++++---- .../detail/completed/new-child-segment.tsx | 175 ++ .../detail/completed/segment-card.tsx | 280 +++ .../detail/completed/segment-detail.tsx | 190 ++ .../detail/completed/segment-list.tsx | 116 ++ .../skeleton/full-doc-list-skeleton.tsx | 25 + .../skeleton/general-list-skeleton.tsx | 74 + .../skeleton/paragraph-list-skeleton.tsx | 76 + .../skeleton/parent-chunk-card-skeleton.tsx | 45 + .../detail/completed/status-item.tsx | 22 + .../detail/completed/style.module.css | 13 +- .../documents/detail/embedding/index.tsx | 302 ++-- .../detail/embedding/skeleton/index.tsx | 66 + .../datasets/documents/detail/index.tsx | 219 ++- .../documents/detail/metadata/index.tsx | 18 +- .../detail/metadata/style.module.css | 13 +- .../documents/detail/new-segment-modal.tsx | 156 -- .../datasets/documents/detail/new-segment.tsx | 208 +++ .../documents/detail/segment-add/index.tsx | 114 +- .../documents/detail/settings/index.tsx | 40 +- .../documents/detail/style.module.css | 10 +- .../components/datasets/documents/index.tsx | 77 +- .../components/datasets/documents/list.tsx | 440 +++-- .../datasets/documents/style.module.css | 17 +- .../formatted-text/flavours/edit-slice.tsx | 115 ++ .../formatted-text/flavours/preview-slice.tsx | 56 + .../formatted-text/flavours/shared.tsx | 60 + .../datasets/formatted-text/flavours/type.ts | 5 + .../datasets/formatted-text/formatted.tsx | 12 + .../components/child-chunks-item.tsx | 30 + .../components/chunk-detail-modal.tsx | 89 + .../hit-testing/components/result-item.tsx | 121 ++ .../datasets/hit-testing/components/score.tsx | 25 + .../datasets/hit-testing/hit-detail.tsx | 68 - .../components/datasets/hit-testing/index.tsx | 144 +- .../hit-testing/modify-retrieval-modal.tsx | 8 +- .../datasets/hit-testing/style.module.css | 36 +- .../datasets/hit-testing/textarea.tsx | 59 +- .../utils/extension-to-file-type.ts | 31 + web/app/components/datasets/loading.tsx | 0 .../components/datasets/preview/container.tsx | 29 + .../components/datasets/preview/header.tsx | 23 + web/app/components/datasets/preview/index.tsx | 0 .../datasets/settings/form/index.tsx | 130 +- .../index-method-radio/index.module.css | 54 - .../settings/index-method-radio/index.tsx | 105 +- .../model-selector/model-trigger.tsx | 16 +- web/app/components/header/indicator/index.tsx | 18 +- web/context/dataset-detail.ts | 13 +- web/hooks/use-metadata.ts | 4 +- web/i18n/en-US/common.ts | 8 +- web/i18n/en-US/dataset-creation.ts | 50 +- web/i18n/en-US/dataset-documents.ts | 66 +- web/i18n/en-US/dataset-hit-testing.ts | 14 +- web/i18n/en-US/dataset-settings.ts | 9 +- web/i18n/en-US/dataset.ts | 22 +- web/i18n/zh-Hans/common.ts | 8 +- web/i18n/zh-Hans/dataset-creation.ts | 29 +- web/i18n/zh-Hans/dataset-documents.ts | 54 +- web/i18n/zh-Hans/dataset-hit-testing.ts | 10 +- web/i18n/zh-Hans/dataset-settings.ts | 9 +- web/i18n/zh-Hans/dataset.ts | 22 +- web/i18n/zh-Hant/dataset-creation.ts | 3 +- web/models/datasets.ts | 111 +- web/package.json | 2 + web/public/screenshots/Light/Agent.png | Bin 0 -> 36209 bytes web/public/screenshots/Light/Agent@2x.png | Bin 0 -> 103245 bytes web/public/screenshots/Light/Agent@3x.png | Bin 0 -> 209674 bytes web/public/screenshots/Light/ChatFlow.png | Bin 0 -> 28423 bytes web/public/screenshots/Light/ChatFlow@2x.png | Bin 0 -> 81229 bytes web/public/screenshots/Light/ChatFlow@3x.png | Bin 0 -> 160820 bytes web/public/screenshots/Light/Chatbot.png | Bin 0 -> 31633 bytes web/public/screenshots/Light/Chatbot@2x.png | Bin 0 -> 84515 bytes web/public/screenshots/Light/Chatbot@3x.png | Bin 0 -> 142013 bytes web/public/screenshots/Light/Chatflow.png | Bin 0 -> 28423 bytes web/public/screenshots/Light/Chatflow@2x.png | Bin 0 -> 81229 bytes web/public/screenshots/Light/Chatflow@3x.png | Bin 0 -> 160820 bytes .../screenshots/Light/TextGenerator.png | Bin 0 -> 26627 bytes .../screenshots/Light/TextGenerator@2x.png | Bin 0 -> 63818 bytes .../screenshots/Light/TextGenerator@3x.png | Bin 0 -> 122391 bytes web/public/screenshots/Light/Workflow.png | Bin 0 -> 22110 bytes web/public/screenshots/Light/Workflow@2x.png | Bin 0 -> 62688 bytes web/public/screenshots/Light/Workflow@3x.png | Bin 0 -> 147073 bytes web/service/datasets.ts | 71 - web/service/knowledge/use-create-dataset.ts | 223 +++ web/service/knowledge/use-dateset.ts | 0 web/service/knowledge/use-document.ts | 124 ++ web/service/knowledge/use-hit-testing.ts | 0 web/service/knowledge/use-import.ts | 0 web/service/knowledge/use-segment.ts | 169 ++ web/tailwind.config.js | 23 +- web/themes/manual-dark.css | 8 + web/themes/manual-light.css | 8 + web/utils/time.ts | 12 + web/yarn.lock | 10 + 216 files changed, 9066 insertions(+), 3116 deletions(-) create mode 100644 web/app/components/app-sidebar/dataset-info.tsx create mode 100644 web/app/components/base/app-icon/style.module.css create mode 100644 web/app/components/base/checkbox/assets/mixed.svg create mode 100644 web/app/components/base/checkbox/index.module.css create mode 100644 web/app/components/base/divider/with-label.tsx create mode 100644 web/app/components/base/icons/assets/public/knowledge/chunk.svg create mode 100644 web/app/components/base/icons/assets/public/knowledge/collapse.svg create mode 100644 web/app/components/base/icons/assets/public/knowledge/general-type.svg create mode 100644 web/app/components/base/icons/assets/public/knowledge/layout-right-2-line-mod.svg create mode 100644 web/app/components/base/icons/assets/public/knowledge/parent-child-type.svg create mode 100644 web/app/components/base/icons/assets/public/knowledge/selection-mod.svg create mode 100644 web/app/components/base/icons/src/public/knowledge/Chunk.json create mode 100644 web/app/components/base/icons/src/public/knowledge/Chunk.tsx create mode 100644 web/app/components/base/icons/src/public/knowledge/Collapse.json create mode 100644 web/app/components/base/icons/src/public/knowledge/Collapse.tsx create mode 100644 web/app/components/base/icons/src/public/knowledge/GeneralType.json create mode 100644 web/app/components/base/icons/src/public/knowledge/GeneralType.tsx create mode 100644 web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.json create mode 100644 web/app/components/base/icons/src/public/knowledge/LayoutRight2LineMod.tsx create mode 100644 web/app/components/base/icons/src/public/knowledge/ParentChildType.json create mode 100644 web/app/components/base/icons/src/public/knowledge/ParentChildType.tsx create mode 100644 web/app/components/base/icons/src/public/knowledge/SelectionMod.json create mode 100644 web/app/components/base/icons/src/public/knowledge/SelectionMod.tsx create mode 100644 web/app/components/base/icons/src/public/knowledge/index.ts create mode 100644 web/app/components/base/input-number/index.tsx create mode 100644 web/app/components/base/linked-apps-panel/index.tsx delete mode 100644 web/app/components/base/retry-button/index.tsx delete mode 100644 web/app/components/base/retry-button/style.module.css create mode 100644 web/app/components/datasets/chunk.tsx create mode 100644 web/app/components/datasets/common/chunking-mode-label.tsx create mode 100644 web/app/components/datasets/common/document-file-icon.tsx create mode 100644 web/app/components/datasets/common/document-picker/document-list.tsx create mode 100644 web/app/components/datasets/common/document-picker/index.tsx create mode 100644 web/app/components/datasets/common/document-picker/preview-document-picker.tsx create mode 100644 web/app/components/datasets/common/document-status-with-action/auto-disabled-document.tsx create mode 100644 web/app/components/datasets/common/document-status-with-action/index-failed.tsx create mode 100644 web/app/components/datasets/common/document-status-with-action/status-with-action.tsx create mode 100644 web/app/components/datasets/create/assets/family-mod.svg create mode 100644 web/app/components/datasets/create/assets/file-list-3-fill.svg create mode 100644 web/app/components/datasets/create/assets/gold.svg create mode 100644 web/app/components/datasets/create/assets/note-mod.svg create mode 100644 web/app/components/datasets/create/assets/option-card-effect-blue.svg create mode 100644 web/app/components/datasets/create/assets/option-card-effect-orange.svg create mode 100644 web/app/components/datasets/create/assets/option-card-effect-purple.svg create mode 100644 web/app/components/datasets/create/assets/pattern-recognition-mod.svg create mode 100644 web/app/components/datasets/create/assets/piggy-bank-mod.svg create mode 100644 web/app/components/datasets/create/assets/progress-indicator.svg create mode 100644 web/app/components/datasets/create/assets/rerank.svg create mode 100644 web/app/components/datasets/create/assets/research-mod.svg create mode 100644 web/app/components/datasets/create/assets/selection-mod.svg create mode 100644 web/app/components/datasets/create/assets/setting-gear-mod.svg create mode 100644 web/app/components/datasets/create/icons.ts create mode 100644 web/app/components/datasets/create/step-two/inputs.tsx create mode 100644 web/app/components/datasets/create/step-two/option-card.tsx create mode 100644 web/app/components/datasets/create/stepper/index.tsx create mode 100644 web/app/components/datasets/create/stepper/step.tsx create mode 100644 web/app/components/datasets/create/top-bar/index.tsx delete mode 100644 web/app/components/datasets/documents/detail/completed/InfiniteVirtualList.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/child-segment-detail.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/child-segment-list.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/add-another.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/batch-action.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/chunk-content.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/dot.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/empty.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/full-screen-drawer.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/keywords.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/segment-index-tag.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/common/tag.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/display-toggle.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/new-child-segment.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/segment-card.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/segment-detail.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/segment-list.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/skeleton/full-doc-list-skeleton.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/skeleton/general-list-skeleton.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/skeleton/paragraph-list-skeleton.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/skeleton/parent-chunk-card-skeleton.tsx create mode 100644 web/app/components/datasets/documents/detail/completed/status-item.tsx create mode 100644 web/app/components/datasets/documents/detail/embedding/skeleton/index.tsx delete mode 100644 web/app/components/datasets/documents/detail/new-segment-modal.tsx create mode 100644 web/app/components/datasets/documents/detail/new-segment.tsx create mode 100644 web/app/components/datasets/formatted-text/flavours/edit-slice.tsx create mode 100644 web/app/components/datasets/formatted-text/flavours/preview-slice.tsx create mode 100644 web/app/components/datasets/formatted-text/flavours/shared.tsx create mode 100644 web/app/components/datasets/formatted-text/flavours/type.ts create mode 100644 web/app/components/datasets/formatted-text/formatted.tsx create mode 100644 web/app/components/datasets/hit-testing/components/child-chunks-item.tsx create mode 100644 web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx create mode 100644 web/app/components/datasets/hit-testing/components/result-item.tsx create mode 100644 web/app/components/datasets/hit-testing/components/score.tsx delete mode 100644 web/app/components/datasets/hit-testing/hit-detail.tsx create mode 100644 web/app/components/datasets/hit-testing/utils/extension-to-file-type.ts create mode 100644 web/app/components/datasets/loading.tsx create mode 100644 web/app/components/datasets/preview/container.tsx create mode 100644 web/app/components/datasets/preview/header.tsx create mode 100644 web/app/components/datasets/preview/index.tsx delete mode 100644 web/app/components/datasets/settings/index-method-radio/index.module.css create mode 100644 web/public/screenshots/Light/Agent.png create mode 100644 web/public/screenshots/Light/Agent@2x.png create mode 100644 web/public/screenshots/Light/Agent@3x.png create mode 100644 web/public/screenshots/Light/ChatFlow.png create mode 100644 web/public/screenshots/Light/ChatFlow@2x.png create mode 100644 web/public/screenshots/Light/ChatFlow@3x.png create mode 100644 web/public/screenshots/Light/Chatbot.png create mode 100644 web/public/screenshots/Light/Chatbot@2x.png create mode 100644 web/public/screenshots/Light/Chatbot@3x.png create mode 100644 web/public/screenshots/Light/Chatflow.png create mode 100644 web/public/screenshots/Light/Chatflow@2x.png create mode 100644 web/public/screenshots/Light/Chatflow@3x.png create mode 100644 web/public/screenshots/Light/TextGenerator.png create mode 100644 web/public/screenshots/Light/TextGenerator@2x.png create mode 100644 web/public/screenshots/Light/TextGenerator@3x.png create mode 100644 web/public/screenshots/Light/Workflow.png create mode 100644 web/public/screenshots/Light/Workflow@2x.png create mode 100644 web/public/screenshots/Light/Workflow@3x.png create mode 100644 web/service/knowledge/use-create-dataset.ts create mode 100644 web/service/knowledge/use-dateset.ts create mode 100644 web/service/knowledge/use-document.ts create mode 100644 web/service/knowledge/use-hit-testing.ts create mode 100644 web/service/knowledge/use-import.ts create mode 100644 web/service/knowledge/use-segment.ts create mode 100644 web/utils/time.ts diff --git a/api/poetry.lock b/api/poetry.lock index b42eb22dd4..b2d22a887d 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -1,4 +1,15 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. + +[[package]] +name = "aiofiles" +version = "24.1.0" +description = "File support for asyncio." +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5"}, + {file = "aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c"}, +] [[package]] name = "aiofiles" diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx index b416659a6a..a6fb116fa8 100644 --- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx +++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout.tsx @@ -7,85 +7,36 @@ import { useTranslation } from 'react-i18next' import { useBoolean } from 'ahooks' import { Cog8ToothIcon, - // CommandLineIcon, - Squares2X2Icon, - // eslint-disable-next-line sort-imports - PuzzlePieceIcon, DocumentTextIcon, PaperClipIcon, - QuestionMarkCircleIcon, } from '@heroicons/react/24/outline' import { Cog8ToothIcon as Cog8ToothSolidIcon, // CommandLineIcon as CommandLineSolidIcon, DocumentTextIcon as DocumentTextSolidIcon, } from '@heroicons/react/24/solid' -import Link from 'next/link' +import { RiApps2AddLine, RiInformation2Line } from '@remixicon/react' import s from './style.module.css' import classNames from '@/utils/classnames' import { fetchDatasetDetail, fetchDatasetRelatedApps } from '@/service/datasets' -import type { RelatedApp, RelatedAppResponse } from '@/models/datasets' +import type { RelatedAppResponse } from '@/models/datasets' import AppSideBar from '@/app/components/app-sidebar' -import Divider from '@/app/components/base/divider' -import AppIcon from '@/app/components/base/app-icon' import Loading from '@/app/components/base/loading' -import FloatPopoverContainer from '@/app/components/base/float-popover-container' import DatasetDetailContext from '@/context/dataset-detail' import { DataSourceType } from '@/models/datasets' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import { LanguagesSupported } from '@/i18n/language' import { useStore } from '@/app/components/app/store' -import { AiText, ChatBot, CuteRobot } from '@/app/components/base/icons/src/vender/solid/communication' -import { Route } from '@/app/components/base/icons/src/vender/solid/mapsAndTravel' import { getLocaleOnClient } from '@/i18n' import { useAppContext } from '@/context/app-context' +import Tooltip from '@/app/components/base/tooltip' +import LinkedAppsPanel from '@/app/components/base/linked-apps-panel' export type IAppDetailLayoutProps = { children: React.ReactNode params: { datasetId: string } } -type ILikedItemProps = { - type?: 'plugin' | 'app' - appStatus?: boolean - detail: RelatedApp - isMobile: boolean -} - -const LikedItem = ({ - type = 'app', - detail, - isMobile, -}: ILikedItemProps) => { - return ( - -
- - {type === 'app' && ( - - {detail.mode === 'advanced-chat' && ( - - )} - {detail.mode === 'agent-chat' && ( - - )} - {detail.mode === 'chat' && ( - - )} - {detail.mode === 'completion' && ( - - )} - {detail.mode === 'workflow' && ( - - )} - - )} -
- {!isMobile &&
{detail?.name || '--'}
} - - ) -} - const TargetIcon = ({ className }: SVGProps) => { return @@ -117,65 +68,80 @@ const BookOpenIcon = ({ className }: SVGProps) => { type IExtraInfoProps = { isMobile: boolean relatedApps?: RelatedAppResponse + expand: boolean } -const ExtraInfo = ({ isMobile, relatedApps }: IExtraInfoProps) => { +const ExtraInfo = ({ isMobile, relatedApps, expand }: IExtraInfoProps) => { const locale = getLocaleOnClient() const [isShowTips, { toggle: toggleTips, set: setShowTips }] = useBoolean(!isMobile) const { t } = useTranslation() + const hasRelatedApps = relatedApps?.data && relatedApps?.data?.length > 0 + const relatedAppsTotal = relatedApps?.data?.length || 0 + useEffect(() => { setShowTips(!isMobile) }, [isMobile, setShowTips]) - return
- - {(relatedApps?.data && relatedApps?.data?.length > 0) && ( + return
+ {hasRelatedApps && ( <> - {!isMobile &&
{relatedApps?.total || '--'} {t('common.datasetMenus.relatedApp')}
} + {!isMobile && ( + + } + > +
+ {relatedAppsTotal || '--'} {t('common.datasetMenus.relatedApp')} + +
+
+ )} + {isMobile &&
- {relatedApps?.total || '--'} + {relatedAppsTotal || '--'}
} - {relatedApps?.data?.map((item, index) => ())} )} - {!relatedApps?.data?.length && ( - - + {!hasRelatedApps && !expand && ( + +
+ +
+
{t('common.datasetMenus.emptyTip')}
+ + + {t('common.datasetMenus.viewDoc')} +
} > -
-
-
- -
-
- -
-
-
{t('common.datasetMenus.emptyTip')}
- - - {t('common.datasetMenus.viewDoc')} - +
+ {t('common.datasetMenus.noRelatedApp')} +
- + )}
} @@ -235,7 +201,7 @@ const DatasetDetailLayout: FC = (props) => { }, [isMobile, setAppSiderbarExpand]) if (!datasetRes && !error) - return + return return (
@@ -246,7 +212,7 @@ const DatasetDetailLayout: FC = (props) => { desc={datasetRes?.description || '--'} isExternal={datasetRes?.provider === 'external'} navigation={navigation} - extraInfo={!isCurrentWorkspaceDatasetOperator ? mode => : undefined} + extraInfo={!isCurrentWorkspaceDatasetOperator ? mode => : undefined} iconType={datasetRes?.data_source_type === DataSourceType.NOTION ? 'notion' : 'dataset'} />} = (props) => { dataset: datasetRes, mutateDatasetRes: () => mutateDatasetRes(), }}> -
{children}
+
{children}
) diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx index df314ddafe..3a65f1d30f 100644 --- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx +++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/settings/page.tsx @@ -7,10 +7,10 @@ const Settings = async () => { const { t } = await translate(locale, 'dataset-settings') return ( -
+
-
{t('title')}
-
{t('desc')}
+
{t('title')}
+
{t('desc')}
diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css index 0ee64b4fcd..516b124809 100644 --- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css +++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/style.module.css @@ -1,12 +1,3 @@ -.itemWrapper { - @apply flex items-center w-full h-10 rounded-lg hover:bg-gray-50 cursor-pointer; -} -.appInfo { - @apply truncate text-gray-700 text-sm font-normal; -} -.iconWrapper { - @apply relative w-6 h-6 rounded-lg; -} .statusPoint { @apply flex justify-center items-center absolute -right-0.5 -bottom-0.5 w-2.5 h-2.5 bg-white rounded; } diff --git a/web/app/(commonLayout)/datasets/Container.tsx b/web/app/(commonLayout)/datasets/Container.tsx index a30521d998..a0edb1cd61 100644 --- a/web/app/(commonLayout)/datasets/Container.tsx +++ b/web/app/(commonLayout)/datasets/Container.tsx @@ -17,7 +17,6 @@ import TagManagementModal from '@/app/components/base/tag-management' import TagFilter from '@/app/components/base/tag-management/filter' import Button from '@/app/components/base/button' import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development' -import SearchInput from '@/app/components/base/search-input' // Services import { fetchDatasetApiBaseUrl } from '@/service/datasets' @@ -29,6 +28,7 @@ import { useAppContext } from '@/context/app-context' import { useExternalApiPanel } from '@/context/external-api-panel-context' // eslint-disable-next-line import/order import { useQuery } from '@tanstack/react-query' +import Input from '@/app/components/base/input' const Container = () => { const { t } = useTranslation() @@ -81,17 +81,24 @@ const Container = () => { }, [currentWorkspace, router]) return ( -
-
+
+
setActiveTab(newActiveTab)} options={options} /> {activeTab === 'dataset' && ( -
+
- + handleKeywordsChange(e.target.value)} + onClear={() => handleKeywordsChange('')} + />
+ +
+
+} diff --git a/web/app/components/base/linked-apps-panel/index.tsx b/web/app/components/base/linked-apps-panel/index.tsx new file mode 100644 index 0000000000..4320cb0fc6 --- /dev/null +++ b/web/app/components/base/linked-apps-panel/index.tsx @@ -0,0 +1,62 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import Link from 'next/link' +import { useTranslation } from 'react-i18next' +import { RiArrowRightUpLine } from '@remixicon/react' +import cn from '@/utils/classnames' +import AppIcon from '@/app/components/base/app-icon' +import type { RelatedApp } from '@/models/datasets' + +type ILikedItemProps = { + appStatus?: boolean + detail: RelatedApp + isMobile: boolean +} + +const appTypeMap = { + 'chat': 'Chatbot', + 'completion': 'Completion', + 'agent-chat': 'Agent', + 'advanced-chat': 'Chatflow', + 'workflow': 'Workflow', +} + +const LikedItem = ({ + detail, + isMobile, +}: ILikedItemProps) => { + return ( + +
+
+ +
+ {!isMobile &&
{detail?.name || '--'}
} +
+
{appTypeMap[detail.mode]}
+ + + ) +} + +type Props = { + relatedApps: RelatedApp[] + isMobile: boolean +} + +const LinkedAppsPanel: FC = ({ + relatedApps, + isMobile, +}) => { + const { t } = useTranslation() + return ( +
+
{relatedApps.length || '--'} {t('common.datasetMenus.relatedApp')}
+ {relatedApps.map((item, index) => ( + + ))} +
+ ) +} +export default React.memo(LinkedAppsPanel) diff --git a/web/app/components/base/pagination/index.tsx b/web/app/components/base/pagination/index.tsx index b64c712425..c0cc9f86ec 100644 --- a/web/app/components/base/pagination/index.tsx +++ b/web/app/components/base/pagination/index.tsx @@ -8,7 +8,7 @@ import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import cn from '@/utils/classnames' -type Props = { +export type Props = { className?: string current: number onChange: (cur: number) => void diff --git a/web/app/components/base/param-item/index.tsx b/web/app/components/base/param-item/index.tsx index 49acc81484..68c980ad09 100644 --- a/web/app/components/base/param-item/index.tsx +++ b/web/app/components/base/param-item/index.tsx @@ -1,5 +1,6 @@ 'use client' import type { FC } from 'react' +import { InputNumber } from '../input-number' import Tooltip from '@/app/components/base/tooltip' import Slider from '@/app/components/base/slider' import Switch from '@/app/components/base/switch' @@ -23,39 +24,44 @@ type Props = { const ParamItem: FC = ({ className, id, name, noTooltip, tip, step = 0.1, min = 0, max, value, enable, onChange, hasSwitch, onSwitchChange }) => { return (
-
-
+
+
{hasSwitch && ( { onSwitchChange?.(id, val) }} /> )} - {name} + {name} {!noTooltip && ( {tip}
} /> )} -
-
-
-
- { - const value = parseFloat(e.target.value) - if (value < min || value > max) - return - - onChange(id, value) - }} /> +
+
+ { + onChange(id, value) + }} + className='w-[72px]' + />
-
+
= ({ onChosen = () => { }, chosenConfig, chosenConfigWrapClassName, + className, }) => { return (
-
-
+
+
{icon}
-
{title}
-
{description}
+
{title}
+
{description}
{!noRadio && ( -
+
= ({ )}
{((isChosen && chosenConfig) || noRadio) && ( -
- {chosenConfig} +
+
+
+ {chosenConfig} +
)}
diff --git a/web/app/components/base/retry-button/index.tsx b/web/app/components/base/retry-button/index.tsx deleted file mode 100644 index 689827af7b..0000000000 --- a/web/app/components/base/retry-button/index.tsx +++ /dev/null @@ -1,85 +0,0 @@ -'use client' -import type { FC } from 'react' -import React, { useEffect, useReducer } from 'react' -import { useTranslation } from 'react-i18next' -import useSWR from 'swr' -import s from './style.module.css' -import classNames from '@/utils/classnames' -import Divider from '@/app/components/base/divider' -import { getErrorDocs, retryErrorDocs } from '@/service/datasets' -import type { IndexingStatusResponse } from '@/models/datasets' - -const WarningIcon = () => - - - - -type Props = { - datasetId: string -} -type IIndexState = { - value: string -} -type ActionType = 'retry' | 'success' | 'error' - -type IAction = { - type: ActionType -} -const indexStateReducer = (state: IIndexState, action: IAction) => { - const actionMap = { - retry: 'retry', - success: 'success', - error: 'error', - } - - return { - ...state, - value: actionMap[action.type] || state.value, - } -} - -const RetryButton: FC = ({ datasetId }) => { - const { t } = useTranslation() - const [indexState, dispatch] = useReducer(indexStateReducer, { value: 'success' }) - const { data: errorDocs } = useSWR({ datasetId }, getErrorDocs) - - const onRetryErrorDocs = async () => { - dispatch({ type: 'retry' }) - const document_ids = errorDocs?.data.map((doc: IndexingStatusResponse) => doc.id) || [] - const res = await retryErrorDocs({ datasetId, document_ids }) - if (res.result === 'success') - dispatch({ type: 'success' }) - else - dispatch({ type: 'error' }) - } - - useEffect(() => { - if (errorDocs?.total === 0) - dispatch({ type: 'success' }) - else - dispatch({ type: 'error' }) - }, [errorDocs?.total]) - - if (indexState.value === 'success') - return null - - return ( -
- - - {errorDocs?.total} {t('dataset.docsFailedNotice')} - - - - {t('dataset.retry')} - -
- ) -} -export default RetryButton diff --git a/web/app/components/base/retry-button/style.module.css b/web/app/components/base/retry-button/style.module.css deleted file mode 100644 index 99a0947576..0000000000 --- a/web/app/components/base/retry-button/style.module.css +++ /dev/null @@ -1,4 +0,0 @@ -.retryBtn { - @apply inline-flex justify-center items-center content-center h-9 leading-5 rounded-lg px-4 py-2 text-base; - @apply border-solid border border-gray-200 text-gray-500 hover:bg-white hover:shadow-sm hover:border-gray-300; -} diff --git a/web/app/components/base/simple-pie-chart/index.tsx b/web/app/components/base/simple-pie-chart/index.tsx index 7de539cbb1..4b987ab42d 100644 --- a/web/app/components/base/simple-pie-chart/index.tsx +++ b/web/app/components/base/simple-pie-chart/index.tsx @@ -10,10 +10,11 @@ export type SimplePieChartProps = { fill?: string stroke?: string size?: number + animationDuration?: number className?: string } -const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009', size = 12, className }: SimplePieChartProps) => { +const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009', size = 12, animationDuration, className }: SimplePieChartProps) => { const option: EChartsOption = useMemo(() => ({ series: [ { @@ -34,7 +35,7 @@ const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009', { type: 'pie', radius: '83%', - animationDuration: 600, + animationDuration: animationDuration ?? 600, data: [ { value: percentage, itemStyle: { color: fill } }, { value: 100 - percentage, itemStyle: { color: '#fff' } }, @@ -48,7 +49,7 @@ const SimplePieChart = ({ percentage = 80, fill = '#fdb022', stroke = '#f79009', cursor: 'default', }, ], - }), [stroke, fill, percentage]) + }), [stroke, fill, percentage, animationDuration]) return ( -export const SkeletonContanier: FC = (props) => { +export const SkeletonContainer: FC = (props) => { const { className, children, ...rest } = props return (
@@ -30,11 +30,14 @@ export const SkeletonRectangle: FC = (props) => { ) } -export const SkeletonPoint: FC = () => -
·
- +export const SkeletonPoint: FC = (props) => { + const { className, ...rest } = props + return ( +
·
+ ) +} /** Usage - * + * * * * diff --git a/web/app/components/base/switch/index.tsx b/web/app/components/base/switch/index.tsx index f61c6f46ff..8bf32b1311 100644 --- a/web/app/components/base/switch/index.tsx +++ b/web/app/components/base/switch/index.tsx @@ -64,4 +64,7 @@ const Switch = ({ onChange, size = 'md', defaultValue = false, disabled = false, ) } + +Switch.displayName = 'Switch' + export default React.memo(Switch) diff --git a/web/app/components/base/tag-input/index.tsx b/web/app/components/base/tag-input/index.tsx index b26d0c6438..ec6c1cee34 100644 --- a/web/app/components/base/tag-input/index.tsx +++ b/web/app/components/base/tag-input/index.tsx @@ -3,8 +3,8 @@ import type { ChangeEvent, FC, KeyboardEvent } from 'react' import { } from 'use-context-selector' import { useTranslation } from 'react-i18next' import AutosizeInput from 'react-18-input-autosize' +import { RiAddLine, RiCloseLine } from '@remixicon/react' import cn from '@/utils/classnames' -import { X } from '@/app/components/base/icons/src/vender/line/general' import { useToastContext } from '@/app/components/base/toast' type TagInputProps = { @@ -75,14 +75,14 @@ const TagInput: FC = ({ (items || []).map((item, index) => (
+ className={cn('flex items-center mr-1 mt-1 pl-1.5 pr-1 py-1 system-xs-regular text-text-secondary border border-divider-deep bg-components-badge-white-to-dark rounded-md')} + > {item} { !disableRemove && ( - handleRemove(index)} - /> +
handleRemove(index)}> + +
) }
@@ -90,24 +90,27 @@ const TagInput: FC = ({ } { !disableAdd && ( - setFocused(true)} - onBlur={handleBlur} - value={value} - onChange={(e: ChangeEvent) => { - setValue(e.target.value) - }} - onKeyDown={handleKeyDown} - placeholder={t(placeholder || (isSpecialMode ? 'common.model.params.stop_sequencesPlaceholder' : 'datasetDocuments.segment.addKeyWord'))} - /> +
+ {!isSpecialMode && !focused && } + setFocused(true)} + onBlur={handleBlur} + value={value} + onChange={(e: ChangeEvent) => { + setValue(e.target.value) + }} + onKeyDown={handleKeyDown} + placeholder={t(placeholder || (isSpecialMode ? 'common.model.params.stop_sequencesPlaceholder' : 'datasetDocuments.segment.addKeyWord'))} + /> +
) }
diff --git a/web/app/components/base/toast/index.tsx b/web/app/components/base/toast/index.tsx index b9a6de9fe5..ba7d8af518 100644 --- a/web/app/components/base/toast/index.tsx +++ b/web/app/components/base/toast/index.tsx @@ -21,6 +21,7 @@ export type IToastProps = { children?: ReactNode onClose?: () => void className?: string + customComponent?: ReactNode } type IToastContext = { notify: (props: IToastProps) => void @@ -35,6 +36,7 @@ const Toast = ({ message, children, className, + customComponent, }: IToastProps) => { const { close } = useToastContext() // sometimes message is react node array. Not handle it. @@ -49,8 +51,7 @@ const Toast = ({ 'top-0', 'right-0', )}> - -
-
{message}
+
+
+
{message}
+ {customComponent} +
{children &&
{children}
}
- +
@@ -117,7 +121,8 @@ Toast.notify = ({ message, duration, className, -}: Pick) => { + customComponent, +}: Pick) => { const defaultDuring = (type === 'success' || type === 'info') ? 3000 : 6000 if (typeof window === 'object') { const holder = document.createElement('div') @@ -133,7 +138,7 @@ Toast.notify = ({ } }, }}> - + , ) document.body.appendChild(holder) diff --git a/web/app/components/base/tooltip/index.tsx b/web/app/components/base/tooltip/index.tsx index 8ec3cd8c7a..65b5a99077 100644 --- a/web/app/components/base/tooltip/index.tsx +++ b/web/app/components/base/tooltip/index.tsx @@ -14,6 +14,7 @@ export type TooltipProps = { popupContent?: React.ReactNode children?: React.ReactNode popupClassName?: string + noDecoration?: boolean offset?: OffsetOptions needsDelay?: boolean asChild?: boolean @@ -27,6 +28,7 @@ const Tooltip: FC = ({ popupContent, children, popupClassName, + noDecoration, offset, asChild = true, needsDelay = false, @@ -96,7 +98,7 @@ const Tooltip: FC = ({ > {popupContent && (
triggerMethod === 'hover' && setHoverPopup()} diff --git a/web/app/components/billing/priority-label/index.tsx b/web/app/components/billing/priority-label/index.tsx index 36338cf4a8..6ecac4a79e 100644 --- a/web/app/components/billing/priority-label/index.tsx +++ b/web/app/components/billing/priority-label/index.tsx @@ -4,6 +4,7 @@ import { DocumentProcessingPriority, Plan, } from '../type' +import cn from '@/utils/classnames' import { useProviderContext } from '@/context/provider-context' import { ZapFast, @@ -11,7 +12,11 @@ import { } from '@/app/components/base/icons/src/vender/solid/general' import Tooltip from '@/app/components/base/tooltip' -const PriorityLabel = () => { +type PriorityLabelProps = { + className?: string +} + +const PriorityLabel = ({ className }: PriorityLabelProps) => { const { t } = useTranslation() const { plan } = useProviderContext() @@ -37,18 +42,18 @@ const PriorityLabel = () => { }
}> - + { plan.type === Plan.professional && ( - + ) } { (plan.type === Plan.team || plan.type === Plan.enterprise) && ( - + ) } {t(`billing.plansCommon.priority.${priority}`)} diff --git a/web/app/components/datasets/chunk.tsx b/web/app/components/datasets/chunk.tsx new file mode 100644 index 0000000000..bf2835dbdb --- /dev/null +++ b/web/app/components/datasets/chunk.tsx @@ -0,0 +1,54 @@ +import type { FC, PropsWithChildren } from 'react' +import { SelectionMod } from '../base/icons/src/public/knowledge' +import type { QA } from '@/models/datasets' + +export type ChunkLabelProps = { + label: string + characterCount: number +} + +export const ChunkLabel: FC = (props) => { + const { label, characterCount } = props + return
+ +

+ {label} + + + · + + + {`${characterCount} characters`} +

+
+} + +export type ChunkContainerProps = ChunkLabelProps & PropsWithChildren + +export const ChunkContainer: FC = (props) => { + const { label, characterCount, children } = props + return
+ +
+ {children} +
+
+} + +export type QAPreviewProps = { + qa: QA +} + +export const QAPreview: FC = (props) => { + const { qa } = props + return
+
+ +

{qa.question}

+
+
+ +

{qa.answer}

+
+
+} diff --git a/web/app/components/datasets/common/chunking-mode-label.tsx b/web/app/components/datasets/common/chunking-mode-label.tsx new file mode 100644 index 0000000000..7c6e924009 --- /dev/null +++ b/web/app/components/datasets/common/chunking-mode-label.tsx @@ -0,0 +1,29 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import { useTranslation } from 'react-i18next' +import Badge from '@/app/components/base/badge' +import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge' + +type Props = { + isGeneralMode: boolean + isQAMode: boolean +} + +const ChunkingModeLabel: FC = ({ + isGeneralMode, + isQAMode, +}) => { + const { t } = useTranslation() + const TypeIcon = isGeneralMode ? GeneralType : ParentChildType + + return ( + +
+ + {isGeneralMode ? `${t('dataset.chunkingMode.general')}${isQAMode ? ' · QA' : ''}` : t('dataset.chunkingMode.parentChild')} +
+
+ ) +} +export default React.memo(ChunkingModeLabel) diff --git a/web/app/components/datasets/common/document-file-icon.tsx b/web/app/components/datasets/common/document-file-icon.tsx new file mode 100644 index 0000000000..5842cbbc7c --- /dev/null +++ b/web/app/components/datasets/common/document-file-icon.tsx @@ -0,0 +1,40 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import FileTypeIcon from '../../base/file-uploader/file-type-icon' +import type { FileAppearanceType } from '@/app/components/base/file-uploader/types' +import { FileAppearanceTypeEnum } from '@/app/components/base/file-uploader/types' + +const extendToFileTypeMap: { [key: string]: FileAppearanceType } = { + pdf: FileAppearanceTypeEnum.pdf, + json: FileAppearanceTypeEnum.document, + html: FileAppearanceTypeEnum.document, + txt: FileAppearanceTypeEnum.document, + markdown: FileAppearanceTypeEnum.markdown, + md: FileAppearanceTypeEnum.markdown, + xlsx: FileAppearanceTypeEnum.excel, + xls: FileAppearanceTypeEnum.excel, + csv: FileAppearanceTypeEnum.excel, + doc: FileAppearanceTypeEnum.word, + docx: FileAppearanceTypeEnum.word, +} + +type Props = { + extension?: string + name?: string + size?: 'sm' | 'lg' | 'md' + className?: string +} + +const DocumentFileIcon: FC = ({ + extension, + name, + size = 'md', + className, +}) => { + const localExtension = extension?.toLowerCase() || name?.split('.')?.pop()?.toLowerCase() + return ( + + ) +} +export default React.memo(DocumentFileIcon) diff --git a/web/app/components/datasets/common/document-picker/document-list.tsx b/web/app/components/datasets/common/document-picker/document-list.tsx new file mode 100644 index 0000000000..3e320d7507 --- /dev/null +++ b/web/app/components/datasets/common/document-picker/document-list.tsx @@ -0,0 +1,42 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import FileIcon from '../document-file-icon' +import cn from '@/utils/classnames' +import type { DocumentItem } from '@/models/datasets' + +type Props = { + className?: string + list: DocumentItem[] + onChange: (value: DocumentItem) => void +} + +const DocumentList: FC = ({ + className, + list, + onChange, +}) => { + const handleChange = useCallback((item: DocumentItem) => { + return () => onChange(item) + }, [onChange]) + + return ( +
+ {list.map((item) => { + const { id, name, extension } = item + return ( +
+ +
{name}
+
+ ) + })} +
+ ) +} + +export default React.memo(DocumentList) diff --git a/web/app/components/datasets/common/document-picker/index.tsx b/web/app/components/datasets/common/document-picker/index.tsx new file mode 100644 index 0000000000..30690fca00 --- /dev/null +++ b/web/app/components/datasets/common/document-picker/index.tsx @@ -0,0 +1,118 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback, useState } from 'react' +import { useBoolean } from 'ahooks' +import { RiArrowDownSLine } from '@remixicon/react' +import { useTranslation } from 'react-i18next' +import FileIcon from '../document-file-icon' +import DocumentList from './document-list' +import type { DocumentItem, ParentMode, SimpleDocumentDetail } from '@/models/datasets' +import { ProcessMode } from '@/models/datasets' +import { + PortalToFollowElem, + PortalToFollowElemContent, + PortalToFollowElemTrigger, +} from '@/app/components/base/portal-to-follow-elem' +import cn from '@/utils/classnames' +import SearchInput from '@/app/components/base/search-input' +import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge' +import { useDocumentList } from '@/service/knowledge/use-document' +import Loading from '@/app/components/base/loading' + +type Props = { + datasetId: string + value: { + name?: string + extension?: string + processMode?: ProcessMode + parentMode?: ParentMode + } + onChange: (value: SimpleDocumentDetail) => void +} + +const DocumentPicker: FC = ({ + datasetId, + value, + onChange, +}) => { + const { t } = useTranslation() + const { + name, + extension, + processMode, + parentMode, + } = value + const [query, setQuery] = useState('') + + const { data } = useDocumentList({ + datasetId, + query: { + keyword: query, + page: 1, + limit: 20, + }, + }) + const documentsList = data?.data + const isParentChild = processMode === ProcessMode.parentChild + const TypeIcon = isParentChild ? ParentChildType : GeneralType + + const [open, { + set: setOpen, + toggle: togglePopup, + }] = useBoolean(false) + const ArrowIcon = RiArrowDownSLine + + const handleChange = useCallback(({ id }: DocumentItem) => { + onChange(documentsList?.find(item => item.id === id) as SimpleDocumentDetail) + setOpen(false) + }, [documentsList, onChange, setOpen]) + + return ( + + +
+ +
+
+ {name || '--'} + +
+
+ + + {isParentChild ? t('dataset.chunkingMode.parentChild') : t('dataset.chunkingMode.general')} + {isParentChild && ` · ${!parentMode ? '--' : parentMode === 'paragraph' ? t('dataset.parentMode.paragraph') : t('dataset.parentMode.fullDoc')}`} + +
+
+
+
+ +
+ + {documentsList + ? ( + ({ + id: d.id, + name: d.name, + extension: d.data_source_detail_dict?.upload_file?.extension || '', + }))} + onChange={handleChange} + /> + ) + : (
+ +
)} +
+ +
+
+ ) +} +export default React.memo(DocumentPicker) diff --git a/web/app/components/datasets/common/document-picker/preview-document-picker.tsx b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx new file mode 100644 index 0000000000..2a35b75471 --- /dev/null +++ b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx @@ -0,0 +1,82 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import { useBoolean } from 'ahooks' +import { RiArrowDownSLine } from '@remixicon/react' +import { useTranslation } from 'react-i18next' +import FileIcon from '../document-file-icon' +import DocumentList from './document-list' +import { + PortalToFollowElem, + PortalToFollowElemContent, + PortalToFollowElemTrigger, +} from '@/app/components/base/portal-to-follow-elem' +import cn from '@/utils/classnames' +import Loading from '@/app/components/base/loading' +import type { DocumentItem } from '@/models/datasets' + +type Props = { + className?: string + value: DocumentItem + files: DocumentItem[] + onChange: (value: DocumentItem) => void +} + +const PreviewDocumentPicker: FC = ({ + className, + value, + files, + onChange, +}) => { + const { t } = useTranslation() + const { name, extension } = value + + const [open, { + set: setOpen, + toggle: togglePopup, + }] = useBoolean(false) + const ArrowIcon = RiArrowDownSLine + + const handleChange = useCallback((item: DocumentItem) => { + onChange(item) + setOpen(false) + }, [onChange, setOpen]) + + return ( + + +
+ +
+
+ {name || '--'} + +
+
+
+
+ +
+ {files?.length > 1 &&
{t('dataset.preprocessDocument', { num: files.length })}
} + {files?.length > 0 + ? ( + + ) + : (
+ +
)} +
+ +
+
+ ) +} +export default React.memo(PreviewDocumentPicker) diff --git a/web/app/components/datasets/common/document-status-with-action/auto-disabled-document.tsx b/web/app/components/datasets/common/document-status-with-action/auto-disabled-document.tsx new file mode 100644 index 0000000000..b687c004e5 --- /dev/null +++ b/web/app/components/datasets/common/document-status-with-action/auto-disabled-document.tsx @@ -0,0 +1,38 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import { useTranslation } from 'react-i18next' +import StatusWithAction from './status-with-action' +import { useAutoDisabledDocuments, useDocumentEnable, useInvalidDisabledDocument } from '@/service/knowledge/use-document' +import Toast from '@/app/components/base/toast' +type Props = { + datasetId: string +} + +const AutoDisabledDocument: FC = ({ + datasetId, +}) => { + const { t } = useTranslation() + const { data, isLoading } = useAutoDisabledDocuments(datasetId) + const invalidDisabledDocument = useInvalidDisabledDocument() + const documentIds = data?.document_ids + const hasDisabledDocument = documentIds && documentIds.length > 0 + const { mutateAsync: enableDocument } = useDocumentEnable() + const handleEnableDocuments = useCallback(async () => { + await enableDocument({ datasetId, documentIds }) + invalidDisabledDocument() + Toast.notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) + }, []) + if (!hasDisabledDocument || isLoading) + return null + + return ( + + ) +} +export default React.memo(AutoDisabledDocument) diff --git a/web/app/components/datasets/common/document-status-with-action/index-failed.tsx b/web/app/components/datasets/common/document-status-with-action/index-failed.tsx new file mode 100644 index 0000000000..37311768b9 --- /dev/null +++ b/web/app/components/datasets/common/document-status-with-action/index-failed.tsx @@ -0,0 +1,69 @@ +'use client' +import type { FC } from 'react' +import React, { useEffect, useReducer } from 'react' +import { useTranslation } from 'react-i18next' +import useSWR from 'swr' +import StatusWithAction from './status-with-action' +import { getErrorDocs, retryErrorDocs } from '@/service/datasets' +import type { IndexingStatusResponse } from '@/models/datasets' + +type Props = { + datasetId: string +} +type IIndexState = { + value: string +} +type ActionType = 'retry' | 'success' | 'error' + +type IAction = { + type: ActionType +} +const indexStateReducer = (state: IIndexState, action: IAction) => { + const actionMap = { + retry: 'retry', + success: 'success', + error: 'error', + } + + return { + ...state, + value: actionMap[action.type] || state.value, + } +} + +const RetryButton: FC = ({ datasetId }) => { + const { t } = useTranslation() + const [indexState, dispatch] = useReducer(indexStateReducer, { value: 'success' }) + const { data: errorDocs, isLoading } = useSWR({ datasetId }, getErrorDocs) + + const onRetryErrorDocs = async () => { + dispatch({ type: 'retry' }) + const document_ids = errorDocs?.data.map((doc: IndexingStatusResponse) => doc.id) || [] + const res = await retryErrorDocs({ datasetId, document_ids }) + if (res.result === 'success') + dispatch({ type: 'success' }) + else + dispatch({ type: 'error' }) + } + + useEffect(() => { + if (errorDocs?.total === 0) + dispatch({ type: 'success' }) + else + dispatch({ type: 'error' }) + }, [errorDocs?.total]) + + if (isLoading || indexState.value === 'success') + return null + + return ( + { }} + /> + ) +} +export default RetryButton diff --git a/web/app/components/datasets/common/document-status-with-action/status-with-action.tsx b/web/app/components/datasets/common/document-status-with-action/status-with-action.tsx new file mode 100644 index 0000000000..a8da9bf6cc --- /dev/null +++ b/web/app/components/datasets/common/document-status-with-action/status-with-action.tsx @@ -0,0 +1,65 @@ +'use client' +import { RiAlertFill, RiCheckboxCircleFill, RiErrorWarningFill, RiInformation2Fill } from '@remixicon/react' +import type { FC } from 'react' +import React from 'react' +import cn from '@/utils/classnames' +import Divider from '@/app/components/base/divider' + +type Status = 'success' | 'error' | 'warning' | 'info' +type Props = { + type?: Status + description: string + actionText: string + onAction: () => void + disabled?: boolean +} + +const IconMap = { + success: { + Icon: RiCheckboxCircleFill, + color: 'text-text-success', + }, + error: { + Icon: RiErrorWarningFill, + color: 'text-text-destructive', + }, + warning: { + Icon: RiAlertFill, + color: 'text-text-warning-secondary', + }, + info: { + Icon: RiInformation2Fill, + color: 'text-text-accent', + }, +} + +const getIcon = (type: Status) => { + return IconMap[type] +} + +const StatusAction: FC = ({ + type = 'info', + description, + actionText, + onAction, + disabled, +}) => { + const { Icon, color } = getIcon(type) + return ( +
+
+
+ +
{description}
+ +
{actionText}
+
+
+ ) +} +export default React.memo(StatusAction) diff --git a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx index f3da67b92c..9236858ae4 100644 --- a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx +++ b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx @@ -2,10 +2,11 @@ import type { FC } from 'react' import React from 'react' import { useTranslation } from 'react-i18next' +import Image from 'next/image' import RetrievalParamConfig from '../retrieval-param-config' +import { OptionCard } from '../../create/step-two/option-card' +import { retrievalIcon } from '../../create/icons' import { RETRIEVE_METHOD } from '@/types/app' -import RadioCard from '@/app/components/base/radio-card' -import { HighPriority } from '@/app/components/base/icons/src/vender/solid/arrows' import type { RetrievalConfig } from '@/types/app' type Props = { @@ -21,19 +22,17 @@ const EconomicalRetrievalMethodConfig: FC = ({ return (
- } + } title={t('dataset.retrieval.invertedIndex.title')} - description={t('dataset.retrieval.invertedIndex.description')} - noRadio - chosenConfig={ - - } - /> + description={t('dataset.retrieval.invertedIndex.description')} isActive + activeHeaderClassName='bg-dataset-option-card-purple-gradient' + > + +
) } diff --git a/web/app/components/datasets/common/retrieval-method-config/index.tsx b/web/app/components/datasets/common/retrieval-method-config/index.tsx index 20d93568ad..9ab157571b 100644 --- a/web/app/components/datasets/common/retrieval-method-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-method-config/index.tsx @@ -2,12 +2,13 @@ import type { FC } from 'react' import React from 'react' import { useTranslation } from 'react-i18next' +import Image from 'next/image' import RetrievalParamConfig from '../retrieval-param-config' +import { OptionCard } from '../../create/step-two/option-card' +import Effect from '../../create/assets/option-card-effect-purple.svg' +import { retrievalIcon } from '../../create/icons' import type { RetrievalConfig } from '@/types/app' import { RETRIEVE_METHOD } from '@/types/app' -import RadioCard from '@/app/components/base/radio-card' -import { PatternRecognition, Semantic } from '@/app/components/base/icons/src/vender/solid/development' -import { FileSearch02 } from '@/app/components/base/icons/src/vender/solid/files' import { useProviderContext } from '@/context/provider-context' import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' @@ -16,6 +17,7 @@ import { RerankingModeEnum, WeightedScoreEnum, } from '@/models/datasets' +import Badge from '@/app/components/base/badge' type Props = { value: RetrievalConfig @@ -56,67 +58,72 @@ const RetrievalMethodConfig: FC = ({ return (
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={t('dataset.retrieval.semantic_search.title')} description={t('dataset.retrieval.semantic_search.description')} - isChosen={value.search_method === RETRIEVE_METHOD.semantic} - onChosen={() => onChange({ + isActive={ + value.search_method === RETRIEVE_METHOD.semantic + } + onSwitched={() => onChange({ ...value, search_method: RETRIEVE_METHOD.semantic, })} - chosenConfig={ - - } - /> + effectImg={Effect.src} + activeHeaderClassName='bg-dataset-option-card-purple-gradient' + > + + )} {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={t('dataset.retrieval.full_text_search.title')} description={t('dataset.retrieval.full_text_search.description')} - isChosen={value.search_method === RETRIEVE_METHOD.fullText} - onChosen={() => onChange({ + isActive={ + value.search_method === RETRIEVE_METHOD.fullText + } + onSwitched={() => onChange({ ...value, search_method: RETRIEVE_METHOD.fullText, })} - chosenConfig={ - - } - /> + effectImg={Effect.src} + activeHeaderClassName='bg-dataset-option-card-purple-gradient' + > + + )} {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={
{t('dataset.retrieval.hybrid_search.title')}
-
{t('dataset.retrieval.hybrid_search.recommend')}
+
} - description={t('dataset.retrieval.hybrid_search.description')} - isChosen={value.search_method === RETRIEVE_METHOD.hybrid} - onChosen={() => onChange({ + description={t('dataset.retrieval.hybrid_search.description')} isActive={ + value.search_method === RETRIEVE_METHOD.hybrid + } + onSwitched={() => onChange({ ...value, search_method: RETRIEVE_METHOD.hybrid, reranking_enable: true, })} - chosenConfig={ - - } - /> + effectImg={Effect.src} + activeHeaderClassName='bg-dataset-option-card-purple-gradient' + > + +
)}
) diff --git a/web/app/components/datasets/common/retrieval-method-info/index.tsx b/web/app/components/datasets/common/retrieval-method-info/index.tsx index 7d9b999c53..fc3020d4a9 100644 --- a/web/app/components/datasets/common/retrieval-method-info/index.tsx +++ b/web/app/components/datasets/common/retrieval-method-info/index.tsx @@ -2,12 +2,11 @@ import type { FC } from 'react' import React from 'react' import { useTranslation } from 'react-i18next' +import Image from 'next/image' +import { retrievalIcon } from '../../create/icons' import type { RetrievalConfig } from '@/types/app' import { RETRIEVE_METHOD } from '@/types/app' import RadioCard from '@/app/components/base/radio-card' -import { HighPriority } from '@/app/components/base/icons/src/vender/solid/arrows' -import { PatternRecognition, Semantic } from '@/app/components/base/icons/src/vender/solid/development' -import { FileSearch02 } from '@/app/components/base/icons/src/vender/solid/files' type Props = { value: RetrievalConfig @@ -15,11 +14,12 @@ type Props = { export const getIcon = (type: RETRIEVE_METHOD) => { return ({ - [RETRIEVE_METHOD.semantic]: Semantic, - [RETRIEVE_METHOD.fullText]: FileSearch02, - [RETRIEVE_METHOD.hybrid]: PatternRecognition, - [RETRIEVE_METHOD.invertedIndex]: HighPriority, - })[type] || FileSearch02 + [RETRIEVE_METHOD.semantic]: retrievalIcon.vector, + [RETRIEVE_METHOD.fullText]: retrievalIcon.fullText, + [RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid, + [RETRIEVE_METHOD.invertedIndex]: retrievalIcon.vector, + [RETRIEVE_METHOD.keywordSearch]: retrievalIcon.vector, + })[type] || retrievalIcon.vector } const EconomicalRetrievalMethodConfig: FC = ({ @@ -28,11 +28,11 @@ const EconomicalRetrievalMethodConfig: FC = ({ }) => { const { t } = useTranslation() const type = value.search_method - const Icon = getIcon(type) + const icon = return (
} + icon={icon} title={t(`dataset.retrieval.${type}.title`)} description={t(`dataset.retrieval.${type}.description`)} noRadio diff --git a/web/app/components/datasets/common/retrieval-param-config/index.tsx b/web/app/components/datasets/common/retrieval-param-config/index.tsx index 9d48d56a8d..5136ac1659 100644 --- a/web/app/components/datasets/common/retrieval-param-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-param-config/index.tsx @@ -3,6 +3,9 @@ import type { FC } from 'react' import React, { useCallback } from 'react' import { useTranslation } from 'react-i18next' +import Image from 'next/image' +import ProgressIndicator from '../../create/assets/progress-indicator.svg' +import Reranking from '../../create/assets/rerank.svg' import cn from '@/utils/classnames' import TopKItem from '@/app/components/base/param-item/top-k-item' import ScoreThresholdItem from '@/app/components/base/param-item/score-threshold-item' @@ -20,6 +23,7 @@ import { } from '@/models/datasets' import WeightedScore from '@/app/components/app/configuration/dataset-config/params-config/weighted-score' import Toast from '@/app/components/base/toast' +import RadioCard from '@/app/components/base/radio-card' type Props = { type: RETRIEVE_METHOD @@ -116,7 +120,7 @@ const RetrievalParamConfig: FC = ({
{!isEconomical && !isHybridSearch && (
-
+
{canToggleRerankModalEnable && (
= ({
)}
- {t('common.modelProvider.rerankModel.key')} + {t('common.modelProvider.rerankModel.key')} {t('common.modelProvider.rerankModel.tip')}
@@ -163,7 +167,7 @@ const RetrievalParamConfig: FC = ({ )} { !isHybridSearch && ( -
+
= ({ { isHybridSearch && ( <> -
+
{ rerankingModeOptions.map(option => ( -
handleChangeRerankMode(option.value)} - > -
{option.label}
- {option.tips}
} - triggerClassName='ml-0.5 w-3.5 h-3.5' - /> -
+ isChosen={value.reranking_mode === option.value} + onChosen={() => handleChangeRerankMode(option.value)} + icon={} + title={option.label} + description={option.tips} + className='flex-1' + /> )) }
diff --git a/web/app/components/datasets/create/assets/family-mod.svg b/web/app/components/datasets/create/assets/family-mod.svg new file mode 100644 index 0000000000..b1c4e6f566 --- /dev/null +++ b/web/app/components/datasets/create/assets/family-mod.svg @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/web/app/components/datasets/create/assets/file-list-3-fill.svg b/web/app/components/datasets/create/assets/file-list-3-fill.svg new file mode 100644 index 0000000000..a4e6c4da97 --- /dev/null +++ b/web/app/components/datasets/create/assets/file-list-3-fill.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/web/app/components/datasets/create/assets/gold.svg b/web/app/components/datasets/create/assets/gold.svg new file mode 100644 index 0000000000..b48ac0eae5 --- /dev/null +++ b/web/app/components/datasets/create/assets/gold.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/web/app/components/datasets/create/assets/note-mod.svg b/web/app/components/datasets/create/assets/note-mod.svg new file mode 100644 index 0000000000..b9e81f6bd5 --- /dev/null +++ b/web/app/components/datasets/create/assets/note-mod.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/web/app/components/datasets/create/assets/option-card-effect-blue.svg b/web/app/components/datasets/create/assets/option-card-effect-blue.svg new file mode 100644 index 0000000000..00a8afad8b --- /dev/null +++ b/web/app/components/datasets/create/assets/option-card-effect-blue.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/web/app/components/datasets/create/assets/option-card-effect-orange.svg b/web/app/components/datasets/create/assets/option-card-effect-orange.svg new file mode 100644 index 0000000000..d833764f0c --- /dev/null +++ b/web/app/components/datasets/create/assets/option-card-effect-orange.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/web/app/components/datasets/create/assets/option-card-effect-purple.svg b/web/app/components/datasets/create/assets/option-card-effect-purple.svg new file mode 100644 index 0000000000..a7857f8e57 --- /dev/null +++ b/web/app/components/datasets/create/assets/option-card-effect-purple.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/web/app/components/datasets/create/assets/pattern-recognition-mod.svg b/web/app/components/datasets/create/assets/pattern-recognition-mod.svg new file mode 100644 index 0000000000..1083e888ed --- /dev/null +++ b/web/app/components/datasets/create/assets/pattern-recognition-mod.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/web/app/components/datasets/create/assets/piggy-bank-mod.svg b/web/app/components/datasets/create/assets/piggy-bank-mod.svg new file mode 100644 index 0000000000..b1120ad9a9 --- /dev/null +++ b/web/app/components/datasets/create/assets/piggy-bank-mod.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/web/app/components/datasets/create/assets/progress-indicator.svg b/web/app/components/datasets/create/assets/progress-indicator.svg new file mode 100644 index 0000000000..3c99713636 --- /dev/null +++ b/web/app/components/datasets/create/assets/progress-indicator.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/web/app/components/datasets/create/assets/rerank.svg b/web/app/components/datasets/create/assets/rerank.svg new file mode 100644 index 0000000000..409b52e6e2 --- /dev/null +++ b/web/app/components/datasets/create/assets/rerank.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/web/app/components/datasets/create/assets/research-mod.svg b/web/app/components/datasets/create/assets/research-mod.svg new file mode 100644 index 0000000000..1f0bb34233 --- /dev/null +++ b/web/app/components/datasets/create/assets/research-mod.svg @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/web/app/components/datasets/create/assets/selection-mod.svg b/web/app/components/datasets/create/assets/selection-mod.svg new file mode 100644 index 0000000000..2d0dd3b5f7 --- /dev/null +++ b/web/app/components/datasets/create/assets/selection-mod.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/web/app/components/datasets/create/assets/setting-gear-mod.svg b/web/app/components/datasets/create/assets/setting-gear-mod.svg new file mode 100644 index 0000000000..c782caade8 --- /dev/null +++ b/web/app/components/datasets/create/assets/setting-gear-mod.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/web/app/components/datasets/create/embedding-process/index.module.css b/web/app/components/datasets/create/embedding-process/index.module.css index 1ebb006b54..f2ab4d85a2 100644 --- a/web/app/components/datasets/create/embedding-process/index.module.css +++ b/web/app/components/datasets/create/embedding-process/index.module.css @@ -14,24 +14,7 @@ border-radius: 6px; overflow: hidden; } -.sourceItem.error { - background: #FEE4E2; -} -.sourceItem.success { - background: #D1FADF; -} -.progressbar { - position: absolute; - top: 0; - left: 0; - height: 100%; - background-color: #B2CCFF; -} -.sourceItem .info { - display: flex; - align-items: center; - z-index: 1; -} + .sourceItem .info .name { font-weight: 500; font-size: 12px; @@ -55,13 +38,6 @@ color: #05603A; } - -.cost { - @apply flex justify-between items-center text-xs text-gray-700; -} -.embeddingStatus { - @apply flex items-center justify-between text-gray-900 font-medium text-sm mr-2; -} .commonIcon { @apply w-3 h-3 mr-1 inline-block align-middle; } @@ -81,35 +57,33 @@ @apply text-xs font-medium; } -.fileIcon { - @apply w-4 h-4 mr-1 bg-center bg-no-repeat; +.unknownFileIcon { background-image: url(../assets/unknown.svg); - background-size: 16px; } -.fileIcon.csv { +.csv { background-image: url(../assets/csv.svg); } -.fileIcon.docx { +.docx { background-image: url(../assets/docx.svg); } -.fileIcon.xlsx, -.fileIcon.xls { +.xlsx, +.xls { background-image: url(../assets/xlsx.svg); } -.fileIcon.pdf { +.pdf { background-image: url(../assets/pdf.svg); } -.fileIcon.html, -.fileIcon.htm { +.html, +.htm { background-image: url(../assets/html.svg); } -.fileIcon.md, -.fileIcon.markdown { +.md, +.markdown { background-image: url(../assets/md.svg); } -.fileIcon.txt { +.txt { background-image: url(../assets/txt.svg); } -.fileIcon.json { +.json { background-image: url(../assets/json.svg); } diff --git a/web/app/components/datasets/create/embedding-process/index.tsx b/web/app/components/datasets/create/embedding-process/index.tsx index 7786582085..201333ffce 100644 --- a/web/app/components/datasets/create/embedding-process/index.tsx +++ b/web/app/components/datasets/create/embedding-process/index.tsx @@ -6,32 +6,44 @@ import { useTranslation } from 'react-i18next' import { omit } from 'lodash-es' import { ArrowRightIcon } from '@heroicons/react/24/solid' import { + RiCheckboxCircleFill, RiErrorWarningFill, + RiLoader2Fill, + RiTerminalBoxLine, } from '@remixicon/react' -import s from './index.module.css' +import Image from 'next/image' +import { indexMethodIcon, retrievalIcon } from '../icons' +import { IndexingType } from '../step-two' +import DocumentFileIcon from '../../common/document-file-icon' import cn from '@/utils/classnames' import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata' import Button from '@/app/components/base/button' import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets' import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets' -import { DataSourceType } from '@/models/datasets' +import { DataSourceType, ProcessMode } from '@/models/datasets' import NotionIcon from '@/app/components/base/notion-icon' import PriorityLabel from '@/app/components/billing/priority-label' import { Plan } from '@/app/components/billing/type' import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general' import UpgradeBtn from '@/app/components/billing/upgrade-btn' import { useProviderContext } from '@/context/provider-context' -import Tooltip from '@/app/components/base/tooltip' import { sleep } from '@/utils' +import { RETRIEVE_METHOD } from '@/types/app' +import Tooltip from '@/app/components/base/tooltip' type Props = { datasetId: string batchId: string documents?: FullDocumentDetail[] indexingType?: string + retrievalMethod?: string } -const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => { +const RuleDetail: FC<{ + sourceData?: ProcessRuleResponse + indexingType?: string + retrievalMethod?: string +}> = ({ sourceData, indexingType, retrievalMethod }) => { const { t } = useTranslation() const segmentationRuleMap = { @@ -51,29 +63,47 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => return t('datasetCreation.stepTwo.removeStopwords') } + const isNumber = (value: unknown) => { + return typeof value === 'number' + } + const getValue = useCallback((field: string) => { let value: string | number | undefined = '-' + const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens) + ? sourceData.rules.segmentation.max_tokens + : value + const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens) + ? sourceData.rules.subchunk_segmentation.max_tokens + : value switch (field) { case 'mode': - value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string) + value = !sourceData?.mode + ? value + : sourceData.mode === ProcessMode.general + ? (t('datasetDocuments.embedding.custom') as string) + : `${t('datasetDocuments.embedding.hierarchical')} · ${sourceData?.rules?.parent_mode === 'paragraph' + ? t('dataset.parentMode.paragraph') + : t('dataset.parentMode.fullDoc')}` break case 'segmentLength': - value = sourceData?.rules?.segmentation?.max_tokens + value = !sourceData?.mode + ? value + : sourceData.mode === ProcessMode.general + ? maxTokens + : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}` break default: - value = sourceData?.mode === 'automatic' - ? (t('datasetDocuments.embedding.automatic') as string) - // eslint-disable-next-line array-callback-return - : sourceData?.rules?.pre_processing_rules?.map((rule) => { - if (rule.enabled) - return getRuleName(rule.id) - }).filter(Boolean).join(';') + value = !sourceData?.mode + ? value + : sourceData?.rules?.pre_processing_rules?.filter(rule => + rule.enabled).map(rule => getRuleName(rule.id)).join(',') break } return value + // eslint-disable-next-line react-hooks/exhaustive-deps }, [sourceData]) - return
+ return
{Object.keys(segmentationRuleMap).map((field) => { return = ({ sourceData }) => displayedValue={String(getValue(field))} /> })} + + } + /> + + } + />
} -const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], indexingType }) => { +const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => { const { t } = useTranslation() const { enableBilling, plan } = useProviderContext() @@ -127,6 +190,7 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index } useEffect(() => { + setIsStopQuery(false) startQueryStatus() return () => { stopQueryStatus() @@ -146,6 +210,9 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index const navToDocumentList = () => { router.push(`/datasets/${datasetId}/documents`) } + const navToApiDocs = () => { + router.push('/datasets?category=api') + } const isEmbedding = useMemo(() => { return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || '')) @@ -177,13 +244,17 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index return doc?.data_source_info.notion_page_icon } - const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') + const isSourceEmbedding = (detail: IndexingStatusResponse) => + ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') return ( <> -
-
- {isEmbedding && t('datasetDocuments.embedding.processing')} +
+
+ {isEmbedding &&
+ + {t('datasetDocuments.embedding.processing')} +
} {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
@@ -200,69 +271,80 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index
) } -
+
{indexingStatusBatchDetail.map(indexingStatusDetail => (
{isSourceEmbedding(indexingStatusDetail) && ( -
+
)} -
+
{getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && ( -
+ //
+ )} {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && ( )} -
{getSourceName(indexingStatusDetail.id)}
- { - enableBilling && ( - - ) - } -
-
+
+
+ {getSourceName(indexingStatusDetail.id)} +
+ { + enableBilling && ( + + ) + } +
{isSourceEmbedding(indexingStatusDetail) && ( -
{`${getSourcePercent(indexingStatusDetail)}%`}
+
{`${getSourcePercent(indexingStatusDetail)}%`}
)} - {indexingStatusDetail.indexing_status === 'error' && indexingStatusDetail.error && ( + {indexingStatusDetail.indexing_status === 'error' && ( - {indexingStatusDetail.error} -
- )} + popupClassName='px-4 py-[14px] max-w-60 text-sm leading-4 text-text-secondary border-[0.5px] border-components-panel-border rounded-xl' + offset={4} + popupContent={indexingStatusDetail.error} > -
- Error - -
+ + + )} - {indexingStatusDetail.indexing_status === 'error' && !indexingStatusDetail.error && ( -
- Error -
- )} {indexingStatusDetail.indexing_status === 'completed' && ( -
100%
+ )}
))}
- -
+
+ +
+
diff --git a/web/app/components/datasets/create/file-preview/index.module.css b/web/app/components/datasets/create/file-preview/index.module.css index d87522e6d0..929002e1e2 100644 --- a/web/app/components/datasets/create/file-preview/index.module.css +++ b/web/app/components/datasets/create/file-preview/index.module.css @@ -1,6 +1,6 @@ .filePreview { @apply flex flex-col border-l border-gray-200 shrink-0; - width: 528px; + width: 100%; background-color: #fcfcfd; } @@ -48,5 +48,6 @@ } .fileContent { white-space: pre-line; + word-break: break-all; } \ No newline at end of file diff --git a/web/app/components/datasets/create/file-preview/index.tsx b/web/app/components/datasets/create/file-preview/index.tsx index e20af64386..cb1f1d6908 100644 --- a/web/app/components/datasets/create/file-preview/index.tsx +++ b/web/app/components/datasets/create/file-preview/index.tsx @@ -44,7 +44,7 @@ const FilePreview = ({ }, [file]) return ( -
+
{t('datasetCreation.stepOne.filePreview')} @@ -59,7 +59,7 @@ const FilePreview = ({
{loading &&
} {!loading && ( -
{previewContent}
+
{previewContent}
)}
diff --git a/web/app/components/datasets/create/file-uploader/index.module.css b/web/app/components/datasets/create/file-uploader/index.module.css index bf5b7dcaf5..7d29f2ef9c 100644 --- a/web/app/components/datasets/create/file-uploader/index.module.css +++ b/web/app/components/datasets/create/file-uploader/index.module.css @@ -1,68 +1,3 @@ -.fileUploader { - @apply mb-6; -} - -.fileUploader .title { - @apply mb-2; - font-weight: 500; - font-size: 16px; - line-height: 24px; - color: #344054; -} - -.fileUploader .tip { - font-weight: 400; - font-size: 12px; - line-height: 18px; - color: #667085; -} - -.uploader { - @apply relative box-border flex justify-center items-center mb-2 p-3; - flex-direction: column; - max-width: 640px; - min-height: 80px; - background: #F9FAFB; - border: 1px dashed #EAECF0; - border-radius: 12px; - font-weight: 400; - font-size: 14px; - line-height: 20px; - color: #667085; -} - -.uploader.dragging { - background: #F5F8FF; - border: 1px dashed #B2CCFF; -} - -.uploader .draggingCover { - position: absolute; - top: 0; - left: 0; - width: 100%; - height: 100%; -} - -.uploader .uploadIcon { - content: ''; - display: block; - margin-right: 8px; - width: 24px; - height: 24px; - background: center no-repeat url(../assets/upload-cloud-01.svg); - background-size: contain; -} - -.uploader .browse { - @apply pl-1 cursor-pointer; - color: #155eef; -} - -.fileList { - @apply space-y-2; -} - .file { @apply box-border relative flex items-center justify-between; padding: 8px 12px 8px 8px; @@ -193,4 +128,4 @@ .file:hover .actionWrapper .remove { display: block; -} \ No newline at end of file +} diff --git a/web/app/components/datasets/create/file-uploader/index.tsx b/web/app/components/datasets/create/file-uploader/index.tsx index adb4bed0d1..e42a24cfef 100644 --- a/web/app/components/datasets/create/file-uploader/index.tsx +++ b/web/app/components/datasets/create/file-uploader/index.tsx @@ -3,10 +3,12 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import useSWR from 'swr' -import s from './index.module.css' +import { RiDeleteBinLine, RiUploadCloud2Line } from '@remixicon/react' +import DocumentFileIcon from '../../common/document-file-icon' import cn from '@/utils/classnames' import type { CustomFile as File, FileItem } from '@/models/datasets' import { ToastContext } from '@/app/components/base/toast' +import SimplePieChart from '@/app/components/base/simple-pie-chart' import { upload } from '@/service/base' import { fetchFileUploadConfig } from '@/service/common' @@ -14,6 +16,8 @@ import { fetchSupportFileTypes } from '@/service/datasets' import I18n from '@/context/i18n' import { LanguagesSupported } from '@/i18n/language' import { IS_CE_EDITION } from '@/config' +import { useAppContext } from '@/context/app-context' +import { Theme } from '@/types/app' const FILES_NUMBER_LIMIT = 20 @@ -222,6 +226,9 @@ const FileUploader = ({ initialUpload(files.filter(isValid)) }, [isValid, initialUpload]) + const { theme } = useAppContext() + const chartColor = useMemo(() => theme === Theme.dark ? '#5289ff' : '#296dff', [theme]) + useEffect(() => { dropRef.current?.addEventListener('dragenter', handleDragEnter) dropRef.current?.addEventListener('dragover', handleDragOver) @@ -236,12 +243,12 @@ const FileUploader = ({ }, [handleDrop]) return ( -
+
{!hideUpload && ( )} -
{t('datasetCreation.stepOne.uploader.title')}
- {!hideUpload && ( +
{t('datasetCreation.stepOne.uploader.title')}
+ + {!hideUpload && ( +
+
+ -
-
- {t('datasetCreation.stepOne.uploader.button')} - + {supportTypes.length > 0 && ( + + )}
-
{t('datasetCreation.stepOne.uploader.tip', { +
{t('datasetCreation.stepOne.uploader.tip', { size: fileUploadConfig.file_size_limit, supportTypes: supportTypesShowNames, })}
- {dragging &&
} + {dragging &&
}
)} -
+
+ {fileList.map((fileItem, index) => (
fileItem.file?.id && onPreview(fileItem.file)} className={cn( - s.file, - fileItem.progress < 100 && s.uploading, + 'flex items-center h-12 max-w-[640px] bg-components-panel-on-panel-item-bg text-xs leading-3 text-text-tertiary border border-components-panel-border rounded-lg shadow-xs', + // 'border-state-destructive-border bg-state-destructive-hover', )} > - {fileItem.progress < 100 && ( -
- )} -
-
-
{fileItem.file.name}
-
{getFileSize(fileItem.file.size)}
+
+
-
+
+
+
{fileItem.file.name}
+
+
+ {getFileType(fileItem.file)} + · + {getFileSize(fileItem.file.size)} + {/* · + 10k characters */} +
+
+
+ {/* + + */} {(fileItem.progress < 100 && fileItem.progress >= 0) && ( -
{`${fileItem.progress}%`}
- )} - {fileItem.progress === 100 && ( -
{ - e.stopPropagation() - removeFile(fileItem.fileID) - }} /> + //
{`${fileItem.progress}%`}
+ )} + { + e.stopPropagation() + removeFile(fileItem.fileID) + }}> + +
))} diff --git a/web/app/components/datasets/create/icons.ts b/web/app/components/datasets/create/icons.ts new file mode 100644 index 0000000000..80c4b6c944 --- /dev/null +++ b/web/app/components/datasets/create/icons.ts @@ -0,0 +1,16 @@ +import GoldIcon from './assets/gold.svg' +import Piggybank from './assets/piggy-bank-mod.svg' +import Selection from './assets/selection-mod.svg' +import Research from './assets/research-mod.svg' +import PatternRecognition from './assets/pattern-recognition-mod.svg' + +export const indexMethodIcon = { + high_quality: GoldIcon, + economical: Piggybank, +} + +export const retrievalIcon = { + vector: Selection, + fullText: Research, + hybrid: PatternRecognition, +} diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx index 98098445c7..9556b9fad5 100644 --- a/web/app/components/datasets/create/index.tsx +++ b/web/app/components/datasets/create/index.tsx @@ -3,10 +3,10 @@ import React, { useCallback, useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import AppUnavailable from '../../base/app-unavailable' import { ModelTypeEnum } from '../../header/account-setting/model-provider-page/declarations' -import StepsNavBar from './steps-nav-bar' import StepOne from './step-one' import StepTwo from './step-two' import StepThree from './step-three' +import { Topbar } from './top-bar' import { DataSourceType } from '@/models/datasets' import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets' import { fetchDataSource } from '@/service/common' @@ -36,6 +36,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { const [dataSourceType, setDataSourceType] = useState(DataSourceType.FILE) const [step, setStep] = useState(1) const [indexingTypeCache, setIndexTypeCache] = useState('') + const [retrievalMethodCache, setRetrievalMethodCache] = useState('') const [fileList, setFiles] = useState([]) const [result, setResult] = useState() const [hasError, setHasError] = useState(false) @@ -80,6 +81,9 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { const updateResultCache = (res?: createDocumentResponse) => { setResult(res) } + const updateRetrievalMethodCache = (method: string) => { + setRetrievalMethodCache(method) + } const nextStep = useCallback(() => { setStep(step + 1) @@ -118,33 +122,29 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { return return ( -
-
- -
-
-
- setShowAccountSettingModal({ payload: 'data-source' })} - datasetId={datasetId} - dataSourceType={dataSourceType} - dataSourceTypeDisable={!!detail?.data_source_type} - changeType={setDataSourceType} - files={fileList} - updateFile={updateFile} - updateFileList={updateFileList} - notionPages={notionPages} - updateNotionPages={updateNotionPages} - onStepChange={nextStep} - websitePages={websitePages} - updateWebsitePages={setWebsitePages} - onWebsiteCrawlProviderChange={setWebsiteCrawlProvider} - onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId} - crawlOptions={crawlOptions} - onCrawlOptionsChange={setCrawlOptions} - /> -
+
+ +
+ {step === 1 && setShowAccountSettingModal({ payload: 'data-source' })} + datasetId={datasetId} + dataSourceType={dataSourceType} + dataSourceTypeDisable={!!detail?.data_source_type} + changeType={setDataSourceType} + files={fileList} + updateFile={updateFile} + updateFileList={updateFileList} + notionPages={notionPages} + updateNotionPages={updateNotionPages} + onStepChange={nextStep} + websitePages={websitePages} + updateWebsitePages={setWebsitePages} + onWebsiteCrawlProviderChange={setWebsiteCrawlProvider} + onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId} + crawlOptions={crawlOptions} + onCrawlOptionsChange={setCrawlOptions} + />} {(step === 2 && (!datasetId || (datasetId && !!detail))) && setShowAccountSettingModal({ payload: 'provider' })} @@ -158,6 +158,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { websiteCrawlJobId={websiteCrawlJobId} onStepChange={changeStep} updateIndexingTypeCache={updateIndexingTypeCache} + updateRetrievalMethodCache={updateRetrievalMethodCache} updateResultCache={updateResultCache} crawlOptions={crawlOptions} />} @@ -165,6 +166,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { datasetId={datasetId} datasetName={detail?.name} indexingType={detail?.indexing_technique || indexingTypeCache} + retrievalMethod={detail?.retrieval_model_dict?.search_method || retrievalMethodCache} creationCache={result} />}
diff --git a/web/app/components/datasets/create/notion-page-preview/index.tsx b/web/app/components/datasets/create/notion-page-preview/index.tsx index 8225e56f04..f658f213e8 100644 --- a/web/app/components/datasets/create/notion-page-preview/index.tsx +++ b/web/app/components/datasets/create/notion-page-preview/index.tsx @@ -44,7 +44,7 @@ const NotionPagePreview = ({ }, [currentPage]) return ( -
+
{t('datasetCreation.stepOne.pagePreview')} @@ -64,7 +64,7 @@ const NotionPagePreview = ({
{loading &&
} {!loading && ( -
{previewContent}
+
{previewContent}
)}
diff --git a/web/app/components/datasets/create/step-one/index.module.css b/web/app/components/datasets/create/step-one/index.module.css index 4e3cf67cd6..bb8dd9b895 100644 --- a/web/app/components/datasets/create/step-one/index.module.css +++ b/web/app/components/datasets/create/step-one/index.module.css @@ -2,21 +2,19 @@ position: sticky; top: 0; left: 0; - padding: 42px 64px 12px; + padding: 42px 64px 12px 0; font-weight: 600; font-size: 18px; line-height: 28px; - color: #101828; } .form { position: relative; padding: 12px 64px; - background-color: #fff; } .dataSourceItem { - @apply box-border relative shrink-0 flex items-center mr-3 p-3 h-14 bg-white rounded-xl cursor-pointer; + @apply box-border relative grow shrink-0 flex items-center p-3 h-14 bg-white rounded-xl cursor-pointer; border: 0.5px solid #EAECF0; box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05); font-weight: 500; @@ -24,27 +22,32 @@ line-height: 20px; color: #101828; } + .dataSourceItem:hover { background-color: #f5f8ff; border: 0.5px solid #B2CCFF; box-shadow: 0px 12px 16px -4px rgba(16, 24, 40, 0.08), 0px 4px 6px -2px rgba(16, 24, 40, 0.03); } + .dataSourceItem.active { background-color: #f5f8ff; border: 1.5px solid #528BFF; box-shadow: 0px 1px 3px rgba(16, 24, 40, 0.1), 0px 1px 2px rgba(16, 24, 40, 0.06); } + .dataSourceItem.disabled { background-color: #f9fafb; border: 0.5px solid #EAECF0; box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05); cursor: default; } + .dataSourceItem.disabled:hover { background-color: #f9fafb; border: 0.5px solid #EAECF0; box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05); } + .comingTag { @apply flex justify-center items-center bg-white; position: absolute; @@ -59,6 +62,7 @@ line-height: 18px; color: #444CE7; } + .datasetIcon { @apply flex mr-2 w-8 h-8 rounded-lg bg-center bg-no-repeat; background-color: #F5FAFF; @@ -66,15 +70,18 @@ background-size: 16px; border: 0.5px solid #D1E9FF; } + .dataSourceItem:active .datasetIcon, .dataSourceItem:hover .datasetIcon { background-color: #F5F8FF; border: 0.5px solid #E0EAFF; } + .datasetIcon.notion { background-image: url(../assets/notion.svg); background-size: 20px; } + .datasetIcon.web { background-image: url(../assets/web.svg); } @@ -90,29 +97,12 @@ background-color: #eaecf0; } -.OtherCreationOption { - @apply flex items-center cursor-pointer; - font-weight: 500; - font-size: 13px; - line-height: 18px; - color: #155EEF; -} -.OtherCreationOption::before { - content: ''; - display: block; - margin-right: 4px; - width: 16px; - height: 16px; - background: center no-repeat url(../assets/folder-plus.svg); - background-size: contain; -} - .notionConnectionTip { display: flex; flex-direction: column; align-items: flex-start; padding: 24px; - max-width: 640px; + width: 640px; background: #F9FAFB; border-radius: 16px; } @@ -138,6 +128,7 @@ line-height: 24px; color: #374151; } + .notionConnectionTip .title::after { content: ''; position: absolute; @@ -148,6 +139,7 @@ background: center no-repeat url(../assets/Icon-3-dots.svg); background-size: contain; } + .notionConnectionTip .tip { margin-bottom: 20px; font-style: normal; @@ -155,4 +147,4 @@ font-size: 13px; line-height: 18px; color: #6B7280; -} +} \ No newline at end of file diff --git a/web/app/components/datasets/create/step-one/index.tsx b/web/app/components/datasets/create/step-one/index.tsx index 643932e9ae..2cca003b39 100644 --- a/web/app/components/datasets/create/step-one/index.tsx +++ b/web/app/components/datasets/create/step-one/index.tsx @@ -1,6 +1,7 @@ 'use client' import React, { useMemo, useState } from 'react' import { useTranslation } from 'react-i18next' +import { RiArrowRightLine, RiFolder6Line } from '@remixicon/react' import FilePreview from '../file-preview' import FileUploader from '../file-uploader' import NotionPagePreview from '../notion-page-preview' @@ -17,6 +18,7 @@ import { NotionPageSelector } from '@/app/components/base/notion-page-selector' import { useDatasetDetailContext } from '@/context/dataset-detail' import { useProviderContext } from '@/context/provider-context' import VectorSpaceFull from '@/app/components/billing/vector-space-full' +import classNames from '@/utils/classnames' type IStepOneProps = { datasetId?: string @@ -120,143 +122,174 @@ const StepOne = ({ return true if (isShowVectorSpaceFull) return true - return false - }, [files]) + }, [files, isShowVectorSpaceFull]) + return (
-
- { - shouldShowDataSourceTypeList && ( -
{t('datasetCreation.steps.one')}
- ) - } -
- { - shouldShowDataSourceTypeList && ( -
-
{ - if (dataSourceTypeDisable) - return - changeType(DataSourceType.FILE) - hideFilePreview() - hideNotionPagePreview() - }} - > - - {t('datasetCreation.stepOne.dataSourceType.file')} -
-
{ - if (dataSourceTypeDisable) - return - changeType(DataSourceType.NOTION) - hideFilePreview() - hideNotionPagePreview() - }} - > - - {t('datasetCreation.stepOne.dataSourceType.notion')} -
-
changeType(DataSourceType.WEB)} - > - - {t('datasetCreation.stepOne.dataSourceType.web')} -
-
- ) - } - {dataSourceType === DataSourceType.FILE && ( - <> - - {isShowVectorSpaceFull && ( -
- -
- )} - - - )} - {dataSourceType === DataSourceType.NOTION && ( - <> - {!hasConnection && } - {hasConnection && ( - <> -
- page.page_id)} - onSelect={updateNotionPages} - onPreview={updateCurrentPage} - /> +
+
+
+ { + shouldShowDataSourceTypeList && ( +
{t('datasetCreation.steps.one')}
+ ) + } + { + shouldShowDataSourceTypeList && ( +
+
{ + if (dataSourceTypeDisable) + return + changeType(DataSourceType.FILE) + hideFilePreview() + hideNotionPagePreview() + }} + > + + {t('datasetCreation.stepOne.dataSourceType.file')} +
+
{ + if (dataSourceTypeDisable) + return + changeType(DataSourceType.NOTION) + hideFilePreview() + hideNotionPagePreview() + }} + > + + {t('datasetCreation.stepOne.dataSourceType.notion')} +
+
changeType(DataSourceType.WEB)} + > + + {t('datasetCreation.stepOne.dataSourceType.web')}
- {isShowVectorSpaceFull && ( -
- -
- )} - - - )} - - )} - {dataSourceType === DataSourceType.WEB && ( - <> -
- -
- {isShowVectorSpaceFull && ( -
-
- )} - - - )} - {!datasetId && ( - <> -
-
{t('datasetCreation.stepOne.emptyDatasetCreation')}
- - )} + ) + } + {dataSourceType === DataSourceType.FILE && ( + <> + + {isShowVectorSpaceFull && ( +
+ +
+ )} +
+ {/* */} + +
+ + )} + {dataSourceType === DataSourceType.NOTION && ( + <> + {!hasConnection && } + {hasConnection && ( + <> +
+ page.page_id)} + onSelect={updateNotionPages} + onPreview={updateCurrentPage} + /> +
+ {isShowVectorSpaceFull && ( +
+ +
+ )} +
+ {/* */} + +
+ + )} + + )} + {dataSourceType === DataSourceType.WEB && ( + <> +
+ +
+ {isShowVectorSpaceFull && ( +
+ +
+ )} +
+ {/* */} + +
+ + )} + {!datasetId && ( + <> +
+ + + {t('datasetCreation.stepOne.emptyDatasetCreation')} + + + )} +
+
-
- {currentFile && } - {currentNotionPage && } - {currentWebsite && } +
+ {currentFile && } + {currentNotionPage && } + {currentWebsite && } +
) } diff --git a/web/app/components/datasets/create/step-three/index.tsx b/web/app/components/datasets/create/step-three/index.tsx index 804a196ed5..8d979616d1 100644 --- a/web/app/components/datasets/create/step-three/index.tsx +++ b/web/app/components/datasets/create/step-three/index.tsx @@ -1,45 +1,51 @@ 'use client' import React from 'react' import { useTranslation } from 'react-i18next' +import { RiBookOpenLine } from '@remixicon/react' import EmbeddingProcess from '../embedding-process' -import s from './index.module.css' -import cn from '@/utils/classnames' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import type { FullDocumentDetail, createDocumentResponse } from '@/models/datasets' +import AppIcon from '@/app/components/base/app-icon' type StepThreeProps = { datasetId?: string datasetName?: string indexingType?: string + retrievalMethod?: string creationCache?: createDocumentResponse } -const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: StepThreeProps) => { +const StepThree = ({ datasetId, datasetName, indexingType, creationCache, retrievalMethod }: StepThreeProps) => { const { t } = useTranslation() const media = useBreakpoints() const isMobile = media === MediaType.mobile return ( -
-
-
+
+
+
{!datasetId && ( <> -
-
{t('datasetCreation.stepThree.creationTitle')}
-
{t('datasetCreation.stepThree.creationContent')}
-
{t('datasetCreation.stepThree.label')}
-
{datasetName || creationCache?.dataset?.name}
+
+
{t('datasetCreation.stepThree.creationTitle')}
+
{t('datasetCreation.stepThree.creationContent')}
+
+ +
+
{t('datasetCreation.stepThree.label')}
+
{datasetName || creationCache?.dataset?.name}
+
+
-
+
)} {datasetId && ( -
-
{t('datasetCreation.stepThree.additionTitle')}
-
{`${t('datasetCreation.stepThree.additionP1')} ${datasetName || creationCache?.dataset?.name} ${t('datasetCreation.stepThree.additionP2')}`}
+
+
{t('datasetCreation.stepThree.additionTitle')}
+
{`${t('datasetCreation.stepThree.additionP1')} ${datasetName || creationCache?.dataset?.name} ${t('datasetCreation.stepThree.additionP2')}`}
)}
- {!isMobile &&
-
- -
{t('datasetCreation.stepThree.sideTipTitle')}
-
{t('datasetCreation.stepThree.sideTipContent')}
+ {!isMobile && ( +
+
+
+ +
+
{t('datasetCreation.stepThree.sideTipTitle')}
+
{t('datasetCreation.stepThree.sideTipContent')}
+
-
} + )}
) } diff --git a/web/app/components/datasets/create/step-two/index.module.css b/web/app/components/datasets/create/step-two/index.module.css index f89d6d67ea..178cbeba85 100644 --- a/web/app/components/datasets/create/step-two/index.module.css +++ b/web/app/components/datasets/create/step-two/index.module.css @@ -13,18 +13,6 @@ z-index: 10; } -.form { - @apply px-16 pb-8; -} - -.form .label { - @apply pt-6 pb-2 flex items-center; - font-weight: 500; - font-size: 16px; - line-height: 24px; - color: #344054; -} - .segmentationItem { min-height: 68px; } @@ -75,6 +63,10 @@ cursor: pointer; } +.disabled { + cursor: not-allowed !important; +} + .indexItem.disabled:hover { background-color: #fcfcfd; border-color: #f2f4f7; @@ -87,8 +79,7 @@ } .radioItem { - @apply relative mb-2 rounded-xl border border-gray-100 cursor-pointer; - background-color: #fcfcfd; + @apply relative mb-2 rounded-xl border border-components-option-card-option-border cursor-pointer bg-components-option-card-option-bg; } .radioItem.segmentationItem.custom { @@ -146,7 +137,7 @@ } .typeIcon.economical { - background-image: url(../assets/piggy-bank-01.svg); + background-image: url(../assets/piggy-bank-mod.svg); } .radioItem .radio { @@ -247,7 +238,7 @@ } .ruleItem { - @apply flex items-center; + @apply flex items-center py-1.5; } .formFooter { @@ -394,19 +385,6 @@ max-width: 524px; } -.previewHeader { - position: sticky; - top: 0; - left: 0; - padding-top: 42px; - background-color: #fff; - font-weight: 600; - font-size: 18px; - line-height: 28px; - color: #101828; - z-index: 10; -} - /* * `fixed` must under `previewHeader` because of style override would not work */ @@ -432,4 +410,4 @@ font-size: 12px; line-height: 18px; } -} \ No newline at end of file +} diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index f915c68fef..0d7202967a 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -1,65 +1,80 @@ 'use client' -import React, { useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react' +import type { FC, PropsWithChildren } from 'react' +import React, { useCallback, useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' -import { useBoolean } from 'ahooks' -import { XMarkIcon } from '@heroicons/react/20/solid' -import { RocketLaunchIcon } from '@heroicons/react/24/outline' import { - RiCloseLine, + RiAlertFill, + RiArrowLeftLine, + RiSearchEyeLine, } from '@remixicon/react' import Link from 'next/link' -import { groupBy } from 'lodash-es' -import PreviewItem, { PreviewType } from './preview-item' -import LanguageSelect from './language-select' +import Image from 'next/image' +import { useHover } from 'ahooks' +import SettingCog from '../assets/setting-gear-mod.svg' +import OrangeEffect from '../assets/option-card-effect-orange.svg' +import FamilyMod from '../assets/family-mod.svg' +import Note from '../assets/note-mod.svg' +import FileList from '../assets/file-list-3-fill.svg' +import { indexMethodIcon } from '../icons' +import { PreviewContainer } from '../../preview/container' +import { ChunkContainer, QAPreview } from '../../chunk' +import { PreviewHeader } from '../../preview/header' +import { FormattedText } from '../../formatted-text/formatted' +import { PreviewSlice } from '../../formatted-text/flavours/preview-slice' +import PreviewDocumentPicker from '../../common/document-picker/preview-document-picker' import s from './index.module.css' import unescape from './unescape' import escape from './escape' +import { OptionCard } from './option-card' +import LanguageSelect from './language-select' +import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' import cn from '@/utils/classnames' -import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' -import { - createDocument, - createFirstDocument, - fetchFileIndexingEstimate as didFetchFileIndexingEstimate, - fetchDefaultProcessRule, -} from '@/service/datasets' +import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DocumentItem, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' + import Button from '@/app/components/base/button' -import Input from '@/app/components/base/input' -import Loading from '@/app/components/base/loading' import FloatRightContainer from '@/app/components/base/float-right-container' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' import { type RetrievalConfig } from '@/types/app' import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' import Toast from '@/app/components/base/toast' -import { formatNumber } from '@/utils/format' import type { NotionPage } from '@/models/common' import { DataSourceProvider } from '@/models/common' -import { DataSourceType, DocForm } from '@/models/datasets' -import NotionIcon from '@/app/components/base/notion-icon' -import Switch from '@/app/components/base/switch' -import { MessageChatSquare } from '@/app/components/base/icons/src/public/common' +import { ChunkingMode, DataSourceType, RerankingModeEnum } from '@/models/datasets' import { useDatasetDetailContext } from '@/context/dataset-detail' import I18n from '@/context/i18n' -import { IS_CE_EDITION } from '@/config' import { RETRIEVE_METHOD } from '@/types/app' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' -import Tooltip from '@/app/components/base/tooltip' import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks' import { LanguagesSupported } from '@/i18n/language' import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector' import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' -import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel' +import Checkbox from '@/app/components/base/checkbox' +import RadioCard from '@/app/components/base/radio-card' +import { IS_CE_EDITION } from '@/config' +import Divider from '@/app/components/base/divider' +import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument, useFetchDefaultProcessRule, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/knowledge/use-create-dataset' +import Badge from '@/app/components/base/badge' +import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton' +import Tooltip from '@/app/components/base/tooltip' +import CustomDialog from '@/app/components/base/dialog' +import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem' +import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback' + +const TextLabel: FC = (props) => { + return +} -type ValueOf = T[keyof T] type StepTwoProps = { isSetting?: boolean documentDetail?: FullDocumentDetail isAPIKeySet: boolean onSetting: () => void datasetId?: string - indexingType?: ValueOf + indexingType?: IndexingType + retrievalMethod?: string dataSourceType: DataSourceType files: CustomFile[] notionPages?: NotionPage[] @@ -69,21 +84,48 @@ type StepTwoProps = { websiteCrawlJobId?: string onStepChange?: (delta: number) => void updateIndexingTypeCache?: (type: string) => void + updateRetrievalMethodCache?: (method: string) => void updateResultCache?: (res: createDocumentResponse) => void onSave?: () => void onCancel?: () => void } -enum SegmentType { +export enum SegmentType { AUTO = 'automatic', CUSTOM = 'custom', } -enum IndexingType { +export enum IndexingType { QUALIFIED = 'high_quality', ECONOMICAL = 'economy', } const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' +const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500 +const DEFAULT_OVERLAP = 50 + +type ParentChildConfig = { + chunkForContext: ParentMode + parent: { + delimiter: string + maxLength: number + } + child: { + delimiter: string + maxLength: number + } +} + +const defaultParentChildConfig: ParentChildConfig = { + chunkForContext: 'paragraph', + parent: { + delimiter: '\\n\\n', + maxLength: 500, + }, + child: { + delimiter: '\\n', + maxLength: 200, + }, +} const StepTwo = ({ isSetting, @@ -104,6 +146,7 @@ const StepTwo = ({ updateResultCache, onSave, onCancel, + updateRetrievalMethodCache, }: StepTwoProps) => { const { t } = useTranslation() const { locale } = useContext(I18n) @@ -111,66 +154,166 @@ const StepTwo = ({ const isMobile = media === MediaType.mobile const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext() + + const isInUpload = Boolean(currentDataset) + const isUploadInEmptyDataset = isInUpload && !currentDataset?.doc_form + const isNotUploadInEmptyDataset = !isUploadInEmptyDataset + const isInInit = !isInUpload && !isSetting + const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type) const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type - const scrollRef = useRef(null) - const [scrolled, setScrolled] = useState(false) - const previewScrollRef = useRef(null) - const [previewScrolled, setPreviewScrolled] = useState(false) - const [segmentationType, setSegmentationType] = useState(SegmentType.AUTO) + const [segmentationType, setSegmentationType] = useState(SegmentType.CUSTOM) const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER) - const setSegmentIdentifier = useCallback((value: string) => { - doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER) + const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => { + doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER)) }, []) - const [maxChunkLength, setMaxChunkLength] = useState(4000) // default chunk length + const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000) - const [overlap, setOverlap] = useState(50) + const [overlap, setOverlap] = useState(DEFAULT_OVERLAP) const [rules, setRules] = useState([]) const [defaultConfig, setDefaultConfig] = useState() const hasSetIndexType = !!indexingType - const [indexType, setIndexType] = useState>( + const [indexType, setIndexType] = useState( (indexingType || isAPIKeySet) ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL, ) - const [isLanguageSelectDisabled, setIsLanguageSelectDisabled] = useState(false) - const [docForm, setDocForm] = useState( - (datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT, + + const [previewFile, setPreviewFile] = useState( + (datasetId && documentDetail) + ? documentDetail.file + : files[0], ) + const [previewNotionPage, setPreviewNotionPage] = useState( + (datasetId && documentDetail) + ? documentDetail.notion_page + : notionPages[0], + ) + + const [previewWebsitePage, setPreviewWebsitePage] = useState( + (datasetId && documentDetail) + ? documentDetail.website_page + : websitePages[0], + ) + + // QA Related + const [isLanguageSelectDisabled, _setIsLanguageSelectDisabled] = useState(false) + const [isQAConfirmDialogOpen, setIsQAConfirmDialogOpen] = useState(false) + const [docForm, setDocForm] = useState( + (datasetId && documentDetail) ? documentDetail.doc_form as ChunkingMode : ChunkingMode.text, + ) + const handleChangeDocform = (value: ChunkingMode) => { + if (value === ChunkingMode.qa && indexType === IndexingType.ECONOMICAL) { + setIsQAConfirmDialogOpen(true) + return + } + if (value === ChunkingMode.parentChild && indexType === IndexingType.ECONOMICAL) + setIndexType(IndexingType.QUALIFIED) + setDocForm(value) + // eslint-disable-next-line @typescript-eslint/no-use-before-define + currentEstimateMutation.reset() + } + const [docLanguage, setDocLanguage] = useState( (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'), ) - const [QATipHide, setQATipHide] = useState(false) - const [previewSwitched, setPreviewSwitched] = useState(false) - const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean() - const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState(null) - const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState(null) - const fileIndexingEstimate = (() => { - return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate - })() - const [isCreating, setIsCreating] = useState(false) + const [parentChildConfig, setParentChildConfig] = useState(defaultParentChildConfig) - const scrollHandle = (e: Event) => { - if ((e.target as HTMLDivElement).scrollTop > 0) - setScrolled(true) + const getIndexing_technique = () => indexingType || indexType + const currentDocForm = currentDataset?.doc_form || docForm - else - setScrolled(false) + const getProcessRule = (): ProcessRule => { + if (currentDocForm === ChunkingMode.parentChild) { + return { + rules: { + pre_processing_rules: rules, + segmentation: { + separator: unescape( + parentChildConfig.parent.delimiter, + ), + max_tokens: parentChildConfig.parent.maxLength, + }, + parent_mode: parentChildConfig.chunkForContext, + subchunk_segmentation: { + separator: unescape(parentChildConfig.child.delimiter), + max_tokens: parentChildConfig.child.maxLength, + }, + }, + mode: 'hierarchical', + } as ProcessRule + } + return { + rules: { + pre_processing_rules: rules, + segmentation: { + separator: unescape(segmentIdentifier), + max_tokens: maxChunkLength, + chunk_overlap: overlap, + }, + }, // api will check this. It will be removed after api refactored. + mode: segmentationType, + } as ProcessRule } - const previewScrollHandle = (e: Event) => { - if ((e.target as HTMLDivElement).scrollTop > 0) - setPreviewScrolled(true) + const fileIndexingEstimateQuery = useFetchFileIndexingEstimateForFile({ + docForm: currentDocForm, + docLanguage, + dataSourceType: DataSourceType.FILE, + files: previewFile + ? [files.find(file => file.name === previewFile.name)!] + : files, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId!, + }) + const notionIndexingEstimateQuery = useFetchFileIndexingEstimateForNotion({ + docForm: currentDocForm, + docLanguage, + dataSourceType: DataSourceType.NOTION, + notionPages: [previewNotionPage], + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId || '', + }) - else - setPreviewScrolled(false) - } - const getFileName = (name: string) => { - const arr = name.split('.') - return arr.slice(0, -1).join('.') - } + const websiteIndexingEstimateQuery = useFetchFileIndexingEstimateForWeb({ + docForm: currentDocForm, + docLanguage, + dataSourceType: DataSourceType.WEB, + websitePages: [previewWebsitePage], + crawlOptions, + websiteCrawlProvider, + websiteCrawlJobId, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId || '', + }) + + const currentEstimateMutation = dataSourceType === DataSourceType.FILE + ? fileIndexingEstimateQuery + : dataSourceType === DataSourceType.NOTION + ? notionIndexingEstimateQuery + : websiteIndexingEstimateQuery + + const fetchEstimate = useCallback(() => { + if (dataSourceType === DataSourceType.FILE) + fileIndexingEstimateQuery.mutate() + + if (dataSourceType === DataSourceType.NOTION) + notionIndexingEstimateQuery.mutate() + + if (dataSourceType === DataSourceType.WEB) + websiteIndexingEstimateQuery.mutate() + }, [dataSourceType, fileIndexingEstimateQuery, notionIndexingEstimateQuery, websiteIndexingEstimateQuery]) + + const estimate + = dataSourceType === DataSourceType.FILE + ? fileIndexingEstimateQuery.data + : dataSourceType === DataSourceType.NOTION + ? notionIndexingEstimateQuery.data + : websiteIndexingEstimateQuery.data const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') @@ -198,128 +341,20 @@ const StepTwo = ({ if (defaultConfig) { setSegmentIdentifier(defaultConfig.segmentation.separator) setMaxChunkLength(defaultConfig.segmentation.max_tokens) - setOverlap(defaultConfig.segmentation.chunk_overlap) + setOverlap(defaultConfig.segmentation.chunk_overlap!) setRules(defaultConfig.pre_processing_rules) } + setParentChildConfig(defaultParentChildConfig) } - const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT, language?: string) => { - // eslint-disable-next-line @typescript-eslint/no-use-before-define - const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm, language)!) - if (segmentationType === SegmentType.CUSTOM) - setCustomFileIndexingEstimate(res) - else - setAutomaticFileIndexingEstimate(res) - } - - const confirmChangeCustomConfig = () => { - if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) { - Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) }) + const updatePreview = () => { + if (segmentationType === SegmentType.CUSTOM && maxChunkLength > 4000) { + Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) return } - setCustomFileIndexingEstimate(null) - setShowPreview() - fetchFileIndexingEstimate() - setPreviewSwitched(false) + fetchEstimate() } - const getIndexing_technique = () => indexingType || indexType - - const getProcessRule = () => { - const processRule: ProcessRule = { - rules: {} as any, // api will check this. It will be removed after api refactored. - mode: segmentationType, - } - if (segmentationType === SegmentType.CUSTOM) { - const ruleObj = { - pre_processing_rules: rules, - segmentation: { - separator: unescape(segmentIdentifier), - max_tokens: maxChunkLength, - chunk_overlap: overlap, - }, - } - processRule.rules = ruleObj - } - return processRule - } - - const getNotionInfo = () => { - const workspacesMap = groupBy(notionPages, 'workspace_id') - const workspaces = Object.keys(workspacesMap).map((workspaceId) => { - return { - workspaceId, - pages: workspacesMap[workspaceId], - } - }) - return workspaces.map((workspace) => { - return { - workspace_id: workspace.workspaceId, - pages: workspace.pages.map((page) => { - const { page_id, page_name, page_icon, type } = page - return { - page_id, - page_name, - page_icon, - type, - } - }), - } - }) as NotionInfo[] - } - - const getWebsiteInfo = () => { - return { - provider: websiteCrawlProvider, - job_id: websiteCrawlJobId, - urls: websitePages.map(page => page.source_url), - only_main_content: crawlOptions?.only_main_content, - } - } - - const getFileIndexingEstimateParams = (docForm: DocForm, language?: string): IndexingEstimateParams | undefined => { - if (dataSourceType === DataSourceType.FILE) { - return { - info_list: { - data_source_type: dataSourceType, - file_info_list: { - file_ids: files.map(file => file.id) as string[], - }, - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - if (dataSourceType === DataSourceType.NOTION) { - return { - info_list: { - data_source_type: dataSourceType, - notion_info_list: getNotionInfo(), - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - if (dataSourceType === DataSourceType.WEB) { - return { - info_list: { - data_source_type: dataSourceType, - website_info_list: getWebsiteInfo(), - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - } const { modelList: rerankModelList, defaultModel: rerankDefaultModel, @@ -351,13 +386,14 @@ const StepTwo = ({ if (isSetting) { params = { original_document_id: documentDetail?.id, - doc_form: docForm, + doc_form: currentDocForm, doc_language: docLanguage, process_rule: getProcessRule(), // eslint-disable-next-line @typescript-eslint/no-use-before-define retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page. embedding_model: embeddingModel.model, // Readonly embedding_model_provider: embeddingModel.provider, // Readonly + indexing_technique: getIndexing_technique(), } as CreateDocumentReq } else { // create @@ -377,8 +413,12 @@ const StepTwo = ({ } const postRetrievalConfig = ensureRerankModelSelected({ rerankDefaultModel: rerankDefaultModel!, - // eslint-disable-next-line @typescript-eslint/no-use-before-define - retrievalConfig, + retrievalConfig: { + // eslint-disable-next-line @typescript-eslint/no-use-before-define + ...retrievalConfig, + // eslint-disable-next-line @typescript-eslint/no-use-before-define + reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel, + }, indexMethod: indexMethod as string, }) params = { @@ -390,7 +430,7 @@ const StepTwo = ({ }, indexing_technique: getIndexing_technique(), process_rule: getProcessRule(), - doc_form: docForm, + doc_form: currentDocForm, doc_language: docLanguage, retrieval_model: postRetrievalConfig, @@ -403,29 +443,36 @@ const StepTwo = ({ } } if (dataSourceType === DataSourceType.NOTION) - params.data_source.info_list.notion_info_list = getNotionInfo() + params.data_source.info_list.notion_info_list = getNotionInfo(notionPages) - if (dataSourceType === DataSourceType.WEB) - params.data_source.info_list.website_info_list = getWebsiteInfo() + if (dataSourceType === DataSourceType.WEB) { + params.data_source.info_list.website_info_list = getWebsiteInfo({ + websiteCrawlProvider, + websiteCrawlJobId, + websitePages, + }) + } } return params } - const getRules = async () => { - try { - const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) - const separator = res.rules.segmentation.separator + const fetchDefaultProcessRuleMutation = useFetchDefaultProcessRule({ + onSuccess(data) { + const separator = data.rules.segmentation.separator setSegmentIdentifier(separator) - setMaxChunkLength(res.rules.segmentation.max_tokens) - setLimitMaxChunkLength(res.limits.indexing_max_segmentation_tokens_length) - setOverlap(res.rules.segmentation.chunk_overlap) - setRules(res.rules.pre_processing_rules) - setDefaultConfig(res.rules) - } - catch (err) { - console.log(err) - } - } + setMaxChunkLength(data.rules.segmentation.max_tokens) + setOverlap(data.rules.segmentation.chunk_overlap!) + setRules(data.rules.pre_processing_rules) + setDefaultConfig(data.rules) + setLimitMaxChunkLength(data.limits.indexing_max_segmentation_tokens_length) + }, + onError(error) { + Toast.notify({ + type: 'error', + message: `${error}`, + }) + }, + }) const getRulesFromDetail = () => { if (documentDetail) { @@ -435,7 +482,7 @@ const StepTwo = ({ const overlap = rules.segmentation.chunk_overlap setSegmentIdentifier(separator) setMaxChunkLength(max) - setOverlap(overlap) + setOverlap(overlap!) setRules(rules.pre_processing_rules) setDefaultConfig(rules) } @@ -443,119 +490,81 @@ const StepTwo = ({ const getDefaultMode = () => { if (documentDetail) + // @ts-expect-error fix after api refactored setSegmentationType(documentDetail.dataset_process_rule.mode) } - const createHandle = async () => { - if (isCreating) - return - setIsCreating(true) - try { - let res - const params = getCreationParams() - if (!params) - return false - - setIsCreating(true) - if (!datasetId) { - res = await createFirstDocument({ - body: params as CreateDocumentReq, - }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) - updateResultCache && updateResultCache(res) - } - else { - res = await createDocument({ - datasetId, - body: params as CreateDocumentReq, - }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) - updateResultCache && updateResultCache(res) - } - if (mutateDatasetRes) - mutateDatasetRes() - onStepChange && onStepChange(+1) - isSetting && onSave && onSave() - } - catch (err) { + const createFirstDocumentMutation = useCreateFirstDocument({ + onError(error) { Toast.notify({ type: 'error', - message: `${err}`, + message: `${error}`, + }) + }, + }) + const createDocumentMutation = useCreateDocument(datasetId!, { + onError(error) { + Toast.notify({ + type: 'error', + message: `${error}`, + }) + }, + }) + + const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending + + const createHandle = async () => { + const params = getCreationParams() + if (!params) + return false + + if (!datasetId) { + await createFirstDocumentMutation.mutateAsync( + params, + { + onSuccess(data) { + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) + updateResultCache && updateResultCache(data) + // eslint-disable-next-line @typescript-eslint/no-use-before-define + updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string) + }, + }, + ) + } + else { + await createDocumentMutation.mutateAsync(params, { + onSuccess(data) { + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) + updateResultCache && updateResultCache(data) + }, }) } - finally { - setIsCreating(false) - } - } - - const handleSwitch = (state: boolean) => { - if (state) - setDocForm(DocForm.QA) - else - setDocForm(DocForm.TEXT) - } - - const previewSwitch = async (language?: string) => { - setPreviewSwitched(true) - setIsLanguageSelectDisabled(true) - if (segmentationType === SegmentType.AUTO) - setAutomaticFileIndexingEstimate(null) - else - setCustomFileIndexingEstimate(null) - try { - await fetchFileIndexingEstimate(DocForm.QA, language) - } - finally { - setIsLanguageSelectDisabled(false) - } - } - - const handleSelect = (language: string) => { - setDocLanguage(language) - // Switch language, re-cutter - if (docForm === DocForm.QA && previewSwitched) - previewSwitch(language) + if (mutateDatasetRes) + mutateDatasetRes() + onStepChange && onStepChange(+1) + isSetting && onSave && onSave() } const changeToEconomicalType = () => { - if (!hasSetIndexType) { + if (docForm !== ChunkingMode.text) + return + + if (!hasSetIndexType) setIndexType(IndexingType.ECONOMICAL) - setDocForm(DocForm.TEXT) - } } useEffect(() => { // fetch rules if (!isSetting) { - getRules() + fetchDefaultProcessRuleMutation.mutate('/datasets/process-rule') } else { getRulesFromDetail() getDefaultMode() } + // eslint-disable-next-line react-hooks/exhaustive-deps }, []) - useEffect(() => { - scrollRef.current?.addEventListener('scroll', scrollHandle) - return () => { - scrollRef.current?.removeEventListener('scroll', scrollHandle) - } - }, []) - - useLayoutEffect(() => { - if (showPreview) { - previewScrollRef.current?.addEventListener('scroll', previewScrollHandle) - return () => { - previewScrollRef.current?.removeEventListener('scroll', previewScrollHandle) - } - } - }, [showPreview]) - - useEffect(() => { - if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA) - setDocForm(DocForm.TEXT) - }, [indexingType, docForm]) - useEffect(() => { // get indexing type by props if (indexingType) @@ -565,20 +574,6 @@ const StepTwo = ({ setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL) }, [isAPIKeySet, indexingType, datasetId]) - useEffect(() => { - if (segmentationType === SegmentType.AUTO) { - setAutomaticFileIndexingEstimate(null) - !isMobile && setShowPreview() - fetchFileIndexingEstimate() - setPreviewSwitched(false) - } - else { - hidePreview() - setCustomFileIndexingEstimate(null) - setPreviewSwitched(false) - } - }, [segmentationType, indexType]) - const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || { search_method: RETRIEVE_METHOD.semantic, reranking_enable: false, @@ -591,433 +586,589 @@ const StepTwo = ({ score_threshold: 0.5, } as RetrievalConfig) + const economyDomRef = useRef(null) + const isHoveringEconomy = useHover(economyDomRef) + return (
-
-
- {t('datasetCreation.steps.two')} - {(isMobile || !showPreview) && ( - - )} -
-
-
{t('datasetCreation.stepTwo.segmentation')}
-
-
setSegmentationType(SegmentType.AUTO)} - > - - -
-
{t('datasetCreation.stepTwo.auto')}
-
{t('datasetCreation.stepTwo.autoDescription')}
-
-
-
setSegmentationType(SegmentType.CUSTOM)} - > - - -
-
{t('datasetCreation.stepTwo.custom')}
-
{t('datasetCreation.stepTwo.customDescription')}
-
- {segmentationType === SegmentType.CUSTOM && ( -
-
-
-
- {t('datasetCreation.stepTwo.separator')} - - {t('datasetCreation.stepTwo.separatorTip')} -
- } - /> -
- setSegmentIdentifier(e.target.value)} - /> -
-
-
-
-
{t('datasetCreation.stepTwo.maxLength')}
- setMaxChunkLength(parseInt(e.target.value.replace(/^0+/, ''), 10))} - /> -
-
-
-
-
- {t('datasetCreation.stepTwo.overlap')} - - {t('datasetCreation.stepTwo.overlapTip')} -
- } - /> -
- setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} - /> -
-
-
-
-
{t('datasetCreation.stepTwo.rules')}
- {rules.map(rule => ( -
- ruleChangeHandle(rule.id)} className="w-4 h-4 rounded border-gray-300 text-blue-700 focus:ring-blue-700" /> - -
- ))} -
-
-
- - -
-
- )} -
-
-
{t('datasetCreation.stepTwo.indexMode')}
-
-
- {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && ( -
{ - if (isAPIKeySet) - setIndexType(IndexingType.QUALIFIED) - }} - > - - {!hasSetIndexType && } -
-
- {t('datasetCreation.stepTwo.qualified')} - {!hasSetIndexType && {t('datasetCreation.stepTwo.recommend')}} -
-
{t('datasetCreation.stepTwo.qualifiedTip')}
-
- {!isAPIKeySet && ( -
- {t('datasetCreation.stepTwo.warning')}  - {t('datasetCreation.stepTwo.click')} -
- )} -
- )} - - {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && ( -
- - {!hasSetIndexType && } -
-
{t('datasetCreation.stepTwo.economical')}
-
{t('datasetCreation.stepTwo.economicalTip')}
-
-
- )} -
- {hasSetIndexType && indexType === IndexingType.ECONOMICAL && ( -
- {t('datasetCreation.stepTwo.indexSettingTip')} - {t('datasetCreation.stepTwo.datasetSettingLink')} -
- )} - {IS_CE_EDITION && indexType === IndexingType.QUALIFIED && ( -
-
-
- -
-
-
{t('datasetCreation.stepTwo.QATitle')}
-
- {t('datasetCreation.stepTwo.QALanguage')} - -
-
-
- -
-
- {docForm === DocForm.QA && !QATipHide && ( -
- {t('datasetCreation.stepTwo.QATip')} - setQATipHide(true)} /> -
- )} -
- )} - {/* Embedding model */} - {indexType === IndexingType.QUALIFIED && ( -
-
{t('datasetSettings.form.embeddingModel')}
- { - setEmbeddingModel(model) - }} +
+
{t('datasetCreation.stepTwo.segmentation')}
+ {((isInUpload && [ChunkingMode.text, ChunkingMode.qa].includes(currentDataset!.doc_form)) + || isUploadInEmptyDataset + || isInInit) + && } + activeHeaderClassName='bg-dataset-option-card-blue-gradient' + description={t('datasetCreation.stepTwo.generalTip')} + isActive={ + [ChunkingMode.text, ChunkingMode.qa].includes(currentDocForm) + } + onSwitched={() => + handleChangeDocform(ChunkingMode.text) + } + actions={ + <> + + + + } + noHighlight={isInUpload && isNotUploadInEmptyDataset} + > +
+
+ setSegmentIdentifier(e.target.value, true)} + /> + + - {!!datasetId && ( -
- {t('datasetCreation.stepTwo.indexSettingTip')} - {t('datasetCreation.stepTwo.datasetSettingLink')} -
- )}
- )} - {/* Retrieval Method Config */} -
- {!datasetId - ? ( -
-
{t('datasetSettings.form.retrievalSetting.title')}
-
- {t('datasetSettings.form.retrievalSetting.learnMore')} - {t('datasetSettings.form.retrievalSetting.longDescription')} +
+
+
+ {t('datasetCreation.stepTwo.rules')} +
+ +
+
+ {rules.map(rule => ( +
{ + ruleChangeHandle(rule.id) + }}> + +
-
- ) - : ( -
-
{t('datasetSettings.form.retrievalSetting.title')}
-
- )} - -
- { - getIndexing_technique() === IndexingType.QUALIFIED - ? ( - + +
+
{ + if (currentDataset?.doc_form) + return + if (docForm === ChunkingMode.qa) + handleChangeDocform(ChunkingMode.text) + else + handleChangeDocform(ChunkingMode.qa) + }}> + + +
+ - ) - : ( - - ) - } + +
+ {currentDocForm === ChunkingMode.qa && ( +
+ + + {t('datasetCreation.stepTwo.QATip')} + +
+ )} + } +
- -
-
- {dataSourceType === DataSourceType.FILE && ( - <> -
{t('datasetCreation.stepTwo.fileSource')}
-
- - {getFileName(files[0].name || '')} - {files.length > 1 && ( - - {t('datasetCreation.stepTwo.other')} - {files.length - 1} - {t('datasetCreation.stepTwo.fileUnit')} - - )} -
- - )} - {dataSourceType === DataSourceType.NOTION && ( - <> -
{t('datasetCreation.stepTwo.notionSource')}
-
- } + { + ( + (isInUpload && currentDataset!.doc_form === ChunkingMode.parentChild) + || isUploadInEmptyDataset + || isInInit + ) + && } + effectImg={OrangeEffect.src} + activeHeaderClassName='bg-dataset-option-card-orange-gradient' + description={t('datasetCreation.stepTwo.parentChildTip')} + isActive={currentDocForm === ChunkingMode.parentChild} + onSwitched={() => handleChangeDocform(ChunkingMode.parentChild)} + actions={ + <> + + + + } + noHighlight={isInUpload && isNotUploadInEmptyDataset} + > +
+
+
+
+ {t('datasetCreation.stepTwo.parentChunkForContext')} +
+ +
+ } + title={t('datasetCreation.stepTwo.paragraph')} + description={t('datasetCreation.stepTwo.paragraphTip')} + isChosen={parentChildConfig.chunkForContext === 'paragraph'} + onChosen={() => setParentChildConfig( + { + ...parentChildConfig, + chunkForContext: 'paragraph', + }, + )} + chosenConfig={ +
+ setParentChildConfig({ + ...parentChildConfig, + parent: { + ...parentChildConfig.parent, + delimiter: e.target.value ? escape(e.target.value) : '', + }, + })} + /> + setParentChildConfig({ + ...parentChildConfig, + parent: { + ...parentChildConfig.parent, + maxLength: value, + }, + })} /> - {notionPages[0]?.page_name} - {notionPages.length > 1 && ( - - {t('datasetCreation.stepTwo.other')} - {notionPages.length - 1} - {t('datasetCreation.stepTwo.notionUnit')} - - )}
- - )} - {dataSourceType === DataSourceType.WEB && ( - <> -
{t('datasetCreation.stepTwo.websiteSource')}
-
- - {websitePages[0].source_url} - {websitePages.length > 1 && ( - - {t('datasetCreation.stepTwo.other')} - {websitePages.length - 1} - {t('datasetCreation.stepTwo.webpageUnit')} - - )} -
- - )} -
-
-
-
{t('datasetCreation.stepTwo.estimateSegment')}
-
- { - fileIndexingEstimate - ? ( -
{formatNumber(fileIndexingEstimate.total_segments)}
- ) - : ( -
{t('datasetCreation.stepTwo.calculating')}
- ) } + /> + } + title={t('datasetCreation.stepTwo.fullDoc')} + description={t('datasetCreation.stepTwo.fullDocTip')} + onChosen={() => setParentChildConfig( + { + ...parentChildConfig, + chunkForContext: 'full-doc', + }, + )} + isChosen={parentChildConfig.chunkForContext === 'full-doc'} + /> +
+ +
+
+
+ {t('datasetCreation.stepTwo.childChunkForRetrieval')} +
+ +
+
+ setParentChildConfig({ + ...parentChildConfig, + child: { + ...parentChildConfig.child, + delimiter: e.target.value ? escape(e.target.value) : '', + }, + })} + /> + setParentChildConfig({ + ...parentChildConfig, + child: { + ...parentChildConfig.child, + maxLength: value, + }, + })} + /> +
+
+
+
+
+ {t('datasetCreation.stepTwo.rules')} +
+ +
+
+ {rules.map(rule => ( +
{ + ruleChangeHandle(rule.id) + }}> + + +
+ ))}
- {!isSetting - ? ( -
- -
- + } + +
{t('datasetCreation.stepTwo.indexMode')}
+
+ {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && ( + + {t('datasetCreation.stepTwo.qualified')} + + {t('datasetCreation.stepTwo.recommend')} + + + {!hasSetIndexType && } + +
} + description={t('datasetCreation.stepTwo.qualifiedTip')} + icon={} + isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED} + disabled={!isAPIKeySet || hasSetIndexType} + onSwitched={() => { + if (isAPIKeySet) + setIndexType(IndexingType.QUALIFIED) + }} + /> + )} + + {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && ( + <> + setIsQAConfirmDialogOpen(false)} className='w-[432px]'> +
+

+ {t('datasetCreation.stepTwo.qaSwitchHighQualityTipTitle')} +

+

+ {t('datasetCreation.stepTwo.qaSwitchHighQualityTipContent')} +

+
+
+ +
- ) - : ( -
- - -
- )} -
+ + + + } + isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL} + disabled={!isAPIKeySet || hasSetIndexType || docForm !== ChunkingMode.text} + ref={economyDomRef} + onSwitched={() => { + if (isAPIKeySet && docForm === ChunkingMode.text) + setIndexType(IndexingType.ECONOMICAL) + }} + /> + + +
+ { + docForm === ChunkingMode.qa + ? t('datasetCreation.stepTwo.notAvailableForQA') + : t('datasetCreation.stepTwo.notAvailableForParentChild') + } +
+
+
+ )}
-
- - {showPreview &&
-
-
-
-
{t('datasetCreation.stepTwo.previewTitle')}
- {docForm === DocForm.QA && !previewSwitched && ( - - )} -
-
- -
-
- {docForm === DocForm.QA && !previewSwitched && ( -
- {t('datasetCreation.stepTwo.previewSwitchTipStart')} - {t('datasetCreation.stepTwo.previewSwitchTipEnd')} -
- )} -
-
- {previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && ( - <> - {fileIndexingEstimate?.qa_preview.map((item, index) => ( - - ))} - - )} - {(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && ( - <> - {fileIndexingEstimate?.preview.map((item, index) => ( - - ))} - - )} - {previewSwitched && docForm === DocForm.QA && !fileIndexingEstimate?.qa_preview && ( -
- -
- )} - {!previewSwitched && !fileIndexingEstimate?.preview && ( -
- -
- )} -
-
} - {!showPreview && ( -
-
- -
{t('datasetCreation.stepTwo.sideTipTitle')}
-
-

{t('datasetCreation.stepTwo.sideTipP1')}

-

{t('datasetCreation.stepTwo.sideTipP2')}

-

{t('datasetCreation.stepTwo.sideTipP3')}

-

{t('datasetCreation.stepTwo.sideTipP4')}

-
+ {!hasSetIndexType && indexType === IndexingType.QUALIFIED && ( +
+
+
+
+ {t('datasetCreation.stepTwo.highQualityTip')}
)} + {hasSetIndexType && indexType === IndexingType.ECONOMICAL && ( +
+ {t('datasetCreation.stepTwo.indexSettingTip')} + {t('datasetCreation.stepTwo.datasetSettingLink')} +
+ )} + {/* Embedding model */} + {indexType === IndexingType.QUALIFIED && ( +
+
{t('datasetSettings.form.embeddingModel')}
+ { + setEmbeddingModel(model) + }} + /> + {!!datasetId && ( +
+ {t('datasetCreation.stepTwo.indexSettingTip')} + {t('datasetCreation.stepTwo.datasetSettingLink')} +
+ )} +
+ )} + + {/* Retrieval Method Config */} +
+ {!datasetId + ? ( +
+
{t('datasetSettings.form.retrievalSetting.title')}
+
+ {t('datasetSettings.form.retrievalSetting.learnMore')} + {t('datasetSettings.form.retrievalSetting.longDescription')} +
+
+ ) + : ( +
+
{t('datasetSettings.form.retrievalSetting.title')}
+
+ )} + +
+ { + getIndexing_technique() === IndexingType.QUALIFIED + ? ( + + ) + : ( + + ) + } +
+
+ + {!isSetting + ? ( +
+ + +
+ ) + : ( +
+ + +
+ )} +
+ { }} footer={null}> + +
+ {dataSourceType === DataSourceType.FILE + && >} + onChange={(selected) => { + currentEstimateMutation.reset() + setPreviewFile(selected) + currentEstimateMutation.mutate() + }} + // when it is from setting, it just has one file + value={isSetting ? (files[0]! as Required) : previewFile} + /> + } + {dataSourceType === DataSourceType.NOTION + && ({ + id: page.page_id, + name: page.page_name, + extension: 'md', + })) + } + onChange={(selected) => { + currentEstimateMutation.reset() + const selectedPage = notionPages.find(page => page.page_id === selected.id) + setPreviewNotionPage(selectedPage!) + currentEstimateMutation.mutate() + }} + value={{ + id: previewNotionPage?.page_id || '', + name: previewNotionPage?.page_name || '', + extension: 'md', + }} + /> + } + {dataSourceType === DataSourceType.WEB + && ({ + id: page.source_url, + name: page.title, + extension: 'md', + })) + } + onChange={(selected) => { + currentEstimateMutation.reset() + const selectedPage = websitePages.find(page => page.source_url === selected.id) + setPreviewWebsitePage(selectedPage!) + currentEstimateMutation.mutate() + }} + value={ + { + id: previewWebsitePage?.source_url || '', + name: previewWebsitePage?.title || '', + extension: 'md', + } + } + /> + } + { + currentDocForm !== ChunkingMode.qa + && + } +
+ } + className={cn('flex shrink-0 w-1/2 p-4 pr-0 relative h-full', isMobile && 'w-full max-w-[524px]')} + mainClassName='space-y-6' + > + {currentDocForm === ChunkingMode.qa && estimate?.qa_preview && ( + estimate?.qa_preview.map((item, index) => ( + + + + )) + )} + {currentDocForm === ChunkingMode.text && estimate?.preview && ( + estimate?.preview.map((item, index) => ( + + {item.content} + + )) + )} + {currentDocForm === ChunkingMode.parentChild && currentEstimateMutation.data?.preview && ( + estimate?.preview?.map((item, index) => { + const indexForLabel = index + 1 + return ( + + + {item.child_chunks.map((child, index) => { + const indexForLabel = index + 1 + return ( + + ) + })} + + + ) + }) + )} + {currentEstimateMutation.isIdle && ( +
+
+ +

+ {t('datasetCreation.stepTwo.previewChunkTip')} +

+
+
+ )} + {currentEstimateMutation.isPending && ( +
+ {Array.from({ length: 10 }, (_, i) => ( + + + + + + + + + + + ))} +
+ )} +
) diff --git a/web/app/components/datasets/create/step-two/inputs.tsx b/web/app/components/datasets/create/step-two/inputs.tsx new file mode 100644 index 0000000000..4231f6242d --- /dev/null +++ b/web/app/components/datasets/create/step-two/inputs.tsx @@ -0,0 +1,77 @@ +import type { FC, PropsWithChildren, ReactNode } from 'react' +import { useTranslation } from 'react-i18next' +import type { InputProps } from '@/app/components/base/input' +import Input from '@/app/components/base/input' +import Tooltip from '@/app/components/base/tooltip' +import type { InputNumberProps } from '@/app/components/base/input-number' +import { InputNumber } from '@/app/components/base/input-number' + +const TextLabel: FC = (props) => { + return +} + +const FormField: FC> = (props) => { + return
+ {props.label} + {props.children} +
+} + +export const DelimiterInput: FC = (props) => { + const { t } = useTranslation() + return + {t('datasetCreation.stepTwo.separator')} + + {props.tooltip || t('datasetCreation.stepTwo.separatorTip')} +
+ } + /> +
}> + + +} + +export const MaxLengthInput: FC = (props) => { + const { t } = useTranslation() + return + {t('datasetCreation.stepTwo.maxLength')} +
}> + + +} + +export const OverlapInput: FC = (props) => { + const { t } = useTranslation() + return + {t('datasetCreation.stepTwo.overlap')} + + {t('datasetCreation.stepTwo.overlapTip')} +
+ } + /> +
}> + + +} diff --git a/web/app/components/datasets/create/step-two/language-select/index.tsx b/web/app/components/datasets/create/step-two/language-select/index.tsx index 41f3e0abb5..9cbf1a40d1 100644 --- a/web/app/components/datasets/create/step-two/language-select/index.tsx +++ b/web/app/components/datasets/create/step-two/language-select/index.tsx @@ -1,7 +1,7 @@ 'use client' import type { FC } from 'react' import React from 'react' -import { RiArrowDownSLine } from '@remixicon/react' +import { RiArrowDownSLine, RiCheckLine } from '@remixicon/react' import cn from '@/utils/classnames' import Popover from '@/app/components/base/popover' import { languages } from '@/i18n/language' @@ -22,25 +22,40 @@ const LanguageSelect: FC = ({ manualClose trigger='click' disabled={disabled} + popupClassName='z-20' htmlContent={ -
+
{languages.filter(language => language.supported).map(({ prompt_name }) => (
onSelect(prompt_name)}>{prompt_name} + className='w-full py-2 px-3 inline-flex items-center justify-between hover:bg-state-base-hover rounded-lg cursor-pointer' + onClick={() => onSelect(prompt_name)} + > + {prompt_name} + {(currentLanguage === prompt_name) && }
))}
} btnElement={ -
- {currentLanguage} - +
+ + {currentLanguage} + +
} - btnClassName={open => cn('!border-0 !px-0 !py-0 !bg-inherit !hover:bg-inherit', open ? 'text-blue-600' : 'text-gray-500')} - className='!w-[120px] h-fit !z-20 !translate-x-0 !left-[-16px]' + btnClassName={() => cn( + '!border-0 rounded-md !px-1.5 !py-1 !mx-1 !bg-components-button-tertiary-bg !hover:bg-components-button-tertiary-bg', + disabled ? 'bg-components-button-tertiary-bg-disabled' : '', + )} + className='!w-[140px] h-fit !z-20 !translate-x-0 !left-1' /> ) } diff --git a/web/app/components/datasets/create/step-two/option-card.tsx b/web/app/components/datasets/create/step-two/option-card.tsx new file mode 100644 index 0000000000..d0efdaabb1 --- /dev/null +++ b/web/app/components/datasets/create/step-two/option-card.tsx @@ -0,0 +1,98 @@ +import { type ComponentProps, type FC, type ReactNode, forwardRef } from 'react' +import Image from 'next/image' +import classNames from '@/utils/classnames' + +const TriangleArrow: FC> = props => ( + + + +) + +type OptionCardHeaderProps = { + icon: ReactNode + title: ReactNode + description: string + isActive?: boolean + activeClassName?: string + effectImg?: string +} + +export const OptionCardHeader: FC = (props) => { + const { icon, title, description, isActive, activeClassName, effectImg } = props + return
+
+ {isActive && effectImg && } +
+
+ {icon} +
+
+
+ +
+
{title}
+
{description}
+
+
+} + +type OptionCardProps = { + icon: ReactNode + className?: string + activeHeaderClassName?: string + title: ReactNode + description: string + isActive?: boolean + actions?: ReactNode + effectImg?: string + onSwitched?: () => void + noHighlight?: boolean + disabled?: boolean +} & Omit, 'title' | 'onClick'> + +export const OptionCard: FC = forwardRef((props, ref) => { + const { icon, className, title, description, isActive, children, actions, activeHeaderClassName, style, effectImg, onSwitched, noHighlight, disabled, ...rest } = props + return
{ + if (!isActive && !disabled) + onSwitched?.() + }} + {...rest} + ref={ref} + > + + {/** Body */} + {isActive && (children || actions) &&
+ {children} + {actions &&
+ {actions} +
+ } +
} +
+}) + +OptionCard.displayName = 'OptionCard' diff --git a/web/app/components/datasets/create/stepper/index.tsx b/web/app/components/datasets/create/stepper/index.tsx new file mode 100644 index 0000000000..317c1a76ee --- /dev/null +++ b/web/app/components/datasets/create/stepper/index.tsx @@ -0,0 +1,27 @@ +import { type FC, Fragment } from 'react' +import type { Step } from './step' +import { StepperStep } from './step' + +export type StepperProps = { + steps: Step[] + activeIndex: number +} + +export const Stepper: FC = (props) => { + const { steps, activeIndex } = props + return
+ {steps.map((step, index) => { + const isLast = index === steps.length - 1 + return ( + + + {!isLast &&
} + + ) + })} +
+} diff --git a/web/app/components/datasets/create/stepper/step.tsx b/web/app/components/datasets/create/stepper/step.tsx new file mode 100644 index 0000000000..c230de1a6e --- /dev/null +++ b/web/app/components/datasets/create/stepper/step.tsx @@ -0,0 +1,46 @@ +import type { FC } from 'react' +import classNames from '@/utils/classnames' + +export type Step = { + name: string +} + +export type StepperStepProps = Step & { + index: number + activeIndex: number +} + +export const StepperStep: FC = (props) => { + const { name, activeIndex, index } = props + const isActive = index === activeIndex + const isDisabled = activeIndex < index + const label = isActive ? `STEP ${index + 1}` : `${index + 1}` + return
+
+
+ {label} +
+
+
{name}
+
+} diff --git a/web/app/components/datasets/create/top-bar/index.tsx b/web/app/components/datasets/create/top-bar/index.tsx new file mode 100644 index 0000000000..20ba7158db --- /dev/null +++ b/web/app/components/datasets/create/top-bar/index.tsx @@ -0,0 +1,41 @@ +import type { FC } from 'react' +import { RiArrowLeftLine } from '@remixicon/react' +import Link from 'next/link' +import { useTranslation } from 'react-i18next' +import { Stepper, type StepperProps } from '../stepper' +import classNames from '@/utils/classnames' + +export type TopbarProps = Pick & { + className?: string +} + +const STEP_T_MAP: Record = { + 1: 'datasetCreation.steps.one', + 2: 'datasetCreation.steps.two', + 3: 'datasetCreation.steps.three', +} + +export const Topbar: FC = (props) => { + const { className, ...rest } = props + const { t } = useTranslation() + return
+ +
+ +
+

+ {t('datasetCreation.steps.header.creation')} +

+ +
+ ({ + name: t(STEP_T_MAP[i + 1]), + }))} + {...rest} + /> +
+
+} diff --git a/web/app/components/datasets/create/website/base/error-message.tsx b/web/app/components/datasets/create/website/base/error-message.tsx index aa337ec4bf..f061c4624e 100644 --- a/web/app/components/datasets/create/website/base/error-message.tsx +++ b/web/app/components/datasets/create/website/base/error-message.tsx @@ -18,7 +18,7 @@ const ErrorMessage: FC = ({ return (
- +
{title}
{errorMsg && ( diff --git a/web/app/components/datasets/create/website/jina-reader/index.tsx b/web/app/components/datasets/create/website/jina-reader/index.tsx index 51d77d7121..1c133f935c 100644 --- a/web/app/components/datasets/create/website/jina-reader/index.tsx +++ b/web/app/components/datasets/create/website/jina-reader/index.tsx @@ -94,7 +94,6 @@ const JinaReader: FC = ({ const waitForCrawlFinished = useCallback(async (jobId: string) => { try { const res = await checkJinaReaderTaskStatus(jobId) as any - console.log('res', res) if (res.status === 'completed') { return { isError: false, diff --git a/web/app/components/datasets/create/website/preview.tsx b/web/app/components/datasets/create/website/preview.tsx index 65abe83ed7..5180a83442 100644 --- a/web/app/components/datasets/create/website/preview.tsx +++ b/web/app/components/datasets/create/website/preview.tsx @@ -18,7 +18,7 @@ const WebsitePreview = ({ const { t } = useTranslation() return ( -
+
{t('datasetCreation.stepOne.pagePreview')} @@ -32,7 +32,7 @@ const WebsitePreview = ({
{payload.source_url}
-
{payload.markdown}
+
{payload.markdown}
) diff --git a/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx b/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx index 36216aa7c8..6602244a48 100644 --- a/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx +++ b/web/app/components/datasets/documents/detail/batch-modal/csv-downloader.tsx @@ -7,7 +7,7 @@ import { import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import { Download02 as DownloadIcon } from '@/app/components/base/icons/src/vender/solid/general' -import { DocForm } from '@/models/datasets' +import { ChunkingMode } from '@/models/datasets' import I18n from '@/context/i18n' import { LanguagesSupported } from '@/i18n/language' @@ -32,18 +32,18 @@ const CSV_TEMPLATE_CN = [ ['内容 2'], ] -const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => { +const CSVDownload: FC<{ docForm: ChunkingMode }> = ({ docForm }) => { const { t } = useTranslation() const { locale } = useContext(I18n) const { CSVDownloader, Type } = useCSVDownloader() const getTemplate = () => { if (locale === LanguagesSupported[1]) { - if (docForm === DocForm.QA) + if (docForm === ChunkingMode.qa) return CSV_TEMPLATE_QA_CN return CSV_TEMPLATE_CN } - if (docForm === DocForm.QA) + if (docForm === ChunkingMode.qa) return CSV_TEMPLATE_QA_EN return CSV_TEMPLATE_EN } @@ -52,7 +52,7 @@ const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => {
{t('share.generation.csvStructureTitle')}
- {docForm === DocForm.QA && ( + {docForm === ChunkingMode.qa && ( @@ -72,7 +72,7 @@ const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => {
)} - {docForm === DocForm.TEXT && ( + {docForm === ChunkingMode.text && ( @@ -97,7 +97,7 @@ const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => { bom={true} data={getTemplate()} > -
+
{t('datasetDocuments.list.batchModal.template')}
diff --git a/web/app/components/datasets/documents/detail/batch-modal/index.tsx b/web/app/components/datasets/documents/detail/batch-modal/index.tsx index 139a364cb4..c666ba6715 100644 --- a/web/app/components/datasets/documents/detail/batch-modal/index.tsx +++ b/web/app/components/datasets/documents/detail/batch-modal/index.tsx @@ -7,11 +7,11 @@ import CSVUploader from './csv-uploader' import CSVDownloader from './csv-downloader' import Button from '@/app/components/base/button' import Modal from '@/app/components/base/modal' -import type { DocForm } from '@/models/datasets' +import type { ChunkingMode } from '@/models/datasets' export type IBatchModalProps = { isShow: boolean - docForm: DocForm + docForm: ChunkingMode onCancel: () => void onConfirm: (file: File) => void } diff --git a/web/app/components/datasets/documents/detail/completed/InfiniteVirtualList.tsx b/web/app/components/datasets/documents/detail/completed/InfiniteVirtualList.tsx deleted file mode 100644 index 7b510bcf21..0000000000 --- a/web/app/components/datasets/documents/detail/completed/InfiniteVirtualList.tsx +++ /dev/null @@ -1,98 +0,0 @@ -import type { CSSProperties, FC } from 'react' -import React from 'react' -import { FixedSizeList as List } from 'react-window' -import InfiniteLoader from 'react-window-infinite-loader' -import SegmentCard from './SegmentCard' -import s from './style.module.css' -import type { SegmentDetailModel } from '@/models/datasets' - -type IInfiniteVirtualListProps = { - hasNextPage?: boolean // Are there more items to load? (This information comes from the most recent API request.) - isNextPageLoading: boolean // Are we currently loading a page of items? (This may be an in-flight flag in your Redux store for example.) - items: Array // Array of items loaded so far. - loadNextPage: () => Promise // Callback function responsible for loading the next page of items. - onClick: (detail: SegmentDetailModel) => void - onChangeSwitch: (segId: string, enabled: boolean) => Promise - onDelete: (segId: string) => Promise - archived?: boolean - embeddingAvailable: boolean -} - -const InfiniteVirtualList: FC = ({ - hasNextPage, - isNextPageLoading, - items, - loadNextPage, - onClick: onClickCard, - onChangeSwitch, - onDelete, - archived, - embeddingAvailable, -}) => { - // If there are more items to be loaded then add an extra row to hold a loading indicator. - const itemCount = hasNextPage ? items.length + 1 : items.length - - // Only load 1 page of items at a time. - // Pass an empty callback to InfiniteLoader in case it asks us to load more than once. - const loadMoreItems = isNextPageLoading ? () => { } : loadNextPage - - // Every row is loaded except for our loading indicator row. - const isItemLoaded = (index: number) => !hasNextPage || index < items.length - - // Render an item or a loading indicator. - const Item = ({ index, style }: { index: number; style: CSSProperties }) => { - let content - if (!isItemLoaded(index)) { - content = ( - <> - {[1, 2, 3].map(v => ( - - ))} - - ) - } - else { - content = items[index].map(segItem => ( - onClickCard(segItem)} - onChangeSwitch={onChangeSwitch} - onDelete={onDelete} - loading={false} - archived={archived} - embeddingAvailable={embeddingAvailable} - /> - )) - } - - return ( -
- {content} -
- ) - } - - return ( - - {({ onItemsRendered, ref }) => ( - - {Item} - - )} - - ) -} -export default InfiniteVirtualList diff --git a/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx b/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx index 5b76acc936..264d62b68a 100644 --- a/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx +++ b/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx @@ -6,9 +6,9 @@ import { RiDeleteBinLine, } from '@remixicon/react' import { StatusItem } from '../../list' -import { DocumentTitle } from '../index' +import style from '../../style.module.css' import s from './style.module.css' -import { SegmentIndexTag } from './index' +import { SegmentIndexTag } from './common/segment-index-tag' import cn from '@/utils/classnames' import Confirm from '@/app/components/base/confirm' import Switch from '@/app/components/base/switch' @@ -31,6 +31,22 @@ const ProgressBar: FC<{ percent: number; loading: boolean }> = ({ percent, loadi ) } +type DocumentTitleProps = { + extension?: string + name?: string + iconCls?: string + textCls?: string + wrapperCls?: string +} + +const DocumentTitle: FC = ({ extension, name, iconCls, textCls, wrapperCls }) => { + const localExtension = extension?.toLowerCase() || name?.split('.')?.pop()?.toLowerCase() + return
+
+ {name || '--'} +
+} + export type UsageScene = 'doc' | 'hitTesting' type ISegmentCardProps = { diff --git a/web/app/components/datasets/documents/detail/completed/child-segment-detail.tsx b/web/app/components/datasets/documents/detail/completed/child-segment-detail.tsx new file mode 100644 index 0000000000..085bfddc16 --- /dev/null +++ b/web/app/components/datasets/documents/detail/completed/child-segment-detail.tsx @@ -0,0 +1,134 @@ +import React, { type FC, useMemo, useState } from 'react' +import { useTranslation } from 'react-i18next' +import { + RiCloseLine, + RiExpandDiagonalLine, +} from '@remixicon/react' +import ActionButtons from './common/action-buttons' +import ChunkContent from './common/chunk-content' +import Dot from './common/dot' +import { SegmentIndexTag } from './common/segment-index-tag' +import { useSegmentListContext } from './index' +import type { ChildChunkDetail, ChunkingMode } from '@/models/datasets' +import { useEventEmitterContextContext } from '@/context/event-emitter' +import { formatNumber } from '@/utils/format' +import classNames from '@/utils/classnames' +import Divider from '@/app/components/base/divider' +import { formatTime } from '@/utils/time' + +type IChildSegmentDetailProps = { + chunkId: string + childChunkInfo?: Partial & { id: string } + onUpdate: (segmentId: string, childChunkId: string, content: string) => void + onCancel: () => void + docForm: ChunkingMode +} + +/** + * Show all the contents of the segment + */ +const ChildSegmentDetail: FC = ({ + chunkId, + childChunkInfo, + onUpdate, + onCancel, + docForm, +}) => { + const { t } = useTranslation() + const [content, setContent] = useState(childChunkInfo?.content || '') + const { eventEmitter } = useEventEmitterContextContext() + const [loading, setLoading] = useState(false) + const fullScreen = useSegmentListContext(s => s.fullScreen) + const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) + + eventEmitter?.useSubscription((v) => { + if (v === 'update-child-segment') + setLoading(true) + if (v === 'update-child-segment-done') + setLoading(false) + }) + + const handleCancel = () => { + onCancel() + setContent(childChunkInfo?.content || '') + } + + const handleSave = () => { + onUpdate(chunkId, childChunkInfo?.id || '', content) + } + + const wordCountText = useMemo(() => { + const count = content.length + return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}` + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [content.length]) + + const EditTimeText = useMemo(() => { + const timeText = formatTime({ + date: (childChunkInfo?.updated_at ?? 0) * 1000, + dateFormat: 'MM/DD/YYYY h:mm:ss', + }) + return `${t('datasetDocuments.segment.editedAt')} ${timeText}` + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [childChunkInfo?.updated_at]) + + return ( +
+
+
+
{t('datasetDocuments.segment.editChildChunk')}
+
+ + + {wordCountText} + + + {EditTimeText} + +
+
+
+ {fullScreen && ( + <> + + + + )} +
+ +
+
+ +
+
+
+
+
+ setContent(content)} + isEditMode={true} + /> +
+
+ {!fullScreen && ( +
+ +
+ )} +
+ ) +} + +export default React.memo(ChildSegmentDetail) diff --git a/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx b/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx new file mode 100644 index 0000000000..1615ea98cf --- /dev/null +++ b/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx @@ -0,0 +1,195 @@ +import { type FC, useMemo, useState } from 'react' +import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react' +import { useTranslation } from 'react-i18next' +import { EditSlice } from '../../../formatted-text/flavours/edit-slice' +import { useDocumentContext } from '../index' +import { FormattedText } from '../../../formatted-text/formatted' +import Empty from './common/empty' +import FullDocListSkeleton from './skeleton/full-doc-list-skeleton' +import { useSegmentListContext } from './index' +import type { ChildChunkDetail } from '@/models/datasets' +import Input from '@/app/components/base/input' +import classNames from '@/utils/classnames' +import Divider from '@/app/components/base/divider' +import { formatNumber } from '@/utils/format' + +type IChildSegmentCardProps = { + childChunks: ChildChunkDetail[] + parentChunkId: string + handleInputChange?: (value: string) => void + handleAddNewChildChunk?: (parentChunkId: string) => void + enabled: boolean + onDelete?: (segId: string, childChunkId: string) => Promise + onClickSlice?: (childChunk: ChildChunkDetail) => void + total?: number + inputValue?: string + onClearFilter?: () => void + isLoading?: boolean + focused?: boolean +} + +const ChildSegmentList: FC = ({ + childChunks, + parentChunkId, + handleInputChange, + handleAddNewChildChunk, + enabled, + onDelete, + onClickSlice, + total, + inputValue, + onClearFilter, + isLoading, + focused = false, +}) => { + const { t } = useTranslation() + const parentMode = useDocumentContext(s => s.parentMode) + const currChildChunk = useSegmentListContext(s => s.currChildChunk) + + const [collapsed, setCollapsed] = useState(true) + + const toggleCollapse = () => { + setCollapsed(!collapsed) + } + + const isParagraphMode = useMemo(() => { + return parentMode === 'paragraph' + }, [parentMode]) + + const isFullDocMode = useMemo(() => { + return parentMode === 'full-doc' + }, [parentMode]) + + const contentOpacity = useMemo(() => { + return (enabled || focused) ? '' : 'opacity-50 group-hover/card:opacity-100' + }, [enabled, focused]) + + const totalText = useMemo(() => { + const isSearch = inputValue !== '' && isFullDocMode + if (!isSearch) { + const text = isFullDocMode + ? !total + ? '--' + : formatNumber(total) + : formatNumber(childChunks.length) + const count = isFullDocMode + ? text === '--' + ? 0 + : total + : childChunks.length + return `${text} ${t('datasetDocuments.segment.childChunks', { count })}` + } + else { + const text = !total ? '--' : formatNumber(total) + const count = text === '--' ? 0 : total + return `${count} ${t('datasetDocuments.segment.searchResults', { count })}` + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [isFullDocMode, total, childChunks.length, inputValue]) + + return ( +
+ {isFullDocMode ? : null} +
+
{ + event.stopPropagation() + toggleCollapse() + }} + > + { + isParagraphMode + ? collapsed + ? ( + + ) + : () + : null + } + {totalText} + · + +
+ {isFullDocMode + ? handleInputChange?.(e.target.value)} + onClear={() => handleInputChange?.('')} + /> + : null} +
+ {isLoading ? : null} + {((isFullDocMode && !isLoading) || !collapsed) + ?
+ {isParagraphMode && ( +
+ +
+ )} + {childChunks.length > 0 + ? + {childChunks.map((childChunk) => { + const edited = childChunk.updated_at !== childChunk.created_at + const focused = currChildChunk?.childChunkInfo?.id === childChunk.id + return onDelete?.(childChunk.segment_id, childChunk.id)} + labelClassName={focused ? 'bg-state-accent-solid text-text-primary-on-surface' : ''} + labelInnerClassName={'text-[10px] font-semibold align-bottom leading-6'} + contentClassName={classNames('!leading-6', focused ? 'bg-state-accent-hover-alt text-text-primary' : '')} + showDivider={false} + onClick={(e) => { + e.stopPropagation() + onClickSlice?.(childChunk) + }} + offsetOptions={({ rects }) => { + return { + mainAxis: isFullDocMode ? -rects.floating.width : 12 - rects.floating.width, + crossAxis: (20 - rects.floating.height) / 2, + } + }} + /> + })} + + : inputValue !== '' + ?
+ +
+ : null + } +
+ : null} +
+ ) +} + +export default ChildSegmentList diff --git a/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx b/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx new file mode 100644 index 0000000000..1238d98a9c --- /dev/null +++ b/web/app/components/datasets/documents/detail/completed/common/action-buttons.tsx @@ -0,0 +1,86 @@ +import React, { type FC, useMemo } from 'react' +import { useTranslation } from 'react-i18next' +import { useKeyPress } from 'ahooks' +import { useDocumentContext } from '../../index' +import Button from '@/app/components/base/button' +import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '@/app/components/workflow/utils' + +type IActionButtonsProps = { + handleCancel: () => void + handleSave: () => void + loading: boolean + actionType?: 'edit' | 'add' + handleRegeneration?: () => void + isChildChunk?: boolean +} + +const ActionButtons: FC = ({ + handleCancel, + handleSave, + loading, + actionType = 'edit', + handleRegeneration, + isChildChunk = false, +}) => { + const { t } = useTranslation() + const mode = useDocumentContext(s => s.mode) + const parentMode = useDocumentContext(s => s.parentMode) + + useKeyPress(['esc'], (e) => { + e.preventDefault() + handleCancel() + }) + + useKeyPress(`${getKeyboardKeyCodeBySystem('ctrl')}.s`, (e) => { + e.preventDefault() + if (loading) + return + handleSave() + } + , { exactMatch: true, useCapture: true }) + + const isParentChildParagraphMode = useMemo(() => { + return mode === 'hierarchical' && parentMode === 'paragraph' + }, [mode, parentMode]) + + return ( +
+ + {(isParentChildParagraphMode && actionType === 'edit' && !isChildChunk) + ? + : null + } + +
+ ) +} + +ActionButtons.displayName = 'ActionButtons' + +export default React.memo(ActionButtons) diff --git a/web/app/components/datasets/documents/detail/completed/common/add-another.tsx b/web/app/components/datasets/documents/detail/completed/common/add-another.tsx new file mode 100644 index 0000000000..444560e55f --- /dev/null +++ b/web/app/components/datasets/documents/detail/completed/common/add-another.tsx @@ -0,0 +1,32 @@ +import React, { type FC } from 'react' +import { useTranslation } from 'react-i18next' +import classNames from '@/utils/classnames' +import Checkbox from '@/app/components/base/checkbox' + +type AddAnotherProps = { + className?: string + isChecked: boolean + onCheck: () => void +} + +const AddAnother: FC = ({ + className, + isChecked, + onCheck, +}) => { + const { t } = useTranslation() + + return ( +
+ + {t('datasetDocuments.segment.addAnother')} +
+ ) +} + +export default React.memo(AddAnother) diff --git a/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx b/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx new file mode 100644 index 0000000000..3dd3689b64 --- /dev/null +++ b/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx @@ -0,0 +1,103 @@ +import React, { type FC } from 'react' +import { RiArchive2Line, RiCheckboxCircleLine, RiCloseCircleLine, RiDeleteBinLine } from '@remixicon/react' +import { useTranslation } from 'react-i18next' +import { useBoolean } from 'ahooks' +import Divider from '@/app/components/base/divider' +import classNames from '@/utils/classnames' +import Confirm from '@/app/components/base/confirm' + +const i18nPrefix = 'dataset.batchAction' +type IBatchActionProps = { + className?: string + selectedIds: string[] + onBatchEnable: () => void + onBatchDisable: () => void + onBatchDelete: () => Promise + onArchive?: () => void + onCancel: () => void +} + +const BatchAction: FC = ({ + className, + selectedIds, + onBatchEnable, + onBatchDisable, + onArchive, + onBatchDelete, + onCancel, +}) => { + const { t } = useTranslation() + const [isShowDeleteConfirm, { + setTrue: showDeleteConfirm, + setFalse: hideDeleteConfirm, + }] = useBoolean(false) + const [isDeleting, { + setTrue: setIsDeleting, + }] = useBoolean(false) + + const handleBatchDelete = async () => { + setIsDeleting() + await onBatchDelete() + hideDeleteConfirm() + } + return ( +
+
+
+ + {selectedIds.length} + + {t(`${i18nPrefix}.selected`)} +
+ +
+ + +
+
+ + +
+ {onArchive && ( +
+ + +
+ )} +
+ + +
+ + + +
+ { + isShowDeleteConfirm && ( + + ) + } +
+ ) +} + +export default React.memo(BatchAction) diff --git a/web/app/components/datasets/documents/detail/completed/common/chunk-content.tsx b/web/app/components/datasets/documents/detail/completed/common/chunk-content.tsx new file mode 100644 index 0000000000..e6403fa12f --- /dev/null +++ b/web/app/components/datasets/documents/detail/completed/common/chunk-content.tsx @@ -0,0 +1,192 @@ +import React, { useEffect, useRef, useState } from 'react' +import type { ComponentProps, FC } from 'react' +import { useTranslation } from 'react-i18next' +import { ChunkingMode } from '@/models/datasets' +import classNames from '@/utils/classnames' + +type IContentProps = ComponentProps<'textarea'> + +const Textarea: FC = React.memo(({ + value, + placeholder, + className, + disabled, + ...rest +}) => { + return ( +