Merge branch 'main' into 4-27-app-deploy

This commit is contained in:
Stephen Zhou 2026-05-12 17:48:47 +08:00 committed by GitHub
commit 86e88ef6c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
77 changed files with 1490 additions and 1614 deletions

View File

@ -181,7 +181,6 @@ def initialize_extensions(app: DifyApp):
ext_import_modules,
ext_orjson,
ext_forward_refs,
ext_set_secretkey,
ext_compress,
ext_code_based_extension,
ext_database,
@ -189,6 +188,7 @@ def initialize_extensions(app: DifyApp):
ext_migrate,
ext_redis,
ext_storage,
ext_set_secretkey,
ext_logstore, # Initialize logstore after storage, before celery
ext_celery,
ext_login,

View File

@ -23,9 +23,9 @@ class SecurityConfig(BaseSettings):
"""
SECRET_KEY: str = Field(
description="Secret key for secure session cookie signing."
"Make sure you are changing this key for your deployment with a strong key."
"Generate a strong key using `openssl rand -base64 42` or set via the `SECRET_KEY` environment variable.",
description="Secret key for secure session cookie signing. "
"Leave empty to let Dify generate a persistent key in the storage directory, "
"or set a strong value via the `SECRET_KEY` environment variable.",
default="",
)

38
api/configs/secret_key.py Normal file
View File

@ -0,0 +1,38 @@
"""SECRET_KEY persistence helpers for runtime setup."""
from __future__ import annotations
import secrets
from extensions.ext_storage import storage
GENERATED_SECRET_KEY_FILENAME = ".dify_secret_key"
def resolve_secret_key(secret_key: str) -> str:
"""Return an explicit SECRET_KEY or a generated key persisted in storage."""
if secret_key:
return secret_key
return _load_or_create_secret_key()
def _load_or_create_secret_key() -> str:
try:
persisted_key = storage.load_once(GENERATED_SECRET_KEY_FILENAME).decode("utf-8").strip()
if persisted_key:
return persisted_key
except FileNotFoundError:
pass
generated_key = secrets.token_urlsafe(48)
try:
storage.save(GENERATED_SECRET_KEY_FILENAME, f"{generated_key}\n".encode())
except Exception as exc:
raise ValueError(
f"SECRET_KEY is not set and could not be generated at {GENERATED_SECRET_KEY_FILENAME}. "
"Set SECRET_KEY explicitly or make storage writable."
) from exc
return generated_key

View File

@ -106,7 +106,7 @@ app_detail_fields_with_site_copy["tags"] = fields.List(fields.Nested(tag_model))
app_detail_fields_with_site_copy["site"] = fields.Nested(site_model)
app_detail_with_site_model = get_or_create_model("TrialAppDetailWithSite", app_detail_fields_with_site_copy)
simple_account_model = get_or_create_model("SimpleAccount", simple_account_fields)
simple_account_model = get_or_create_model("TrialSimpleAccount", simple_account_fields)
conversation_variable_model = get_or_create_model("TrialConversationVariable", conversation_variable_fields)
pipeline_variable_model = get_or_create_model("TrialPipelineVariable", pipeline_variable_fields)

View File

@ -136,7 +136,7 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[
if not dataset:
raise ValueError("Dataset does not exist.")
if not dataset.indexing_technique and not args["indexing_technique"]:
if not dataset.indexing_technique and not args.get("indexing_technique"):
raise ValueError("indexing_technique is required.")
embedding_model_provider = payload.embedding_model_provider

View File

@ -128,7 +128,7 @@ class DifyWorkflowFileRuntime(WorkflowFileRuntimeProtocol):
@staticmethod
def _secret_key() -> bytes:
return dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
return dify_config.SECRET_KEY.encode()
def _sign_query(self, *, payload: str) -> dict[str, str]:
timestamp = str(int(time.time()))

View File

@ -35,8 +35,11 @@ class DatasourceFileManager:
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
sign = hmac.new(
dify_config.SECRET_KEY.encode(),
data_to_sign.encode(),
hashlib.sha256,
).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@ -47,8 +50,11 @@ class DatasourceFileManager:
verify signature
"""
data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
recalculated_sign = hmac.new(
dify_config.SECRET_KEY.encode(),
data_to_sign.encode(),
hashlib.sha256,
).digest()
recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
# verify signature

View File

@ -245,6 +245,7 @@ class Jieba(BaseKeyword):
segment = pre_segment_data["segment"]
if pre_segment_data["keywords"]:
segment.keywords = pre_segment_data["keywords"]
assert segment.index_node_id
keyword_table = self._add_text_to_keyword_table(
keyword_table or {}, segment.index_node_id, pre_segment_data["keywords"]
)
@ -253,6 +254,7 @@ class Jieba(BaseKeyword):
keywords = keyword_table_handler.extract_keywords(segment.content, keyword_number)
segment.keywords = list(keywords)
assert segment.index_node_id
keyword_table = self._add_text_to_keyword_table(
keyword_table or {}, segment.index_node_id, list(keywords)
)

View File

@ -1,5 +1,6 @@
import concurrent.futures
import logging
from collections.abc import Sequence
from concurrent.futures import ThreadPoolExecutor
from typing import Any, NotRequired, TypedDict
@ -526,7 +527,7 @@ class RetrievalService:
index_node_ids = [i for i in index_node_ids if i]
segment_ids: list[str] = []
index_node_segments: list[DocumentSegment] = []
index_node_segments: Sequence[DocumentSegment] = []
segments: list[DocumentSegment] = []
attachment_map: dict[str, list[AttachmentInfoDict]] = {}
child_chunk_map: dict[str, list[ChildChunk]] = {}
@ -568,8 +569,9 @@ class RetrievalService:
DocumentSegment.status == "completed",
DocumentSegment.index_node_id.in_(index_node_ids),
)
index_node_segments = session.execute(document_segment_stmt).scalars().all() # type: ignore
index_node_segments = session.execute(document_segment_stmt).scalars().all()
for index_node_segment in index_node_segments:
assert index_node_segment.index_node_id
doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id]
if segment_ids:

View File

@ -50,6 +50,7 @@ class DatasetDocumentStore:
output = {}
for document_segment in document_segments:
assert document_segment.index_node_id
doc_id = document_segment.index_node_id
output[doc_id] = Document(
page_content=document_segment.content,
@ -103,7 +104,7 @@ class DatasetDocumentStore:
if not segment_document:
max_position += 1
assert self._document_id
segment_document = DocumentSegment(
tenant_id=self._dataset.tenant_id,
dataset_id=self._dataset.id,

View File

@ -84,7 +84,7 @@ class IndexProcessor:
select(DocumentSegment).where(DocumentSegment.document_id == original_document_id)
).all()
if segments:
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
indexing_start_at = time.perf_counter()
# delete from vector index

View File

@ -8,6 +8,10 @@ import urllib.parse
from configs import dify_config
def _secret_key() -> bytes:
return dify_config.SECRET_KEY.encode()
def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True) -> str:
"""
sign file to get a temporary url for plugin access
@ -19,8 +23,7 @@ def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True)
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@ -39,8 +42,7 @@ def sign_upload_file_preview_url(upload_file_id: str, extension: str) -> str:
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@ -51,8 +53,7 @@ def verify_tool_file_signature(file_id: str, timestamp: str, nonce: str, sign: s
verify signature
"""
data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
recalculated_sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
# verify signature
@ -71,8 +72,7 @@ def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str,
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
query = urllib.parse.urlencode(
{
@ -92,8 +92,7 @@ def verify_plugin_file_signature(
"""Verify the signature used by the plugin-facing file upload endpoint."""
data_to_sign = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
recalculated_sign = hmac.new(_secret_key(), data_to_sign.encode(), hashlib.sha256).digest()
recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
if sign != recalculated_encoded_sign:

View File

@ -51,8 +51,11 @@ class ToolFileManager:
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
sign = hmac.new(
dify_config.SECRET_KEY.encode(),
data_to_sign.encode(),
hashlib.sha256,
).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
@ -63,8 +66,11 @@ class ToolFileManager:
verify signature
"""
data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
recalculated_sign = hmac.new(
dify_config.SECRET_KEY.encode(),
data_to_sign.encode(),
hashlib.sha256,
).digest()
recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
# verify signature

View File

@ -5,6 +5,7 @@ import threading
from flask import Response
from configs import dify_config
from controllers.console.admin import admin_required
from dify_app import DifyApp
@ -25,6 +26,7 @@ def init_app(app: DifyApp):
)
@app.route("/threads")
@admin_required
def threads(): # pyright: ignore[reportUnusedFunction]
num_threads = threading.active_count()
threads = threading.enumerate()
@ -50,6 +52,7 @@ def init_app(app: DifyApp):
}
@app.route("/db-pool-stat")
@admin_required
def pool_stat(): # pyright: ignore[reportUnusedFunction]
from extensions.ext_database import db

View File

@ -1,6 +1,13 @@
from configs import dify_config
from configs.secret_key import resolve_secret_key
from dify_app import DifyApp
def init_app(app: DifyApp):
app.secret_key = dify_config.SECRET_KEY
def init_app(app: DifyApp) -> None:
"""Resolve SECRET_KEY after config loading and before session/login setup."""
secret_key = dify_config.SECRET_KEY
if not secret_key:
secret_key = resolve_secret_key(secret_key)
dify_config.SECRET_KEY = secret_key
app.config["SECRET_KEY"] = secret_key
app.secret_key = secret_key

View File

@ -8,7 +8,6 @@ import os
import pickle
import re
import time
from collections.abc import Sequence
from datetime import datetime
from json import JSONDecodeError
from typing import Any, ClassVar, TypedDict, cast
@ -831,7 +830,7 @@ class Document(Base):
)
class DocumentSegment(Base):
class DocumentSegment(TypeBase):
__tablename__ = "document_segments"
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="document_segment_pkey"),
@ -844,35 +843,40 @@ class DocumentSegment(Base):
)
# initial fields
id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4()))
tenant_id = mapped_column(StringUUID, nullable=False)
dataset_id = mapped_column(StringUUID, nullable=False)
document_id = mapped_column(StringUUID, nullable=False)
id: Mapped[str] = mapped_column(StringUUID, nullable=False, default_factory=lambda: str(uuid4()), init=False)
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
document_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
position: Mapped[int]
content = mapped_column(LongText, nullable=False)
answer = mapped_column(LongText, nullable=True)
content: Mapped[str] = mapped_column(LongText, nullable=False)
word_count: Mapped[int]
tokens: Mapped[int]
# indexing fields
keywords = mapped_column(sa.JSON, nullable=True)
index_node_id = mapped_column(String(255), nullable=True)
index_node_hash = mapped_column(String(255), nullable=True)
created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
# basic fields
# indexing fields
index_node_id: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None)
index_node_hash: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None)
enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"), default=True)
answer: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
keywords: Mapped[Any] = mapped_column(sa.JSON, nullable=True, default=None)
disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
disabled_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None)
status: Mapped[SegmentStatus] = mapped_column(
EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'"), default=SegmentStatus.WAITING
)
created_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp(), init=False
)
updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None)
updated_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp(), init=False
)
indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
error: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None)
hit_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0)
enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
disabled_by = mapped_column(StringUUID, nullable=True)
status: Mapped[str] = mapped_column(EnumText(SegmentStatus, length=255), server_default=sa.text("'waiting'"))
created_by = mapped_column(StringUUID, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
updated_by = mapped_column(StringUUID, nullable=True)
updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
error = mapped_column(LongText, nullable=True)
stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
@property
def dataset(self):
@ -899,7 +903,7 @@ class DocumentSegment(Base):
)
@property
def child_chunks(self) -> Sequence[Any]:
def child_chunks(self):
if not self.document:
return []
process_rule = self.document.dataset_process_rule
@ -914,7 +918,7 @@ class DocumentSegment(Base):
return child_chunks or []
return []
def get_child_chunks(self) -> Sequence[Any]:
def get_child_chunks(self):
if not self.document:
return []
process_rule = self.document.dataset_process_rule
@ -945,7 +949,7 @@ class DocumentSegment(Base):
nonce = os.urandom(16).hex()
timestamp = str(int(time.time()))
data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
secret_key = dify_config.SECRET_KEY.encode()
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
@ -962,7 +966,7 @@ class DocumentSegment(Base):
nonce = os.urandom(16).hex()
timestamp = str(int(time.time()))
data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
secret_key = dify_config.SECRET_KEY.encode()
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
@ -981,7 +985,7 @@ class DocumentSegment(Base):
nonce = os.urandom(16).hex()
timestamp = str(int(time.time()))
data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
secret_key = dify_config.SECRET_KEY.encode()
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
@ -1019,7 +1023,7 @@ class DocumentSegment(Base):
nonce = os.urandom(16).hex()
timestamp = str(int(time.time()))
data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
secret_key = dify_config.SECRET_KEY.encode()
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()

View File

@ -13786,6 +13786,14 @@ Tag type
| unit | string | | No |
| variable | string | | No |
#### TrialSimpleAccount
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| email | string | | No |
| id | string | | No |
| name | string | | No |
#### TrialSite
| Name | Type | Description | Required |
@ -13829,7 +13837,7 @@ Tag type
| ---- | ---- | ----------- | -------- |
| conversation_variables | [ [TrialConversationVariable](#trialconversationvariable) ] | | No |
| created_at | object | | No |
| created_by | [SimpleAccount](#simpleaccount) | | No |
| created_by | [TrialSimpleAccount](#trialsimpleaccount) | | No |
| environment_variables | [ object ] | | No |
| features | object | | No |
| graph | object | | No |
@ -13840,7 +13848,7 @@ Tag type
| rag_pipeline_variables | [ [TrialPipelineVariable](#trialpipelinevariable) ] | | No |
| tool_published | boolean | | No |
| updated_at | object | | No |
| updated_by | [SimpleAccount](#simpleaccount) | | No |
| updated_by | [TrialSimpleAccount](#trialsimpleaccount) | | No |
| version | string | | No |
#### TrialWorkflowPartial

View File

@ -1,6 +1,6 @@
[project]
name = "dify-api"
version = "1.14.0"
version = "1.14.1"
requires-python = "~=3.12.0"
dependencies = [

View File

@ -7,9 +7,10 @@ import time
import uuid
from collections import Counter
from collections.abc import Sequence
from typing import Any, Literal, TypedDict, cast
from typing import Annotated, Any, Literal, TypedDict, cast
import sqlalchemy as sa
from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator
from redis.exceptions import LockNotOwnedError
from sqlalchemy import delete, exists, func, select, update
from sqlalchemy.orm import Session, sessionmaker
@ -117,6 +118,86 @@ class AutoDisableLogsDict(TypedDict):
count: int
class _EstimatePreProcessingRule(BaseModel):
id: str = Field(min_length=1)
enabled: bool
@field_validator("id")
@classmethod
def _validate_id(cls, v: str) -> str:
if v not in DatasetProcessRule.PRE_PROCESSING_RULES:
raise ValueError("Process rule pre_processing_rules id is invalid")
return v
class _EstimateSegmentation(BaseModel):
separator: str = Field(min_length=1)
max_tokens: int = Field(gt=0)
class _EstimateRules(BaseModel):
pre_processing_rules: list[_EstimatePreProcessingRule]
segmentation: _EstimateSegmentation
@field_validator("pre_processing_rules")
@classmethod
def _deduplicate(cls, v: list[_EstimatePreProcessingRule]) -> list[_EstimatePreProcessingRule]:
seen: dict[str, _EstimatePreProcessingRule] = {}
for rule in v:
seen[rule.id] = rule
return list(seen.values())
class _SummaryIndexSettingDisabled(BaseModel):
enable: Literal[False] = False
class _SummaryIndexSettingEnabled(BaseModel):
enable: Literal[True]
model_name: str = Field(min_length=1)
model_provider_name: str = Field(min_length=1)
_SummaryIndexSetting = Annotated[
_SummaryIndexSettingDisabled | _SummaryIndexSettingEnabled,
Field(discriminator="enable"),
]
class _AutomaticProcessRule(BaseModel):
model_config = ConfigDict(extra="allow")
mode: Literal[ProcessRuleMode.AUTOMATIC]
summary_index_setting: _SummaryIndexSetting | None = None
class _CustomProcessRule(BaseModel):
model_config = ConfigDict(extra="allow")
mode: Literal[ProcessRuleMode.CUSTOM]
rules: _EstimateRules
summary_index_setting: _SummaryIndexSetting | None = None
class _HierarchicalProcessRule(BaseModel):
model_config = ConfigDict(extra="allow")
mode: Literal[ProcessRuleMode.HIERARCHICAL]
rules: _EstimateRules
summary_index_setting: _SummaryIndexSetting | None = None
_EstimateProcessRule = Annotated[
_AutomaticProcessRule | _CustomProcessRule | _HierarchicalProcessRule,
Field(discriminator="mode"),
]
class _EstimateArgs(BaseModel):
info_list: dict[str, Any]
process_rule: _EstimateProcessRule
class DatasetService:
@staticmethod
def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False):
@ -2851,94 +2932,16 @@ class DocumentService:
@classmethod
def estimate_args_validate(cls, args: dict[str, Any]):
if "info_list" not in args or not args["info_list"]:
raise ValueError("Data source info is required")
if not isinstance(args["info_list"], dict):
raise ValueError("Data info is invalid")
if "process_rule" not in args or not args["process_rule"]:
raise ValueError("Process rule is required")
if not isinstance(args["process_rule"], dict):
raise ValueError("Process rule is invalid")
if "mode" not in args["process_rule"] or not args["process_rule"]["mode"]:
raise ValueError("Process rule mode is required")
if args["process_rule"]["mode"] not in DatasetProcessRule.MODES:
raise ValueError("Process rule mode is invalid")
if args["process_rule"]["mode"] == ProcessRuleMode.AUTOMATIC:
args["process_rule"]["rules"] = {}
else:
if "rules" not in args["process_rule"] or not args["process_rule"]["rules"]:
raise ValueError("Process rule rules is required")
if not isinstance(args["process_rule"]["rules"], dict):
raise ValueError("Process rule rules is invalid")
if (
"pre_processing_rules" not in args["process_rule"]["rules"]
or args["process_rule"]["rules"]["pre_processing_rules"] is None
):
raise ValueError("Process rule pre_processing_rules is required")
if not isinstance(args["process_rule"]["rules"]["pre_processing_rules"], list):
raise ValueError("Process rule pre_processing_rules is invalid")
unique_pre_processing_rule_dicts = {}
for pre_processing_rule in args["process_rule"]["rules"]["pre_processing_rules"]:
if "id" not in pre_processing_rule or not pre_processing_rule["id"]:
raise ValueError("Process rule pre_processing_rules id is required")
if pre_processing_rule["id"] not in DatasetProcessRule.PRE_PROCESSING_RULES:
raise ValueError("Process rule pre_processing_rules id is invalid")
if "enabled" not in pre_processing_rule or pre_processing_rule["enabled"] is None:
raise ValueError("Process rule pre_processing_rules enabled is required")
if not isinstance(pre_processing_rule["enabled"], bool):
raise ValueError("Process rule pre_processing_rules enabled is invalid")
unique_pre_processing_rule_dicts[pre_processing_rule["id"]] = pre_processing_rule
args["process_rule"]["rules"]["pre_processing_rules"] = list(unique_pre_processing_rule_dicts.values())
if (
"segmentation" not in args["process_rule"]["rules"]
or args["process_rule"]["rules"]["segmentation"] is None
):
raise ValueError("Process rule segmentation is required")
if not isinstance(args["process_rule"]["rules"]["segmentation"], dict):
raise ValueError("Process rule segmentation is invalid")
if (
"separator" not in args["process_rule"]["rules"]["segmentation"]
or not args["process_rule"]["rules"]["segmentation"]["separator"]
):
raise ValueError("Process rule segmentation separator is required")
if not isinstance(args["process_rule"]["rules"]["segmentation"]["separator"], str):
raise ValueError("Process rule segmentation separator is invalid")
if (
"max_tokens" not in args["process_rule"]["rules"]["segmentation"]
or not args["process_rule"]["rules"]["segmentation"]["max_tokens"]
):
raise ValueError("Process rule segmentation max_tokens is required")
if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
raise ValueError("Process rule segmentation max_tokens is invalid")
# valid summary index setting
summary_index_setting = args["process_rule"].get("summary_index_setting")
if summary_index_setting and summary_index_setting.get("enable"):
if "model_name" not in summary_index_setting or not summary_index_setting["model_name"]:
raise ValueError("Summary index model name is required")
if "model_provider_name" not in summary_index_setting or not summary_index_setting["model_provider_name"]:
raise ValueError("Summary index model provider name is required")
try:
validated = _EstimateArgs.model_validate(args)
except ValidationError as e:
first = e.errors()[0]
original = first.get("ctx", {}).get("error")
raise ValueError(str(original) if isinstance(original, ValueError) else first["msg"]) from e
process_rule_dict = validated.process_rule.model_dump(exclude_none=True)
if validated.process_rule.mode == ProcessRuleMode.AUTOMATIC:
process_rule_dict["rules"] = {}
args["process_rule"] = process_rule_dict
@staticmethod
def batch_update_document_status(

View File

@ -111,6 +111,7 @@ class VectorService:
"dataset_id": segment.dataset_id,
},
)
assert segment.index_node_id
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
# update vector index
vector = Vector(dataset=dataset)
@ -138,6 +139,7 @@ class VectorService:
regenerate: bool = False,
):
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
assert segment.index_node_id
if regenerate:
# delete child chunks
index_processor.clean(dataset, [segment.index_node_id], with_keywords=True, delete_child_chunks=True)

View File

@ -50,7 +50,7 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form
).all()
if segments:
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
segment_ids = [segment.id for segment in segments]
# Collect image file IDs from segment content

View File

@ -19,6 +19,7 @@ from graphon.model_runtime.entities.model_entities import ModelType
from libs import helper
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import SegmentStatus
from models.model import UploadFile
from services.vector_service import VectorService
@ -156,7 +157,7 @@ def batch_create_segment_to_index_task(
tokens=tokens,
created_by=user_id,
indexing_at=naive_utc_now(),
status="completed",
status=SegmentStatus.COMPLETED,
completed_at=naive_utc_now(),
)
if document_config["doc_form"] == IndexStructureType.QA_INDEX:

View File

@ -53,7 +53,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
binding_ids = [binding.id for binding, _ in attachments_with_bindings]
total_attachment_files.extend([attachment_file.key for _, attachment_file in attachments_with_bindings])
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
segment_contents = [segment.content for segment in segments]
except Exception:
logger.exception("Cleaned document when document deleted failed")

View File

@ -38,7 +38,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str):
for document_id in document_ids:
segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
total_index_node_ids.extend([segment.index_node_id for segment in segments])
total_index_node_ids.extend([segment.index_node_id for segment in segments if segment.index_node_id])
# Wrap vector / keyword index cleanup in try/except so that a transient
# failure here (e.g. billing API hiccup propagated via FeatureService when

View File

@ -9,6 +9,7 @@ from core.db.session_factory import session_factory
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from extensions.ext_redis import redis_client
from models.dataset import DocumentSegment
from models.enums import SegmentStatus
logger = logging.getLogger(__name__)
@ -30,7 +31,7 @@ def disable_segment_from_index_task(segment_id: str):
logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
return
if segment.status != "completed":
if segment.status != SegmentStatus.COMPLETED:
logger.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red"))
return
@ -59,6 +60,7 @@ def disable_segment_from_index_task(segment_id: str):
index_type = dataset_document.doc_form
index_processor = IndexProcessorFactory(index_type).init_index_processor()
assert segment.index_node_id
index_processor.clean(dataset, [segment.index_node_id])
# Disable summary index for this segment

View File

@ -55,7 +55,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen
return
try:
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
if dataset.is_multimodal:
segment_ids = [segment.id for segment in segments]
segment_attachment_bindings = session.scalars(

View File

@ -69,7 +69,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
index_type = document.doc_form
segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
# Get credentials from datasource provider
datasource_provider_service = DatasourceProviderService()

View File

@ -45,7 +45,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
index_type = document.doc_form
segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
clean_success = False
try:

View File

@ -137,7 +137,7 @@ def _duplicate_document_indexing_task(dataset_id: str, document_ids: Sequence[st
select(DocumentSegment).where(DocumentSegment.document_id == document.id)
).all()
if segments:
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
# delete from vector index
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)

View File

@ -61,7 +61,7 @@ def remove_document_from_index_task(document_id: str):
except Exception as e:
logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e))
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
if index_node_ids:
try:
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)

View File

@ -85,7 +85,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
select(DocumentSegment).where(DocumentSegment.document_id == document_id)
).all()
if segments:
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
# delete from vector index
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)

View File

@ -70,7 +70,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str):
segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document_id)).all()
if segments:
index_node_ids = [segment.index_node_id for segment in segments]
index_node_ids = [segment.index_node_id for segment in segments if segment.index_node_id]
# delete from vector index
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)

View File

@ -13,9 +13,9 @@ from uuid import uuid4
from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole, TenantStatus
from models.dataset import Dataset, DatasetPermissionEnum, Document, DocumentSegment
from models.enums import DataSourceType, DocumentCreatedFrom
from models.enums import DataSourceType, DocumentCreatedFrom, SegmentStatus
from services.dataset_service import SegmentService
@ -35,13 +35,13 @@ class SegmentServiceTestDataFactory:
email=f"{uuid4()}@example.com",
name=f"user-{uuid4()}",
interface_language="en-US",
status="active",
status=AccountStatus.ACTIVE,
)
db_session_with_containers.add(account)
db_session_with_containers.commit()
if tenant is None:
tenant = Tenant(name=f"tenant-{uuid4()}", status="normal")
tenant = Tenant(name=f"tenant-{uuid4()}", status=TenantStatus.NORMAL)
db_session_with_containers.add(tenant)
db_session_with_containers.commit()
@ -103,7 +103,7 @@ class SegmentServiceTestDataFactory:
created_by: str,
position: int = 1,
content: str = "Test content",
status: str = "completed",
status: SegmentStatus = SegmentStatus.COMPLETED,
word_count: int = 10,
tokens: int = 15,
) -> DocumentSegment:
@ -203,7 +203,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=1,
status="completed",
status=SegmentStatus.COMPLETED,
)
SegmentServiceTestDataFactory.create_segment(
db_session_with_containers,
@ -212,7 +212,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=2,
status="indexing",
status=SegmentStatus.INDEXING,
)
SegmentServiceTestDataFactory.create_segment(
db_session_with_containers,
@ -221,7 +221,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=3,
status="waiting",
status=SegmentStatus.WAITING,
)
# Act
@ -257,7 +257,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=1,
status="completed",
status=SegmentStatus.COMPLETED,
)
SegmentServiceTestDataFactory.create_segment(
db_session_with_containers,
@ -266,7 +266,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=2,
status="indexing",
status=SegmentStatus.INDEXING,
)
# Act
@ -415,7 +415,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=1,
status="completed",
status=SegmentStatus.COMPLETED,
content="This is important information",
)
SegmentServiceTestDataFactory.create_segment(
@ -425,7 +425,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=2,
status="indexing",
status=SegmentStatus.INDEXING,
content="This is also important",
)
SegmentServiceTestDataFactory.create_segment(
@ -435,7 +435,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=3,
status="completed",
status=SegmentStatus.COMPLETED,
content="This is irrelevant",
)
@ -477,7 +477,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=1,
status="completed",
status=SegmentStatus.COMPLETED,
)
SegmentServiceTestDataFactory.create_segment(
db_session_with_containers,
@ -486,7 +486,7 @@ class TestSegmentServiceGetSegments:
document_id=document.id,
created_by=owner.id,
position=2,
status="waiting",
status=SegmentStatus.WAITING,
)
# Act

View File

@ -128,7 +128,6 @@ class TestAddDocumentToIndexTask:
for i in range(3):
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
@ -451,7 +450,6 @@ class TestAddDocumentToIndexTask:
segments = []
for i in range(3):
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
@ -630,7 +628,6 @@ class TestAddDocumentToIndexTask:
# Segment 1: Should be processed (enabled=False, status=SegmentStatus.COMPLETED)
segment1 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
@ -650,7 +647,6 @@ class TestAddDocumentToIndexTask:
# Segment 2: Should be processed (enabled=True, status=SegmentStatus.COMPLETED)
# Note: Implementation doesn't filter by enabled status, only by status=SegmentStatus.COMPLETED
segment2 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
@ -669,7 +665,6 @@ class TestAddDocumentToIndexTask:
# Segment 3: Should NOT be processed (enabled=False, status="processing")
segment3 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
@ -688,7 +683,6 @@ class TestAddDocumentToIndexTask:
# Segment 4: Should be processed (enabled=False, status=SegmentStatus.COMPLETED)
segment4 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,

View File

@ -177,7 +177,6 @@ class TestBatchCleanDocumentTask:
fake = Faker()
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=document.dataset_id,
document_id=document.id,
@ -290,10 +289,9 @@ class TestBatchCleanDocumentTask:
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
document = self._create_test_document(db_session_with_containers, dataset, account)
assert account.current_tenant
# Create segment with simple content (no image references)
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=document.dataset_id,
document_id=document.id,
@ -692,9 +690,9 @@ class TestBatchCleanDocumentTask:
# Create multiple segments for the document
segments = []
assert account.current_tenant
for i in range(3):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=document.dataset_id,
document_id=document.id,

View File

@ -220,7 +220,6 @@ class TestCleanDatasetTask:
DocumentSegment: Created document segment instance
"""
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -232,8 +231,6 @@ class TestCleanDatasetTask:
status=SegmentStatus.COMPLETED,
index_node_id=str(uuid.uuid4()),
index_node_hash="test_hash",
created_at=datetime.now(),
updated_at=datetime.now(),
)
db_session_with_containers.add(segment)
@ -614,7 +611,6 @@ class TestCleanDatasetTask:
"""
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -626,8 +622,6 @@ class TestCleanDatasetTask:
status=SegmentStatus.COMPLETED,
index_node_id=str(uuid.uuid4()),
index_node_hash="test_hash",
created_at=datetime.now(),
updated_at=datetime.now(),
)
db_session_with_containers.add(segment)
@ -729,8 +723,6 @@ class TestCleanDatasetTask:
type=DatasetMetadataType.STRING,
created_by=account.id,
)
metadata.id = str(uuid.uuid4())
metadata.created_at = datetime.now()
metadata_items.append(metadata)
# Create binding for each metadata item
@ -741,8 +733,6 @@ class TestCleanDatasetTask:
document_id=documents[i % len(documents)].id,
created_by=account.id,
)
binding.id = str(uuid.uuid4())
binding.created_at = datetime.now()
bindings.append(binding)
db_session_with_containers.add_all(metadata_items)
@ -946,7 +936,6 @@ class TestCleanDatasetTask:
long_content = "Very long content " * 100 # Long content within reasonable limits
segment_content = f"Segment with special chars: {special_content}\n{long_content}"
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -958,8 +947,6 @@ class TestCleanDatasetTask:
status=SegmentStatus.COMPLETED,
index_node_id=str(uuid.uuid4()),
index_node_hash="test_hash_" + "x" * 50, # Long hash within limits
created_at=datetime.now(),
updated_at=datetime.now(),
)
db_session_with_containers.add(segment)
db_session_with_containers.commit()

View File

@ -132,11 +132,10 @@ class TestCleanNotionDocumentTask:
db_session_with_containers.add(document)
db_session_with_containers.flush()
document_ids.append(document.id)
assert tenant
# Create segments for each document
for j in range(2):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -297,10 +296,9 @@ class TestCleanNotionDocumentTask:
)
db_session_with_containers.add(document)
db_session_with_containers.flush()
assert tenant
# Create test segment
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -379,12 +377,11 @@ class TestCleanNotionDocumentTask:
)
db_session_with_containers.add(document)
db_session_with_containers.flush()
assert tenant
# Create segments without index_node_ids
segments = []
for i in range(3):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -468,11 +465,10 @@ class TestCleanNotionDocumentTask:
db_session_with_containers.add(document)
db_session_with_containers.flush()
documents.append(document)
assert tenant
# Create segments for each document
for j in range(2):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -569,10 +565,9 @@ class TestCleanNotionDocumentTask:
segment_statuses = [SegmentStatus.WAITING, SegmentStatus.INDEXING, SegmentStatus.COMPLETED, SegmentStatus.ERROR]
segments = []
index_node_ids = []
assert tenant
for i, status in enumerate(segment_statuses):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -665,10 +660,9 @@ class TestCleanNotionDocumentTask:
)
db_session_with_containers.add(document)
db_session_with_containers.flush()
assert tenant
# Create segment
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -765,12 +759,11 @@ class TestCleanNotionDocumentTask:
db_session_with_containers.add(document)
db_session_with_containers.flush()
documents.append(document)
assert tenant
# Create multiple segments for each document
num_segments_per_doc = 5
for j in range(num_segments_per_doc):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -875,7 +868,6 @@ class TestCleanNotionDocumentTask:
# Create segments for each document
for j in range(3):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -984,11 +976,10 @@ class TestCleanNotionDocumentTask:
db_session_with_containers.add(document)
db_session_with_containers.flush()
documents.append(document)
assert tenant
# Create segments for each document
for j in range(2):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -1093,10 +1084,9 @@ class TestCleanNotionDocumentTask:
# Create segments with metadata
segments = []
index_node_ids = []
assert tenant
for i in range(3):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,

View File

@ -90,7 +90,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -150,7 +149,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -202,7 +200,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -253,7 +250,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset with parent-child index
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -305,7 +301,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -371,7 +366,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset without documents
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -403,7 +397,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -461,7 +454,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset without documents
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -494,7 +486,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -546,7 +537,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -592,7 +582,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset with custom index type
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -624,7 +613,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -670,7 +658,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset without doc_form (should use default)
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -702,7 +689,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -748,7 +734,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -806,7 +791,6 @@ class TestDealDatasetVectorIndexTask:
for i, document in enumerate(documents):
for j in range(2):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -832,6 +816,7 @@ class TestDealDatasetVectorIndexTask:
updated_document = db_session_with_containers.scalar(
select(Document).where(Document.id == document.id).limit(1)
)
assert updated_document
assert updated_document.indexing_status == IndexingStatus.COMPLETED
# Verify index processor load was called multiple times
@ -853,7 +838,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -905,7 +889,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
@ -952,7 +935,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -1024,7 +1006,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments for enabled document only
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=enabled_document.id,
@ -1075,7 +1056,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -1147,7 +1127,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments for active document only
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=active_document.id,
@ -1198,7 +1177,6 @@ class TestDealDatasetVectorIndexTask:
# Create dataset
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
@ -1270,7 +1248,6 @@ class TestDealDatasetVectorIndexTask:
# Create segments for completed document only
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=completed_document.id,

View File

@ -209,26 +209,25 @@ class TestDeleteSegmentFromIndexTask:
segments = []
for i in range(count):
segment = DocumentSegment()
segment.id = fake.uuid4()
segment.tenant_id = document.tenant_id
segment.dataset_id = document.dataset_id
segment.document_id = document.id
segment.position = i + 1
segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
segment.answer = f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}"
segment.word_count = fake.random_int(min=10, max=100)
segment.tokens = fake.random_int(min=5, max=50)
segment.keywords = [fake.word() for _ in range(3)]
segment.index_node_id = f"node_{fake.uuid4()}"
segment.index_node_hash = fake.sha256()
segment.hit_count = 0
segment.enabled = True
segment.status = SegmentStatus.COMPLETED
segment.created_by = account.id
segment.created_at = fake.date_time_this_year()
segment.updated_by = account.id
segment.updated_at = segment.created_at
created_at = fake.date_time_this_year()
segment = DocumentSegment(
tenant_id=document.tenant_id,
dataset_id=document.dataset_id,
document_id=document.id,
position=i + 1,
content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}",
answer=f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}",
word_count=fake.random_int(min=10, max=100),
tokens=fake.random_int(min=5, max=50),
keywords=[fake.word() for _ in range(3)],
index_node_id=f"node_{fake.uuid4()}",
index_node_hash=fake.sha256(),
hit_count=0,
enabled=True,
status=SegmentStatus.COMPLETED,
created_by=account.id,
updated_by=account.id,
)
db_session_with_containers.add(segment)
segments.append(segment)

View File

@ -159,7 +159,7 @@ class TestDisableSegmentFromIndexTask:
dataset: Dataset,
tenant: Tenant,
account: Account,
status: str = "completed",
status: SegmentStatus = SegmentStatus.COMPLETED,
enabled: bool = True,
) -> DocumentSegment:
"""

View File

@ -185,30 +185,31 @@ class TestDisableSegmentsFromIndexTask:
segments = []
for i in range(count):
segment = DocumentSegment()
segment.id = fake.uuid4()
segment.tenant_id = dataset.tenant_id
segment.dataset_id = dataset.id
segment.document_id = document.id
segment.position = i + 1
segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
segment.answer = f"Test answer {i + 1}" if i % 2 == 0 else None
segment.word_count = fake.random_int(min=10, max=100)
segment.tokens = fake.random_int(min=5, max=50)
segment.keywords = [fake.word() for _ in range(3)]
segment.index_node_id = f"node_{segment.id}"
segment.index_node_hash = fake.sha256()
segment.hit_count = 0
segment.enabled = True
segment.disabled_at = None
segment.disabled_by = None
segment.status = SegmentStatus.COMPLETED
segment.created_by = account.id
segment.updated_by = account.id
segment.indexing_at = fake.date_time_this_year()
segment.completed_at = fake.date_time_this_year()
segment.error = None
segment.stopped_at = None
id = fake.uuid4()
segment = DocumentSegment(
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=i + 1,
content=f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}",
answer=f"Test answer {i + 1}" if i % 2 == 0 else None,
word_count=fake.random_int(min=10, max=100),
tokens=fake.random_int(min=5, max=50),
keywords=[fake.word() for _ in range(3)],
index_node_id=f"node_{id}",
index_node_hash=fake.sha256(),
hit_count=0,
enabled=True,
disabled_at=None,
disabled_by=None,
status=SegmentStatus.COMPLETED,
created_by=account.id,
updated_by=account.id,
indexing_at=fake.date_time_this_year(),
completed_at=fake.date_time_this_year(),
error=None,
stopped_at=None,
)
segments.append(segment)

View File

@ -175,7 +175,6 @@ class TestDuplicateDocumentIndexingTasks:
for document in documents:
for i in range(segments_per_doc):
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
document_id=document.id,

View File

@ -139,7 +139,6 @@ class TestEnableSegmentsToIndexTask:
for i in range(count):
text = fake.text(max_nb_chars=200)
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,

View File

@ -8,6 +8,47 @@ from yarl import URL
from configs.app_config import DifyConfig
def _set_basic_config_env(monkeypatch: pytest.MonkeyPatch) -> None:
os.environ.clear()
monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
monkeypatch.setenv("DB_TYPE", "postgresql")
monkeypatch.setenv("DB_USERNAME", "postgres")
monkeypatch.setenv("DB_PASSWORD", "postgres")
monkeypatch.setenv("DB_HOST", "localhost")
monkeypatch.setenv("DB_PORT", "5432")
monkeypatch.setenv("DB_DATABASE", "dify")
def test_dify_config_keeps_secret_key_empty_when_missing(
monkeypatch: pytest.MonkeyPatch,
tmp_path,
) -> None:
_set_basic_config_env(monkeypatch)
monkeypatch.delenv("SECRET_KEY", raising=False)
monkeypatch.setenv("OPENDAL_FS_ROOT", str(tmp_path))
config = DifyConfig(_env_file=None)
assert config.SECRET_KEY == ""
assert not hasattr(config, "OPENDAL_FS_ROOT")
assert not (tmp_path / ".dify_secret_key").exists()
def test_dify_config_preserves_explicit_secret_key(
monkeypatch: pytest.MonkeyPatch,
tmp_path,
) -> None:
_set_basic_config_env(monkeypatch)
monkeypatch.setenv("SECRET_KEY", "explicit")
monkeypatch.setenv("OPENDAL_FS_ROOT", str(tmp_path))
config = DifyConfig(_env_file=None)
assert config.SECRET_KEY == "explicit"
assert not (tmp_path / ".dify_secret_key").exists()
def test_dify_config(monkeypatch: pytest.MonkeyPatch):
# clear system environment variables
os.environ.clear()

View File

@ -88,6 +88,11 @@ def valid_parameters():
}
def test_trial_workflow_uses_trial_scoped_simple_account_model():
assert module.simple_account_model.name == "TrialSimpleAccount"
assert hasattr(module.simple_account_model, "items")
class TestTrialAppWorkflowRunApi:
def test_not_workflow_app(self, app: Flask):
api = module.TrialAppWorkflowRunApi()

View File

@ -1057,8 +1057,8 @@ class TestDocumentAddByTextApi:
"""Test error when both dataset and payload lack indexing_technique.
When ``indexing_technique`` is ``None`` in the payload, ``model_dump(exclude_none=True)``
omits the key. The production code accesses ``args["indexing_technique"]`` which raises
``KeyError`` before the ``ValueError`` guard can fire.
omits the key. The service API should still raise the same validation error as other
document creation paths instead of leaking a ``KeyError`` from the dumped payload dict.
"""
# Arrange — neutralise billing decorators
self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id)
@ -1074,7 +1074,7 @@ class TestDocumentAddByTextApi:
headers={"Authorization": "Bearer test_token"},
):
api = DocumentAddByTextApi()
with pytest.raises(KeyError):
with pytest.raises(ValueError, match="indexing_technique is required."):
api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id)

View File

@ -34,20 +34,6 @@ class TestDatasourceFileManager:
assert f"nonce={mock_urandom.return_value.hex()}" in signed_url
assert "sign=" in signed_url
@patch("core.datasource.datasource_file_manager.time.time")
@patch("core.datasource.datasource_file_manager.os.urandom")
@patch("core.datasource.datasource_file_manager.dify_config")
def test_sign_file_empty_secret(self, mock_config, mock_urandom, mock_time):
# Setup
mock_config.FILES_URL = "http://localhost:5001"
mock_config.SECRET_KEY = None # Empty secret
mock_time.return_value = 1700000000
mock_urandom.return_value = b"1234567890abcdef"
# Execute
signed_url = DatasourceFileManager.sign_file("file_id", ".png")
assert "sign=" in signed_url
@patch("core.datasource.datasource_file_manager.time.time")
@patch("core.datasource.datasource_file_manager.dify_config")
def test_verify_file(self, mock_config, mock_time):
@ -76,25 +62,6 @@ class TestDatasourceFileManager:
mock_time.return_value = 1700000500 # 700 seconds after timestamp (300 is timeout)
assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is False
@patch("core.datasource.datasource_file_manager.time.time")
@patch("core.datasource.datasource_file_manager.dify_config")
def test_verify_file_empty_secret(self, mock_config, mock_time):
# Setup
mock_config.SECRET_KEY = "" # Empty string secret
mock_config.FILES_ACCESS_TIMEOUT = 300
mock_time.return_value = 1700000000
datasource_file_id = "file_id_123"
timestamp = "1699999800"
nonce = "some_nonce"
# Calculate with empty secret
data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
sign = hmac.new(b"", data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is True
@patch("core.datasource.datasource_file_manager.db")
@patch("core.datasource.datasource_file_manager.storage")
@patch("core.datasource.datasource_file_manager.uuid4")

View File

@ -0,0 +1,74 @@
from __future__ import annotations
import pytest
from flask import Flask
from extensions import ext_set_secretkey
class InMemoryStorage:
def __init__(self, files: dict[str, bytes] | None = None) -> None:
self.files = files or {}
self.saved_files: list[tuple[str, bytes]] = []
def load_once(self, filename: str) -> bytes:
try:
return self.files[filename]
except KeyError:
raise FileNotFoundError(filename)
def save(self, filename: str, data: bytes) -> None:
self.files[filename] = data
self.saved_files.append((filename, data))
def test_init_app_uses_configured_secret_key(monkeypatch: pytest.MonkeyPatch) -> None:
secret_key = "configured-secret-key"
storage = InMemoryStorage()
monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", secret_key)
monkeypatch.setattr("configs.secret_key.storage", storage)
app = Flask(__name__)
app.config["SECRET_KEY"] = secret_key
ext_set_secretkey.init_app(app)
assert app.secret_key == secret_key
assert app.config["SECRET_KEY"] == secret_key
assert storage.saved_files == []
def test_init_app_generates_and_persists_secret_key_when_missing(
monkeypatch: pytest.MonkeyPatch,
) -> None:
storage = InMemoryStorage()
monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", "")
monkeypatch.setattr("configs.secret_key.storage", storage)
app = Flask(__name__)
app.config["SECRET_KEY"] = ""
ext_set_secretkey.init_app(app)
persisted_key = storage.files[".dify_secret_key"].decode("utf-8").strip()
assert persisted_key
assert storage.saved_files == [(".dify_secret_key", f"{persisted_key}\n".encode())]
assert persisted_key == ext_set_secretkey.dify_config.SECRET_KEY
assert persisted_key == app.config["SECRET_KEY"]
assert persisted_key == app.secret_key
def test_init_app_reuses_persisted_secret_key_when_missing(
monkeypatch: pytest.MonkeyPatch,
) -> None:
persisted_key = "persisted-secret-key"
storage = InMemoryStorage({".dify_secret_key": f"{persisted_key}\n".encode()})
monkeypatch.setattr("extensions.ext_set_secretkey.dify_config.SECRET_KEY", "")
monkeypatch.setattr("configs.secret_key.storage", storage)
app = Flask(__name__)
app.config["SECRET_KEY"] = ""
ext_set_secretkey.init_app(app)
assert persisted_key == ext_set_secretkey.dify_config.SECRET_KEY
assert persisted_key == app.config["SECRET_KEY"]
assert persisted_key == app.secret_key
assert storage.saved_files == []

View File

@ -143,28 +143,13 @@ class TestPassportService:
assert str(exc_info.value) == "401 Unauthorized: Token has expired."
# Configuration tests
def test_should_handle_empty_secret_key(self):
"""Test behavior when SECRET_KEY is empty"""
def test_should_use_configured_secret_key_without_policy_validation(self):
"""Test that policy decisions are owned by config, not PassportService."""
with patch("libs.passport.dify_config") as mock_config:
mock_config.SECRET_KEY = ""
mock_config.SECRET_KEY = "configured"
service = PassportService()
# Empty secret key should still work but is insecure
payload = {"test": "data"}
token = service.issue(payload)
decoded = service.verify(token)
assert decoded == payload
def test_should_handle_none_secret_key(self):
"""Test behavior when SECRET_KEY is None"""
with patch("libs.passport.dify_config") as mock_config:
mock_config.SECRET_KEY = None
service = PassportService()
payload = {"test": "data"}
# JWT library will raise TypeError when secret is None
with pytest.raises((TypeError, jwt.exceptions.InvalidKeyError)):
service.issue(payload)
assert service.sk == "configured"
# Boundary condition tests
def test_should_handle_large_payload(self, passport_service):

View File

@ -1297,7 +1297,7 @@ class TestDocumentServiceEstimateValidation:
"""Unit tests for estimate_args_validate branches."""
def test_estimate_args_validate_rejects_missing_info_list(self):
with pytest.raises(ValueError, match="Data source info is required"):
with pytest.raises(ValueError, match="Field required"):
DocumentService.estimate_args_validate({})
def test_estimate_args_validate_sets_empty_rules_for_automatic_mode(self):
@ -1357,7 +1357,7 @@ class TestDocumentServiceEstimateValidation:
},
}
with pytest.raises(ValueError, match="Summary index model provider name is required"):
with pytest.raises(ValueError, match="Field required"):
DocumentService.estimate_args_validate(args)

View File

@ -282,7 +282,6 @@ class TestSegmentServiceQueries:
def test_get_segment_by_id_returns_only_document_segment_instances(self):
segment = DocumentSegment(
id="segment-1",
tenant_id="tenant-1",
dataset_id="dataset-1",
document_id="doc-1",
@ -292,7 +291,7 @@ class TestSegmentServiceQueries:
tokens=2,
created_by="user-1",
)
segment.id = "segment-1"
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalar.return_value = segment
result = SegmentService.get_segment_by_id("segment-1", "tenant-1")
@ -307,7 +306,6 @@ class TestSegmentServiceQueries:
def test_get_segments_by_document_and_dataset_returns_scalars_result(self):
segment = DocumentSegment(
id="segment-1",
tenant_id="tenant-1",
dataset_id="dataset-1",
document_id="doc-1",
@ -318,6 +316,7 @@ class TestSegmentServiceQueries:
created_by="user-1",
)
segment.id = "segment-1"
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalars.return_value.all.return_value = [segment]
@ -461,6 +460,7 @@ class TestSegmentServiceMutations:
vector_service.create_segments_vector.side_effect = RuntimeError("vector failed")
result = SegmentService.multi_create_segment(segments, document, dataset)
assert result
assert len(result) == 2
assert [segment.position for segment in result] == [2, 3]

8
api/uv.lock generated
View File

@ -1292,7 +1292,7 @@ wheels = [
[[package]]
name = "dify-api"
version = "1.14.0"
version = "1.14.1"
source = { virtual = "." }
dependencies = [
{ name = "aliyun-log-python-sdk" },
@ -7147,11 +7147,11 @@ wheels = [
[[package]]
name = "urllib3"
version = "2.6.3"
version = "2.7.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
{ url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" },
]
[[package]]

View File

@ -28,7 +28,8 @@ LANG=C.UTF-8
LC_ALL=C.UTF-8
PYTHONIOENCODING=utf-8
UV_CACHE_DIR=/tmp/.uv-cache
SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
# Leave empty to auto-generate a persistent key in the storage directory.
SECRET_KEY=
INIT_PASSWORD=
DEPLOY_ENV=PRODUCTION
CHECK_UPDATE_URL=https://updates.dify.ai

View File

@ -87,7 +87,7 @@ The root `.env.example` file contains the essential startup settings. Optional a
1. **Server Configuration**:
- `LOG_LEVEL`, `DEBUG`, `FLASK_DEBUG`: Logging and debug settings.
- `SECRET_KEY`: A key for encrypting session cookies and other sensitive data.
- `SECRET_KEY`: A key for signing sessions, JWTs, and file URLs. Leave it empty to let Dify generate a persistent key in the storage directory, or set a unique value yourself.
1. **Database Configuration**:

View File

@ -220,7 +220,7 @@ services:
# API service
api:
<<: *shared-api-worker-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
environment:
MODE: api
SENTRY_DSN: ${API_SENTRY_DSN:-}
@ -264,7 +264,7 @@ services:
# WebSocket service for workflow collaboration.
api_websocket:
<<: *shared-api-worker-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
profiles:
- collaboration
environment:
@ -290,7 +290,7 @@ services:
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
<<: *shared-worker-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
environment:
MODE: worker
SENTRY_DSN: ${API_SENTRY_DSN:-}
@ -333,7 +333,7 @@ services:
# Celery beat for scheduling periodic tasks.
worker_beat:
<<: *shared-worker-beat-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
environment:
MODE: beat
depends_on:
@ -366,7 +366,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.14.0
image: langgenius/dify-web:1.14.1
restart: always
env_file:
- path: ./envs/core-services/web.env

View File

@ -226,7 +226,7 @@ services:
# API service
api:
<<: *shared-api-worker-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
environment:
MODE: api
SENTRY_DSN: ${API_SENTRY_DSN:-}
@ -270,7 +270,7 @@ services:
# WebSocket service for workflow collaboration.
api_websocket:
<<: *shared-api-worker-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
profiles:
- collaboration
environment:
@ -296,7 +296,7 @@ services:
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
<<: *shared-worker-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
environment:
MODE: worker
SENTRY_DSN: ${API_SENTRY_DSN:-}
@ -339,7 +339,7 @@ services:
# Celery beat for scheduling periodic tasks.
worker_beat:
<<: *shared-worker-beat-config
image: langgenius/dify-api:1.14.0
image: langgenius/dify-api:1.14.1
environment:
MODE: beat
depends_on:
@ -372,7 +372,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.14.0
image: langgenius/dify-web:1.14.1
restart: always
env_file:
- path: ./envs/core-services/web.env

View File

@ -36,5 +36,6 @@ TIDB_PUBLIC_KEY=dify
TIDB_PRIVATE_KEY=dify
VIKINGDB_ACCESS_KEY=your-ak
VIKINGDB_SECRET_KEY=your-sk
SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
# Leave empty to auto-generate a persistent key in the storage directory.
SECRET_KEY=
INIT_PASSWORD=

View File

@ -246,11 +246,6 @@
"count": 1
}
},
"web/app/components/app/app-access-control/add-member-or-group-pop.tsx": {
"no-restricted-imports": {
"count": 1
}
},
"web/app/components/app/app-publisher/features-wrapper.tsx": {
"ts/no-explicit-any": {
"count": 4

View File

@ -9,6 +9,7 @@ Shared design tokens, the `cn()` utility, CSS-first Tailwind styles, and headles
- No imports from `web/`. No dependencies on next / i18next / ky / jotai / zustand.
- One component per folder: `src/<name>/index.tsx`, optional `index.stories.tsx` and `__tests__/index.spec.tsx`. Add a matching `./<name>` subpath to `package.json#exports`.
- Props pattern: `Omit<BaseXxx.Root.Props, 'className' | ...> & VariantProps<typeof xxxVariants> & { /* custom */ }`.
- Use plain `Omit<...>` only for non-union Base UI props. When a prop changes the valid shape of related props (for example `value` / `defaultValue`, `multiple` / `value`, or `clearable` / `onChange`), model that relationship with an explicit discriminated union or a distributive helper instead of flattening the props.
- When a component accepts a prop typed from a shared internal module, `export type` it from that component so consumers import it from the component subpath.
## Overlay Primitive Selection: Tooltip vs PreviewCard vs Popover

View File

@ -79,6 +79,9 @@ vi.mock('@tanstack/react-query', async (importOriginal) => {
const actual = await importOriginal<typeof import('@tanstack/react-query')>()
return {
...actual,
useQuery: () => ({
data: [],
}),
useInfiniteQuery: () => ({
data: { pages: mockPages },
isLoading: mockIsLoading,

View File

@ -0,0 +1,151 @@
import { render, screen, waitFor } from '@testing-library/react'
import { usePathname, useRouter } from '@/next/navigation'
import { useDatasetDetail, useDatasetRelatedApps } from '@/service/knowledge/use-dataset'
import DatasetDetailLayout from '../layout-main'
const mockReplace = vi.fn()
const mockSetAppSidebarExpand = vi.fn()
vi.mock('@/next/navigation', () => ({
usePathname: vi.fn(),
useRouter: vi.fn(),
}))
vi.mock('@/service/knowledge/use-dataset', () => ({
useDatasetDetail: vi.fn(),
useDatasetRelatedApps: vi.fn(),
}))
vi.mock('@/app/components/app/store', () => ({
useStore: (selector: (state: { setAppSidebarExpand: typeof mockSetAppSidebarExpand }) => unknown) => selector({
setAppSidebarExpand: mockSetAppSidebarExpand,
}),
}))
vi.mock('@/context/app-context', () => ({
useAppContext: () => ({
isCurrentWorkspaceDatasetOperator: false,
}),
}))
vi.mock('@/context/event-emitter', () => ({
useEventEmitterContextContext: () => ({
eventEmitter: undefined,
}),
}))
vi.mock('@/hooks/use-breakpoints', () => ({
default: () => 'desktop',
MediaType: {
mobile: 'mobile',
},
}))
vi.mock('@/hooks/use-document-title', () => ({
default: vi.fn(),
}))
vi.mock('@/app/components/app-sidebar', () => ({
default: () => <aside aria-label="dataset navigation" />,
}))
vi.mock('@/app/components/datasets/extra-info', () => ({
default: () => <div />,
}))
const mockUsePathname = vi.mocked(usePathname)
const mockUseRouter = vi.mocked(useRouter)
const mockUseDatasetDetail = vi.mocked(useDatasetDetail)
const mockUseDatasetRelatedApps = vi.mocked(useDatasetRelatedApps)
describe('DatasetDetailLayout', () => {
beforeEach(() => {
vi.clearAllMocks()
mockUsePathname.mockReturnValue('/datasets/dataset-1/pipeline')
mockUseRouter.mockReturnValue({
back: vi.fn(),
forward: vi.fn(),
refresh: vi.fn(),
push: vi.fn(),
replace: mockReplace,
prefetch: vi.fn(),
})
mockUseDatasetRelatedApps.mockReturnValue({ data: undefined } as ReturnType<typeof useDatasetRelatedApps>)
})
describe('Access Errors', () => {
it.each([403, 404])('should redirect to datasets page when dataset detail returns %s', async (status) => {
// Arrange
mockUseDatasetDetail.mockReturnValue({
data: undefined,
error: new Response(null, { status }),
refetch: vi.fn(),
} as unknown as ReturnType<typeof useDatasetDetail>)
// Act
render(
<DatasetDetailLayout datasetId="dataset-1">
<div>Pipeline content</div>
</DatasetDetailLayout>,
)
// Assert
await waitFor(() => {
expect(mockReplace).toHaveBeenCalledWith('/datasets')
})
expect(mockUseDatasetRelatedApps).toHaveBeenCalledWith('dataset-1', { enabled: false })
expect(screen.queryByText('Pipeline content')).not.toBeInTheDocument()
})
it('should redirect when the dataset detail error exposes status without being a Response', async () => {
// Arrange
mockUseDatasetDetail.mockReturnValue({
data: undefined,
error: { status: 403 },
refetch: vi.fn(),
} as unknown as ReturnType<typeof useDatasetDetail>)
// Act
render(
<DatasetDetailLayout datasetId="dataset-1">
<div>Pipeline content</div>
</DatasetDetailLayout>,
)
// Assert
await waitFor(() => {
expect(mockReplace).toHaveBeenCalledWith('/datasets')
})
expect(screen.queryByText('Pipeline content')).not.toBeInTheDocument()
})
})
describe('Rendering', () => {
it('should render children when dataset detail is available', () => {
// Arrange
mockUseDatasetDetail.mockReturnValue({
data: {
id: 'dataset-1',
name: 'Dataset 1',
provider: 'vendor',
runtime_mode: 'rag_pipeline',
is_published: true,
},
error: null,
refetch: vi.fn(),
} as unknown as ReturnType<typeof useDatasetDetail>)
// Act
render(
<DatasetDetailLayout datasetId="dataset-1">
<div>Pipeline content</div>
</DatasetDetailLayout>,
)
// Assert
expect(screen.getByText('Pipeline content')).toBeInTheDocument()
expect(mockUseDatasetRelatedApps).toHaveBeenCalledWith('dataset-1', { enabled: true })
expect(mockReplace).not.toHaveBeenCalled()
})
})
})

View File

@ -23,7 +23,7 @@ import DatasetDetailContext from '@/context/dataset-detail'
import { useEventEmitterContextContext } from '@/context/event-emitter'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import useDocumentTitle from '@/hooks/use-document-title'
import { usePathname } from '@/next/navigation'
import { usePathname, useRouter } from '@/next/navigation'
import { useDatasetDetail, useDatasetRelatedApps } from '@/service/knowledge/use-dataset'
type IAppDetailLayoutProps = {
@ -31,12 +31,26 @@ type IAppDetailLayoutProps = {
datasetId: string
}
const getResponseStatus = (error: unknown) => {
if (error instanceof Response)
return error.status
if (typeof error === 'object' && error && 'status' in error && typeof error.status === 'number')
return error.status
}
const shouldRedirectToDatasetList = (error: unknown) => {
const status = getResponseStatus(error)
return status === 403 || status === 404
}
const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
const {
children,
datasetId,
} = props
const { t } = useTranslation()
const router = useRouter()
const pathname = usePathname()
const hideSideBar = pathname.endsWith('documents/create') || pathname.endsWith('documents/create-from-pipeline')
const isPipelineCanvas = pathname.endsWith('/pipeline')
@ -54,8 +68,9 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
const isMobile = media === MediaType.mobile
const { data: datasetRes, error, refetch: mutateDatasetRes } = useDatasetDetail(datasetId)
const shouldRedirect = shouldRedirectToDatasetList(error)
const { data: relatedApps } = useDatasetRelatedApps(datasetId)
const { data: relatedApps } = useDatasetRelatedApps(datasetId, { enabled: !!datasetRes && !shouldRedirect })
const isButtonDisabledWithPipeline = useMemo(() => {
if (!datasetRes)
@ -115,9 +130,17 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
setAppSidebarExpand(isMobile ? mode : localeMode)
}, [isMobile, setAppSidebarExpand])
useEffect(() => {
if (shouldRedirect)
router.replace('/datasets')
}, [router, shouldRedirect])
if (!datasetRes && !error)
return <Loading type="app" />
if (shouldRedirect)
return <Loading type="app" />
return (
<div
className={cn(

View File

@ -254,9 +254,7 @@ describe('AddMemberOrGroupDialog', () => {
await user.click(expandButton)
expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([baseGroup])
const memberLabel = screen.getByText(baseMember.name)
const memberCheckbox = memberLabel.parentElement?.previousElementSibling as HTMLElement
fireEvent.click(memberCheckbox)
await user.click(screen.getByRole('option', { name: /Member One/ }))
expect(useAccessControlStore.getState().specificMembers).toEqual([baseMember])
})
@ -277,13 +275,13 @@ describe('AddMemberOrGroupDialog', () => {
await user.type(screen.getByPlaceholderText('app.accessControlDialog.operateGroupAndMember.searchPlaceholder'), 'Group')
expect(document.querySelector('.spin-animation')).toBeInTheDocument()
const groupCheckbox = screen.getByText(baseGroup.name).closest('div')?.previousElementSibling as HTMLElement
fireEvent.click(groupCheckbox)
fireEvent.click(groupCheckbox)
const groupOption = screen.getByRole('option', { name: /Group One/ })
fireEvent.click(groupOption)
fireEvent.click(groupOption)
const memberCheckbox = screen.getByText(baseMember.name).parentElement?.previousElementSibling as HTMLElement
fireEvent.click(memberCheckbox)
fireEvent.click(memberCheckbox)
const memberOption = screen.getByRole('option', { name: /Member One/ })
fireEvent.click(memberOption)
fireEvent.click(memberOption)
fireEvent.click(screen.getByText('app.accessControlDialog.operateGroupAndMember.expand'))
fireEvent.click(screen.getByText('app.accessControlDialog.operateGroupAndMember.allMembers'))
@ -307,7 +305,7 @@ describe('AddMemberOrGroupDialog', () => {
await user.click(screen.getByText('common.operation.add'))
expect(screen.getByText('app.accessControlDialog.operateGroupAndMember.noResult')).toBeInTheDocument()
expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult')
})
})

View File

@ -1,5 +1,5 @@
import type { AccessControlAccount, AccessControlGroup, Subject } from '@/models/access-control'
import { fireEvent, render, screen } from '@testing-library/react'
import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import useAccessControlStore from '@/context/access-control-store'
import { SubjectType } from '@/models/access-control'
@ -106,8 +106,7 @@ describe('AddMemberOrGroupDialog', () => {
expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([baseGroup])
const memberCheckbox = screen.getByText(baseMember.name).parentElement?.previousElementSibling as HTMLElement
fireEvent.click(memberCheckbox)
await user.click(screen.getByRole('option', { name: /Member One/ }))
expect(useAccessControlStore.getState().specificMembers).toEqual([baseMember])
})
@ -125,6 +124,31 @@ describe('AddMemberOrGroupDialog', () => {
await user.click(screen.getByText('common.operation.add'))
expect(screen.getByText('app.accessControlDialog.operateGroupAndMember.noResult')).toBeInTheDocument()
expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult')
})
it('should keep breadcrumbs visible when the current group has no candidates', async () => {
useAccessControlStore.setState({
selectedGroupsForBreadcrumb: [baseGroup],
})
mockUseSearchForWhiteListCandidates.mockReturnValue({
isLoading: false,
isFetchingNextPage: false,
fetchNextPage: vi.fn(),
data: { pages: [{ currPage: 1, subjects: [], hasMore: false }] },
})
const user = userEvent.setup()
render(<AddMemberOrGroupDialog />)
await user.click(screen.getByText('common.operation.add'))
expect(screen.getByRole('button', { name: 'app.accessControlDialog.operateGroupAndMember.allMembers' })).toBeInTheDocument()
expect(screen.getByText(baseGroup.name)).toBeInTheDocument()
expect(screen.getByRole('status')).toHaveTextContent('app.accessControlDialog.operateGroupAndMember.noResult')
await user.click(screen.getByRole('button', { name: 'app.accessControlDialog.operateGroupAndMember.allMembers' }))
expect(useAccessControlStore.getState().selectedGroupsForBreadcrumb).toEqual([])
})
})

View File

@ -1,110 +1,207 @@
'use client'
import type { ComboboxRootChangeEventDetails } from '@langgenius/dify-ui/combobox'
import type { AccessControlAccount, AccessControlGroup, Subject, SubjectAccount, SubjectGroup } from '@/models/access-control'
import { FloatingOverlay } from '@floating-ui/react'
import { Avatar } from '@langgenius/dify-ui/avatar'
import { Button } from '@langgenius/dify-ui/button'
import { cn } from '@langgenius/dify-ui/cn'
import { Popover, PopoverContent, PopoverTrigger } from '@langgenius/dify-ui/popover'
import {
Combobox,
ComboboxContent,
ComboboxEmpty,
ComboboxInput,
ComboboxInputGroup,
ComboboxItem,
ComboboxItemText,
ComboboxList,
ComboboxStatus,
ComboboxTrigger,
} from '@langgenius/dify-ui/combobox'
import { RiAddCircleFill, RiArrowRightSLine, RiOrganizationChart } from '@remixicon/react'
import { useDebounce } from 'ahooks'
import { useCallback, useEffect, useRef, useState } from 'react'
import { useEffect, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useSelector } from '@/context/app-context'
import { SubjectType } from '@/models/access-control'
import { useSearchForWhiteListCandidates } from '@/service/access-control'
import useAccessControlStore from '../../../../context/access-control-store'
import Checkbox from '../../base/checkbox'
import Input from '../../base/input'
import Loading from '../../base/loading'
export default function AddMemberOrGroupDialog() {
const { t } = useTranslation()
const [open, setOpen] = useState(false)
const [keyword, setKeyword] = useState('')
const scrollRootRef = useRef<HTMLDivElement>(null)
const anchorRef = useRef<HTMLDivElement>(null)
const specificGroups = useAccessControlStore(s => s.specificGroups)
const setSpecificGroups = useAccessControlStore(s => s.setSpecificGroups)
const specificMembers = useAccessControlStore(s => s.specificMembers)
const setSpecificMembers = useAccessControlStore(s => s.setSpecificMembers)
const selectedGroupsForBreadcrumb = useAccessControlStore(s => s.selectedGroupsForBreadcrumb)
const debouncedKeyword = useDebounce(keyword, { wait: 500 })
const lastAvailableGroup = selectedGroupsForBreadcrumb[selectedGroupsForBreadcrumb.length - 1]
const { isLoading, isFetchingNextPage, fetchNextPage, data } = useSearchForWhiteListCandidates({ keyword: debouncedKeyword, groupId: lastAvailableGroup?.id, resultsPerPage: 10 }, open)
const handleKeywordChange = (e: React.ChangeEvent<HTMLInputElement>) => {
setKeyword(e.target.value)
}
const pages = data?.pages ?? []
const subjects = pages.flatMap(page => page.subjects ?? [])
const selectedSubjects = [
...specificGroups.map(groupToSubject),
...specificMembers.map(memberToSubject),
]
const hasResults = pages.length > 0 && subjects.length > 0
const shouldShowBreadcrumb = hasResults || selectedGroupsForBreadcrumb.length > 0
const hasMore = pages[pages.length - 1]?.hasMore ?? false
const anchorRef = useRef<HTMLDivElement>(null)
useEffect(() => {
const hasMore = data?.pages?.[0]?.hasMore ?? false
let observer: IntersectionObserver | undefined
if (anchorRef.current) {
observer = new IntersectionObserver((entries) => {
if (entries[0]!.isIntersecting && !isLoading && hasMore)
fetchNextPage()
}, { rootMargin: '20px' })
}, { root: scrollRootRef.current, rootMargin: '20px' })
observer.observe(anchorRef.current)
}
return () => observer?.disconnect()
}, [isLoading, fetchNextPage, anchorRef, data])
}, [isLoading, fetchNextPage, hasMore])
const handleOpenChange = (nextOpen: boolean) => {
if (!nextOpen)
setKeyword('')
setOpen(nextOpen)
}
const handleInputValueChange = (inputValue: string, details: ComboboxRootChangeEventDetails) => {
if (details.reason !== 'item-press')
setKeyword(inputValue)
}
const handleValueChange = (nextSubjects: Subject[]) => {
const nextGroups: AccessControlGroup[] = []
const nextMembers: AccessControlAccount[] = []
for (const subject of nextSubjects) {
if (subject.subjectType === SubjectType.GROUP)
nextGroups.push((subject as SubjectGroup).groupData)
else
nextMembers.push((subject as SubjectAccount).accountData)
}
setSpecificGroups(nextGroups)
setSpecificMembers(nextMembers)
}
return (
<Popover open={open} onOpenChange={setOpen}>
<PopoverTrigger
render={(
<Button variant="ghost-accent" size="small" className="flex shrink-0 items-center gap-x-0.5">
<RiAddCircleFill className="h-4 w-4" />
<span>{t('operation.add', { ns: 'common' })}</span>
</Button>
)}
/>
{open && <FloatingOverlay />}
<PopoverContent
<Combobox<Subject, true>
multiple
open={open}
value={selectedSubjects}
inputValue={keyword}
items={subjects}
itemToStringLabel={getSubjectLabel}
itemToStringValue={getSubjectValue}
isItemEqualToValue={isSameSubject}
filter={null}
onOpenChange={handleOpenChange}
onInputValueChange={handleInputValueChange}
onValueChange={handleValueChange}
>
<ComboboxTrigger
aria-label={t('operation.add', { ns: 'common' })}
icon={false}
size="small"
className="flex h-6 w-auto shrink-0 items-center gap-x-0.5 rounded-md border-0 bg-transparent px-2 py-0 text-xs font-medium text-components-button-secondary-accent-text hover:bg-state-accent-hover focus-visible:bg-state-accent-hover focus-visible:ring-2 focus-visible:ring-state-accent-solid data-open:bg-state-accent-hover"
>
<RiAddCircleFill className="h-4 w-4" aria-hidden="true" />
<span>{t('operation.add', { ns: 'common' })}</span>
</ComboboxTrigger>
<ComboboxContent
placement="bottom-end"
alignOffset={300}
popupClassName="border-none bg-transparent shadow-none"
popupClassName="relative flex max-h-[400px] w-[400px] flex-col overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-0 shadow-lg backdrop-blur-[5px]"
>
<div className="relative flex max-h-[400px] w-[400px] flex-col overflow-y-auto rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur shadow-lg backdrop-blur-[5px]">
<div ref={scrollRootRef} className="min-h-0 overflow-y-auto">
<div className="sticky top-0 z-10 bg-components-panel-bg-blur p-2 pb-0.5 backdrop-blur-[5px]">
<Input value={keyword} onChange={handleKeywordChange} showLeftIcon placeholder={t('accessControlDialog.operateGroupAndMember.searchPlaceholder', { ns: 'app' }) as string} />
<ComboboxInputGroup className="h-8 min-h-8 px-2">
<span className="mr-0.5 i-ri-search-line size-4 shrink-0 text-text-tertiary" aria-hidden="true" />
<ComboboxInput
aria-label={t('accessControlDialog.operateGroupAndMember.searchPlaceholder', { ns: 'app' })}
placeholder={t('accessControlDialog.operateGroupAndMember.searchPlaceholder', { ns: 'app' })}
className="block h-4.5 grow px-1 py-0 text-[13px] text-text-primary"
/>
</ComboboxInputGroup>
</div>
{
isLoading
? <div className="p-1"><Loading /></div>
: (data?.pages?.length ?? 0) > 0
? (
<>
<div className="flex h-7 items-center px-2 py-0.5">
<SelectedGroupsBreadCrumb />
</div>
<div className="p-1">
{renderGroupOrMember(data?.pages ?? [])}
{isLoading
? (
<ComboboxStatus className="p-1">
<Loading />
</ComboboxStatus>
)
: (
<>
{shouldShowBreadcrumb && (
<div className="flex h-7 items-center px-2 py-0.5">
<SelectedGroupsBreadCrumb />
</div>
)}
{hasResults
? (
<>
<ComboboxList className="max-h-none p-1">
{(subject: Subject) => <SubjectItem key={getSubjectValue(subject)} subject={subject} />}
</ComboboxList>
{isFetchingNextPage && <Loading />}
</div>
<div ref={anchorRef} className="h-0"> </div>
</>
)
: (
<div className="flex h-7 items-center justify-center px-2 py-0.5">
<span className="system-xs-regular text-text-tertiary">{t('accessControlDialog.operateGroupAndMember.noResult', { ns: 'app' })}</span>
</div>
)
}
<div ref={anchorRef} className="h-0" />
</>
)
: (
<ComboboxEmpty className="flex h-7 items-center justify-center px-2 py-0.5">
{t('accessControlDialog.operateGroupAndMember.noResult', { ns: 'app' })}
</ComboboxEmpty>
)}
</>
)}
</div>
</PopoverContent>
</Popover>
</ComboboxContent>
</Combobox>
)
}
type GroupOrMemberData = { subjects: Subject[], currPage: number }[]
function renderGroupOrMember(data: GroupOrMemberData) {
return data?.map((page) => {
return (
<div key={`search_group_member_page_${page.currPage}`}>
{page.subjects?.map((item, index) => {
if (item.subjectType === SubjectType.GROUP)
return <GroupItem key={index} group={(item as SubjectGroup).groupData} />
return <MemberItem key={index} member={(item as SubjectAccount).accountData} />
})}
</div>
)
}) ?? null
function groupToSubject(group: AccessControlGroup): SubjectGroup {
return {
subjectId: group.id,
subjectType: SubjectType.GROUP,
groupData: group,
}
}
function memberToSubject(member: AccessControlAccount): SubjectAccount {
return {
subjectId: member.id,
subjectType: SubjectType.ACCOUNT,
accountData: member,
}
}
function getSubjectLabel(subject: Subject) {
if (subject.subjectType === SubjectType.GROUP)
return (subject as SubjectGroup).groupData.name
return (subject as SubjectAccount).accountData.name
}
function getSubjectValue(subject: Subject) {
return `${subject.subjectType}:${subject.subjectId}`
}
function isSameSubject(item: Subject, value: Subject) {
return item.subjectId === value.subjectId && item.subjectType === value.subjectType
}
function SubjectItem({ subject }: { subject: Subject }) {
if (subject.subjectType === SubjectType.GROUP)
return <GroupItem group={(subject as SubjectGroup).groupData} subject={subject} />
return <MemberItem member={(subject as SubjectAccount).accountData} subject={subject} />
}
function SelectedGroupsBreadCrumb() {
@ -112,13 +209,13 @@ function SelectedGroupsBreadCrumb() {
const setSelectedGroupsForBreadcrumb = useAccessControlStore(s => s.setSelectedGroupsForBreadcrumb)
const { t } = useTranslation()
const handleBreadCrumbClick = useCallback((index: number) => {
const handleBreadCrumbClick = (index: number) => {
const newGroups = selectedGroupsForBreadcrumb.slice(0, index + 1)
setSelectedGroupsForBreadcrumb(newGroups)
}, [setSelectedGroupsForBreadcrumb, selectedGroupsForBreadcrumb])
const handleReset = useCallback(() => {
}
const handleReset = () => {
setSelectedGroupsForBreadcrumb([])
}, [setSelectedGroupsForBreadcrumb])
}
const hasBreadcrumb = selectedGroupsForBreadcrumb.length > 0
return (
@ -162,104 +259,111 @@ function SelectedGroupsBreadCrumb() {
type GroupItemProps = {
group: AccessControlGroup
subject: Subject
}
function GroupItem({ group }: GroupItemProps) {
function GroupItem({ group, subject }: GroupItemProps) {
const { t } = useTranslation()
const specificGroups = useAccessControlStore(s => s.specificGroups)
const setSpecificGroups = useAccessControlStore(s => s.setSpecificGroups)
const selectedGroupsForBreadcrumb = useAccessControlStore(s => s.selectedGroupsForBreadcrumb)
const setSelectedGroupsForBreadcrumb = useAccessControlStore(s => s.setSelectedGroupsForBreadcrumb)
const isChecked = specificGroups.some(g => g.id === group.id)
const handleCheckChange = useCallback(() => {
if (!isChecked) {
const newGroups = [...specificGroups, group]
setSpecificGroups(newGroups)
}
else {
const newGroups = specificGroups.filter(g => g.id !== group.id)
setSpecificGroups(newGroups)
}
}, [specificGroups, setSpecificGroups, group, isChecked])
const handleExpandClick = useCallback(() => {
const handleExpandClick = () => {
setSelectedGroupsForBreadcrumb([...selectedGroupsForBreadcrumb, group])
}, [selectedGroupsForBreadcrumb, setSelectedGroupsForBreadcrumb, group])
}
return (
<BaseItem>
<Checkbox checked={isChecked} className="h-4 w-4 shrink-0" onCheck={handleCheckChange} />
<div className="item-center flex grow">
<div className="mr-2 h-5 w-5 overflow-hidden rounded-full bg-components-icon-bg-blue-solid">
<div className="bg-access-app-icon-mask-bg flex h-full w-full items-center justify-center">
<RiOrganizationChart className="h-[14px] w-[14px] text-components-avatar-shape-fill-stop-0" />
<div className="flex items-center gap-2 rounded-lg hover:bg-state-base-hover">
<BaseItem subject={subject}>
<SelectionBox checked={isChecked} />
<ComboboxItemText className="flex grow items-center px-0">
<div className="mr-2 h-5 w-5 overflow-hidden rounded-full bg-components-icon-bg-blue-solid">
<div className="bg-access-app-icon-mask-bg flex h-full w-full items-center justify-center">
<RiOrganizationChart className="h-[14px] w-[14px] text-components-avatar-shape-fill-stop-0" aria-hidden="true" />
</div>
</div>
</div>
<p className="mr-1 system-sm-medium text-text-secondary">{group.name}</p>
<p className="system-xs-regular text-text-tertiary">{group.groupSize}</p>
</div>
<span className="mr-1 system-sm-medium text-text-secondary">{group.name}</span>
<span className="system-xs-regular text-text-tertiary">{group.groupSize}</span>
</ComboboxItemText>
</BaseItem>
<Button
size="small"
disabled={isChecked}
variant="ghost-accent"
className="flex shrink-0 items-center justify-between px-1.5 py-1"
className="mr-1 flex shrink-0 items-center justify-between px-1.5 py-1"
onPointerDown={event => event.preventDefault()}
onClick={handleExpandClick}
>
<span className="px-[3px]">{t('accessControlDialog.operateGroupAndMember.expand', { ns: 'app' })}</span>
<RiArrowRightSLine className="h-4 w-4" />
<RiArrowRightSLine className="h-4 w-4" aria-hidden="true" />
</Button>
</BaseItem>
</div>
)
}
type MemberItemProps = {
member: AccessControlAccount
subject: Subject
}
function MemberItem({ member }: MemberItemProps) {
function MemberItem({ member, subject }: MemberItemProps) {
const currentUser = useSelector(s => s.userProfile)
const { t } = useTranslation()
const specificMembers = useAccessControlStore(s => s.specificMembers)
const setSpecificMembers = useAccessControlStore(s => s.setSpecificMembers)
const isChecked = specificMembers.some(m => m.id === member.id)
const handleCheckChange = useCallback(() => {
if (!isChecked) {
const newMembers = [...specificMembers, member]
setSpecificMembers(newMembers)
}
else {
const newMembers = specificMembers.filter(m => m.id !== member.id)
setSpecificMembers(newMembers)
}
}, [specificMembers, setSpecificMembers, member, isChecked])
return (
<BaseItem className="pr-3">
<Checkbox checked={isChecked} className="h-4 w-4 shrink-0" onCheck={handleCheckChange} />
<div className="flex grow items-center">
<BaseItem subject={subject} className="pr-3">
<SelectionBox checked={isChecked} />
<ComboboxItemText className="flex grow items-center px-0">
<div className="mr-2 h-5 w-5 overflow-hidden rounded-full bg-components-icon-bg-blue-solid">
<div className="bg-access-app-icon-mask-bg flex h-full w-full items-center justify-center">
<Avatar size="xxs" avatar={null} name={member.name} />
</div>
</div>
<p className="mr-1 system-sm-medium text-text-secondary">{member.name}</p>
<span className="mr-1 system-sm-medium text-text-secondary">{member.name}</span>
{currentUser.email === member.email && (
<p className="system-xs-regular text-text-tertiary">
<span className="system-xs-regular text-text-tertiary">
(
{t('you', { ns: 'common' })}
)
</p>
</span>
)}
</div>
<p className="system-xs-regular text-text-quaternary">{member.email}</p>
</ComboboxItemText>
<span className="system-xs-regular text-text-quaternary">{member.email}</span>
</BaseItem>
)
}
type BaseItemProps = {
className?: string
subject: Subject
children: React.ReactNode
}
function BaseItem({ children, className }: BaseItemProps) {
function BaseItem({ children, className, subject }: BaseItemProps) {
return (
<div className={cn('flex cursor-pointer items-center space-x-2 p-1 pl-2 hover:rounded-lg hover:bg-state-base-hover', className)}>
<ComboboxItem
value={subject}
className={cn(
'mx-0 flex min-h-8 grow grid-cols-none items-center gap-2 rounded-lg p-1 pl-2',
className,
)}
>
{children}
</div>
</ComboboxItem>
)
}
function SelectionBox({ checked }: { checked: boolean }) {
return (
<span
aria-hidden="true"
className={cn(
'flex size-4 shrink-0 items-center justify-center rounded-sm shadow-xs shadow-shadow-shadow-3',
checked
? 'bg-components-checkbox-bg text-components-checkbox-icon'
: 'border border-components-checkbox-border bg-components-checkbox-bg-unchecked',
)}
>
{checked && <span className="i-ri-check-line size-3" />}
</span>
)
}

View File

@ -1,6 +1,8 @@
import type { DocumentItem } from '@/models/datasets'
import { fireEvent, render, screen } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import type { SimpleDocumentDetail } from '@/models/datasets'
import { Combobox } from '@langgenius/dify-ui/combobox'
import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import { ChunkingMode, DataSourceType } from '@/models/datasets'
import DocumentList from '../document-list'
vi.mock('../../document-file-icon', () => ({
@ -13,37 +15,92 @@ vi.mock('../../document-file-icon', () => ({
),
}))
const createDocument = (overrides: Partial<SimpleDocumentDetail> = {}): SimpleDocumentDetail => ({
id: 'doc-1',
batch: 'batch-1',
position: 1,
dataset_id: 'dataset-1',
data_source_type: DataSourceType.FILE,
data_source_info: {
upload_file: {
id: 'file-1',
name: 'report.pdf',
size: 1024,
extension: 'pdf',
mime_type: 'application/pdf',
created_by: 'user-1',
created_at: Date.now(),
},
job_id: 'job-1',
url: '',
},
dataset_process_rule_id: 'rule-1',
name: 'report',
created_from: 'web',
created_by: 'user-1',
created_at: Date.now(),
indexing_status: 'completed',
display_status: 'enabled',
doc_form: ChunkingMode.text,
doc_language: 'en',
enabled: true,
word_count: 1000,
archived: false,
updated_at: Date.now(),
hit_count: 0,
data_source_detail_dict: {
upload_file: {
name: 'report.pdf',
extension: 'pdf',
},
},
...overrides,
})
const renderDocumentList = (list: SimpleDocumentDetail[], onValueChange = vi.fn()) => ({
onValueChange,
...render(
<Combobox
open
items={list}
itemToStringLabel={document => document.name}
itemToStringValue={document => document.id}
onValueChange={onValueChange}
>
<DocumentList />
</Combobox>,
),
})
describe('DocumentList', () => {
const mockList = [
{ id: 'doc-1', name: 'report', extension: 'pdf' },
{ id: 'doc-2', name: 'data', extension: 'csv' },
] as DocumentItem[]
const onChange = vi.fn()
beforeEach(() => {
vi.clearAllMocks()
})
it('should render all documents', () => {
render(<DocumentList list={mockList} onChange={onChange} />)
expect(screen.getByText('report')).toBeInTheDocument()
expect(screen.getByText('data')).toBeInTheDocument()
})
it('should render documents as combobox options', () => {
renderDocumentList([
createDocument({ id: 'doc-1', name: 'report' }),
createDocument({ id: 'doc-2', name: 'data' }),
])
it('should render file icons', () => {
render(<DocumentList list={mockList} onChange={onChange} />)
expect(screen.getByRole('option', { name: /report/ })).toBeInTheDocument()
expect(screen.getByRole('option', { name: /data/ })).toBeInTheDocument()
expect(screen.getAllByTestId('file-icon')).toHaveLength(2)
})
it('should call onChange with document on click', () => {
render(<DocumentList list={mockList} onChange={onChange} />)
fireEvent.click(screen.getByText('report'))
expect(onChange).toHaveBeenCalledWith(mockList[0])
it('should keep item spacing symmetric with the search field', () => {
renderDocumentList([createDocument({ id: 'doc-1', name: 'report' })])
expect(screen.getByRole('option', { name: /report/ })).toHaveClass('px-3')
})
it('should render empty list without errors', () => {
const { container } = render(<DocumentList list={[]} onChange={onChange} />)
expect(container.firstChild).toBeInTheDocument()
it('should select a document through combobox value change', async () => {
const user = userEvent.setup()
const selectedDocument = createDocument({ id: 'doc-1', name: 'report' })
const { onValueChange } = renderDocumentList([selectedDocument])
await user.click(screen.getByRole('option', { name: /report/ }))
expect(onValueChange).toHaveBeenCalledWith(selectedDocument, expect.any(Object))
})
})

View File

@ -1,43 +1,49 @@
'use client'
import type { FC } from 'react'
import type { DocumentItem } from '@/models/datasets'
import type { SimpleDocumentDetail } from '@/models/datasets'
import { cn } from '@langgenius/dify-ui/cn'
import * as React from 'react'
import { useCallback } from 'react'
import {
ComboboxItem,
ComboboxItemText,
ComboboxList,
} from '@langgenius/dify-ui/combobox'
import FileIcon from '../document-file-icon'
type Props = {
className?: string
list: DocumentItem[]
onChange: (value: DocumentItem) => void
}
const DocumentList: FC<Props> = ({
className,
list,
onChange,
}) => {
const handleChange = useCallback((item: DocumentItem) => {
return () => onChange(item)
}, [onChange])
function getDocumentExtension(document: SimpleDocumentDetail) {
const detailExtension = document.data_source_detail_dict?.upload_file?.extension
if (detailExtension)
return detailExtension
const dataSourceInfo = document.data_source_info
if (dataSourceInfo && 'upload_file' in dataSourceInfo)
return dataSourceInfo.upload_file.extension
return ''
}
export default function DocumentList({
className,
}: Props) {
return (
<div className={cn('max-h-[calc(100vh-120px)] overflow-auto', className)}>
{list.map((item) => {
const { id, name, extension } = item
<ComboboxList className={cn('max-h-[calc(100vh-120px)] p-0', className)}>
{(item: SimpleDocumentDetail) => {
const extension = getDocumentExtension(item)
return (
<div
key={id}
className="flex h-8 cursor-pointer items-center space-x-2 rounded-lg px-2 hover:bg-state-base-hover"
onClick={handleChange(item)}
<ComboboxItem
key={item.id}
value={item}
className="mx-0 flex h-8 grid-cols-none items-center gap-2 rounded-lg px-3 py-0"
>
<FileIcon name={item.name} extension={extension} size="lg" />
<div className="truncate text-sm text-text-secondary">{name}</div>
</div>
<ComboboxItemText className="min-w-0 px-0 system-sm-regular text-text-secondary">
{item.name}
</ComboboxItemText>
</ComboboxItem>
)
})}
</div>
}}
</ComboboxList>
)
}
export default React.memo(DocumentList)

View File

@ -1,20 +1,22 @@
'use client'
import type { FC } from 'react'
import type { DocumentItem, ParentMode, SimpleDocumentDetail } from '@/models/datasets'
import type { ComboboxRootChangeEventDetails } from '@langgenius/dify-ui/combobox'
import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets'
import { cn } from '@langgenius/dify-ui/cn'
import {
Popover,
PopoverContent,
PopoverTrigger,
} from '@langgenius/dify-ui/popover'
Combobox,
ComboboxContent,
ComboboxEmpty,
ComboboxInput,
ComboboxInputGroup,
ComboboxStatus,
ComboboxTrigger,
ComboboxValue,
} from '@langgenius/dify-ui/combobox'
import { RiArrowDownSLine } from '@remixicon/react'
import { useBoolean } from 'ahooks'
import * as React from 'react'
import { useCallback, useMemo, useState } from 'react'
import { useDeferredValue, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { GeneralChunk, ParentChildChunk } from '@/app/components/base/icons/src/vender/knowledge'
import Loading from '@/app/components/base/loading'
import SearchInput from '@/app/components/base/search-input'
import { ChunkingMode } from '@/models/datasets'
import { useDocumentList } from '@/service/knowledge/use-document'
import FileIcon from '../document-file-icon'
@ -22,116 +24,177 @@ import DocumentList from './document-list'
type Props = {
datasetId: string
value: {
name?: string
extension?: string
chunkingMode?: ChunkingMode
parentMode?: ParentMode
}
value?: SimpleDocumentDetail | null
parentMode?: ParentMode
onChange: (value: SimpleDocumentDetail) => void
}
const DocumentPicker: FC<Props> = ({
function getDocumentLabel(document: SimpleDocumentDetail) {
return document.name
}
function getDocumentValue(document: SimpleDocumentDetail) {
return document.id
}
function isSameDocument(item: SimpleDocumentDetail, value: SimpleDocumentDetail) {
return item.id === value.id
}
function getDocumentExtension(document?: SimpleDocumentDetail | null) {
if (!document)
return ''
const detailExtension = document.data_source_detail_dict?.upload_file?.extension
if (detailExtension)
return detailExtension
const dataSourceInfo = document.data_source_info
if (dataSourceInfo && 'upload_file' in dataSourceInfo)
return dataSourceInfo.upload_file.extension
return ''
}
function DocumentPickerTriggerValue({
document,
parentMode,
}: {
document?: SimpleDocumentDetail | null
parentMode?: ParentMode
}) {
const { t } = useTranslation()
const isGeneralMode = document?.doc_form === ChunkingMode.text
const isParentChild = document?.doc_form === ChunkingMode.parentChild
const isQAMode = document?.doc_form === ChunkingMode.qa
const TypeIcon = isParentChild ? ParentChildChunk : GeneralChunk
const ArrowIcon = RiArrowDownSLine
const parentModeLabel = (() => {
if (!parentMode)
return '--'
return parentMode === 'paragraph' ? t('parentMode.paragraph', { ns: 'dataset' }) : t('parentMode.fullDoc', { ns: 'dataset' })
})()
return (
<span className="flex min-w-0 items-center gap-1.5">
<FileIcon name={document?.name} extension={getDocumentExtension(document)} size="xl" />
<span className="flex min-w-0 flex-col items-start">
<span className="flex max-w-full min-w-0 items-center gap-1">
<span className="max-w-[280px] min-w-0 truncate system-md-semibold text-text-primary">
{document?.name || '--'}
</span>
<ArrowIcon className="h-4 w-4 shrink-0 text-text-primary" aria-hidden="true" />
</span>
<span className="flex h-3 max-w-[300px] items-center gap-0.5 text-text-tertiary">
<TypeIcon className="h-3 w-3 shrink-0" />
<span className={cn('truncate system-2xs-medium-uppercase', isParentChild && 'mt-0.5')}>
{isGeneralMode && t('chunkingMode.general', { ns: 'dataset' })}
{isQAMode && t('chunkingMode.qa', { ns: 'dataset' })}
{isParentChild && `${t('chunkingMode.parentChild', { ns: 'dataset' })} · ${parentModeLabel}`}
</span>
</span>
</span>
</span>
)
}
export function DocumentPicker({
datasetId,
value,
parentMode,
onChange,
}) => {
}: Props) {
const { t } = useTranslation()
const {
name,
extension,
chunkingMode,
parentMode,
} = value
const [query, setQuery] = useState('')
const [searchValue, setSearchValue] = useState('')
const deferredSearchValue = useDeferredValue(searchValue)
const { data } = useDocumentList({
datasetId,
query: {
keyword: query,
keyword: deferredSearchValue,
page: 1,
limit: 20,
},
})
const documentsList = data?.data
const isGeneralMode = chunkingMode === ChunkingMode.text
const isParentChild = chunkingMode === ChunkingMode.parentChild
const isQAMode = chunkingMode === ChunkingMode.qa
const TypeIcon = isParentChild ? ParentChildChunk : GeneralChunk
const documentsList = data?.data ?? []
const [open, {
set: setOpen,
}] = useBoolean(false)
const ArrowIcon = RiArrowDownSLine
const handleInputValueChange = (inputValue: string, details: ComboboxRootChangeEventDetails) => {
if (details.reason !== 'item-press')
setSearchValue(inputValue)
}
const handleChange = useCallback(({ id }: DocumentItem) => {
onChange(documentsList?.find(item => item.id === id) as SimpleDocumentDetail)
setOpen(false)
}, [documentsList, onChange, setOpen])
const handleOpenChange = (nextOpen: boolean) => {
if (!nextOpen)
setSearchValue('')
}
const parentModeLabel = useMemo(() => {
if (!parentMode)
return '--'
return parentMode === 'paragraph' ? t('parentMode.paragraph', { ns: 'dataset' }) : t('parentMode.fullDoc', { ns: 'dataset' })
}, [parentMode, t])
const handleDocumentChange = (document: SimpleDocumentDetail | null) => {
if (!document)
return
onChange(document)
setSearchValue('')
}
return (
<Popover
open={open}
onOpenChange={setOpen}
<Combobox<SimpleDocumentDetail>
items={documentsList}
value={value ?? null}
inputValue={searchValue}
onOpenChange={handleOpenChange}
onInputValueChange={handleInputValueChange}
onValueChange={handleDocumentChange}
isItemEqualToValue={isSameDocument}
itemToStringLabel={getDocumentLabel}
itemToStringValue={getDocumentValue}
filter={null}
>
<PopoverTrigger
nativeButton={false}
render={(
<div className={cn('ml-1 flex cursor-pointer items-center rounded-lg px-2 py-0.5 select-none hover:bg-state-base-hover', open && 'bg-state-base-hover')}>
<FileIcon name={name} extension={extension} size="xl" />
<div className="mr-0.5 ml-1 flex flex-col items-start">
<div className="flex items-center space-x-0.5">
<span className={cn('system-md-semibold text-text-primary')}>
{' '}
{name || '--'}
</span>
<ArrowIcon className="h-4 w-4 text-text-primary" />
</div>
<div className="flex h-3 items-center space-x-0.5 text-text-tertiary">
<TypeIcon className="h-3 w-3" />
<span className={cn('system-2xs-medium-uppercase', isParentChild && 'mt-0.5' /* to icon problem cause not ver align */)}>
{isGeneralMode && t('chunkingMode.general', { ns: 'dataset' })}
{isQAMode && t('chunkingMode.qa', { ns: 'dataset' })}
{isParentChild && `${t('chunkingMode.parentChild', { ns: 'dataset' })} · ${parentModeLabel}`}
</span>
</div>
</div>
</div>
<ComboboxTrigger
aria-label={value?.name || t('operation.search', { ns: 'common' })}
icon={false}
className={cn(
'ml-1 flex h-auto w-auto rounded-lg border-0 bg-transparent px-2 py-1 hover:bg-state-base-hover focus-visible:bg-state-base-hover focus-visible:ring-1 focus-visible:ring-components-input-border-active data-open:bg-state-base-hover',
)}
/>
<PopoverContent
>
<ComboboxValue>
{(document: SimpleDocumentDetail | null) => (
<DocumentPickerTriggerValue document={document} parentMode={parentMode} />
)}
</ComboboxValue>
</ComboboxTrigger>
<ComboboxContent
placement="bottom-start"
sideOffset={0}
popupClassName="border-none bg-transparent shadow-none"
popupClassName="w-[360px] rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-2 shadow-lg backdrop-blur-[5px]"
>
<div className="w-[360px] rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-1 pt-2 shadow-lg backdrop-blur-[5px]">
<SearchInput value={query} onChange={setQuery} className="mx-1" />
{documentsList
? (
<DocumentList
className="mt-2"
list={documentsList.map(d => ({
id: d.id,
name: d.name,
extension: d.data_source_detail_dict?.upload_file?.extension || '',
}))}
onChange={handleChange}
/>
)
: (
<div className="mt-2 flex h-[100px] w-[360px] items-center justify-center">
<Loading />
</div>
)}
</div>
</PopoverContent>
</Popover>
<ComboboxInputGroup className="h-8 min-h-8 px-2">
<span className="mr-0.5 i-ri-search-line size-4 shrink-0 text-text-tertiary" aria-hidden="true" />
<ComboboxInput
aria-label={t('operation.search', { ns: 'common' })}
placeholder={t('operation.search', { ns: 'common' })}
className="block h-4.5 grow px-1 py-0 text-[13px] text-text-primary"
/>
</ComboboxInputGroup>
{data
? (
documentsList.length > 0
? (
<DocumentList
className="mt-2"
/>
)
: (
<ComboboxEmpty className="mt-2 flex h-[100px] w-full items-center justify-center">
{t('noData', { ns: 'common' })}
</ComboboxEmpty>
)
)
: (
<ComboboxStatus className="mt-2 flex h-[100px] w-full items-center justify-center">
<Loading />
</ComboboxStatus>
)}
</ComboboxContent>
</Combobox>
)
}
export default React.memo(DocumentPicker)

View File

@ -14,7 +14,6 @@ import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import Loading from '@/app/components/base/loading'
import FileIcon from '../document-file-icon'
import DocumentList from './document-list'
type Props = {
className?: string
@ -74,7 +73,7 @@ const PreviewDocumentPicker: FC<Props> = ({
{files?.length > 1 && <div className="flex h-8 items-center pl-2 system-xs-medium-uppercase text-text-tertiary">{t('preprocessDocument', { ns: 'dataset', num: files.length })}</div>}
{files?.length > 0
? (
<DocumentList
<PreviewDocumentList
list={files}
onChange={handleChange}
/>
@ -90,3 +89,27 @@ const PreviewDocumentPicker: FC<Props> = ({
)
}
export default React.memo(PreviewDocumentPicker)
function PreviewDocumentList({
list,
onChange,
}: {
list: DocumentItem[]
onChange: (value: DocumentItem) => void
}) {
return (
<div className="max-h-[calc(100vh-120px)] overflow-auto">
{list.map(item => (
<button
key={item.id}
type="button"
className="flex h-8 w-full cursor-pointer items-center gap-2 rounded-lg border-0 bg-transparent px-2 text-left hover:bg-state-base-hover"
onClick={() => onChange(item)}
>
<FileIcon name={item.name} extension={item.extension} size="lg" />
<span className="truncate text-sm text-text-secondary">{item.name}</span>
</button>
))}
</div>
)
}

View File

@ -1,6 +1,7 @@
import type { SimpleDocumentDetail } from '@/models/datasets'
import { render } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { ChunkingMode } from '@/models/datasets'
import { ChunkingMode, DataSourceType } from '@/models/datasets'
import { DocumentTitle } from '../document-title'
@ -11,13 +12,23 @@ vi.mock('@/next/navigation', () => ({
}),
}))
// Mock DocumentPicker
vi.mock('../../../common/document-picker', () => ({
default: ({ datasetId, value, onChange }: { datasetId: string, value: unknown, onChange: (doc: { id: string }) => void }) => (
DocumentPicker: ({
datasetId,
value,
parentMode,
onChange,
}: {
datasetId: string
value?: SimpleDocumentDetail | null
parentMode?: string
onChange: (doc: { id: string }) => void
}) => (
<div
data-testid="document-picker"
data-dataset-id={datasetId}
data-value={JSON.stringify(value)}
data-value-id={value?.id ?? ''}
data-parent-mode={parentMode ?? ''}
onClick={() => onChange({ id: 'new-doc-id' })}
>
Document Picker
@ -25,6 +36,42 @@ vi.mock('../../../common/document-picker', () => ({
),
}))
const createDocument = (overrides: Partial<SimpleDocumentDetail> = {}): SimpleDocumentDetail => ({
id: 'doc-1',
batch: 'batch-1',
position: 1,
dataset_id: 'dataset-1',
data_source_type: DataSourceType.FILE,
data_source_info: {
upload_file: {
id: 'file-1',
name: 'document.pdf',
size: 1024,
extension: 'pdf',
mime_type: 'application/pdf',
created_by: 'user-1',
created_at: Date.now(),
},
job_id: 'job-1',
url: '',
},
dataset_process_rule_id: 'rule-1',
name: 'Document 1',
created_from: 'web',
created_by: 'user-1',
created_at: Date.now(),
indexing_status: 'completed',
display_status: 'enabled',
doc_form: ChunkingMode.text,
doc_language: 'en',
enabled: true,
word_count: 1000,
archived: false,
updated_at: Date.now(),
hit_count: 0,
...overrides,
})
describe('DocumentTitle', () => {
beforeEach(() => {
vi.clearAllMocks()
@ -69,31 +116,26 @@ describe('DocumentTitle', () => {
expect(getByTestId('document-picker').getAttribute('data-dataset-id')).toBe('test-dataset-id')
})
it('should pass value props to DocumentPicker', () => {
it('should pass the selected document to DocumentPicker', () => {
const document = createDocument({ id: 'doc-current' })
const { getByTestId } = render(
<DocumentTitle
datasetId="dataset-1"
name="test-document"
extension="pdf"
chunkingMode={ChunkingMode.text}
parent_mode="paragraph"
document={document}
parentMode="paragraph"
/>,
)
const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}')
expect(value.name).toBe('test-document')
expect(value.extension).toBe('pdf')
expect(value.chunkingMode).toBe(ChunkingMode.text)
expect(value.parentMode).toBe('paragraph')
expect(getByTestId('document-picker')).toHaveAttribute('data-value-id', 'doc-current')
expect(getByTestId('document-picker')).toHaveAttribute('data-parent-mode', 'paragraph')
})
it('should default parentMode to paragraph when parent_mode is undefined', () => {
it('should pass no parent mode when it is undefined', () => {
const { getByTestId } = render(
<DocumentTitle datasetId="dataset-1" />,
)
const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}')
expect(value.parentMode).toBe('paragraph')
expect(getByTestId('document-picker')).toHaveAttribute('data-parent-mode', '')
})
it('should apply custom wrapperCls', () => {
@ -119,24 +161,23 @@ describe('DocumentTitle', () => {
})
describe('Edge Cases', () => {
it('should handle undefined optional props', () => {
it('should handle an empty document value', () => {
const { getByTestId } = render(
<DocumentTitle datasetId="dataset-1" />,
)
const value = JSON.parse(getByTestId('document-picker').getAttribute('data-value') || '{}')
expect(value.name).toBeUndefined()
expect(value.extension).toBeUndefined()
expect(getByTestId('document-picker')).toHaveAttribute('data-value-id', '')
})
it('should maintain structure when rerendered', () => {
const { rerender, getByTestId } = render(
<DocumentTitle datasetId="dataset-1" name="doc1" />,
<DocumentTitle datasetId="dataset-1" document={createDocument({ id: 'doc-1' })} />,
)
rerender(<DocumentTitle datasetId="dataset-2" name="doc2" />)
rerender(<DocumentTitle datasetId="dataset-2" document={createDocument({ id: 'doc-2' })} />)
expect(getByTestId('document-picker').getAttribute('data-dataset-id')).toBe('dataset-2')
expect(getByTestId('document-picker').getAttribute('data-value-id')).toBe('doc-2')
})
})
})

View File

@ -114,9 +114,20 @@ vi.mock('../batch-modal', () => ({
}))
vi.mock('../document-title', () => ({
DocumentTitle: ({ name, extension }: { name?: string, extension?: string }) => (
<div data-testid="document-title" data-extension={extension}>{name}</div>
),
DocumentTitle: ({
document,
}: {
document?: {
name?: string
data_source_detail_dict?: { upload_file?: { extension?: string } }
data_source_info?: { upload_file?: { extension?: string } }
} | null
}) => {
const extension = document?.data_source_detail_dict?.upload_file?.extension
?? document?.data_source_info?.upload_file?.extension
return <div data-testid="document-title" data-extension={extension}>{document?.name}</div>
},
}))
vi.mock('../segment-add', () => ({

View File

@ -1,39 +1,29 @@
import type { FC } from 'react'
import type { ChunkingMode, ParentMode } from '@/models/datasets'
import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets'
import { cn } from '@langgenius/dify-ui/cn'
import { useRouter } from '@/next/navigation'
import DocumentPicker from '../../common/document-picker'
import { DocumentPicker } from '../../common/document-picker'
type DocumentTitleProps = {
datasetId: string
extension?: string
name?: string
chunkingMode?: ChunkingMode
parent_mode?: ParentMode
iconCls?: string
textCls?: string
document?: SimpleDocumentDetail | null
parentMode?: ParentMode
wrapperCls?: string
}
export const DocumentTitle: FC<DocumentTitleProps> = ({
export function DocumentTitle({
datasetId,
extension,
name,
chunkingMode,
parent_mode,
document,
parentMode,
wrapperCls,
}) => {
}: DocumentTitleProps) {
const router = useRouter()
return (
<div className={cn('flex flex-1 items-center justify-start', wrapperCls)}>
<DocumentPicker
datasetId={datasetId}
value={{
name,
extension,
chunkingMode,
parentMode: parent_mode || 'paragraph',
}}
value={document}
parentMode={parentMode}
onChange={(doc) => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}

View File

@ -1,6 +1,6 @@
'use client'
import type { FC } from 'react'
import type { DataSourceInfo, DocumentDisplayStatus, FileItem, FullDocumentDetail, LegacyDataSourceInfo } from '@/models/datasets'
import type { DocumentDisplayStatus, FileItem, FullDocumentDetail } from '@/models/datasets'
import type { SegmentImportStatus } from '@/types/dataset'
import { cn } from '@langgenius/dify-ui/cn'
import { toast } from '@langgenius/dify-ui/toast'
@ -38,10 +38,6 @@ const NON_TERMINAL_DISPLAY_STATUSES = new Set<typeof DisplayStatusList[number]>(
DisplayStatusList.filter(s => s === 'queuing' || s === 'indexing' || s === 'paused'),
)
const isLegacyDataSourceInfo = (info?: DataSourceInfo): info is LegacyDataSourceInfo => {
return !!info && 'upload_file' in info
}
const DocumentDetail: FC<DocumentDetailProps> = ({ datasetId, documentId }) => {
const router = useRouter()
const searchParams = useSearchParams()
@ -123,14 +119,6 @@ const DocumentDetail: FC<DocumentDetailProps> = ({ datasetId, documentId }) => {
const embedding = NON_TERMINAL_DISPLAY_STATUSES.has(documentDetail?.display_status as DocumentDisplayStatus)
const documentUploadFile = useMemo(() => {
if (!documentDetail?.data_source_info)
return undefined
if (isLegacyDataSourceInfo(documentDetail.data_source_info))
return documentDetail.data_source_info.upload_file
return undefined
}, [documentDetail?.data_source_info])
const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey)
const invalidDocumentList = useInvalidDocumentList(datasetId)
@ -212,11 +200,9 @@ const DocumentDetail: FC<DocumentDetailProps> = ({ datasetId, documentId }) => {
</button>
<DocumentTitle
datasetId={datasetId}
extension={documentUploadFile?.extension}
name={documentDetail?.name}
document={documentDetail}
wrapperCls="mr-2"
parent_mode={parentMode}
chunkingMode={documentDetail?.doc_form as ChunkingMode}
parentMode={parentMode}
/>
<div className="flex flex-wrap items-center">
{embeddingAvailable && documentDetail && !documentDetail.archived && !isFullDocMode && (

View File

@ -1,7 +1,7 @@
{
"name": "dify-web",
"type": "module",
"version": "1.14.0",
"version": "1.14.1",
"private": true,
"imports": {
"#i18n": {

View File

@ -0,0 +1,94 @@
import { useQuery } from '@tanstack/react-query'
import { get } from '../base'
import { useDatasetDetail, useDatasetRelatedApps } from './use-dataset'
vi.mock('@tanstack/react-query', () => ({
keepPreviousData: Symbol('keepPreviousData'),
useInfiniteQuery: vi.fn(),
useMutation: vi.fn(),
useQuery: vi.fn(),
useQueryClient: vi.fn(),
}))
vi.mock('../base', () => ({
get: vi.fn(),
post: vi.fn(),
}))
vi.mock('../use-base', () => ({
useInvalid: vi.fn(),
}))
const mockUseQuery = vi.mocked(useQuery)
const mockGet = vi.mocked(get)
type QueryOptions = Parameters<typeof useQuery>[0]
type RetryFn = (failureCount: number, error: unknown) => boolean
const getLastQueryOptions = () => {
return mockUseQuery.mock.calls.at(-1)?.[0] as QueryOptions
}
const getRetryFn = () => {
return getLastQueryOptions().retry as RetryFn
}
describe('knowledge dataset hooks', () => {
beforeEach(() => {
vi.clearAllMocks()
mockUseQuery.mockReturnValue({} as ReturnType<typeof useQuery>)
})
describe('useDatasetDetail', () => {
it('should not retry forbidden or missing dataset detail errors', () => {
// Arrange & Act
useDatasetDetail('dataset-1')
const retry = getRetryFn()
// Assert
expect(retry(0, new Response(null, { status: 403 }))).toBe(false)
expect(retry(0, new Response(null, { status: 404 }))).toBe(false)
})
it('should retry other dataset detail errors fewer than three times', () => {
// Arrange & Act
useDatasetDetail('dataset-1')
const retry = getRetryFn()
// Assert
expect(retry(2, new Error('temporary failure'))).toBe(true)
expect(retry(3, new Error('temporary failure'))).toBe(false)
})
it('should fetch dataset detail without silent mode', () => {
// Arrange
mockGet.mockResolvedValue({ id: 'dataset-1' })
// Act
useDatasetDetail('dataset-1')
const queryFn = getLastQueryOptions().queryFn as () => unknown
queryFn()
// Assert
expect(mockGet).toHaveBeenCalledWith('/datasets/dataset-1')
})
})
describe('useDatasetRelatedApps', () => {
it('should use explicit enabled option when provided', () => {
// Arrange & Act
useDatasetRelatedApps('dataset-1', { enabled: false })
// Assert
expect(getLastQueryOptions().enabled).toBe(false)
})
it('should enable related apps query when dataset id exists and no option is provided', () => {
// Arrange & Act
useDatasetRelatedApps('dataset-1')
// Assert
expect(getLastQueryOptions().enabled).toBe(true)
})
})
})

View File

@ -110,13 +110,20 @@ export const useDatasetDetail = (datasetId: string) => {
queryKey: [...datasetDetailQueryKeyPrefix, datasetId],
queryFn: () => get<DataSet>(`/datasets/${datasetId}`),
enabled: !!datasetId,
retry: (failureCount, error) => {
if (error instanceof Response && [403, 404].includes(error.status))
return false
return failureCount < 3
},
})
}
export const useDatasetRelatedApps = (datasetId: string) => {
export const useDatasetRelatedApps = (datasetId: string, options?: { enabled?: boolean }) => {
return useQuery({
queryKey: [NAME_SPACE, 'related-apps', datasetId],
queryFn: () => get<RelatedAppResponse>(`/datasets/${datasetId}/related-apps`),
enabled: options?.enabled ?? !!datasetId,
})
}