Merge remote-tracking branch 'origin/main' into feat/trigger

This commit is contained in:
lyzno1 2025-10-04 11:30:26 +08:00
commit 3370736e09
15 changed files with 1053 additions and 668 deletions

View File

@ -1,4 +1,4 @@
FROM mcr.microsoft.com/devcontainers/python:3.12-bullseye
FROM mcr.microsoft.com/devcontainers/python:3.12-bookworm
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install libgmp-dev libmpfr-dev libmpc-dev

View File

@ -19,6 +19,7 @@ from core.ops.ops_trace_manager import OpsTraceManager
from extensions.ext_database import db
from fields.app_fields import app_detail_fields, app_detail_fields_with_site, app_pagination_fields
from libs.login import login_required
from libs.validators import validate_description_length
from models import Account, App
from services.app_dsl_service import AppDslService, ImportMode
from services.app_service import AppService
@ -28,12 +29,6 @@ from services.feature_service import FeatureService
ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]
def _validate_description_length(description):
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description
@console_ns.route("/apps")
class AppListApi(Resource):
@api.doc("list_apps")
@ -138,7 +133,7 @@ class AppListApi(Resource):
"""Create app"""
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, required=True, location="json")
parser.add_argument("description", type=_validate_description_length, location="json")
parser.add_argument("description", type=validate_description_length, location="json")
parser.add_argument("mode", type=str, choices=ALLOW_CREATE_APP_MODES, location="json")
parser.add_argument("icon_type", type=str, location="json")
parser.add_argument("icon", type=str, location="json")
@ -219,7 +214,7 @@ class AppApi(Resource):
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
parser.add_argument("description", type=_validate_description_length, location="json")
parser.add_argument("description", type=validate_description_length, location="json")
parser.add_argument("icon_type", type=str, location="json")
parser.add_argument("icon", type=str, location="json")
parser.add_argument("icon_background", type=str, location="json")
@ -297,7 +292,7 @@ class AppCopyApi(Resource):
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, location="json")
parser.add_argument("description", type=_validate_description_length, location="json")
parser.add_argument("description", type=validate_description_length, location="json")
parser.add_argument("icon_type", type=str, location="json")
parser.add_argument("icon", type=str, location="json")
parser.add_argument("icon_background", type=str, location="json")

View File

@ -31,6 +31,7 @@ from fields.app_fields import related_app_list
from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fields
from fields.document_fields import document_status_fields
from libs.login import login_required
from libs.validators import validate_description_length
from models import ApiToken, Dataset, Document, DocumentSegment, UploadFile
from models.account import Account
from models.dataset import DatasetPermissionEnum
@ -44,12 +45,6 @@ def _validate_name(name: str) -> str:
return name
def _validate_description_length(description):
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description
@console_ns.route("/datasets")
class DatasetListApi(Resource):
@api.doc("get_datasets")
@ -149,7 +144,7 @@ class DatasetListApi(Resource):
)
parser.add_argument(
"description",
type=_validate_description_length,
type=validate_description_length,
nullable=True,
required=False,
default="",
@ -290,7 +285,7 @@ class DatasetApi(Resource):
help="type is required. Name must be between 1 to 40 characters.",
type=_validate_name,
)
parser.add_argument("description", location="json", store_missing=False, type=_validate_description_length)
parser.add_argument("description", location="json", store_missing=False, type=validate_description_length)
parser.add_argument(
"indexing_technique",
type=str,

View File

@ -17,6 +17,7 @@ from core.provider_manager import ProviderManager
from fields.dataset_fields import dataset_detail_fields
from fields.tag_fields import build_dataset_tag_fields
from libs.login import current_user
from libs.validators import validate_description_length
from models.account import Account
from models.dataset import Dataset, DatasetPermissionEnum
from models.provider_ids import ModelProviderID
@ -31,12 +32,6 @@ def _validate_name(name):
return name
def _validate_description_length(description):
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description
# Define parsers for dataset operations
dataset_create_parser = reqparse.RequestParser()
dataset_create_parser.add_argument(
@ -48,7 +43,7 @@ dataset_create_parser.add_argument(
)
dataset_create_parser.add_argument(
"description",
type=_validate_description_length,
type=validate_description_length,
nullable=True,
required=False,
default="",
@ -101,7 +96,7 @@ dataset_update_parser.add_argument(
type=_validate_name,
)
dataset_update_parser.add_argument(
"description", location="json", store_missing=False, type=_validate_description_length
"description", location="json", store_missing=False, type=validate_description_length
)
dataset_update_parser.add_argument(
"indexing_technique",

5
api/libs/validators.py Normal file
View File

@ -0,0 +1,5 @@
def validate_description_length(description: str | None) -> str | None:
"""Validate description length."""
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description

View File

@ -1,8 +1,8 @@
{
"include": ["."],
"exclude": [
".venv",
"tests/",
".venv",
"migrations/",
"core/rag",
"extensions",

View File

@ -11,8 +11,8 @@ from controllers.console.app import completion as completion_api
from controllers.console.app import message as message_api
from controllers.console.app import wraps
from libs.datetime_utils import naive_utc_now
from models import Account, App, Tenant
from models.account import TenantAccountRole
from models import App, Tenant
from models.account import Account, TenantAccountJoin, TenantAccountRole
from models.model import AppMode
from services.app_generate_service import AppGenerateService
@ -31,9 +31,8 @@ class TestChatMessageApiPermissions:
return app
@pytest.fixture
def mock_account(self):
def mock_account(self, monkeypatch: pytest.MonkeyPatch):
"""Create a mock Account for testing."""
account = Account()
account.id = str(uuid.uuid4())
account.name = "Test User"
@ -42,12 +41,24 @@ class TestChatMessageApiPermissions:
account.created_at = naive_utc_now()
account.updated_at = naive_utc_now()
# Create mock tenant
tenant = Tenant()
tenant.id = str(uuid.uuid4())
tenant.name = "Test Tenant"
account._current_tenant = tenant
mock_session_instance = mock.Mock()
mock_tenant_join = TenantAccountJoin(role=TenantAccountRole.OWNER)
monkeypatch.setattr(mock_session_instance, "scalar", mock.Mock(return_value=mock_tenant_join))
mock_scalars_result = mock.Mock()
mock_scalars_result.one.return_value = tenant
monkeypatch.setattr(mock_session_instance, "scalars", mock.Mock(return_value=mock_scalars_result))
mock_session_context = mock.Mock()
mock_session_context.__enter__.return_value = mock_session_instance
monkeypatch.setattr("models.account.Session", lambda _, expire_on_commit: mock_session_context)
account.current_tenant = tenant
return account
@pytest.mark.parametrize(

View File

@ -18,124 +18,87 @@ class TestAppDescriptionValidationUnit:
"""Unit tests for description validation function"""
def test_validate_description_length_function(self):
"""Test the _validate_description_length function directly"""
from controllers.console.app.app import _validate_description_length
"""Test the validate_description_length function directly"""
from libs.validators import validate_description_length
# Test valid descriptions
assert _validate_description_length("") == ""
assert _validate_description_length("x" * 400) == "x" * 400
assert _validate_description_length(None) is None
assert validate_description_length("") == ""
assert validate_description_length("x" * 400) == "x" * 400
assert validate_description_length(None) is None
# Test invalid descriptions
with pytest.raises(ValueError) as exc_info:
_validate_description_length("x" * 401)
validate_description_length("x" * 401)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
with pytest.raises(ValueError) as exc_info:
_validate_description_length("x" * 500)
validate_description_length("x" * 500)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
with pytest.raises(ValueError) as exc_info:
_validate_description_length("x" * 1000)
validate_description_length("x" * 1000)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
def test_validation_consistency_with_dataset(self):
"""Test that App and Dataset validation functions are consistent"""
from controllers.console.app.app import _validate_description_length as app_validate
from controllers.console.datasets.datasets import _validate_description_length as dataset_validate
from controllers.service_api.dataset.dataset import _validate_description_length as service_dataset_validate
# Test same valid inputs
valid_desc = "x" * 400
assert app_validate(valid_desc) == dataset_validate(valid_desc) == service_dataset_validate(valid_desc)
assert app_validate("") == dataset_validate("") == service_dataset_validate("")
assert app_validate(None) == dataset_validate(None) == service_dataset_validate(None)
# Test same invalid inputs produce same error
invalid_desc = "x" * 401
app_error = None
dataset_error = None
service_dataset_error = None
try:
app_validate(invalid_desc)
except ValueError as e:
app_error = str(e)
try:
dataset_validate(invalid_desc)
except ValueError as e:
dataset_error = str(e)
try:
service_dataset_validate(invalid_desc)
except ValueError as e:
service_dataset_error = str(e)
assert app_error == dataset_error == service_dataset_error
assert app_error == "Description cannot exceed 400 characters."
def test_boundary_values(self):
"""Test boundary values for description validation"""
from controllers.console.app.app import _validate_description_length
from libs.validators import validate_description_length
# Test exact boundary
exactly_400 = "x" * 400
assert _validate_description_length(exactly_400) == exactly_400
assert validate_description_length(exactly_400) == exactly_400
# Test just over boundary
just_over_400 = "x" * 401
with pytest.raises(ValueError):
_validate_description_length(just_over_400)
validate_description_length(just_over_400)
# Test just under boundary
just_under_400 = "x" * 399
assert _validate_description_length(just_under_400) == just_under_400
assert validate_description_length(just_under_400) == just_under_400
def test_edge_cases(self):
"""Test edge cases for description validation"""
from controllers.console.app.app import _validate_description_length
from libs.validators import validate_description_length
# Test None input
assert _validate_description_length(None) is None
assert validate_description_length(None) is None
# Test empty string
assert _validate_description_length("") == ""
assert validate_description_length("") == ""
# Test single character
assert _validate_description_length("a") == "a"
assert validate_description_length("a") == "a"
# Test unicode characters
unicode_desc = "测试" * 200 # 400 characters in Chinese
assert _validate_description_length(unicode_desc) == unicode_desc
assert validate_description_length(unicode_desc) == unicode_desc
# Test unicode over limit
unicode_over = "测试" * 201 # 402 characters
with pytest.raises(ValueError):
_validate_description_length(unicode_over)
validate_description_length(unicode_over)
def test_whitespace_handling(self):
"""Test how validation handles whitespace"""
from controllers.console.app.app import _validate_description_length
from libs.validators import validate_description_length
# Test description with spaces
spaces_400 = " " * 400
assert _validate_description_length(spaces_400) == spaces_400
assert validate_description_length(spaces_400) == spaces_400
# Test description with spaces over limit
spaces_401 = " " * 401
with pytest.raises(ValueError):
_validate_description_length(spaces_401)
validate_description_length(spaces_401)
# Test mixed content
mixed_400 = "a" * 200 + " " * 200
assert _validate_description_length(mixed_400) == mixed_400
assert validate_description_length(mixed_400) == mixed_400
# Test mixed over limit
mixed_401 = "a" * 200 + " " * 201
with pytest.raises(ValueError):
_validate_description_length(mixed_401)
validate_description_length(mixed_401)
if __name__ == "__main__":

View File

@ -9,8 +9,8 @@ from flask.testing import FlaskClient
from controllers.console.app import model_config as model_config_api
from controllers.console.app import wraps
from libs.datetime_utils import naive_utc_now
from models import Account, App, Tenant
from models.account import TenantAccountRole
from models import App, Tenant
from models.account import Account, TenantAccountJoin, TenantAccountRole
from models.model import AppMode
from services.app_model_config_service import AppModelConfigService
@ -30,9 +30,8 @@ class TestModelConfigResourcePermissions:
return app
@pytest.fixture
def mock_account(self):
def mock_account(self, monkeypatch: pytest.MonkeyPatch):
"""Create a mock Account for testing."""
account = Account()
account.id = str(uuid.uuid4())
account.name = "Test User"
@ -41,12 +40,24 @@ class TestModelConfigResourcePermissions:
account.created_at = naive_utc_now()
account.updated_at = naive_utc_now()
# Create mock tenant
tenant = Tenant()
tenant.id = str(uuid.uuid4())
tenant.name = "Test Tenant"
account._current_tenant = tenant
mock_session_instance = mock.Mock()
mock_tenant_join = TenantAccountJoin(role=TenantAccountRole.OWNER)
monkeypatch.setattr(mock_session_instance, "scalar", mock.Mock(return_value=mock_tenant_join))
mock_scalars_result = mock.Mock()
mock_scalars_result.one.return_value = tenant
monkeypatch.setattr(mock_session_instance, "scalars", mock.Mock(return_value=mock_scalars_result))
mock_session_context = mock.Mock()
mock_session_context.__enter__.return_value = mock_session_instance
monkeypatch.setattr("models.account.Session", lambda _, expire_on_commit: mock_session_context)
account.current_tenant = tenant
return account
@pytest.mark.parametrize(

View File

@ -0,0 +1,505 @@
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from core.rag.index_processor.constant.index_type import IndexType
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment
from tasks.enable_segments_to_index_task import enable_segments_to_index_task
class TestEnableSegmentsToIndexTask:
"""Integration tests for enable_segments_to_index_task using testcontainers."""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.enable_segments_to_index_task.IndexProcessorFactory") as mock_index_processor_factory,
):
# Setup mock index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
yield {
"index_processor_factory": mock_index_processor_factory,
"index_processor": mock_processor,
}
def _create_test_dataset_and_document(self, db_session_with_containers, mock_external_service_dependencies):
"""
Helper method to create a test dataset and document for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
mock_external_service_dependencies: Mock dependencies
Returns:
tuple: (dataset, document) - Created dataset and document instances
"""
fake = Faker()
# Create account and tenant
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER.value,
current=True,
)
db.session.add(join)
db.session.commit()
# Create dataset
dataset = Dataset(
id=fake.uuid4(),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
)
db.session.add(dataset)
db.session.commit()
# Create document
document = Document(
id=fake.uuid4(),
tenant_id=tenant.id,
dataset_id=dataset.id,
position=1,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=account.id,
indexing_status="completed",
enabled=True,
doc_form=IndexType.PARAGRAPH_INDEX,
)
db.session.add(document)
db.session.commit()
# Refresh dataset to ensure doc_form property works correctly
db.session.refresh(dataset)
return dataset, document
def _create_test_segments(
self, db_session_with_containers, document, dataset, count=3, enabled=False, status="completed"
):
"""
Helper method to create test document segments.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
document: Document instance
dataset: Dataset instance
count: Number of segments to create
enabled: Whether segments should be enabled
status: Status of the segments
Returns:
list: List of created DocumentSegment instances
"""
fake = Faker()
segments = []
for i in range(count):
text = fake.text(max_nb_chars=200)
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=i,
content=text,
word_count=len(text.split()),
tokens=len(text.split()) * 2,
index_node_id=f"node_{i}",
index_node_hash=f"hash_{i}",
enabled=enabled,
status=status,
created_by=document.created_by,
)
db.session.add(segment)
segments.append(segment)
db.session.commit()
return segments
def test_enable_segments_to_index_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test successful segments indexing with paragraph index type.
This test verifies:
- Proper dataset and document retrieval from database
- Correct segment processing and document creation
- Index processor integration
- Database state updates
- Redis cache key deletion
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache keys to simulate indexing in progress
segment_ids = [segment.id for segment in segments]
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300) # 5 minutes expiry
# Verify cache keys exist
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 1
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify the expected outcomes
# Verify index processor was called correctly
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3
# Verify document structure
for i, doc in enumerate(documents):
assert doc.page_content == segments[i].content
assert doc.metadata["doc_id"] == segments[i].index_node_id
assert doc.metadata["doc_hash"] == segments[i].index_node_hash
assert doc.metadata["document_id"] == document.id
assert doc.metadata["dataset_id"] == dataset.id
# Verify Redis cache keys were deleted
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 0
def test_enable_segments_to_index_with_different_index_type(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test segments indexing with different index types.
This test verifies:
- Proper handling of different index types
- Index processor factory integration
- Document processing with various configurations
- Redis cache key deletion
"""
# Arrange: Create test data with different index type
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Update document to use different index type
document.doc_form = IndexType.QA_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
db.session.refresh(dataset)
# Create segments
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache keys
segment_ids = [segment.id for segment in segments]
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify different index type handling
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3
# Verify Redis cache keys were deleted
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 0
def test_enable_segments_to_index_dataset_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of non-existent dataset.
This test verifies:
- Proper error handling for missing datasets
- Early return without processing
- Database session cleanup
- No unnecessary index processor calls
"""
# Arrange: Use non-existent dataset ID
fake = Faker()
non_existent_dataset_id = fake.uuid4()
non_existent_document_id = fake.uuid4()
segment_ids = [fake.uuid4()]
# Act: Execute the task with non-existent dataset
enable_segments_to_index_task(segment_ids, non_existent_dataset_id, non_existent_document_id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_enable_segments_to_index_document_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of non-existent document.
This test verifies:
- Proper error handling for missing documents
- Early return without processing
- Database session cleanup
- No unnecessary index processor calls
"""
# Arrange: Create dataset but use non-existent document ID
dataset, _ = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
fake = Faker()
non_existent_document_id = fake.uuid4()
segment_ids = [fake.uuid4()]
# Act: Execute the task with non-existent document
enable_segments_to_index_task(segment_ids, dataset.id, non_existent_document_id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_enable_segments_to_index_invalid_document_status(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of document with invalid status.
This test verifies:
- Early return when document is disabled, archived, or not completed
- No index processing for documents not ready for indexing
- Proper database session cleanup
- No unnecessary external service calls
"""
# Arrange: Create test data with invalid document status
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Test different invalid statuses
invalid_statuses = [
("disabled", {"enabled": False}),
("archived", {"archived": True}),
("not_completed", {"indexing_status": "processing"}),
]
for _, status_attrs in invalid_statuses:
# Reset document status
document.enabled = True
document.archived = False
document.indexing_status = "completed"
db.session.commit()
# Set invalid status
for attr, value in status_attrs.items():
setattr(document, attr, value)
db.session.commit()
# Create segments
segments = self._create_test_segments(db_session_with_containers, document, dataset)
segment_ids = [segment.id for segment in segments]
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
# Clean up segments for next iteration
for segment in segments:
db.session.delete(segment)
db.session.commit()
def test_enable_segments_to_index_segments_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling when no segments are found.
This test verifies:
- Proper handling when segments don't exist
- Early return without processing
- Database session cleanup
- Index processor is created but load is not called
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Use non-existent segment IDs
fake = Faker()
non_existent_segment_ids = [fake.uuid4() for _ in range(3)]
# Act: Execute the task with non-existent segments
enable_segments_to_index_task(non_existent_segment_ids, dataset.id, document.id)
# Assert: Verify index processor was created but load was not called
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_enable_segments_to_index_with_parent_child_structure(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test segments indexing with parent-child structure.
This test verifies:
- Proper handling of PARENT_CHILD_INDEX type
- Child document creation from segments
- Correct document structure for parent-child indexing
- Index processor receives properly structured documents
- Redis cache key deletion
"""
# Arrange: Create test data with parent-child index type
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Update document to use parent-child index type
document.doc_form = IndexType.PARENT_CHILD_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
db.session.refresh(dataset)
# Create segments with mock child chunks
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache keys
segment_ids = [segment.id for segment in segments]
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Mock the get_child_chunks method for each segment
with patch.object(DocumentSegment, "get_child_chunks") as mock_get_child_chunks:
# Setup mock to return child chunks for each segment
mock_child_chunks = []
for i in range(2): # Each segment has 2 child chunks
mock_child = MagicMock()
mock_child.content = f"child_content_{i}"
mock_child.index_node_id = f"child_node_{i}"
mock_child.index_node_hash = f"child_hash_{i}"
mock_child_chunks.append(mock_child)
mock_get_child_chunks.return_value = mock_child_chunks
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify parent-child index processing
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexType.PARENT_CHILD_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3 # 3 segments
# Verify each document has children
for doc in documents:
assert hasattr(doc, "children")
assert len(doc.children) == 2 # Each document has 2 children
# Verify Redis cache keys were deleted
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 0
def test_enable_segments_to_index_general_exception_handling(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test general exception handling during indexing process.
This test verifies:
- Exceptions are properly caught and handled
- Segment status is set to error
- Segments are disabled
- Error information is recorded
- Redis cache is still cleared
- Database session is properly closed
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache keys
segment_ids = [segment.id for segment in segments]
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Mock the index processor to raise an exception
mock_external_service_dependencies["index_processor"].load.side_effect = Exception("Index processing failed")
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify error handling
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is False
assert segment.status == "error"
assert segment.error is not None
assert "Index processing failed" in segment.error
assert segment.disabled_at is not None
# Verify Redis cache keys were still cleared despite error
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 0

View File

@ -1,174 +1,53 @@
import pytest
from controllers.console.app.app import _validate_description_length as app_validate
from controllers.console.datasets.datasets import _validate_description_length as dataset_validate
from controllers.service_api.dataset.dataset import _validate_description_length as service_dataset_validate
from libs.validators import validate_description_length
class TestDescriptionValidationUnit:
"""Unit tests for description validation functions in App and Dataset APIs"""
"""Unit tests for the centralized description validation function."""
def test_app_validate_description_length_valid(self):
"""Test App validation function with valid descriptions"""
def test_validate_description_length_valid(self):
"""Test validation function with valid descriptions."""
# Empty string should be valid
assert app_validate("") == ""
assert validate_description_length("") == ""
# None should be valid
assert app_validate(None) is None
assert validate_description_length(None) is None
# Short description should be valid
short_desc = "Short description"
assert app_validate(short_desc) == short_desc
assert validate_description_length(short_desc) == short_desc
# Exactly 400 characters should be valid
exactly_400 = "x" * 400
assert app_validate(exactly_400) == exactly_400
assert validate_description_length(exactly_400) == exactly_400
# Just under limit should be valid
just_under = "x" * 399
assert app_validate(just_under) == just_under
assert validate_description_length(just_under) == just_under
def test_app_validate_description_length_invalid(self):
"""Test App validation function with invalid descriptions"""
def test_validate_description_length_invalid(self):
"""Test validation function with invalid descriptions."""
# 401 characters should fail
just_over = "x" * 401
with pytest.raises(ValueError) as exc_info:
app_validate(just_over)
validate_description_length(just_over)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
# 500 characters should fail
way_over = "x" * 500
with pytest.raises(ValueError) as exc_info:
app_validate(way_over)
validate_description_length(way_over)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
# 1000 characters should fail
very_long = "x" * 1000
with pytest.raises(ValueError) as exc_info:
app_validate(very_long)
validate_description_length(very_long)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
def test_dataset_validate_description_length_valid(self):
"""Test Dataset validation function with valid descriptions"""
# Empty string should be valid
assert dataset_validate("") == ""
# Short description should be valid
short_desc = "Short description"
assert dataset_validate(short_desc) == short_desc
# Exactly 400 characters should be valid
exactly_400 = "x" * 400
assert dataset_validate(exactly_400) == exactly_400
# Just under limit should be valid
just_under = "x" * 399
assert dataset_validate(just_under) == just_under
def test_dataset_validate_description_length_invalid(self):
"""Test Dataset validation function with invalid descriptions"""
# 401 characters should fail
just_over = "x" * 401
with pytest.raises(ValueError) as exc_info:
dataset_validate(just_over)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
# 500 characters should fail
way_over = "x" * 500
with pytest.raises(ValueError) as exc_info:
dataset_validate(way_over)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
def test_service_dataset_validate_description_length_valid(self):
"""Test Service Dataset validation function with valid descriptions"""
# Empty string should be valid
assert service_dataset_validate("") == ""
# None should be valid
assert service_dataset_validate(None) is None
# Short description should be valid
short_desc = "Short description"
assert service_dataset_validate(short_desc) == short_desc
# Exactly 400 characters should be valid
exactly_400 = "x" * 400
assert service_dataset_validate(exactly_400) == exactly_400
# Just under limit should be valid
just_under = "x" * 399
assert service_dataset_validate(just_under) == just_under
def test_service_dataset_validate_description_length_invalid(self):
"""Test Service Dataset validation function with invalid descriptions"""
# 401 characters should fail
just_over = "x" * 401
with pytest.raises(ValueError) as exc_info:
service_dataset_validate(just_over)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
# 500 characters should fail
way_over = "x" * 500
with pytest.raises(ValueError) as exc_info:
service_dataset_validate(way_over)
assert "Description cannot exceed 400 characters." in str(exc_info.value)
def test_app_dataset_validation_consistency(self):
"""Test that App and Dataset validation functions behave identically"""
test_cases = [
"", # Empty string
"Short description", # Normal description
"x" * 100, # Medium description
"x" * 400, # Exactly at limit
]
# Test valid cases produce same results
for test_desc in test_cases:
assert app_validate(test_desc) == dataset_validate(test_desc) == service_dataset_validate(test_desc)
# Test invalid cases produce same errors
invalid_cases = [
"x" * 401, # Just over limit
"x" * 500, # Way over limit
"x" * 1000, # Very long
]
for invalid_desc in invalid_cases:
app_error = None
dataset_error = None
service_dataset_error = None
# Capture App validation error
try:
app_validate(invalid_desc)
except ValueError as e:
app_error = str(e)
# Capture Dataset validation error
try:
dataset_validate(invalid_desc)
except ValueError as e:
dataset_error = str(e)
# Capture Service Dataset validation error
try:
service_dataset_validate(invalid_desc)
except ValueError as e:
service_dataset_error = str(e)
# All should produce errors
assert app_error is not None, f"App validation should fail for {len(invalid_desc)} characters"
assert dataset_error is not None, f"Dataset validation should fail for {len(invalid_desc)} characters"
error_msg = f"Service Dataset validation should fail for {len(invalid_desc)} characters"
assert service_dataset_error is not None, error_msg
# Errors should be identical
error_msg = f"Error messages should be identical for {len(invalid_desc)} characters"
assert app_error == dataset_error == service_dataset_error, error_msg
assert app_error == "Description cannot exceed 400 characters."
def test_boundary_values(self):
"""Test boundary values around the 400 character limit"""
"""Test boundary values around the 400 character limit."""
boundary_tests = [
(0, True), # Empty
(1, True), # Minimum
@ -184,69 +63,45 @@ class TestDescriptionValidationUnit:
if should_pass:
# Should not raise exception
assert app_validate(test_desc) == test_desc
assert dataset_validate(test_desc) == test_desc
assert service_dataset_validate(test_desc) == test_desc
assert validate_description_length(test_desc) == test_desc
else:
# Should raise ValueError
with pytest.raises(ValueError):
app_validate(test_desc)
with pytest.raises(ValueError):
dataset_validate(test_desc)
with pytest.raises(ValueError):
service_dataset_validate(test_desc)
validate_description_length(test_desc)
def test_special_characters(self):
"""Test validation with special characters, Unicode, etc."""
# Unicode characters
unicode_desc = "测试描述" * 100 # Chinese characters
if len(unicode_desc) <= 400:
assert app_validate(unicode_desc) == unicode_desc
assert dataset_validate(unicode_desc) == unicode_desc
assert service_dataset_validate(unicode_desc) == unicode_desc
assert validate_description_length(unicode_desc) == unicode_desc
# Special characters
special_desc = "Special chars: !@#$%^&*()_+-=[]{}|;':\",./<>?" * 10
if len(special_desc) <= 400:
assert app_validate(special_desc) == special_desc
assert dataset_validate(special_desc) == special_desc
assert service_dataset_validate(special_desc) == special_desc
assert validate_description_length(special_desc) == special_desc
# Mixed content
mixed_desc = "Mixed content: 测试 123 !@# " * 15
if len(mixed_desc) <= 400:
assert app_validate(mixed_desc) == mixed_desc
assert dataset_validate(mixed_desc) == mixed_desc
assert service_dataset_validate(mixed_desc) == mixed_desc
assert validate_description_length(mixed_desc) == mixed_desc
elif len(mixed_desc) > 400:
with pytest.raises(ValueError):
app_validate(mixed_desc)
with pytest.raises(ValueError):
dataset_validate(mixed_desc)
with pytest.raises(ValueError):
service_dataset_validate(mixed_desc)
validate_description_length(mixed_desc)
def test_whitespace_handling(self):
"""Test validation with various whitespace scenarios"""
"""Test validation with various whitespace scenarios."""
# Leading/trailing whitespace
whitespace_desc = " Description with whitespace "
if len(whitespace_desc) <= 400:
assert app_validate(whitespace_desc) == whitespace_desc
assert dataset_validate(whitespace_desc) == whitespace_desc
assert service_dataset_validate(whitespace_desc) == whitespace_desc
assert validate_description_length(whitespace_desc) == whitespace_desc
# Newlines and tabs
multiline_desc = "Line 1\nLine 2\tTabbed content"
if len(multiline_desc) <= 400:
assert app_validate(multiline_desc) == multiline_desc
assert dataset_validate(multiline_desc) == multiline_desc
assert service_dataset_validate(multiline_desc) == multiline_desc
assert validate_description_length(multiline_desc) == multiline_desc
# Only whitespace over limit
only_spaces = " " * 401
with pytest.raises(ValueError):
app_validate(only_spaces)
with pytest.raises(ValueError):
dataset_validate(only_spaces)
with pytest.raises(ValueError):
service_dataset_validate(only_spaces)
validate_description_length(only_spaces)

View File

@ -1,7 +1,6 @@
import {
createContext,
useContext,
useEffect,
useRef,
} from 'react'
import {
@ -19,11 +18,13 @@ type Shape = {
export const createFileStore = (
value: FileEntity[] = [],
onChange?: (files: FileEntity[]) => void,
) => {
return create<Shape>(set => ({
files: value ? [...value] : [],
setFiles: (files) => {
set({ files })
onChange?.(files)
},
}))
}
@ -54,35 +55,9 @@ export const FileContextProvider = ({
onChange,
}: FileProviderProps) => {
const storeRef = useRef<FileStore | undefined>(undefined)
const onChangeRef = useRef<FileProviderProps['onChange']>(onChange)
const isSyncingRef = useRef(false)
if (!storeRef.current)
storeRef.current = createFileStore(value)
// keep latest onChange
useEffect(() => {
onChangeRef.current = onChange
}, [onChange])
// subscribe to store changes and call latest onChange
useEffect(() => {
const store = storeRef.current!
const unsubscribe = store.subscribe((state: Shape) => {
if (isSyncingRef.current) return
onChangeRef.current?.(state.files)
})
return unsubscribe
}, [])
// sync external value into internal store when value changes
useEffect(() => {
const store = storeRef.current!
const nextFiles = value ? [...value] : []
isSyncingRef.current = true
store.setState({ files: nextFiles })
isSyncingRef.current = false
}, [value])
storeRef.current = createFileStore(value, onChange)
return (
<FileContext.Provider value={storeRef.current}>

View File

@ -193,8 +193,8 @@ function CodeGroupPanels({ children, targetCode, ...props }: ICodeGroupPanelsPro
if ((targetCode?.length ?? 0) > 1) {
return (
<TabPanels>
{targetCode!.map(code => (
<TabPanel>
{targetCode!.map((code, index) => (
<TabPanel key={code.title || code.tag || index}>
<CodePanel {...props} targetCode={code} />
</TabPanel>
))}
@ -206,8 +206,8 @@ function CodeGroupPanels({ children, targetCode, ...props }: ICodeGroupPanelsPro
}
function usePreventLayoutShift() {
const positionRef = useRef<any>()
const rafRef = useRef<any>()
const positionRef = useRef<any>(null)
const rafRef = useRef<any>(null)
useEffect(() => {
return () => {

View File

@ -50,13 +50,13 @@
"@headlessui/react": "2.2.1",
"@heroicons/react": "^2.0.16",
"@hookform/resolvers": "^3.9.0",
"@lexical/code": "^0.30.0",
"@lexical/link": "^0.30.0",
"@lexical/list": "^0.30.0",
"@lexical/react": "^0.30.0",
"@lexical/selection": "^0.30.0",
"@lexical/text": "^0.35.0",
"@lexical/utils": "^0.30.0",
"@lexical/code": "^0.36.2",
"@lexical/link": "^0.36.2",
"@lexical/list": "^0.36.2",
"@lexical/react": "^0.36.2",
"@lexical/selection": "^0.36.2",
"@lexical/text": "^0.36.2",
"@lexical/utils": "^0.36.2",
"@monaco-editor/react": "^4.6.0",
"@octokit/core": "^6.1.2",
"@octokit/request-error": "^6.1.5",
@ -92,14 +92,14 @@
"katex": "^0.16.21",
"ky": "^1.7.2",
"lamejs": "^1.2.1",
"lexical": "^0.30.0",
"lexical": "^0.36.2",
"line-clamp": "^1.0.0",
"lodash-es": "^4.17.21",
"mermaid": "11.10.0",
"mime": "^4.0.4",
"mitt": "^3.0.1",
"negotiator": "^1.0.0",
"next": "15.5.0",
"next": "15.5.4",
"next-pwa": "^5.6.0",
"next-themes": "^0.4.3",
"pinyin-pro": "^3.25.0",
@ -142,14 +142,15 @@
},
"devDependencies": {
"@antfu/eslint-config": "^5.0.0",
"@babel/core": "^7.28.3",
"@chromatic-com/storybook": "^3.1.0",
"@eslint-react/eslint-plugin": "^1.15.0",
"@happy-dom/jest-environment": "^17.4.4",
"@mdx-js/loader": "^3.1.0",
"@mdx-js/react": "^3.1.0",
"@next/bundle-analyzer": "15.5.3",
"@next/eslint-plugin-next": "15.5.0",
"@next/mdx": "15.5.0",
"@next/bundle-analyzer": "15.5.4",
"@next/eslint-plugin-next": "15.5.4",
"@next/mdx": "15.5.4",
"@rgrove/parse-xml": "^4.1.0",
"@storybook/addon-essentials": "8.5.0",
"@storybook/addon-interactions": "8.5.0",
@ -162,7 +163,6 @@
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.0.1",
"@babel/core": "^7.28.3",
"@types/dagre": "^0.7.52",
"@types/jest": "^29.5.13",
"@types/js-cookie": "^3.0.6",
@ -179,6 +179,7 @@
"@types/sortablejs": "^1.15.1",
"@types/uuid": "^10.0.0",
"autoprefixer": "^10.4.20",
"babel-loader": "^9.2.1",
"bing-translate-api": "^4.0.2",
"code-inspector-plugin": "1.2.9",
"cross-env": "^7.0.3",
@ -201,8 +202,7 @@
"storybook": "8.5.0",
"tailwindcss": "^3.4.14",
"typescript": "^5.8.3",
"uglify-js": "^3.19.3",
"babel-loader": "^9.2.1"
"uglify-js": "^3.19.3"
},
"resolutions": {
"@types/react": "19.1.11",

File diff suppressed because it is too large Load Diff