From b610cf9a11aab67ad93adcc248b3df6d4fda91ce Mon Sep 17 00:00:00 2001 From: Ponder Date: Tue, 4 Nov 2025 10:27:58 +0800 Subject: [PATCH] feat: add segments max number limit for SegmentApi.post (#27745) --- api/.env.example | 3 +++ api/configs/feature/__init__.py | 5 +++++ api/controllers/service_api/dataset/segment.py | 5 +++++ docker/.env.example | 3 +++ docker/docker-compose.yaml | 1 + 5 files changed, 17 insertions(+) diff --git a/api/.env.example b/api/.env.example index c59d3ea16f..22dd7600ed 100644 --- a/api/.env.example +++ b/api/.env.example @@ -608,3 +608,6 @@ SWAGGER_UI_PATH=/swagger-ui.html # Whether to encrypt dataset IDs when exporting DSL files (default: true) # Set to false to export dataset IDs as plain text for easier cross-environment import DSL_EXPORT_ENCRYPT_DATASET_ID=true + +# Maximum number of segments for dataset segments API (0 for unlimited) +DATASET_MAX_SEGMENTS_PER_REQUEST=0 diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index b2a2f8d0fd..59fc0b9661 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -920,6 +920,11 @@ class DataSetConfig(BaseSettings): default=True, ) + DATASET_MAX_SEGMENTS_PER_REQUEST: NonNegativeInt = Field( + description="Maximum number of segments for dataset segments API (0 for unlimited)", + default=0, + ) + class WorkspaceConfig(BaseSettings): """ diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index 81abd19fed..9ca500b044 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -2,6 +2,7 @@ from flask import request from flask_restx import marshal, reqparse from werkzeug.exceptions import NotFound +from configs import dify_config from controllers.service_api import service_api_ns from controllers.service_api.app.error import ProviderNotInitializeError from controllers.service_api.wraps import ( @@ -107,6 +108,10 @@ class SegmentApi(DatasetApiResource): # validate args args = segment_create_parser.parse_args() if args["segments"] is not None: + segments_limit = dify_config.DATASET_MAX_SEGMENTS_PER_REQUEST + if segments_limit > 0 and len(args["segments"]) > segments_limit: + raise ValueError(f"Exceeded maximum segments limit of {segments_limit}.") + for args_item in args["segments"]: SegmentService.segment_create_args_validate(args_item, document) segments = SegmentService.multi_create_segment(args["segments"], document, dataset) diff --git a/docker/.env.example b/docker/.env.example index 7d7c2a2cda..386e328d99 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -1351,6 +1351,9 @@ SWAGGER_UI_PATH=/swagger-ui.html # Set to false to export dataset IDs as plain text for easier cross-environment import DSL_EXPORT_ENCRYPT_DATASET_ID=true +# Maximum number of segments for dataset segments API (0 for unlimited) +DATASET_MAX_SEGMENTS_PER_REQUEST=0 + # Celery schedule tasks configuration ENABLE_CLEAN_EMBEDDING_CACHE_TASK=false ENABLE_CLEAN_UNUSED_DATASETS_TASK=false diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 404fbc7e2c..cc69c13ce2 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -603,6 +603,7 @@ x-shared-env: &shared-api-worker-env SWAGGER_UI_ENABLED: ${SWAGGER_UI_ENABLED:-true} SWAGGER_UI_PATH: ${SWAGGER_UI_PATH:-/swagger-ui.html} DSL_EXPORT_ENCRYPT_DATASET_ID: ${DSL_EXPORT_ENCRYPT_DATASET_ID:-true} + DATASET_MAX_SEGMENTS_PER_REQUEST: ${DATASET_MAX_SEGMENTS_PER_REQUEST:-0} ENABLE_CLEAN_EMBEDDING_CACHE_TASK: ${ENABLE_CLEAN_EMBEDDING_CACHE_TASK:-false} ENABLE_CLEAN_UNUSED_DATASETS_TASK: ${ENABLE_CLEAN_UNUSED_DATASETS_TASK:-false} ENABLE_CREATE_TIDB_SERVERLESS_TASK: ${ENABLE_CREATE_TIDB_SERVERLESS_TASK:-false}