diff --git a/api/.env.example b/api/.env.example index 48d8707d1ad..8d5ad7e5b6a 100644 --- a/api/.env.example +++ b/api/.env.example @@ -551,6 +551,11 @@ LOG_FORMAT=%(asctime)s,%(msecs)d %(levelname)-2s [%(filename)s:%(lineno)d] %(req # Indexing configuration INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000 +# Maximum number of worker threads used for high-quality dataset indexing (1-10). +# Lower this value to reduce memory usage and avoid OOM freezes during re-indexing. +INDEXING_MAX_WORKERS=2 +# Enable indexing memory snapshots in logs for OOM diagnosis (True/False). +INDEXING_MEMORY_SNAPSHOT_ENABLED=False # Workflow runtime configuration WORKFLOW_MAX_EXECUTION_STEPS=500 diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index f664274ba75..ebcee9f853f 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -1200,6 +1200,16 @@ class IndexingConfig(BaseSettings): default=50, ) + INDEXING_MAX_WORKERS: PositiveInt = Field( + description="Maximum number of worker threads used for high-quality dataset indexing (1-10)", + default=2, + ) + + INDEXING_MEMORY_SNAPSHOT_ENABLED: bool = Field( + description="Enable memory snapshot logs during high-quality indexing (for OOM diagnosis)", + default=False, + ) + class MultiModalTransferConfig(BaseSettings): MULTIMODAL_SEND_FORMAT: Literal["base64", "url"] = Field( diff --git a/api/core/helper/code_executor/code_executor.py b/api/core/helper/code_executor/code_executor.py index 951e065b2cb..96a8cf2fc1b 100644 --- a/api/core/helper/code_executor/code_executor.py +++ b/api/core/helper/code_executor/code_executor.py @@ -1,4 +1,5 @@ import logging +import time from collections.abc import Mapping from threading import Lock from typing import Any @@ -18,6 +19,9 @@ from graphon.nodes.code.entities import CodeLanguage logger = logging.getLogger(__name__) code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT)) CODE_EXECUTION_SSL_VERIFY = dify_config.CODE_EXECUTION_SSL_VERIFY +_TRANSIENT_STATUS_CODES = frozenset({502, 503}) +_MAX_TRANSIENT_RETRIES = 3 +_BASE_RETRY_DELAY_SECONDS = 0.2 _CODE_EXECUTOR_CLIENT_LIMITS = httpx.Limits( max_connections=dify_config.CODE_EXECUTION_POOL_MAX_CONNECTIONS, max_keepalive_connections=dify_config.CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS, @@ -94,28 +98,50 @@ class CodeExecutor: client = get_pooled_http_client(_CODE_EXECUTOR_CLIENT_KEY, _build_code_executor_client) - try: - response = client.post( - str(url), - json=data, - headers=headers, - timeout=timeout, - ) - if response.status_code == 503: - raise CodeExecutionError("Code execution service is unavailable") - elif response.status_code != 200: - raise Exception( - f"Failed to execute code, got status code {response.status_code}," - f" please check if the sandbox service is running" + response: httpx.Response | None = None + last_error: Exception | None = None + for attempt in range(_MAX_TRANSIENT_RETRIES + 1): + try: + response = client.post( + str(url), + json=data, + headers=headers, + timeout=timeout, ) - except CodeExecutionError as e: - raise e - except Exception as e: - raise CodeExecutionError( - "Failed to execute code, which is likely a network issue," - " please check if the sandbox service is running." - f" ( Error: {str(e)} )" - ) + + if response.status_code in _TRANSIENT_STATUS_CODES: + if attempt < _MAX_TRANSIENT_RETRIES: + time.sleep(_BASE_RETRY_DELAY_SECONDS * (2**attempt)) + continue + if response.status_code == 503: + raise CodeExecutionError("Code execution service is unavailable") + raise Exception( + f"Failed to execute code, got status code {response.status_code}," + f" please check if the sandbox service is running" + ) + + if response.status_code != 200: + raise Exception( + f"Failed to execute code, got status code {response.status_code}," + f" please check if the sandbox service is running" + ) + break + except CodeExecutionError: + raise + except Exception as e: + last_error = e + is_transport_error = isinstance(e, httpx.TransportError) + if is_transport_error and attempt < _MAX_TRANSIENT_RETRIES: + time.sleep(_BASE_RETRY_DELAY_SECONDS * (2**attempt)) + continue + raise CodeExecutionError( + "Failed to execute code, which is likely a network issue," + " please check if the sandbox service is running." + f" ( Error: {str(last_error)} )" + ) + + if response is None: + raise CodeExecutionError("Failed to execute code, no response received from sandbox service") try: response_data = response.json() diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 0ed91e77913..2486c18ca86 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -1,4 +1,5 @@ import concurrent.futures +import gc import json import logging import re @@ -8,6 +9,7 @@ import uuid from collections.abc import Mapping from typing import Any +import psutil from flask import Flask, current_app from sqlalchemy import delete, func, select, update from sqlalchemy.orm.exc import ObjectDeletedError @@ -65,6 +67,23 @@ class IndexingRunner: document.stopped_at = naive_utc_now() db.session.commit() + @staticmethod + def _log_memory_snapshot(stage: str, *, dataset_document_id: str, extra: Mapping[str, Any] | None = None) -> None: + if not dify_config.INDEXING_MEMORY_SNAPSHOT_ENABLED: + return + try: + rss_bytes = psutil.Process().memory_info().rss + payload: dict[str, Any] = { + "stage": stage, + "dataset_document_id": dataset_document_id, + "rss_mb": round(rss_bytes / 1024 / 1024, 2), + } + if extra: + payload.update(extra) + logger.info("indexing-memory-snapshot %s", payload) + except Exception: + logger.exception("Failed to capture indexing memory snapshot") + def run(self, dataset_documents: list[DatasetDocument]): """Run the indexing process.""" for dataset_document in dataset_documents: @@ -605,8 +624,19 @@ class IndexingRunner: ) create_keyword_thread.start() - max_workers = 10 + # High-quality indexing is memory intensive (embedding generation + vector writes). + # Running too many chunks in parallel can trigger OOM and freeze the service until reboot. + max_workers = max(1, int(dify_config.INDEXING_MAX_WORKERS)) + max_workers = min(max_workers, len(documents)) if documents else 1 if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY: + self._log_memory_snapshot( + "high_quality_load_start", + dataset_document_id=dataset_document.id, + extra={ + "max_workers": max_workers, + "documents": len(documents), + }, + ) with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [] @@ -633,8 +663,13 @@ class IndexingRunner: ) ) - for future in futures: + for completed_idx, future in enumerate(futures, start=1): tokens += future.result() + self._log_memory_snapshot( + "high_quality_chunk_completed", + dataset_document_id=dataset_document.id, + extra={"completed_chunks": completed_idx, "total_chunks": len(futures)}, + ) if ( dataset_document.doc_form != IndexStructureType.PARENT_CHILD_INDEX and dataset.indexing_technique == IndexTechniqueType.ECONOMY @@ -728,6 +763,14 @@ class IndexingRunner: db.session.commit() + # Help reclaim memory between chunk tasks. + # This is especially important for self-hosted setups that may run repeated re-indexing. + gc.collect() + self._log_memory_snapshot( + "chunk_gc_collected", + dataset_document_id=dataset_document.id, + extra={"chunk_documents": len(chunk_documents)}, + ) return tokens @staticmethod diff --git a/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py b/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py index 3f67b9c47ec..41d66195c3b 100644 --- a/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py +++ b/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py @@ -674,6 +674,36 @@ class TestIndexingRunnerLoad: # Verify executor was used for parallel processing assert mock_executor_instance.submit.called + def test_load_with_high_quality_respects_indexing_max_workers( + self, mock_dependencies, sample_dataset, sample_dataset_document, sample_documents + ): + """Ensure the high-quality indexing fan-out is capped by INDEXING_MAX_WORKERS.""" + runner = IndexingRunner() + mock_embedding_instance = MagicMock() + mock_embedding_instance.get_text_embedding_num_tokens.return_value = 100 + model_manager = mock_dependencies["model_manager"].return_value + model_manager.get_model_instance.return_value = mock_embedding_instance + + mock_processor = MagicMock() + + # Mock ThreadPoolExecutor + mock_future = MagicMock() + mock_future.result.return_value = 300 + mock_executor_instance = MagicMock() + mock_executor_instance.__enter__.return_value = mock_executor_instance + mock_executor_instance.__exit__.return_value = None + mock_executor_instance.submit.return_value = mock_future + mock_dependencies["executor"].return_value = mock_executor_instance + + with ( + patch.object(runner, "_update_document_index_status"), + patch("core.indexing_runner.dify_config") as mock_config, + ): + mock_config.INDEXING_MAX_WORKERS = 2 + runner._load(mock_processor, sample_dataset, sample_dataset_document, sample_documents) + + mock_dependencies["executor"].assert_called_once_with(max_workers=2) + def test_load_with_economy_indexing( self, mock_dependencies, sample_dataset, sample_dataset_document, sample_documents ): diff --git a/docker/.env.example b/docker/.env.example index 9646eeeb735..0580bc30e08 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -181,7 +181,477 @@ WEAVIATE_ENABLE_TOKENIZER_GSE=false WEAVIATE_ENABLE_TOKENIZER_KAGOME_JA=false WEAVIATE_ENABLE_TOKENIZER_KAGOME_KR=false -# Sandbox and SSRF proxy +# For OceanBase metadata database configuration, available when `DB_TYPE` is `oceanbase`. +# For OceanBase vector database configuration, available when `VECTOR_STORE` is `oceanbase` +# If you want to use OceanBase as both vector database and metadata database, you need to set both `DB_TYPE` and `VECTOR_STORE` to `oceanbase`, and set Database Configuration is the same as the vector database. +# seekdb is the lite version of OceanBase and shares the connection configuration with OceanBase. +OCEANBASE_VECTOR_HOST=oceanbase +OCEANBASE_VECTOR_PORT=2881 +OCEANBASE_VECTOR_USER=root@test +OCEANBASE_VECTOR_PASSWORD=difyai123456 +OCEANBASE_VECTOR_DATABASE=test +OCEANBASE_CLUSTER_NAME=difyai +OCEANBASE_MEMORY_LIMIT=6G +OCEANBASE_ENABLE_HYBRID_SEARCH=false +# For OceanBase vector database, built-in fulltext parsers are `ngram`, `beng`, `space`, `ngram2`, `ik` +# For OceanBase vector database, external fulltext parsers (require plugin installation) are `japanese_ftparser`, `thai_ftparser` +OCEANBASE_FULLTEXT_PARSER=ik +SEEKDB_MEMORY_LIMIT=2G + +# The Qdrant endpoint URL. Only available when VECTOR_STORE is `qdrant`. +QDRANT_URL=http://qdrant:6333 +QDRANT_API_KEY=difyai123456 +QDRANT_CLIENT_TIMEOUT=20 +QDRANT_GRPC_ENABLED=false +QDRANT_GRPC_PORT=6334 +QDRANT_REPLICATION_FACTOR=1 + +# Milvus configuration. Only available when VECTOR_STORE is `milvus`. +# The milvus uri. +MILVUS_URI=http://host.docker.internal:19530 +MILVUS_DATABASE= +MILVUS_TOKEN= +MILVUS_USER= +MILVUS_PASSWORD= +MILVUS_ENABLE_HYBRID_SEARCH=False +MILVUS_ANALYZER_PARAMS= + +# MyScale configuration, only available when VECTOR_STORE is `myscale` +# For multi-language support, please set MYSCALE_FTS_PARAMS with referring to: +# https://myscale.com/docs/en/text-search/#understanding-fts-index-parameters +MYSCALE_HOST=myscale +MYSCALE_PORT=8123 +MYSCALE_USER=default +MYSCALE_PASSWORD= +MYSCALE_DATABASE=dify +MYSCALE_FTS_PARAMS= + +# Couchbase configurations, only available when VECTOR_STORE is `couchbase` +# The connection string must include hostname defined in the docker-compose file (couchbase-server in this case) +COUCHBASE_CONNECTION_STRING=couchbase://couchbase-server +COUCHBASE_USER=Administrator +COUCHBASE_PASSWORD=password +COUCHBASE_BUCKET_NAME=Embeddings +COUCHBASE_SCOPE_NAME=_default + +# Hologres configurations, only available when VECTOR_STORE is `hologres` +# access_key_id is used as the PG username, access_key_secret is used as the PG password +HOLOGRES_HOST= +HOLOGRES_PORT=80 +HOLOGRES_DATABASE= +HOLOGRES_ACCESS_KEY_ID= +HOLOGRES_ACCESS_KEY_SECRET= +HOLOGRES_SCHEMA=public +HOLOGRES_TOKENIZER=jieba +HOLOGRES_DISTANCE_METHOD=Cosine +HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq +HOLOGRES_MAX_DEGREE=64 +HOLOGRES_EF_CONSTRUCTION=400 + +# pgvector configurations, only available when VECTOR_STORE is `pgvector` +PGVECTOR_HOST=pgvector +PGVECTOR_PORT=5432 +PGVECTOR_USER=postgres +PGVECTOR_PASSWORD=difyai123456 +PGVECTOR_DATABASE=dify +PGVECTOR_MIN_CONNECTION=1 +PGVECTOR_MAX_CONNECTION=5 +PGVECTOR_PG_BIGM=false +PGVECTOR_PG_BIGM_VERSION=1.2-20240606 + +# vastbase configurations, only available when VECTOR_STORE is `vastbase` +VASTBASE_HOST=vastbase +VASTBASE_PORT=5432 +VASTBASE_USER=dify +VASTBASE_PASSWORD=Difyai123456 +VASTBASE_DATABASE=dify +VASTBASE_MIN_CONNECTION=1 +VASTBASE_MAX_CONNECTION=5 + +# pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs` +PGVECTO_RS_HOST=pgvecto-rs +PGVECTO_RS_PORT=5432 +PGVECTO_RS_USER=postgres +PGVECTO_RS_PASSWORD=difyai123456 +PGVECTO_RS_DATABASE=dify + +# analyticdb configurations, only available when VECTOR_STORE is `analyticdb` +ANALYTICDB_KEY_ID=your-ak +ANALYTICDB_KEY_SECRET=your-sk +ANALYTICDB_REGION_ID=cn-hangzhou +ANALYTICDB_INSTANCE_ID=gp-ab123456 +ANALYTICDB_ACCOUNT=testaccount +ANALYTICDB_PASSWORD=testpassword +ANALYTICDB_NAMESPACE=dify +ANALYTICDB_NAMESPACE_PASSWORD=difypassword +ANALYTICDB_HOST=gp-test.aliyuncs.com +ANALYTICDB_PORT=5432 +ANALYTICDB_MIN_CONNECTION=1 +ANALYTICDB_MAX_CONNECTION=5 + +# TiDB vector configurations, only available when VECTOR_STORE is `tidb_vector` +TIDB_VECTOR_HOST=tidb +TIDB_VECTOR_PORT=4000 +TIDB_VECTOR_USER= +TIDB_VECTOR_PASSWORD= +TIDB_VECTOR_DATABASE=dify + +# Matrixone vector configurations. +MATRIXONE_HOST=matrixone +MATRIXONE_PORT=6001 +MATRIXONE_USER=dump +MATRIXONE_PASSWORD=111 +MATRIXONE_DATABASE=dify + +# Tidb on qdrant configuration, only available when VECTOR_STORE is `tidb_on_qdrant` +TIDB_ON_QDRANT_URL=http://127.0.0.1 +TIDB_ON_QDRANT_API_KEY=dify +TIDB_ON_QDRANT_CLIENT_TIMEOUT=20 +TIDB_ON_QDRANT_GRPC_ENABLED=false +TIDB_ON_QDRANT_GRPC_PORT=6334 +TIDB_PUBLIC_KEY=dify +TIDB_PRIVATE_KEY=dify +TIDB_API_URL=http://127.0.0.1 +TIDB_IAM_API_URL=http://127.0.0.1 +TIDB_REGION=regions/aws-us-east-1 +TIDB_PROJECT_ID=dify +TIDB_SPEND_LIMIT=100 + +# Chroma configuration, only available when VECTOR_STORE is `chroma` +CHROMA_HOST=127.0.0.1 +CHROMA_PORT=8000 +CHROMA_TENANT=default_tenant +CHROMA_DATABASE=default_database +CHROMA_AUTH_PROVIDER=chromadb.auth.token_authn.TokenAuthClientProvider +CHROMA_AUTH_CREDENTIALS= + +# Oracle configuration, only available when VECTOR_STORE is `oracle` +ORACLE_USER=dify +ORACLE_PASSWORD=dify +ORACLE_DSN=oracle:1521/FREEPDB1 +ORACLE_CONFIG_DIR=/app/api/storage/wallet +ORACLE_WALLET_LOCATION=/app/api/storage/wallet +ORACLE_WALLET_PASSWORD=dify +ORACLE_IS_AUTONOMOUS=false + +# AlibabaCloud MySQL configuration, only available when VECTOR_STORE is `alibabcloud_mysql` +ALIBABACLOUD_MYSQL_HOST=127.0.0.1 +ALIBABACLOUD_MYSQL_PORT=3306 +ALIBABACLOUD_MYSQL_USER=root +ALIBABACLOUD_MYSQL_PASSWORD=difyai123456 +ALIBABACLOUD_MYSQL_DATABASE=dify +ALIBABACLOUD_MYSQL_MAX_CONNECTION=5 +ALIBABACLOUD_MYSQL_HNSW_M=6 + +# relyt configurations, only available when VECTOR_STORE is `relyt` +RELYT_HOST=db +RELYT_PORT=5432 +RELYT_USER=postgres +RELYT_PASSWORD=difyai123456 +RELYT_DATABASE=postgres + +# open search configuration, only available when VECTOR_STORE is `opensearch` +OPENSEARCH_HOST=opensearch +OPENSEARCH_PORT=9200 +OPENSEARCH_SECURE=true +OPENSEARCH_VERIFY_CERTS=true +OPENSEARCH_AUTH_METHOD=basic +OPENSEARCH_USER=admin +OPENSEARCH_PASSWORD=admin +# If using AWS managed IAM, e.g. Managed Cluster or OpenSearch Serverless +OPENSEARCH_AWS_REGION=ap-southeast-1 +OPENSEARCH_AWS_SERVICE=aoss + +# tencent vector configurations, only available when VECTOR_STORE is `tencent` +TENCENT_VECTOR_DB_URL=http://127.0.0.1 +TENCENT_VECTOR_DB_API_KEY=dify +TENCENT_VECTOR_DB_TIMEOUT=30 +TENCENT_VECTOR_DB_USERNAME=dify +TENCENT_VECTOR_DB_DATABASE=dify +TENCENT_VECTOR_DB_SHARD=1 +TENCENT_VECTOR_DB_REPLICAS=2 +TENCENT_VECTOR_DB_ENABLE_HYBRID_SEARCH=false + +# ElasticSearch configuration, only available when VECTOR_STORE is `elasticsearch` +ELASTICSEARCH_HOST=0.0.0.0 +ELASTICSEARCH_PORT=9200 +ELASTICSEARCH_USERNAME=elastic +ELASTICSEARCH_PASSWORD=elastic +KIBANA_PORT=5601 + +# Using ElasticSearch Cloud Serverless, or not. +ELASTICSEARCH_USE_CLOUD=false +ELASTICSEARCH_CLOUD_URL=YOUR-ELASTICSEARCH_CLOUD_URL +ELASTICSEARCH_API_KEY=YOUR-ELASTICSEARCH_API_KEY + +ELASTICSEARCH_VERIFY_CERTS=False +ELASTICSEARCH_CA_CERTS= +ELASTICSEARCH_REQUEST_TIMEOUT=100000 +ELASTICSEARCH_RETRY_ON_TIMEOUT=True +ELASTICSEARCH_MAX_RETRIES=10 + +# baidu vector configurations, only available when VECTOR_STORE is `baidu` +BAIDU_VECTOR_DB_ENDPOINT=http://127.0.0.1:5287 +BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS=30000 +BAIDU_VECTOR_DB_ACCOUNT=root +BAIDU_VECTOR_DB_API_KEY=dify +BAIDU_VECTOR_DB_DATABASE=dify +BAIDU_VECTOR_DB_SHARD=1 +BAIDU_VECTOR_DB_REPLICAS=3 +BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER=DEFAULT_ANALYZER +BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE=COARSE_MODE +BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT=500 +BAIDU_VECTOR_DB_AUTO_BUILD_ROW_COUNT_INCREMENT_RATIO=0.05 +BAIDU_VECTOR_DB_REBUILD_INDEX_TIMEOUT_IN_SECONDS=300 + +# VikingDB configurations, only available when VECTOR_STORE is `vikingdb` +VIKINGDB_ACCESS_KEY=your-ak +VIKINGDB_SECRET_KEY=your-sk +VIKINGDB_REGION=cn-shanghai +VIKINGDB_HOST=api-vikingdb.xxx.volces.com +VIKINGDB_SCHEMA=http +VIKINGDB_CONNECTION_TIMEOUT=30 +VIKINGDB_SOCKET_TIMEOUT=30 + +# Lindorm configuration, only available when VECTOR_STORE is `lindorm` +LINDORM_URL=http://localhost:30070 +LINDORM_USERNAME=admin +LINDORM_PASSWORD=admin +LINDORM_USING_UGC=True +LINDORM_QUERY_TIMEOUT=1 + +# opengauss configurations, only available when VECTOR_STORE is `opengauss` +OPENGAUSS_HOST=opengauss +OPENGAUSS_PORT=6600 +OPENGAUSS_USER=postgres +OPENGAUSS_PASSWORD=Dify@123 +OPENGAUSS_DATABASE=dify +OPENGAUSS_MIN_CONNECTION=1 +OPENGAUSS_MAX_CONNECTION=5 +OPENGAUSS_ENABLE_PQ=false + +# huawei cloud search service vector configurations, only available when VECTOR_STORE is `huawei_cloud` +HUAWEI_CLOUD_HOSTS=https://127.0.0.1:9200 +HUAWEI_CLOUD_USER=admin +HUAWEI_CLOUD_PASSWORD=admin + +# Upstash Vector configuration, only available when VECTOR_STORE is `upstash` +UPSTASH_VECTOR_URL=https://xxx-vector.upstash.io +UPSTASH_VECTOR_TOKEN=dify + +# TableStore Vector configuration +# (only used when VECTOR_STORE is tablestore) +TABLESTORE_ENDPOINT=https://instance-name.cn-hangzhou.ots.aliyuncs.com +TABLESTORE_INSTANCE_NAME=instance-name +TABLESTORE_ACCESS_KEY_ID=xxx +TABLESTORE_ACCESS_KEY_SECRET=xxx +TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false + +# Clickzetta configuration, only available when VECTOR_STORE is `clickzetta` +CLICKZETTA_USERNAME= +CLICKZETTA_PASSWORD= +CLICKZETTA_INSTANCE= +CLICKZETTA_SERVICE=api.clickzetta.com +CLICKZETTA_WORKSPACE=quick_start +CLICKZETTA_VCLUSTER=default_ap +CLICKZETTA_SCHEMA=dify +CLICKZETTA_BATCH_SIZE=100 +CLICKZETTA_ENABLE_INVERTED_INDEX=true +CLICKZETTA_ANALYZER_TYPE=chinese +CLICKZETTA_ANALYZER_MODE=smart +CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance + +# InterSystems IRIS configuration, only available when VECTOR_STORE is `iris` +IRIS_HOST=iris +IRIS_SUPER_SERVER_PORT=1972 +IRIS_WEB_SERVER_PORT=52773 +IRIS_USER=_SYSTEM +IRIS_PASSWORD=Dify@1234 +IRIS_DATABASE=USER +IRIS_SCHEMA=dify +IRIS_CONNECTION_URL= +IRIS_MIN_CONNECTION=1 +IRIS_MAX_CONNECTION=3 +IRIS_TEXT_INDEX=true +IRIS_TEXT_INDEX_LANGUAGE=en +IRIS_TIMEZONE=UTC + +# ------------------------------ +# Knowledge Configuration +# ------------------------------ + +# Upload file size limit, default 15M. +UPLOAD_FILE_SIZE_LIMIT=15 + +# The maximum number of files that can be uploaded at a time, default 5. +UPLOAD_FILE_BATCH_LIMIT=5 + +# Comma-separated list of file extensions blocked from upload for security reasons. +# Extensions should be lowercase without dots (e.g., exe,bat,sh,dll). +# Empty by default to allow all file types. +# Recommended: exe,bat,cmd,com,scr,vbs,ps1,msi,dll +UPLOAD_FILE_EXTENSION_BLACKLIST= + +# Maximum number of files allowed in a single chunk attachment, default 10. +SINGLE_CHUNK_ATTACHMENT_LIMIT=10 + +# Maximum number of files allowed in a image batch upload operation +IMAGE_FILE_BATCH_LIMIT=10 + +# Maximum allowed image file size for attachments in megabytes, default 2. +ATTACHMENT_IMAGE_FILE_SIZE_LIMIT=2 + +# Timeout for downloading image attachments in seconds, default 60. +ATTACHMENT_IMAGE_DOWNLOAD_TIMEOUT=60 + + +# ETL type, support: `dify`, `Unstructured` +# `dify` Dify's proprietary file extraction scheme +# `Unstructured` Unstructured.io file extraction scheme +ETL_TYPE=dify + +# Unstructured API path and API key, needs to be configured when ETL_TYPE is Unstructured +# Or using Unstructured for document extractor node for pptx. +# For example: http://unstructured:8000/general/v0/general +UNSTRUCTURED_API_URL= +UNSTRUCTURED_API_KEY= +SCARF_NO_ANALYTICS=true + +# ------------------------------ +# Model Configuration +# ------------------------------ + +# The maximum number of tokens allowed for prompt generation. +# This setting controls the upper limit of tokens that can be used by the LLM +# when generating a prompt in the prompt generation tool. +# Default: 512 tokens. +PROMPT_GENERATION_MAX_TOKENS=512 + +# The maximum number of tokens allowed for code generation. +# This setting controls the upper limit of tokens that can be used by the LLM +# when generating code in the code generation tool. +# Default: 1024 tokens. +CODE_GENERATION_MAX_TOKENS=1024 + +# Enable or disable plugin based token counting. If disabled, token counting will return 0. +# This can improve performance by skipping token counting operations. +# Default: false (disabled). +PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false + +# ------------------------------ +# Multi-modal Configuration +# ------------------------------ + +# The format of the image/video/audio/document sent when the multi-modal model is input, +# the default is base64, optional url. +# The delay of the call in url mode will be lower than that in base64 mode. +# It is generally recommended to use the more compatible base64 mode. +# If configured as url, you need to configure FILES_URL as an externally accessible address so that the multi-modal model can access the image/video/audio/document. +MULTIMODAL_SEND_FORMAT=base64 +# Upload image file size limit, default 10M. +UPLOAD_IMAGE_FILE_SIZE_LIMIT=10 +# Upload video file size limit, default 100M. +UPLOAD_VIDEO_FILE_SIZE_LIMIT=100 +# Upload audio file size limit, default 50M. +UPLOAD_AUDIO_FILE_SIZE_LIMIT=50 + +# ------------------------------ +# Sentry Configuration +# Used for application monitoring and error log tracking. +# ------------------------------ +SENTRY_DSN= + +# API Service Sentry DSN address, default is empty, when empty, +# all monitoring information is not reported to Sentry. +# If not set, Sentry error reporting will be disabled. +API_SENTRY_DSN= +# API Service The reporting ratio of Sentry events, if it is 0.01, it is 1%. +API_SENTRY_TRACES_SAMPLE_RATE=1.0 +# API Service The reporting ratio of Sentry profiles, if it is 0.01, it is 1%. +API_SENTRY_PROFILES_SAMPLE_RATE=1.0 + +# Web Service Sentry DSN address, default is empty, when empty, +# all monitoring information is not reported to Sentry. +# If not set, Sentry error reporting will be disabled. +WEB_SENTRY_DSN= + +# Plugin_daemon Service Sentry DSN address, default is empty, when empty, +# all monitoring information is not reported to Sentry. +# If not set, Sentry error reporting will be disabled. +PLUGIN_SENTRY_ENABLED=false +PLUGIN_SENTRY_DSN= + +# ------------------------------ +# Notion Integration Configuration +# Variables can be obtained by applying for Notion integration: https://www.notion.so/my-integrations +# ------------------------------ + +# Configure as "public" or "internal". +# Since Notion's OAuth redirect URL only supports HTTPS, +# if deploying locally, please use Notion's internal integration. +NOTION_INTEGRATION_TYPE=public +# Notion OAuth client secret (used for public integration type) +NOTION_CLIENT_SECRET= +# Notion OAuth client id (used for public integration type) +NOTION_CLIENT_ID= +# Notion internal integration secret. +# If the value of NOTION_INTEGRATION_TYPE is "internal", +# you need to configure this variable. +NOTION_INTERNAL_SECRET= + +# ------------------------------ +# Mail related configuration +# ------------------------------ + +# Mail type, support: resend, smtp, sendgrid +MAIL_TYPE=resend + +# Default send from email address, if not specified +# If using SendGrid, use the 'from' field for authentication if necessary. +MAIL_DEFAULT_SEND_FROM= + +# API-Key for the Resend email provider, used when MAIL_TYPE is `resend`. +RESEND_API_URL=https://api.resend.com +RESEND_API_KEY=your-resend-api-key + + +# SMTP server configuration, used when MAIL_TYPE is `smtp` +SMTP_SERVER= +SMTP_PORT=465 +SMTP_USERNAME= +SMTP_PASSWORD= +SMTP_USE_TLS=true +SMTP_OPPORTUNISTIC_TLS=false +# Optional: override the local hostname used for SMTP HELO/EHLO +SMTP_LOCAL_HOSTNAME= + +# Sendgid configuration +SENDGRID_API_KEY= + +# ------------------------------ +# Others Configuration +# ------------------------------ + +# Maximum length of segmentation tokens for indexing +INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000 + +# Maximum number of worker threads used for high-quality dataset indexing (1-10). +# Lower this value to reduce memory usage and avoid OOM/freezes during re-indexing. +INDEXING_MAX_WORKERS=2 +# Enable indexing memory snapshots in logs for OOM diagnosis (True/False). +INDEXING_MEMORY_SNAPSHOT_ENABLED=False + +# Member invitation link valid time (hours), +# Default: 72. +INVITE_EXPIRY_HOURS=72 + +# Reset password token valid time (minutes), +RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5 +EMAIL_REGISTER_TOKEN_EXPIRY_MINUTES=5 +CHANGE_EMAIL_TOKEN_EXPIRY_MINUTES=5 +OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES=5 + +# The sandbox service endpoint. CODE_EXECUTION_ENDPOINT=http://sandbox:8194 CODE_EXECUTION_API_KEY=dify-sandbox CODE_EXECUTION_SSL_VERIFY=True