pipecone

2025-09-16 08:57:46 +08:00 · 2025-09-16 08:57:46 +08:00 · 90fc5a1f12
parent 41dfdf1ac0
commit 90fc5a1f12
16 changed files with 3171 additions and 2131 deletions
--- a/api/.env.bak
+++ b/api/.env.bak
@ -0,0 +1,581 @@
+# Your App secret key will be used for securely signing the session cookie
+# Make sure you are changing this key for your deployment with a strong key.
+# You can generate a strong key using `openssl rand -base64 42`.
+# Alternatively you can set it with `SECRET_KEY` environment variable.
+SECRET_KEY=9k4es1n7e13xdGP+Rs+DmRR2gTvSdZL9KIc4Bgah+uGLEcUgmf5+FUUc
+
+# Ensure UTF-8 encoding
+LANG=en_US.UTF-8
+LC_ALL=en_US.UTF-8
+PYTHONIOENCODING=utf-8
+
+# Console API base URL
+CONSOLE_API_URL=http://localhost:5001
+CONSOLE_WEB_URL=http://localhost:3000
+
+# Service API base URL
+SERVICE_API_URL=http://localhost:5001
+
+# Web APP base URL
+APP_WEB_URL=http://localhost:3000
+
+# Files URL
+FILES_URL=http://localhost:5001
+
+# INTERNAL_FILES_URL is used for plugin daemon communication within Docker network.
+# Set this to the internal Docker service URL for proper plugin file access.
+# Example: INTERNAL_FILES_URL=http://api:5001
+INTERNAL_FILES_URL=http://127.0.0.1:5001
+
+# The time in seconds after the signature is rejected
+FILES_ACCESS_TIMEOUT=300
+
+# Access token expiration time in minutes
+ACCESS_TOKEN_EXPIRE_MINUTES=60
+
+# Refresh token expiration time in days
+REFRESH_TOKEN_EXPIRE_DAYS=30
+
+# redis configuration
+REDIS_HOST=localhost
+REDIS_PORT=6379
+REDIS_USERNAME=
+REDIS_PASSWORD=difyai123456
+REDIS_USE_SSL=false
+# SSL configuration for Redis (when REDIS_USE_SSL=true)
+REDIS_SSL_CERT_REQS=CERT_NONE
+# Options: CERT_NONE, CERT_OPTIONAL, CERT_REQUIRED
+REDIS_SSL_CA_CERTS=
+# Path to CA certificate file for SSL verification
+REDIS_SSL_CERTFILE=
+# Path to client certificate file for SSL authentication
+REDIS_SSL_KEYFILE=
+# Path to client private key file for SSL authentication
+REDIS_DB=0
+
+# redis Sentinel configuration.
+REDIS_USE_SENTINEL=false
+REDIS_SENTINELS=
+REDIS_SENTINEL_SERVICE_NAME=
+REDIS_SENTINEL_USERNAME=
+REDIS_SENTINEL_PASSWORD=
+REDIS_SENTINEL_SOCKET_TIMEOUT=0.1
+
+# redis Cluster configuration.
+REDIS_USE_CLUSTERS=false
+REDIS_CLUSTERS=
+REDIS_CLUSTERS_PASSWORD=
+
+# celery configuration
+CELERY_BROKER_URL=redis://:difyai123456@localhost:${REDIS_PORT}/1
+CELERY_BACKEND=redis
+# PostgreSQL database configuration
+DB_USERNAME=postgres
+DB_PASSWORD=difyai123456
+DB_HOST=localhost
+DB_PORT=5432
+DB_DATABASE=dify
+
+# Storage configuration
+# use for store upload files, private keys...
+# storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
+STORAGE_TYPE=opendal
+
+# Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal
+OPENDAL_SCHEME=fs
+OPENDAL_FS_ROOT=storage
+
+# S3 Storage configuration
+S3_USE_AWS_MANAGED_IAM=false
+S3_ENDPOINT=https://your-bucket-name.storage.s3.cloudflare.com
+S3_BUCKET_NAME=your-bucket-name
+S3_ACCESS_KEY=your-access-key
+S3_SECRET_KEY=your-secret-key
+S3_REGION=your-region
+
+# Azure Blob Storage configuration
+AZURE_BLOB_ACCOUNT_NAME=your-account-name
+AZURE_BLOB_ACCOUNT_KEY=your-account-key
+AZURE_BLOB_CONTAINER_NAME=your-container-name
+AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
+
+# Aliyun oss Storage configuration
+ALIYUN_OSS_BUCKET_NAME=your-bucket-name
+ALIYUN_OSS_ACCESS_KEY=your-access-key
+ALIYUN_OSS_SECRET_KEY=your-secret-key
+ALIYUN_OSS_ENDPOINT=your-endpoint
+ALIYUN_OSS_AUTH_VERSION=v1
+ALIYUN_OSS_REGION=your-region
+# Don't start with '/'. OSS doesn't support leading slash in object names.
+ALIYUN_OSS_PATH=your-path
+
+# Google Storage configuration
+GOOGLE_STORAGE_BUCKET_NAME=your-bucket-name
+GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
+
+# Tencent COS Storage configuration
+TENCENT_COS_BUCKET_NAME=your-bucket-name
+TENCENT_COS_SECRET_KEY=your-secret-key
+TENCENT_COS_SECRET_ID=your-secret-id
+TENCENT_COS_REGION=your-region
+TENCENT_COS_SCHEME=your-scheme
+
+# Huawei OBS Storage Configuration
+HUAWEI_OBS_BUCKET_NAME=your-bucket-name
+HUAWEI_OBS_SECRET_KEY=your-secret-key
+HUAWEI_OBS_ACCESS_KEY=your-access-key
+HUAWEI_OBS_SERVER=your-server-url
+
+# Baidu OBS Storage Configuration
+BAIDU_OBS_BUCKET_NAME=your-bucket-name
+BAIDU_OBS_SECRET_KEY=your-secret-key
+BAIDU_OBS_ACCESS_KEY=your-access-key
+BAIDU_OBS_ENDPOINT=your-server-url
+
+# OCI Storage configuration
+OCI_ENDPOINT=your-endpoint
+OCI_BUCKET_NAME=your-bucket-name
+OCI_ACCESS_KEY=your-access-key
+OCI_SECRET_KEY=your-secret-key
+OCI_REGION=your-region
+
+# Volcengine tos Storage configuration
+VOLCENGINE_TOS_ENDPOINT=your-endpoint
+VOLCENGINE_TOS_BUCKET_NAME=your-bucket-name
+VOLCENGINE_TOS_ACCESS_KEY=your-access-key
+VOLCENGINE_TOS_SECRET_KEY=your-secret-key
+VOLCENGINE_TOS_REGION=your-region
+
+# Supabase Storage Configuration
+SUPABASE_BUCKET_NAME=your-bucket-name
+SUPABASE_API_KEY=your-access-key
+SUPABASE_URL=your-server-url
+
+# CORS configuration
+WEB_API_CORS_ALLOW_ORIGINS=http://localhost:3000,*
+CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,*
+
+# Vector database configuration
+# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `pinecone`.
+VECTOR_STORE=weaviate
+# Prefix used to create collection name in vector database
+VECTOR_INDEX_NAME_PREFIX=Vector_index
+
+# Weaviate configuration
+WEAVIATE_ENDPOINT=http://localhost:8080
+WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
+WEAVIATE_GRPC_ENABLED=false
+WEAVIATE_BATCH_SIZE=100
+
+# Qdrant configuration, use `http://localhost:6333` for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode
+QDRANT_URL=http://localhost:6333
+QDRANT_API_KEY=difyai123456
+QDRANT_CLIENT_TIMEOUT=20
+QDRANT_GRPC_ENABLED=false
+QDRANT_GRPC_PORT=6334
+QDRANT_REPLICATION_FACTOR=1
+
+#Couchbase configuration
+COUCHBASE_CONNECTION_STRING=127.0.0.1
+COUCHBASE_USER=Administrator
+COUCHBASE_PASSWORD=password
+COUCHBASE_BUCKET_NAME=Embeddings
+COUCHBASE_SCOPE_NAME=_default
+
+# Milvus configuration
+MILVUS_URI=http://127.0.0.1:19530
+MILVUS_TOKEN=
+MILVUS_USER=root
+MILVUS_PASSWORD=Milvus
+MILVUS_ANALYZER_PARAMS=
+
+# MyScale configuration
+MYSCALE_HOST=127.0.0.1
+MYSCALE_PORT=8123
+MYSCALE_USER=default
+MYSCALE_PASSWORD=
+MYSCALE_DATABASE=default
+MYSCALE_FTS_PARAMS=
+
+# Relyt configuration
+RELYT_HOST=127.0.0.1
+RELYT_PORT=5432
+RELYT_USER=postgres
+RELYT_PASSWORD=postgres
+RELYT_DATABASE=postgres
+
+# Tencent configuration
+TENCENT_VECTOR_DB_URL=http://127.0.0.1
+TENCENT_VECTOR_DB_API_KEY=dify
+TENCENT_VECTOR_DB_TIMEOUT=30
+TENCENT_VECTOR_DB_USERNAME=dify
+TENCENT_VECTOR_DB_DATABASE=dify
+TENCENT_VECTOR_DB_SHARD=1
+TENCENT_VECTOR_DB_REPLICAS=2
+TENCENT_VECTOR_DB_ENABLE_HYBRID_SEARCH=false
+
+# ElasticSearch configuration
+ELASTICSEARCH_HOST=127.0.0.1
+ELASTICSEARCH_PORT=9200
+ELASTICSEARCH_USERNAME=elastic
+ELASTICSEARCH_PASSWORD=elastic
+
+# PGVECTO_RS configuration
+PGVECTO_RS_HOST=localhost
+PGVECTO_RS_PORT=5431
+PGVECTO_RS_USER=postgres
+PGVECTO_RS_PASSWORD=difyai123456
+PGVECTO_RS_DATABASE=postgres
+
+# PGVector configuration
+PGVECTOR_HOST=127.0.0.1
+PGVECTOR_PORT=5433
+PGVECTOR_USER=postgres
+PGVECTOR_PASSWORD=postgres
+PGVECTOR_DATABASE=postgres
+PGVECTOR_MIN_CONNECTION=1
+PGVECTOR_MAX_CONNECTION=5
+
+# TableStore Vector configuration
+TABLESTORE_ENDPOINT=https://instance-name.cn-hangzhou.ots.aliyuncs.com
+TABLESTORE_INSTANCE_NAME=instance-name
+TABLESTORE_ACCESS_KEY_ID=xxx
+TABLESTORE_ACCESS_KEY_SECRET=xxx
+TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false
+
+# Tidb Vector configuration
+TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com
+TIDB_VECTOR_PORT=4000
+TIDB_VECTOR_USER=xxx.root
+TIDB_VECTOR_PASSWORD=xxxxxx
+TIDB_VECTOR_DATABASE=dify
+
+# Tidb on qdrant configuration
+TIDB_ON_QDRANT_URL=http://127.0.0.1
+TIDB_ON_QDRANT_API_KEY=dify
+TIDB_ON_QDRANT_CLIENT_TIMEOUT=20
+TIDB_ON_QDRANT_GRPC_ENABLED=false
+TIDB_ON_QDRANT_GRPC_PORT=6334
+TIDB_PUBLIC_KEY=dify
+TIDB_PRIVATE_KEY=dify
+TIDB_API_URL=http://127.0.0.1
+TIDB_IAM_API_URL=http://127.0.0.1
+TIDB_REGION=regions/aws-us-east-1
+TIDB_PROJECT_ID=dify
+TIDB_SPEND_LIMIT=100
+
+# Chroma configuration
+CHROMA_HOST=127.0.0.1
+CHROMA_PORT=8000
+CHROMA_TENANT=default_tenant
+CHROMA_DATABASE=default_database
+CHROMA_AUTH_PROVIDER=chromadb.auth.token_authn.TokenAuthenticationServerProvider
+CHROMA_AUTH_CREDENTIALS=difyai123456
+
+# AnalyticDB configuration
+ANALYTICDB_KEY_ID=your-ak
+ANALYTICDB_KEY_SECRET=your-sk
+ANALYTICDB_REGION_ID=cn-hangzhou
+ANALYTICDB_INSTANCE_ID=gp-ab123456
+ANALYTICDB_ACCOUNT=testaccount
+ANALYTICDB_PASSWORD=testpassword
+ANALYTICDB_NAMESPACE=dify
+ANALYTICDB_NAMESPACE_PASSWORD=difypassword
+ANALYTICDB_HOST=gp-test.aliyuncs.com
+ANALYTICDB_PORT=5432
+ANALYTICDB_MIN_CONNECTION=1
+ANALYTICDB_MAX_CONNECTION=5
+
+# OpenSearch configuration
+OPENSEARCH_HOST=127.0.0.1
+OPENSEARCH_PORT=9200
+OPENSEARCH_USER=admin
+OPENSEARCH_PASSWORD=admin
+OPENSEARCH_SECURE=true
+OPENSEARCH_VERIFY_CERTS=true
+
+# Baidu configuration
+BAIDU_VECTOR_DB_ENDPOINT=http://127.0.0.1:5287
+BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS=30000
+BAIDU_VECTOR_DB_ACCOUNT=root
+BAIDU_VECTOR_DB_API_KEY=dify
+BAIDU_VECTOR_DB_DATABASE=dify
+BAIDU_VECTOR_DB_SHARD=1
+BAIDU_VECTOR_DB_REPLICAS=3
+
+# Upstash configuration
+UPSTASH_VECTOR_URL=your-server-url
+UPSTASH_VECTOR_TOKEN=your-access-token
+
+# ViKingDB configuration
+VIKINGDB_ACCESS_KEY=your-ak
+VIKINGDB_SECRET_KEY=your-sk
+VIKINGDB_REGION=cn-shanghai
+VIKINGDB_HOST=api-vikingdb.xxx.volces.com
+VIKINGDB_SCHEMA=http
+VIKINGDB_CONNECTION_TIMEOUT=30
+VIKINGDB_SOCKET_TIMEOUT=30
+
+# Matrixone configration
+MATRIXONE_HOST=127.0.0.1
+MATRIXONE_PORT=6001
+MATRIXONE_USER=dump
+MATRIXONE_PASSWORD=111
+MATRIXONE_DATABASE=dify
+
+# Lindorm configuration
+LINDORM_URL=http://ld-*******************-proxy-search-pub.lindorm.aliyuncs.com:30070
+LINDORM_USERNAME=admin
+LINDORM_PASSWORD=admin
+USING_UGC_INDEX=False
+LINDORM_QUERY_TIMEOUT=1
+
+# OceanBase Vector configuration
+OCEANBASE_VECTOR_HOST=127.0.0.1
+OCEANBASE_VECTOR_PORT=2881
+OCEANBASE_VECTOR_USER=root@test
+OCEANBASE_VECTOR_PASSWORD=difyai123456
+OCEANBASE_VECTOR_DATABASE=test
+OCEANBASE_MEMORY_LIMIT=6G
+OCEANBASE_ENABLE_HYBRID_SEARCH=false
+
+# openGauss configuration
+OPENGAUSS_HOST=127.0.0.1
+OPENGAUSS_PORT=6600
+OPENGAUSS_USER=postgres
+OPENGAUSS_PASSWORD=Dify@123
+OPENGAUSS_DATABASE=dify
+OPENGAUSS_MIN_CONNECTION=1
+OPENGAUSS_MAX_CONNECTION=5
+
+# Upload configuration
+UPLOAD_FILE_SIZE_LIMIT=15
+UPLOAD_FILE_BATCH_LIMIT=5
+UPLOAD_IMAGE_FILE_SIZE_LIMIT=10
+UPLOAD_VIDEO_FILE_SIZE_LIMIT=100
+UPLOAD_AUDIO_FILE_SIZE_LIMIT=50
+
+# Model configuration
+MULTIMODAL_SEND_FORMAT=base64
+PROMPT_GENERATION_MAX_TOKENS=512
+CODE_GENERATION_MAX_TOKENS=1024
+PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false
+
+
+# Pinecone configuration, only available when VECTOR_STORE is `pinecone`
+PINECONE_API_KEY=your-pinecone-api-key
+PINECONE_ENVIRONMENT=your-pinecone-environment
+PINECONE_INDEX_NAME=dify-index
+PINECONE_CLIENT_TIMEOUT=30
+PINECONE_BATCH_SIZE=100
+PINECONE_METRIC=cosine
+PINECONE_PODS=1
+PINECONE_POD_TYPE=s1
+
+# Mail configuration, support: resend, smtp, sendgrid
+MAIL_TYPE=
+# If using SendGrid, use the 'from' field for authentication if necessary.
+MAIL_DEFAULT_SEND_FROM=no-reply <no-reply@dify.ai>
+# resend configuration
+RESEND_API_KEY=
+RESEND_API_URL=https://api.resend.com
+# smtp configuration
+SMTP_SERVER=smtp.gmail.com
+SMTP_PORT=465
+SMTP_USERNAME=123
+SMTP_PASSWORD=abc
+SMTP_USE_TLS=true
+SMTP_OPPORTUNISTIC_TLS=false
+# Sendgid configuration
+SENDGRID_API_KEY=
+# Sentry configuration
+SENTRY_DSN=
+
+# DEBUG
+DEBUG=false
+ENABLE_REQUEST_LOGGING=False
+SQLALCHEMY_ECHO=false
+
+# Notion import configuration, support public and internal
+NOTION_INTEGRATION_TYPE=public
+NOTION_CLIENT_SECRET=you-client-secret
+NOTION_CLIENT_ID=you-client-id
+NOTION_INTERNAL_SECRET=you-internal-secret
+
+ETL_TYPE=dify
+UNSTRUCTURED_API_URL=
+UNSTRUCTURED_API_KEY=
+SCARF_NO_ANALYTICS=true
+
+#ssrf
+SSRF_PROXY_HTTP_URL=
+SSRF_PROXY_HTTPS_URL=
+SSRF_DEFAULT_MAX_RETRIES=3
+SSRF_DEFAULT_TIME_OUT=5
+SSRF_DEFAULT_CONNECT_TIME_OUT=5
+SSRF_DEFAULT_READ_TIME_OUT=5
+SSRF_DEFAULT_WRITE_TIME_OUT=5
+
+BATCH_UPLOAD_LIMIT=10
+KEYWORD_DATA_SOURCE_TYPE=database
+
+# Workflow file upload limit
+WORKFLOW_FILE_UPLOAD_LIMIT=10
+
+# CODE EXECUTION CONFIGURATION
+CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194
+CODE_EXECUTION_API_KEY=dify-sandbox
+CODE_MAX_NUMBER=9223372036854775807
+CODE_MIN_NUMBER=-9223372036854775808
+CODE_MAX_STRING_LENGTH=80000
+TEMPLATE_TRANSFORM_MAX_LENGTH=80000
+CODE_MAX_STRING_ARRAY_LENGTH=30
+CODE_MAX_OBJECT_ARRAY_LENGTH=30
+CODE_MAX_NUMBER_ARRAY_LENGTH=1000
+
+# API Tool configuration
+API_TOOL_DEFAULT_CONNECT_TIMEOUT=10
+API_TOOL_DEFAULT_READ_TIMEOUT=60
+
+# HTTP Node configuration
+HTTP_REQUEST_MAX_CONNECT_TIMEOUT=300
+HTTP_REQUEST_MAX_READ_TIMEOUT=600
+HTTP_REQUEST_MAX_WRITE_TIMEOUT=600
+HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
+HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
+HTTP_REQUEST_NODE_SSL_VERIFY=True
+
+# Respect X-* headers to redirect clients
+RESPECT_XFORWARD_HEADERS_ENABLED=false
+
+# Log file path
+LOG_FILE=
+# Log file max size, the unit is MB
+LOG_FILE_MAX_SIZE=20
+# Log file max backup count
+LOG_FILE_BACKUP_COUNT=5
+# Log dateformat
+LOG_DATEFORMAT=%Y-%m-%d %H:%M:%S
+# Log Timezone
+LOG_TZ=UTC
+# Log format
+LOG_FORMAT=%(asctime)s,%(msecs)d %(levelname)-2s [%(filename)s:%(lineno)d] %(req_id)s %(message)s
+
+# Indexing configuration
+INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000
+
+# Workflow runtime configuration
+WORKFLOW_MAX_EXECUTION_STEPS=500
+WORKFLOW_MAX_EXECUTION_TIME=1200
+WORKFLOW_CALL_MAX_DEPTH=5
+WORKFLOW_PARALLEL_DEPTH_LIMIT=3
+MAX_VARIABLE_SIZE=204800
+
+# Workflow storage configuration
+# Options: rdbms, hybrid
+# rdbms: Use only the relational database (default)
+# hybrid: Save new data to object storage, read from both object storage and RDBMS
+WORKFLOW_NODE_EXECUTION_STORAGE=rdbms
+
+# Repository configuration
+# Core workflow execution repository implementation
+CORE_WORKFLOW_EXECUTION_REPOSITORY=core.repositories.sqlalchemy_workflow_execution_repository.SQLAlchemyWorkflowExecutionRepository
+
+# Core workflow node execution repository implementation
+CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY=core.repositories.sqlalchemy_workflow_node_execution_repository.SQLAlchemyWorkflowNodeExecutionRepository
+
+# API workflow node execution repository implementation
+API_WORKFLOW_NODE_EXECUTION_REPOSITORY=repositories.sqlalchemy_api_workflow_node_execution_repository.DifyAPISQLAlchemyWorkflowNodeExecutionRepository
+
+# API workflow run repository implementation
+API_WORKFLOW_RUN_REPOSITORY=repositories.sqlalchemy_api_workflow_run_repository.DifyAPISQLAlchemyWorkflowRunRepository
+# Workflow log cleanup configuration
+# Enable automatic cleanup of workflow run logs to manage database size
+WORKFLOW_LOG_CLEANUP_ENABLED=true
+# Number of days to retain workflow run logs (default: 30 days)
+WORKFLOW_LOG_RETENTION_DAYS=30
+# Batch size for workflow log cleanup operations (default: 100)
+WORKFLOW_LOG_CLEANUP_BATCH_SIZE=100
+
+# App configuration
+APP_MAX_EXECUTION_TIME=1200
+APP_MAX_ACTIVE_REQUESTS=0
+
+# Celery beat configuration
+CELERY_BEAT_SCHEDULER_TIME=1
+
+# Celery schedule tasks configuration
+ENABLE_CLEAN_EMBEDDING_CACHE_TASK=false
+ENABLE_CLEAN_UNUSED_DATASETS_TASK=false
+ENABLE_CREATE_TIDB_SERVERLESS_TASK=false
+ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK=false
+ENABLE_CLEAN_MESSAGES=false
+ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK=false
+ENABLE_DATASETS_QUEUE_MONITOR=false
+ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK=true
+
+# Position configuration
+POSITION_TOOL_PINS=
+POSITION_TOOL_INCLUDES=
+POSITION_TOOL_EXCLUDES=
+
+POSITION_PROVIDER_PINS=
+POSITION_PROVIDER_INCLUDES=
+POSITION_PROVIDER_EXCLUDES=
+
+# Plugin configuration
+PLUGIN_DAEMON_KEY=lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi
+PLUGIN_DAEMON_URL=http://127.0.0.1:5002
+PLUGIN_REMOTE_INSTALL_PORT=5003
+PLUGIN_REMOTE_INSTALL_HOST=localhost
+PLUGIN_MAX_PACKAGE_SIZE=15728640
+INNER_API_KEY_FOR_PLUGIN=QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1
+
+# Marketplace configuration
+MARKETPLACE_ENABLED=true
+MARKETPLACE_API_URL=https://marketplace.dify.ai
+
+# Endpoint configuration
+ENDPOINT_URL_TEMPLATE=http://localhost:5002/e/{hook_id}
+
+# Reset password token expiry minutes
+RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5
+CHANGE_EMAIL_TOKEN_EXPIRY_MINUTES=5
+OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES=5
+
+CREATE_TIDB_SERVICE_JOB_ENABLED=false
+
+# Maximum number of submitted thread count in a ThreadPool for parallel node execution
+MAX_SUBMIT_COUNT=100
+# Lockout duration in seconds
+LOGIN_LOCKOUT_DURATION=86400
+
+# Enable OpenTelemetry
+ENABLE_OTEL=false
+OTLP_TRACE_ENDPOINT=
+OTLP_METRIC_ENDPOINT=
+OTLP_BASE_ENDPOINT=http://localhost:4318
+OTLP_API_KEY=
+OTEL_EXPORTER_OTLP_PROTOCOL=
+OTEL_EXPORTER_TYPE=otlp
+OTEL_SAMPLING_RATE=0.1
+OTEL_BATCH_EXPORT_SCHEDULE_DELAY=5000
+OTEL_MAX_QUEUE_SIZE=2048
+OTEL_MAX_EXPORT_BATCH_SIZE=512
+OTEL_METRIC_EXPORT_INTERVAL=60000
+OTEL_BATCH_EXPORT_TIMEOUT=10000
+OTEL_METRIC_EXPORT_TIMEOUT=30000
+
+# Prevent Clickjacking
+ALLOW_EMBED=false
+
+# Dataset queue monitor configuration
+QUEUE_MONITOR_THRESHOLD=200
+# You can configure multiple ones, separated by commas. eg: test1@dify.ai,test2@dify.ai
+QUEUE_MONITOR_ALERT_EMAILS=
+# Monitor interval in minutes, default is 30 minutes
+QUEUE_MONITOR_INTERVAL=30
+
+# Swagger UI configuration
+SWAGGER_UI_ENABLED=true
+SWAGGER_UI_PATH=/swagger-ui.html
--- a/api/.env.example
+++ b/api/.env.example
@ -156,7 +156,7 @@ WEB_API_CORS_ALLOW_ORIGINS=http://localhost:3000,*
 CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,*

 # Vector database configuration
-# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
+# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `pinecone`.
 VECTOR_STORE=weaviate
 # Prefix used to create collection name in vector database
 VECTOR_INDEX_NAME_PREFIX=Vector_index
@ -361,6 +361,17 @@ PROMPT_GENERATION_MAX_TOKENS=512
 CODE_GENERATION_MAX_TOKENS=1024
 PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false

+
+# Pinecone configuration, only available when VECTOR_STORE is `pinecone`
+PINECONE_API_KEY=your-pinecone-api-key
+PINECONE_ENVIRONMENT=your-pinecone-environment
+PINECONE_INDEX_NAME=dify-index
+PINECONE_CLIENT_TIMEOUT=30
+PINECONE_BATCH_SIZE=100
+PINECONE_METRIC=cosine
+PINECONE_PODS=1
+PINECONE_POD_TYPE=s1
+
 # Mail configuration, support: resend, smtp, sendgrid
 MAIL_TYPE=
 # If using SendGrid, use the 'from' field for authentication if necessary.
--- a/api/configs/app_config.py
+++ b/api/configs/app_config.py
@ -74,9 +74,13 @@ class DifyConfig(
    # **Before using, please contact business@dify.ai by email to inquire about licensing matters.**
    EnterpriseFeatureConfig,
 ):
+    # Get the project root directory (parent of api directory)
+    _project_root = Path(__file__).parent.parent.parent
+    _env_file = _project_root / "api" / ".env"
+    
    model_config = SettingsConfigDict(
        # read from dotenv format config file
-        env_file=".env",
+        env_file=str(_env_file),
        env_file_encoding="utf-8",
        # ignore extra attributes
        extra="ignore",
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@ -35,6 +35,7 @@ from .vdb.opensearch_config import OpenSearchConfig
 from .vdb.oracle_config import OracleConfig
 from .vdb.pgvector_config import PGVectorConfig
 from .vdb.pgvectors_config import PGVectoRSConfig
+from .vdb.pinecone_config import PineconeConfig
 from .vdb.qdrant_config import QdrantConfig
 from .vdb.relyt_config import RelytConfig
 from .vdb.tablestore_config import TableStoreConfig
@ -331,6 +332,7 @@ class MiddlewareConfig(
    PGVectorConfig,
    VastbaseVectorConfig,
    PGVectoRSConfig,
+    PineconeConfig,
    QdrantConfig,
    RelytConfig,
    TencentVectorDBConfig,
--- a/api/configs/middleware/vdb/pinecone_config.py
+++ b/api/configs/middleware/vdb/pinecone_config.py
@ -0,0 +1,41 @@
+from typing import Optional
+
+from pydantic import Field, PositiveInt
+from pydantic_settings import BaseSettings
+
+
+class PineconeConfig(BaseSettings):
+    """
+    Configuration settings for Pinecone vector database
+    """
+
+    PINECONE_API_KEY: Optional[str] = Field(
+        description="API key for authenticating with Pinecone service",
+        default=None,
+    )
+
+    PINECONE_ENVIRONMENT: Optional[str] = Field(
+        description="Pinecone environment (e.g., 'us-west1-gcp', 'us-east-1-aws')",
+        default=None,
+    )
+
+    PINECONE_INDEX_NAME: Optional[str] = Field(
+        description="Default Pinecone index name",
+        default=None,
+    )
+
+    PINECONE_CLIENT_TIMEOUT: PositiveInt = Field(
+        description="Timeout in seconds for Pinecone client operations (default is 30 seconds)",
+        default=30,
+    )
+
+    PINECONE_BATCH_SIZE: PositiveInt = Field(
+        description="Batch size for Pinecone operations (default is 100)",
+        default=100,
+    )
+
+    PINECONE_METRIC: str = Field(
+        description="Distance metric for Pinecone index (cosine, euclidean, dotproduct)",
+        default="cosine",
+    )
+
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -660,6 +660,7 @@ class DatasetRetrievalSettingApi(Resource):
                | VectorType.BAIDU
                | VectorType.VIKINGDB
                | VectorType.UPSTASH
+                | VectorType.PINECONE
            ):
                return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
            case (
--- a/api/core/rag/datasource/vdb/pinecone/init.py
+++ b/api/core/rag/datasource/vdb/pinecone/init.py
--- a/api/core/rag/datasource/vdb/pinecone/pinecone_vector.py
+++ b/api/core/rag/datasource/vdb/pinecone/pinecone_vector.py
@ -0,0 +1,285 @@
+import json
+import time
+import uuid
+from typing import Any, Optional, Union
+
+from pinecone import Pinecone, ServerlessSpec
+from pydantic import BaseModel
+
+from configs import dify_config
+from core.rag.datasource.vdb.field import Field
+from core.rag.datasource.vdb.vector_base import BaseVector
+from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
+from core.rag.datasource.vdb.vector_type import VectorType
+from core.rag.embedding.embedding_base import Embeddings
+from core.rag.models.document import Document
+from extensions.ext_database import db
+from extensions.ext_redis import redis_client
+from models.dataset import Dataset, DatasetCollectionBinding
+
+
+class PineconeConfig(BaseModel):
+    """Pinecone configuration class"""
+    api_key: str
+    environment: str
+    index_name: Optional[str] = None
+    timeout: float = 30
+    batch_size: int = 100
+    metric: str = "cosine"
+
+
+class PineconeVector(BaseVector):
+    """Pinecone vector database concrete implementation class"""
+    
+    def __init__(self, collection_name: str, group_id: str, config: PineconeConfig):
+        super().__init__(collection_name)
+        self._client_config = config
+        self._group_id = group_id
+        
+        # Initialize Pinecone client
+        self._pc = Pinecone(api_key=config.api_key)
+        
+        # Use collection_name as index name
+        self._index_name = collection_name
+        self._index = None
+        
+    def get_type(self) -> str:
+        """Return vector database type identifier"""
+        return "pinecone"
+    
+    def to_index_struct(self) -> dict:
+        """Generate index structure dictionary"""
+        return {
+            "type": self.get_type(), 
+            "vector_store": {"class_prefix": self._collection_name}
+        }
+    
+    def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
+        """Create vector index"""
+        if texts:
+            # Get vector dimension
+            vector_size = len(embeddings[0])
+            
+            # Create Pinecone index
+            self.create_index(vector_size)
+            
+            # Add vector data
+            self.add_texts(texts, embeddings, **kwargs)
+    
+    def create_index(self, dimension: int):
+        """Create Pinecone index"""
+        lock_name = f"vector_indexing_lock_{self._index_name}"
+        
+        with redis_client.lock(lock_name, timeout=30):
+            # Check Redis cache
+            index_exist_cache_key = f"vector_indexing_{self._index_name}"
+            if redis_client.get(index_exist_cache_key):
+                self._index = self._pc.Index(self._index_name)
+                return
+            
+            # Check if index already exists
+            existing_indexes = self._pc.list_indexes().names()
+            
+            if self._index_name not in existing_indexes:
+                # Create new index using ServerlessSpec
+                self._pc.create_index(
+                    name=self._index_name,
+                    dimension=dimension,
+                    metric=self._client_config.metric,
+                    spec=ServerlessSpec(
+                        cloud='aws',
+                        region=self._client_config.environment
+                    )
+                )
+                
+                # Wait for index creation to complete
+                while not self._pc.describe_index(self._index_name).status['ready']:
+                    time.sleep(1)
+            
+            # Get index instance
+            self._index = self._pc.Index(self._index_name)
+            
+            # Set cache
+            redis_client.set(index_exist_cache_key, 1, ex=3600)
+    
+    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
+        """Batch add document vectors"""
+        if not self._index:
+            raise ValueError("Index not initialized. Call create() first.")
+        
+        uuids = self._get_uuids(documents)
+        batch_size = self._client_config.batch_size
+        added_ids = []
+        
+        # Batch processing
+        for i in range(0, len(documents), batch_size):
+            batch_documents = documents[i:i + batch_size]
+            batch_embeddings = embeddings[i:i + batch_size]
+            batch_uuids = uuids[i:i + batch_size]
+            
+            # Build Pinecone vector data
+            vectors_to_upsert = []
+            for doc, embedding, doc_id in zip(batch_documents, batch_embeddings, batch_uuids):
+                metadata = {
+                    Field.CONTENT_KEY.value: doc.page_content,
+                    Field.METADATA_KEY.value: doc.metadata or {},
+                    Field.GROUP_KEY.value: self._group_id,
+                }
+                
+                vectors_to_upsert.append({
+                    "id": doc_id,
+                    "values": embedding,
+                    "metadata": metadata
+                })
+            
+            # Batch insert to Pinecone
+            self._index.upsert(vectors=vectors_to_upsert)
+            added_ids.extend(batch_uuids)
+        
+        return added_ids
+    
+    def search_by_vector(self, query_vector: list[float], **kwargs) -> list[Document]:
+        """Vector similarity search"""
+        if not self._index:
+            raise ValueError("Index not initialized.")
+        
+        top_k = kwargs.get("top_k", 4)
+        score_threshold = float(kwargs.get("score_threshold", 0.0))
+        
+        # Build filter conditions
+        filter_dict = {Field.GROUP_KEY.value: {"$eq": self._group_id}}
+        
+        # Document scope filtering
+        document_ids_filter = kwargs.get("document_ids_filter")
+        if document_ids_filter:
+            filter_dict[f"{Field.METADATA_KEY.value}.document_id"] = {"$in": document_ids_filter}
+        
+        # Execute search
+        response = self._index.query(
+            vector=query_vector,
+            top_k=top_k,
+            include_metadata=True,
+            filter=filter_dict
+        )
+        
+        # Convert results
+        docs = []
+        for match in response.matches:
+            if match.score >= score_threshold:
+                metadata = match.metadata.get(Field.METADATA_KEY.value, {})
+                metadata["score"] = match.score
+                
+                doc = Document(
+                    page_content=match.metadata.get(Field.CONTENT_KEY.value, ""),
+                    metadata=metadata,
+                )
+                docs.append(doc)
+        
+        # Sort by similarity score in descending order
+        docs.sort(key=lambda x: x.metadata.get("score", 0), reverse=True)
+        return docs
+    
+    def search_by_full_text(self, query: str, **kwargs) -> list[Document]:
+        """Full-text search - Pinecone does not natively support it, returns empty list"""
+        return []
+    
+    def delete_by_metadata_field(self, key: str, value: str):
+        """Delete by metadata field"""
+        if not self._index:
+            return
+        
+        try:
+            # Build filter conditions
+            filter_dict = {
+                Field.GROUP_KEY.value: {"$eq": self._group_id},
+                f"{Field.METADATA_KEY.value}.{key}": {"$eq": value}
+            }
+            
+            # Pinecone delete operation
+            self._index.delete(filter=filter_dict)
+        except Exception:
+            # Ignore delete errors
+            pass
+    
+    def delete_by_ids(self, ids: list[str]) -> None:
+        """Batch delete by ID list"""
+        if not self._index:
+            return
+        
+        try:
+            # Pinecone delete by ID
+            self._index.delete(ids=ids)
+        except Exception:
+            # Ignore delete errors
+            pass
+    
+    def delete(self) -> None:
+        """Delete all vector data for the entire dataset"""
+        if not self._index:
+            return
+        
+        try:
+            # Delete all vectors by group_id
+            filter_dict = {Field.GROUP_KEY.value: {"$eq": self._group_id}}
+            self._index.delete(filter=filter_dict)
+        except Exception:
+            # Ignore delete errors
+            pass
+    
+    def text_exists(self, id: str) -> bool:
+        """Check if document exists"""
+        if not self._index:
+            return False
+        
+        try:
+            # Check if vector exists through query
+            response = self._index.fetch(ids=[id])
+            return id in response.vectors
+        except Exception:
+            return False
+
+
+class PineconeVectorFactory(AbstractVectorFactory):
+    """Pinecone vector database factory class"""
+    
+    def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> PineconeVector:
+        """Create PineconeVector instance"""
+        
+        # Determine index name
+        if dataset.collection_binding_id:
+            dataset_collection_binding = (
+                db.session.query(DatasetCollectionBinding)
+                .where(DatasetCollectionBinding.id == dataset.collection_binding_id)
+                .one_or_none()
+            )
+            if dataset_collection_binding:
+                collection_name = dataset_collection_binding.collection_name
+            else:
+                raise ValueError("Dataset Collection Bindings does not exist!")
+        else:
+            if dataset.index_struct_dict:
+                class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
+                collection_name = class_prefix
+            else:
+                dataset_id = dataset.id
+                collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+        
+        # Set index structure
+        if not dataset.index_struct_dict:
+            dataset.index_struct = json.dumps(
+                self.gen_index_struct_dict("pinecone", collection_name)
+            )
+        
+        # Create PineconeVector instance
+        return PineconeVector(
+            collection_name=collection_name,
+            group_id=dataset.id,
+            config=PineconeConfig(
+                api_key=dify_config.PINECONE_API_KEY or "",
+                environment=dify_config.PINECONE_ENVIRONMENT or "",
+                index_name=dify_config.PINECONE_INDEX_NAME,
+                timeout=dify_config.PINECONE_CLIENT_TIMEOUT,
+                batch_size=dify_config.PINECONE_BATCH_SIZE,
+                metric=dify_config.PINECONE_METRIC,
+            ),
+        )
--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@ -86,6 +86,10 @@ class Vector:
                from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRSFactory

                return PGVectoRSFactory
+            case VectorType.PINECONE:
+                from core.rag.datasource.vdb.pinecone.pinecone_vector import PineconeVectorFactory
+
+                return PineconeVectorFactory
            case VectorType.QDRANT:
                from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantVectorFactory

--- a/api/core/rag/datasource/vdb/vector_type.py
+++ b/api/core/rag/datasource/vdb/vector_type.py
@ -31,3 +31,4 @@ class VectorType(StrEnum):
    HUAWEI_CLOUD = "huawei_cloud"
    MATRIXONE = "matrixone"
    CLICKZETTA = "clickzetta"
+    PINECONE = "pinecone"
--- a/api/core/rag/extractor/excel_extractor.py
+++ b/api/core/rag/extractor/excel_extractor.py
@ -10,6 +10,23 @@ from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document


+def _format_cell_value(value) -> str:
+    if pd.isna(value):
+        return ""
+    
+    if isinstance(value, (int, float)):
+        if isinstance(value, float):
+            if value.is_integer():
+                return str(int(value))
+            else:
+                formatted = f"{value:f}"
+                return formatted.rstrip('0').rstrip('.')
+        else:
+            return str(value)
+    
+    return str(value)
+
+
 class ExcelExtractor(BaseExtractor):
    """Load Excel files.

@ -49,10 +66,12 @@ class ExcelExtractor(BaseExtractor):
                                row=cast(int, index) + 2, column=col_index + 1
                            )  # +2 to account for header and 1-based index
                            if cell.hyperlink:
-                                value = f"[{v}]({cell.hyperlink.target})"
+                                formatted_v = _format_cell_value(v)
+                                value = f"[{formatted_v}]({cell.hyperlink.target})"
                                page_content.append(f'"{k}":"{value}"')
                            else:
-                                page_content.append(f'"{k}":"{v}"')
+                                formatted_v = _format_cell_value(v)
+                                page_content.append(f'"{k}":"{formatted_v}"')
                    documents.append(
                        Document(page_content=";".join(page_content), metadata={"source": self._file_path})
                    )
@ -67,7 +86,8 @@ class ExcelExtractor(BaseExtractor):
                    page_content = []
                    for k, v in row.items():
                        if pd.notna(v):
-                            page_content.append(f'"{k}":"{v}"')
+                            formatted_v = _format_cell_value(v)
+                            page_content.append(f'"{k}":"{formatted_v}"')
                    documents.append(
                        Document(page_content=";".join(page_content), metadata={"source": self._file_path})
                    )
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@ -485,6 +485,24 @@ def _extract_text_from_csv(file_content: bytes) -> str:
        raise TextExtractionError(f"Failed to extract text from CSV: {str(e)}") from e


+def _format_cell_value_for_markdown(value) -> str:
+    """格式化单元格值，避免科学计数法"""
+    if pd.isna(value):
+        return ""
+    
+    if isinstance(value, (int, float)):
+        if isinstance(value, float):
+            if value.is_integer():
+                return str(int(value))
+            else:
+                formatted = f"{value:f}"
+                return formatted.rstrip('0').rstrip('.')
+        else:
+            return str(value)
+    
+    return str(value)
+
+
 def _extract_text_from_excel(file_content: bytes) -> str:
    """Extract text from an Excel file using pandas."""

@ -499,7 +517,8 @@ def _extract_text_from_excel(file_content: bytes) -> str:
        # Construct the data rows
        data_rows = []
        for _, row in df.iterrows():
-            data_row = "| " + " | ".join(map(str, row)) + " |"
+            formatted_row = [_format_cell_value_for_markdown(cell) for cell in row]
+            data_row = "| " + " | ".join(formatted_row) + " |"
            data_rows.append(data_row)

        # Combine all rows into a single string
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -88,6 +88,7 @@ dependencies = [
    "httpx-sse>=0.4.0",
    "sendgrid~=6.12.3",
    "flask-restx>=1.3.0",
+    "pinecone>=7.3.0",
 ]
 # Before adding new dependency, consider place it in
 # alphabet order (a-z) and suitable group.
--- a/api/tests/integration_tests/vdb/pinecone/init.py
+++ b/api/tests/integration_tests/vdb/pinecone/init.py
--- a/api/tests/integration_tests/vdb/pinecone/test_pinecone.py
+++ b/api/tests/integration_tests/vdb/pinecone/test_pinecone.py
@ -0,0 +1,30 @@
+from core.rag.datasource.vdb.pinecone.pinecone_vector import PineconeConfig, PineconeVector
+from core.rag.models.document import Document
+from tests.integration_tests.vdb.test_vector_store import (
+    AbstractVectorTest,
+    setup_mock_redis,
+)
+
+
+class PineconeVectorTest(AbstractVectorTest):
+    def __init__(self):
+        super().__init__()
+        self.attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
+        self.vector = PineconeVector(
+            collection_name=self.collection_name,
+            group_id=self.dataset_id,
+            config=PineconeConfig(
+                api_key="test_api_key",
+                environment="test_environment",
+                index_name="test_index",
+            ),
+        )
+
+    def search_by_vector(self):
+        super().search_by_vector()
+
+
+def test_pinecone_vector(setup_mock_redis):
+
+
+    PineconeVectorTest().run_all_tests()
--- a/api/uv.lock
+++ b/api/uv.lock