From efa1b452da104b70f5f190a00170eafe3b574b77 Mon Sep 17 00:00:00 2001 From: Nan LI Date: Tue, 9 Dec 2025 05:00:19 -0800 Subject: [PATCH] feat: Add startup parameters for language-specific Weaviate tokenizer (#29347) Co-authored-by: Jing --- docker/.env.example | 3 +++ docker/docker-compose-template.yaml | 3 +++ docker/docker-compose.yaml | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/docker/.env.example b/docker/.env.example index 80e87425c1..85e8b1dc7f 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -1129,6 +1129,9 @@ WEAVIATE_AUTHENTICATION_APIKEY_USERS=hello@dify.ai WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED=true WEAVIATE_AUTHORIZATION_ADMINLIST_USERS=hello@dify.ai WEAVIATE_DISABLE_TELEMETRY=false +WEAVIATE_ENABLE_TOKENIZER_GSE=false +WEAVIATE_ENABLE_TOKENIZER_KAGOME_JA=false +WEAVIATE_ENABLE_TOKENIZER_KAGOME_KR=false # ------------------------------ # Environment Variables for Chroma diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index f1061ef5f9..3c01274ce8 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -451,6 +451,9 @@ services: AUTHORIZATION_ADMINLIST_ENABLED: ${WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED:-true} AUTHORIZATION_ADMINLIST_USERS: ${WEAVIATE_AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} DISABLE_TELEMETRY: ${WEAVIATE_DISABLE_TELEMETRY:-false} + ENABLE_TOKENIZER_GSE: ${WEAVIATE_ENABLE_TOKENIZER_GSE:-false} + ENABLE_TOKENIZER_KAGOME_JA: ${WEAVIATE_ENABLE_TOKENIZER_KAGOME_JA:-false} + ENABLE_TOKENIZER_KAGOME_KR: ${WEAVIATE_ENABLE_TOKENIZER_KAGOME_KR:-false} # OceanBase vector database oceanbase: diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 3e416c36c9..809aa1f841 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -479,6 +479,9 @@ x-shared-env: &shared-api-worker-env WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED: ${WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED:-true} WEAVIATE_AUTHORIZATION_ADMINLIST_USERS: ${WEAVIATE_AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} WEAVIATE_DISABLE_TELEMETRY: ${WEAVIATE_DISABLE_TELEMETRY:-false} + WEAVIATE_ENABLE_TOKENIZER_GSE: ${WEAVIATE_ENABLE_TOKENIZER_GSE:-false} + WEAVIATE_ENABLE_TOKENIZER_KAGOME_JA: ${WEAVIATE_ENABLE_TOKENIZER_KAGOME_JA:-false} + WEAVIATE_ENABLE_TOKENIZER_KAGOME_KR: ${WEAVIATE_ENABLE_TOKENIZER_KAGOME_KR:-false} CHROMA_SERVER_AUTHN_CREDENTIALS: ${CHROMA_SERVER_AUTHN_CREDENTIALS:-difyai123456} CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider} CHROMA_IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE} @@ -1085,6 +1088,9 @@ services: AUTHORIZATION_ADMINLIST_ENABLED: ${WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED:-true} AUTHORIZATION_ADMINLIST_USERS: ${WEAVIATE_AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} DISABLE_TELEMETRY: ${WEAVIATE_DISABLE_TELEMETRY:-false} + ENABLE_TOKENIZER_GSE: ${WEAVIATE_ENABLE_TOKENIZER_GSE:-false} + ENABLE_TOKENIZER_KAGOME_JA: ${WEAVIATE_ENABLE_TOKENIZER_KAGOME_JA:-false} + ENABLE_TOKENIZER_KAGOME_KR: ${WEAVIATE_ENABLE_TOKENIZER_KAGOME_KR:-false} # OceanBase vector database oceanbase: