diff --git a/api/configs/middleware/vdb/oceanbase_config.py b/api/configs/middleware/vdb/oceanbase_config.py index 99f4c49407..7c9376f86b 100644 --- a/api/configs/middleware/vdb/oceanbase_config.py +++ b/api/configs/middleware/vdb/oceanbase_config.py @@ -40,8 +40,12 @@ class OceanBaseVectorConfig(BaseSettings): OCEANBASE_FULLTEXT_PARSER: str | None = Field( description=( - "Fulltext parser to use for text indexing. Options: 'japanese_ftparser' (Japanese), " - "'thai_ftparser' (Thai), 'ik' (Chinese). Default is 'ik'" + "Fulltext parser to use for text indexing. " + "Built-in options: 'ngram' (N-gram tokenizer for English/numbers), " + "'beng' (Basic English tokenizer), 'space' (Space-based tokenizer), " + "'ngram2' (Improved N-gram tokenizer), 'ik' (Chinese tokenizer). " + "External plugins (require installation): 'japanese_ftparser' (Japanese tokenizer), " + "'thai_ftparser' (Thai tokenizer). Default is 'ik'" ), default="ik", ) diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index 49cf900126..b3db7332e8 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -123,7 +123,7 @@ class OceanBaseVector(BaseVector): # Get parser from config or use default ik parser parser_name = dify_config.OCEANBASE_FULLTEXT_PARSER or "ik" - allowed_parsers = ["ik", "japanese_ftparser", "thai_ftparser"] + allowed_parsers = ["ngram", "beng", "space", "ngram2", "ik", "japanese_ftparser", "thai_ftparser"] if parser_name not in allowed_parsers: raise ValueError( f"Invalid OceanBase full-text parser: {parser_name}. " diff --git a/docker/.env.example b/docker/.env.example index d4e8ab3beb..07b4088470 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -655,6 +655,8 @@ LINDORM_USING_UGC=True LINDORM_QUERY_TIMEOUT=1 # OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase` +# Built-in fulltext parsers are `ngram`, `beng`, `space`, `ngram2`, `ik` +# External fulltext parsers (require plugin installation) are `japanese_ftparser`, `thai_ftparser` OCEANBASE_VECTOR_HOST=oceanbase OCEANBASE_VECTOR_PORT=2881 OCEANBASE_VECTOR_USER=root@test