mirror of https://github.com/langgenius/dify.git
Feat/add weaviate tokenization configurable (#28159)
Co-authored-by: lijiezhao <lijiezhao@perfect99.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
b5650b579d
commit
f76a3f545c
|
|
@ -176,6 +176,7 @@ WEAVIATE_ENDPOINT=http://localhost:8080
|
||||||
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
||||||
WEAVIATE_GRPC_ENABLED=false
|
WEAVIATE_GRPC_ENABLED=false
|
||||||
WEAVIATE_BATCH_SIZE=100
|
WEAVIATE_BATCH_SIZE=100
|
||||||
|
WEAVIATE_TOKENIZATION=word
|
||||||
|
|
||||||
# OceanBase Vector configuration
|
# OceanBase Vector configuration
|
||||||
OCEANBASE_VECTOR_HOST=127.0.0.1
|
OCEANBASE_VECTOR_HOST=127.0.0.1
|
||||||
|
|
|
||||||
|
|
@ -31,3 +31,8 @@ class WeaviateConfig(BaseSettings):
|
||||||
description="Number of objects to be processed in a single batch operation (default is 100)",
|
description="Number of objects to be processed in a single batch operation (default is 100)",
|
||||||
default=100,
|
default=100,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
WEAVIATE_TOKENIZATION: str | None = Field(
|
||||||
|
description="Tokenization for Weaviate (default is word)",
|
||||||
|
default="word",
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -167,13 +167,18 @@ class WeaviateVector(BaseVector):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not self._client.collections.exists(self._collection_name):
|
if not self._client.collections.exists(self._collection_name):
|
||||||
|
tokenization = (
|
||||||
|
wc.Tokenization(dify_config.WEAVIATE_TOKENIZATION)
|
||||||
|
if dify_config.WEAVIATE_TOKENIZATION
|
||||||
|
else wc.Tokenization.WORD
|
||||||
|
)
|
||||||
self._client.collections.create(
|
self._client.collections.create(
|
||||||
name=self._collection_name,
|
name=self._collection_name,
|
||||||
properties=[
|
properties=[
|
||||||
wc.Property(
|
wc.Property(
|
||||||
name=Field.TEXT_KEY.value,
|
name=Field.TEXT_KEY.value,
|
||||||
data_type=wc.DataType.TEXT,
|
data_type=wc.DataType.TEXT,
|
||||||
tokenization=wc.Tokenization.WORD,
|
tokenization=tokenization,
|
||||||
),
|
),
|
||||||
wc.Property(name="document_id", data_type=wc.DataType.TEXT),
|
wc.Property(name="document_id", data_type=wc.DataType.TEXT),
|
||||||
wc.Property(name="doc_id", data_type=wc.DataType.TEXT),
|
wc.Property(name="doc_id", data_type=wc.DataType.TEXT),
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,7 @@ WEAVIATE_ENDPOINT=http://localhost:8080
|
||||||
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
||||||
WEAVIATE_GRPC_ENABLED=false
|
WEAVIATE_GRPC_ENABLED=false
|
||||||
WEAVIATE_BATCH_SIZE=100
|
WEAVIATE_BATCH_SIZE=100
|
||||||
|
WEAVIATE_TOKENIZATION=word
|
||||||
|
|
||||||
|
|
||||||
# Upload configuration
|
# Upload configuration
|
||||||
|
|
|
||||||
|
|
@ -525,6 +525,7 @@ VECTOR_INDEX_NAME_PREFIX=Vector_index
|
||||||
WEAVIATE_ENDPOINT=http://weaviate:8080
|
WEAVIATE_ENDPOINT=http://weaviate:8080
|
||||||
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
||||||
WEAVIATE_GRPC_ENDPOINT=grpc://weaviate:50051
|
WEAVIATE_GRPC_ENDPOINT=grpc://weaviate:50051
|
||||||
|
WEAVIATE_TOKENIZATION=word
|
||||||
|
|
||||||
# For OceanBase metadata database configuration, available when `DB_TYPE` is `mysql` and `COMPOSE_PROFILES` includes `oceanbase`.
|
# For OceanBase metadata database configuration, available when `DB_TYPE` is `mysql` and `COMPOSE_PROFILES` includes `oceanbase`.
|
||||||
# For OceanBase vector database configuration, available when `VECTOR_STORE` is `oceanbase`
|
# For OceanBase vector database configuration, available when `VECTOR_STORE` is `oceanbase`
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,7 @@ x-shared-env: &shared-api-worker-env
|
||||||
WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-http://weaviate:8080}
|
WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-http://weaviate:8080}
|
||||||
WEAVIATE_API_KEY: ${WEAVIATE_API_KEY:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih}
|
WEAVIATE_API_KEY: ${WEAVIATE_API_KEY:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih}
|
||||||
WEAVIATE_GRPC_ENDPOINT: ${WEAVIATE_GRPC_ENDPOINT:-grpc://weaviate:50051}
|
WEAVIATE_GRPC_ENDPOINT: ${WEAVIATE_GRPC_ENDPOINT:-grpc://weaviate:50051}
|
||||||
|
WEAVIATE_TOKENIZATION: ${WEAVIATE_TOKENIZATION:-word}
|
||||||
OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase}
|
OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase}
|
||||||
OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881}
|
OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881}
|
||||||
OCEANBASE_VECTOR_USER: ${OCEANBASE_VECTOR_USER:-root@test}
|
OCEANBASE_VECTOR_USER: ${OCEANBASE_VECTOR_USER:-root@test}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue