diff --git a/.github/workflows/expose_service_ports.sh b/.github/workflows/expose_service_ports.sh index fa0fd2ee8c..e7d5f60288 100755 --- a/.github/workflows/expose_service_ports.sh +++ b/.github/workflows/expose_service_ports.sh @@ -14,4 +14,4 @@ yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/tidb/docker-compose.ya yq eval '.services.oceanbase.ports += ["2881:2881"]' -i docker/docker-compose.yaml yq eval '.services.opengauss.ports += ["6600:6600"]' -i docker/docker-compose.yaml -echo "Ports exposed for sandbox, weaviate, tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase, opengauss" +echo "Ports exposed for sandbox, weaviate (HTTP 8080, gRPC 50051), tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase, opengauss" diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index 4793d2bb50..15207add18 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -250,7 +250,6 @@ class WeaviateVector(BaseVector): ) ) - batch_size = max(1, int(dify_config.WEAVIATE_BATCH_SIZE or 100)) with col.batch.dynamic() as batch: for obj in objs: batch.add_object(properties=obj.properties, uuid=obj.uuid, vector=obj.vector) diff --git a/api/pyproject.toml b/api/pyproject.toml index f2de966a57..74e6782d83 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -86,6 +86,7 @@ dependencies = [ "sendgrid~=6.12.3", "flask-restx~=1.3.0", "packaging~=23.2", + "weaviate-client==4.17.0", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. @@ -215,7 +216,7 @@ vdb = [ "tidb-vector==0.0.9", "upstash-vector==0.6.0", "volcengine-compat~=1.0.0", - "weaviate-client>=4.0.0,<5.0.0", + "weaviate-client==4.17.0", "xinference-client~=1.2.2", "mo-vector~=0.1.13", "mysql-connector-python>=9.3.0", diff --git a/api/uv.lock b/api/uv.lock index e7facf8248..8f28fa36a8 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.11, <3.13" resolution-markers = [ "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", @@ -1372,6 +1372,7 @@ dependencies = [ { name = "transformers" }, { name = "unstructured", extra = ["docx", "epub", "md", "ppt", "pptx"] }, { name = "weave" }, + { name = "weaviate-client" }, { name = "webvtt-py" }, { name = "yarl" }, ] @@ -1562,6 +1563,7 @@ requires-dist = [ { name = "transformers", specifier = "~=4.56.1" }, { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" }, { name = "weave", specifier = "~=0.51.0" }, + { name = "weaviate-client", specifier = "==4.17.0" }, { name = "webvtt-py", specifier = "~=0.5.1" }, { name = "yarl", specifier = "~=1.18.3" }, ] @@ -1669,7 +1671,7 @@ vdb = [ { name = "tidb-vector", specifier = "==0.0.9" }, { name = "upstash-vector", specifier = "==0.6.0" }, { name = "volcengine-compat", specifier = "~=1.0.0" }, - { name = "weaviate-client", specifier = ">=4.0.0,<5.0.0" }, + { name = "weaviate-client", specifier = "==4.17.0" }, { name = "xinference-client", specifier = "~=1.2.2" }, ] diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index 5483e2d554..5a67c080cc 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -24,6 +24,13 @@ services: volumes: # Mount the storage directory to the container, for storing user files. - ./volumes/app/storage:/app/api/storage + # TODO: Remove this entrypoint override when weaviate-client 4.17.0 is included in the next Dify release + entrypoint: + - /bin/bash + - -c + - | + uv pip install --system weaviate-client==4.17.0 + exec /bin/bash /app/api/docker/entrypoint.sh networks: - ssrf_proxy_network - default @@ -51,6 +58,13 @@ services: volumes: # Mount the storage directory to the container, for storing user files. - ./volumes/app/storage:/app/api/storage + # TODO: Remove this entrypoint override when weaviate-client 4.17.0 is included in the next Dify release + entrypoint: + - /bin/bash + - -c + - | + uv pip install --system weaviate-client==4.17.0 + exec /bin/bash /app/api/docker/entrypoint.sh networks: - ssrf_proxy_network - default @@ -331,7 +345,6 @@ services: weaviate: image: semitechnologies/weaviate:1.27.0 profiles: - - "" - weaviate restart: always volumes: diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml deleted file mode 100644 index 8f2ab1cb43..0000000000 --- a/docker/docker-compose.override.yml +++ /dev/null @@ -1,9 +0,0 @@ -services: - api: - volumes: - - ../api/core/rag/datasource/vdb/weaviate/weaviate_vector.py:/app/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py:ro - command: > - sh -c " - pip install --no-cache-dir 'weaviate>=4.0.0' && - /bin/bash /entrypoint.sh - " diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 46b4a750ea..421b733e2b 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -631,6 +631,13 @@ services: volumes: # Mount the storage directory to the container, for storing user files. - ./volumes/app/storage:/app/api/storage + # TODO: Remove this entrypoint override when weaviate-client 4.17.0 is included in the next Dify release + entrypoint: + - /bin/bash + - -c + - | + uv pip install --system weaviate-client==4.17.0 + exec /bin/bash /app/api/docker/entrypoint.sh networks: - ssrf_proxy_network - default @@ -658,6 +665,13 @@ services: volumes: # Mount the storage directory to the container, for storing user files. - ./volumes/app/storage:/app/api/storage + # TODO: Remove this entrypoint override when weaviate-client 4.17.0 is included in the next Dify release + entrypoint: + - /bin/bash + - -c + - | + uv pip install --system weaviate-client==4.17.0 + exec /bin/bash /app/api/docker/entrypoint.sh networks: - ssrf_proxy_network - default @@ -938,7 +952,6 @@ services: weaviate: image: semitechnologies/weaviate:1.27.0 profiles: - - "" - weaviate restart: always volumes: diff --git a/docs/weaviate/WEAVIATE_MIGRATION_GUIDE/README.md b/docs/weaviate/WEAVIATE_MIGRATION_GUIDE/README.md new file mode 100644 index 0000000000..b2599e8c2e --- /dev/null +++ b/docs/weaviate/WEAVIATE_MIGRATION_GUIDE/README.md @@ -0,0 +1,187 @@ +# Weaviate Migration Guide: v1.19 → v1.27 + +## Overview + +Dify has upgraded from Weaviate v1.19 to v1.27 with the Python client updated from v3.24 to v4.17. + +## What Changed + +### Breaking Changes + +1. **Weaviate Server**: `1.19.0` → `1.27.0` +1. **Python Client**: `weaviate-client~=3.24.0` → `weaviate-client==4.17.0` +1. **gRPC Required**: Weaviate v1.27 requires gRPC port `50051` (in addition to HTTP port `8080`) +1. **Docker Compose**: Added temporary entrypoint overrides for client installation + +### Key Improvements + +- Faster vector operations via gRPC +- Improved batch processing +- Better error handling + +## Migration Steps + +### For Docker Users + +#### Step 1: Backup Your Data + +```bash +cd docker +docker compose down +sudo cp -r ./volumes/weaviate ./volumes/weaviate_backup_$(date +%Y%m%d) +``` + +#### Step 2: Update Dify + +```bash +git pull origin main +docker compose pull +``` + +#### Step 3: Start Services + +```bash +docker compose up -d +sleep 30 +curl http://localhost:8080/v1/meta +``` + +#### Step 4: Verify Migration + +```bash +# Check both ports are accessible +curl http://localhost:8080/v1/meta +netstat -tulpn | grep 50051 + +# Test in Dify UI: +# 1. Go to Knowledge Base +# 2. Test search functionality +# 3. Upload a test document +``` + +### For Source Installation + +#### Step 1: Update Dependencies + +```bash +cd api +uv sync --dev +uv run python -c "import weaviate; print(weaviate.__version__)" +# Should show: 4.17.0 +``` + +#### Step 2: Update Weaviate Server + +```bash +cd docker +docker compose -f docker-compose.middleware.yaml --profile weaviate up -d weaviate +curl http://localhost:8080/v1/meta +netstat -tulpn | grep 50051 +``` + +## Troubleshooting + +### Error: "No module named 'weaviate.classes'" + +**Solution**: + +```bash +cd api +uv sync --reinstall-package weaviate-client +uv run python -c "import weaviate; print(weaviate.__version__)" +# Should show: 4.17.0 +``` + +### Error: "gRPC health check failed" + +**Solution**: + +```bash +# Check Weaviate ports +docker ps | grep weaviate +# Should show: 0.0.0.0:8080->8080/tcp, 0.0.0.0:50051->50051/tcp + +# If missing gRPC port, add to docker-compose: +# ports: +# - "8080:8080" +# - "50051:50051" +``` + +### Error: "Weaviate version 1.19.0 is not supported" + +**Solution**: + +```bash +# Update Weaviate image in docker-compose +# Change: semitechnologies/weaviate:1.19.0 +# To: semitechnologies/weaviate:1.27.0 +docker compose down +docker compose up -d +``` + +### Data Migration Failed + +**Solution**: + +```bash +cd docker +docker compose down +sudo rm -rf ./volumes/weaviate +sudo cp -r ./volumes/weaviate_backup_YYYYMMDD ./volumes/weaviate +docker compose up -d +``` + +## Rollback Instructions + +```bash +# 1. Stop services +docker compose down + +# 2. Restore data backup +sudo rm -rf ./volumes/weaviate +sudo cp -r ./volumes/weaviate_backup_YYYYMMDD ./volumes/weaviate + +# 3. Checkout previous version +git checkout + +# 4. Restart services +docker compose up -d +``` + +## Compatibility + +| Component | Old Version | New Version | Compatible | +|-----------|-------------|-------------|------------| +| Weaviate Server | 1.19.0 | 1.27.0 | ✅ Yes | +| weaviate-client | ~3.24.0 | ==4.17.0 | ✅ Yes | +| Existing Data | v1.19 format | v1.27 format | ✅ Yes | + +## Testing Checklist + +Before deploying to production: + +- [ ] Backup all Weaviate data +- [ ] Test in staging environment +- [ ] Verify existing collections are accessible +- [ ] Test vector search functionality +- [ ] Test document upload and retrieval +- [ ] Monitor gRPC connection stability +- [ ] Check performance metrics + +## Support + +If you encounter issues: + +1. Check GitHub Issues: https://github.com/langgenius/dify/issues +1. Create a bug report with: + - Error messages + - Docker logs: `docker compose logs weaviate` + - Dify version + - Migration steps attempted + +## Important Notes + +- **Data Safety**: Existing vector data remains fully compatible +- **No Re-indexing**: No need to rebuild vector indexes +- **Temporary Workaround**: The entrypoint overrides are temporary until next Dify release +- **Performance**: May see improved performance due to gRPC usage