From 594906c1ff85ec70e0ab452f3d8e1714d09d666f Mon Sep 17 00:00:00 2001 From: Frederick2313072 <2031894837@qq.com> Date: Wed, 24 Sep 2025 17:01:23 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20MD5=20and=208=E2=80=91hex=20Suffix=20Col?= =?UTF-8?q?lision=20Risk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/core/rag/datasource/vdb/pinecone/pinecone_vector.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/api/core/rag/datasource/vdb/pinecone/pinecone_vector.py b/api/core/rag/datasource/vdb/pinecone/pinecone_vector.py index ef6edf2bb8..5bccb42236 100644 --- a/api/core/rag/datasource/vdb/pinecone/pinecone_vector.py +++ b/api/core/rag/datasource/vdb/pinecone/pinecone_vector.py @@ -51,15 +51,17 @@ class PineconeVector(BaseVector): base_name = collection_name.lower() base_name = re.sub(r'[^a-z0-9-]+', '-', base_name) # replace invalid chars with '-' base_name = re.sub(r'-+', '-', base_name).strip('-') + # Use longer secure suffix to reduce collision risk + suffix_len = 24 # 24 hex digits (96-bit entropy) if len(base_name) > 45: - hash_suffix = hashlib.md5(base_name.encode()).hexdigest()[:8] - truncated_name = base_name[:45-9].rstrip('-') + hash_suffix = hashlib.sha256(base_name.encode()).hexdigest()[:suffix_len] + truncated_name = base_name[:45-(suffix_len+1)].rstrip('-') self._index_name = f"{truncated_name}-{hash_suffix}" else: self._index_name = base_name # Guard empty name if not self._index_name: - self._index_name = f"index-{hashlib.md5(collection_name.encode()).hexdigest()[:8]}" + self._index_name = f"index-{hashlib.sha256(collection_name.encode()).hexdigest()[:suffix_len]}" self._index = None def get_type(self) -> str: