From 551df6ee9cfa96a58433e51a6103d8125faed73e Mon Sep 17 00:00:00 2001 From: eason <85663565+mango766@users.noreply.github.com> Date: Mon, 16 Mar 2026 09:41:57 +0800 Subject: [PATCH] fix: use parameterized queries to prevent SQL injection in vector stores (#33421) Co-authored-by: easonysliu Co-authored-by: Claude (claude-opus-4-6) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py | 8 ++++---- api/core/rag/datasource/vdb/relyt/relyt_vector.py | 15 ++++++--------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py index b986c79e3a..90d9173409 100644 --- a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py +++ b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py @@ -135,8 +135,8 @@ class PGVectoRS(BaseVector): def get_ids_by_metadata_field(self, key: str, value: str): result = None with Session(self._client) as session: - select_statement = sql_text(f"SELECT id FROM {self._collection_name} WHERE meta->>'{key}' = '{value}'; ") - result = session.execute(select_statement).fetchall() + select_statement = sql_text(f"SELECT id FROM {self._collection_name} WHERE meta->>:key = :value") + result = session.execute(select_statement, {"key": key, "value": value}).fetchall() if result: return [item[0] for item in result] else: @@ -172,9 +172,9 @@ class PGVectoRS(BaseVector): def text_exists(self, id: str) -> bool: with Session(self._client) as session: select_statement = sql_text( - f"SELECT id FROM {self._collection_name} WHERE meta->>'doc_id' = '{id}' limit 1; " + f"SELECT id FROM {self._collection_name} WHERE meta->>'doc_id' = :doc_id limit 1" ) - result = session.execute(select_statement).fetchall() + result = session.execute(select_statement, {"doc_id": id}).fetchall() return len(result) > 0 def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: diff --git a/api/core/rag/datasource/vdb/relyt/relyt_vector.py b/api/core/rag/datasource/vdb/relyt/relyt_vector.py index 70857b3e3c..e486375ec2 100644 --- a/api/core/rag/datasource/vdb/relyt/relyt_vector.py +++ b/api/core/rag/datasource/vdb/relyt/relyt_vector.py @@ -154,10 +154,8 @@ class RelytVector(BaseVector): def get_ids_by_metadata_field(self, key: str, value: str): result = None with Session(self.client) as session: - select_statement = sql_text( - f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'{key}' = '{value}'; """ - ) - result = session.execute(select_statement).fetchall() + select_statement = sql_text(f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>:key = :value""") + result = session.execute(select_statement, {"key": key, "value": value}).fetchall() if result: return [item[0] for item in result] else: @@ -201,11 +199,10 @@ class RelytVector(BaseVector): def delete_by_ids(self, ids: list[str]): with Session(self.client) as session: - ids_str = ",".join(f"'{doc_id}'" for doc_id in ids) select_statement = sql_text( - f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' in ({ids_str}); """ + f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' = ANY(:doc_ids)""" ) - result = session.execute(select_statement).fetchall() + result = session.execute(select_statement, {"doc_ids": ids}).fetchall() if result: ids = [item[0] for item in result] self.delete_by_uuids(ids) @@ -218,9 +215,9 @@ class RelytVector(BaseVector): def text_exists(self, id: str) -> bool: with Session(self.client) as session: select_statement = sql_text( - f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' = '{id}' limit 1; """ + f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' = :doc_id limit 1""" ) - result = session.execute(select_statement).fetchall() + result = session.execute(select_statement, {"doc_id": id}).fetchall() return len(result) > 0 def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: