This commit is contained in:
wangxiaolei 2025-12-29 15:03:22 +08:00 committed by GitHub
commit 526f36e8c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 102 additions and 37 deletions

View File

@ -1,4 +1,5 @@
import concurrent.futures
import logging
from concurrent.futures import ThreadPoolExecutor
from typing import Any
@ -36,6 +37,8 @@ default_retrieval_model = {
"score_threshold_enabled": False,
}
logger = logging.getLogger(__name__)
class RetrievalService:
# Cache precompiled regular expressions to avoid repeated compilation
@ -106,7 +109,12 @@ class RetrievalService:
)
)
concurrent.futures.wait(futures, timeout=3600, return_when=concurrent.futures.ALL_COMPLETED)
if futures:
for future in concurrent.futures.as_completed(futures, timeout=3600):
if exceptions:
for f in futures:
f.cancel()
break
if exceptions:
raise ValueError(";\n".join(exceptions))
@ -210,6 +218,7 @@ class RetrievalService:
)
all_documents.extend(documents)
except Exception as e:
logger.error(e, exc_info=True)
exceptions.append(str(e))
@classmethod
@ -303,6 +312,7 @@ class RetrievalService:
else:
all_documents.extend(documents)
except Exception as e:
logger.error(e, exc_info=True)
exceptions.append(str(e))
@classmethod
@ -351,6 +361,7 @@ class RetrievalService:
else:
all_documents.extend(documents)
except Exception as e:
logger.error(e, exc_info=True)
exceptions.append(str(e))
@staticmethod
@ -663,7 +674,14 @@ class RetrievalService:
document_ids_filter=document_ids_filter,
)
)
concurrent.futures.wait(futures, timeout=300, return_when=concurrent.futures.ALL_COMPLETED)
# Use as_completed for early error propagation - cancel remaining futures on first error
if futures:
for future in concurrent.futures.as_completed(futures, timeout=300):
if future.exception():
# Cancel remaining futures to avoid unnecessary waiting
for f in futures:
f.cancel()
break
if exceptions:
raise ValueError(";\n".join(exceptions))

View File

@ -516,6 +516,9 @@ class DatasetRetrieval:
].embedding_model_provider
weights["vector_setting"]["embedding_model_name"] = available_datasets[0].embedding_model
with measure_time() as timer:
cancel_event = threading.Event()
thread_exceptions: list[Exception] = []
if query:
query_thread = threading.Thread(
target=self._multiple_retrieve_thread,
@ -534,6 +537,8 @@ class DatasetRetrieval:
"score_threshold": score_threshold,
"query": query,
"attachment_id": None,
"cancel_event": cancel_event,
"thread_exceptions": thread_exceptions,
},
)
all_threads.append(query_thread)
@ -557,12 +562,25 @@ class DatasetRetrieval:
"score_threshold": score_threshold,
"query": None,
"attachment_id": attachment_id,
"cancel_event": cancel_event,
"thread_exceptions": thread_exceptions,
},
)
all_threads.append(attachment_thread)
attachment_thread.start()
# Poll threads with short timeout to detect errors quickly (fail-fast)
while any(t.is_alive() for t in all_threads):
for thread in all_threads:
thread.join()
thread.join(timeout=0.1)
if thread_exceptions:
cancel_event.set()
break
if thread_exceptions:
break
if thread_exceptions:
raise thread_exceptions[0]
self._on_query(query, attachment_ids, dataset_ids, app_id, user_from, user_id)
if all_documents:
@ -1404,12 +1422,18 @@ class DatasetRetrieval:
score_threshold: float,
query: str | None,
attachment_id: str | None,
cancel_event: threading.Event | None = None,
thread_exceptions: list[Exception] | None = None,
):
try:
with flask_app.app_context():
threads = []
all_documents_item: list[Document] = []
index_type = None
for dataset in available_datasets:
# Check for cancellation signal
if cancel_event and cancel_event.is_set():
break
index_type = dataset.indexing_technique
document_ids_filter = None
if dataset.provider != "external":
@ -1436,8 +1460,15 @@ class DatasetRetrieval:
)
threads.append(retrieval_thread)
retrieval_thread.start()
# Poll threads with short timeout to respond quickly to cancellation
while any(t.is_alive() for t in threads):
for thread in threads:
thread.join()
thread.join(timeout=0.1)
if cancel_event and cancel_event.is_set():
break
if cancel_event and cancel_event.is_set():
break
if reranking_enable:
# do rerank for searched documents
@ -1470,3 +1501,8 @@ class DatasetRetrieval:
all_documents_item = all_documents_item[:top_k] if top_k else all_documents_item
if all_documents_item:
all_documents.extend(all_documents_item)
except Exception as e:
if cancel_event:
cancel_event.set()
if thread_exceptions is not None:
thread_exceptions.append(e)

View File

@ -421,6 +421,17 @@ class TestRetrievalService:
# In real code, this waits for all futures to complete
# In tests, futures complete immediately, so wait is a no-op
with patch("core.rag.datasource.retrieval_service.concurrent.futures.wait"):
# Mock concurrent.futures.as_completed for early error propagation
# In real code, this yields futures as they complete
# In tests, we yield all futures immediately since they're already done
def mock_as_completed(futures_list, timeout=None):
"""Mock as_completed that yields futures immediately."""
yield from futures_list
with patch(
"core.rag.datasource.retrieval_service.concurrent.futures.as_completed",
side_effect=mock_as_completed,
):
yield mock_executor
# ==================== Vector Search Tests ====================