refactor: migrate session.query to select API in document task files (#34646)

This commit is contained in:
Renzo 2026-04-07 00:53:21 -05:00 committed by GitHub
parent 459c36f21b
commit 3e995e6a6d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 13 additions and 13 deletions

View File

@ -26,7 +26,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str):
total_index_node_ids = []
with session_factory.create_session() as session:
dataset = session.query(Dataset).where(Dataset.id == dataset_id).first()
dataset = session.scalar(select(Dataset).where(Dataset.id == dataset_id).limit(1))
if not dataset:
raise Exception("Document has no dataset")
@ -41,7 +41,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str):
total_index_node_ids.extend([segment.index_node_id for segment in segments])
with session_factory.create_session() as session:
dataset = session.query(Dataset).where(Dataset.id == dataset_id).first()
dataset = session.scalar(select(Dataset).where(Dataset.id == dataset_id).limit(1))
if dataset:
index_processor.clean(
dataset, total_index_node_ids, with_keywords=True, delete_child_chunks=True, delete_summaries=True

View File

@ -28,7 +28,9 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
start_at = time.perf_counter()
with session_factory.create_session() as session, session.begin():
document = session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
document = session.scalar(
select(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).limit(1)
)
if not document:
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
@ -37,7 +39,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
document.indexing_status = IndexingStatus.PARSING
document.processing_started_at = naive_utc_now()
dataset = session.query(Dataset).where(Dataset.id == dataset_id).first()
dataset = session.scalar(select(Dataset).where(Dataset.id == dataset_id).limit(1))
if not dataset:
return

View File

@ -32,15 +32,15 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
start_at = time.perf_counter()
with session_factory.create_session() as session:
try:
dataset = session.query(Dataset).where(Dataset.id == dataset_id).first()
dataset = session.scalar(select(Dataset).where(Dataset.id == dataset_id).limit(1))
if not dataset:
logger.info(click.style(f"Dataset not found: {dataset_id}", fg="red"))
return
user = session.query(Account).where(Account.id == user_id).first()
user = session.scalar(select(Account).where(Account.id == user_id).limit(1))
if not user:
logger.info(click.style(f"User not found: {user_id}", fg="red"))
return
tenant = session.query(Tenant).where(Tenant.id == dataset.tenant_id).first()
tenant = session.scalar(select(Tenant).where(Tenant.id == dataset.tenant_id).limit(1))
if not tenant:
raise ValueError("Tenant not found")
user.current_tenant = tenant
@ -58,10 +58,8 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
"your subscription."
)
except Exception as e:
document = (
session.query(Document)
.where(Document.id == document_id, Document.dataset_id == dataset_id)
.first()
document = session.scalar(
select(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).limit(1)
)
if document:
document.indexing_status = IndexingStatus.ERROR
@ -73,8 +71,8 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
return
logger.info(click.style(f"Start retry document: {document_id}", fg="green"))
document = (
session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
document = session.scalar(
select(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).limit(1)
)
if not document:
logger.info(click.style(f"Document not found: {document_id}", fg="yellow"))