From ffddabde43d541d380b46922596c6dbf0e575140 Mon Sep 17 00:00:00 2001 From: Aurelius Huang Date: Wed, 30 Jul 2025 21:35:20 +0800 Subject: [PATCH] feat(notion): Notion Database extracts Rows content `in row order` and appends `Row Page URL` (#22646) Co-authored-by: Aurelius Huang --- api/core/rag/extractor/notion_extractor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/core/rag/extractor/notion_extractor.py b/api/core/rag/extractor/notion_extractor.py index 875626eb34..17f4d1af2d 100644 --- a/api/core/rag/extractor/notion_extractor.py +++ b/api/core/rag/extractor/notion_extractor.py @@ -1,5 +1,6 @@ import json import logging +import operator from typing import Any, Optional, cast import requests @@ -130,13 +131,15 @@ class NotionExtractor(BaseExtractor): data[property_name] = value row_dict = {k: v for k, v in data.items() if v} row_content = "" - for key, value in row_dict.items(): + for key, value in sorted(row_dict.items(), key=operator.itemgetter(0)): if isinstance(value, dict): value_dict = {k: v for k, v in value.items() if v} value_content = "".join(f"{k}:{v} " for k, v in value_dict.items()) row_content = row_content + f"{key}:{value_content}\n" else: row_content = row_content + f"{key}:{value}\n" + if "url" in result: + row_content = row_content + f"Row Page URL:{result.get('url', '')}\n" database_content.append(row_content) has_more = response_data.get("has_more", False)