From d2ee486900b71652393242c080515e75a2461f01 Mon Sep 17 00:00:00 2001 From: corevibe555 <45244658+corevibe555@users.noreply.github.com> Date: Wed, 8 Apr 2026 01:43:37 +0300 Subject: [PATCH] refactor(api): extract shared RAG domain entities into core/rag/entity (#34685) --- api/core/rag/entities/__init__.py | 15 +++++++++++ api/core/rag/entities/processing_entities.py | 27 ++++++++++++++++++++ api/core/rag/entities/retrieval_settings.py | 19 ++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 api/core/rag/entities/__init__.py create mode 100644 api/core/rag/entities/processing_entities.py create mode 100644 api/core/rag/entities/retrieval_settings.py diff --git a/api/core/rag/entities/__init__.py b/api/core/rag/entities/__init__.py new file mode 100644 index 0000000000..f8157571b3 --- /dev/null +++ b/api/core/rag/entities/__init__.py @@ -0,0 +1,15 @@ +from core.rag.entities.citation_metadata import RetrievalSourceMetadata +from core.rag.entities.context_entities import DocumentContext +from core.rag.entities.processing_entities import ParentMode, PreProcessingRule, Rule, Segmentation +from core.rag.entities.retrieval_settings import KeywordSetting, VectorSetting + +__all__ = [ + "DocumentContext", + "KeywordSetting", + "ParentMode", + "PreProcessingRule", + "RetrievalSourceMetadata", + "Rule", + "Segmentation", + "VectorSetting", +] diff --git a/api/core/rag/entities/processing_entities.py b/api/core/rag/entities/processing_entities.py new file mode 100644 index 0000000000..1b54444a19 --- /dev/null +++ b/api/core/rag/entities/processing_entities.py @@ -0,0 +1,27 @@ +from enum import StrEnum +from typing import Literal + +from pydantic import BaseModel + + +class ParentMode(StrEnum): + FULL_DOC = "full-doc" + PARAGRAPH = "paragraph" + + +class PreProcessingRule(BaseModel): + id: str + enabled: bool + + +class Segmentation(BaseModel): + separator: str = "\n" + max_tokens: int + chunk_overlap: int = 0 + + +class Rule(BaseModel): + pre_processing_rules: list[PreProcessingRule] | None = None + segmentation: Segmentation | None = None + parent_mode: Literal["full-doc", "paragraph"] | None = None + subchunk_segmentation: Segmentation | None = None diff --git a/api/core/rag/entities/retrieval_settings.py b/api/core/rag/entities/retrieval_settings.py new file mode 100644 index 0000000000..f52e0f0142 --- /dev/null +++ b/api/core/rag/entities/retrieval_settings.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel + + +class VectorSetting(BaseModel): + """ + Vector Setting. + """ + + vector_weight: float + embedding_provider_name: str + embedding_model_name: str + + +class KeywordSetting(BaseModel): + """ + Keyword Setting. + """ + + keyword_weight: float