refactor(api): extract shared RAG domain entities into core/rag/entity (#34685)

This commit is contained in:
corevibe555 2026-04-08 01:43:37 +03:00 committed by GitHub
parent c44ddd9831
commit d2ee486900
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 61 additions and 0 deletions

View File

@ -0,0 +1,15 @@
from core.rag.entities.citation_metadata import RetrievalSourceMetadata
from core.rag.entities.context_entities import DocumentContext
from core.rag.entities.processing_entities import ParentMode, PreProcessingRule, Rule, Segmentation
from core.rag.entities.retrieval_settings import KeywordSetting, VectorSetting
__all__ = [
"DocumentContext",
"KeywordSetting",
"ParentMode",
"PreProcessingRule",
"RetrievalSourceMetadata",
"Rule",
"Segmentation",
"VectorSetting",
]

View File

@ -0,0 +1,27 @@
from enum import StrEnum
from typing import Literal
from pydantic import BaseModel
class ParentMode(StrEnum):
FULL_DOC = "full-doc"
PARAGRAPH = "paragraph"
class PreProcessingRule(BaseModel):
id: str
enabled: bool
class Segmentation(BaseModel):
separator: str = "\n"
max_tokens: int
chunk_overlap: int = 0
class Rule(BaseModel):
pre_processing_rules: list[PreProcessingRule] | None = None
segmentation: Segmentation | None = None
parent_mode: Literal["full-doc", "paragraph"] | None = None
subchunk_segmentation: Segmentation | None = None

View File

@ -0,0 +1,19 @@
from pydantic import BaseModel
class VectorSetting(BaseModel):
"""
Vector Setting.
"""
vector_weight: float
embedding_provider_name: str
embedding_model_name: str
class KeywordSetting(BaseModel):
"""
Keyword Setting.
"""
keyword_weight: float