diff --git a/api/core/rag/entities/__init__.py b/api/core/rag/entities/__init__.py new file mode 100644 index 0000000000..f8157571b3 --- /dev/null +++ b/api/core/rag/entities/__init__.py @@ -0,0 +1,15 @@ +from core.rag.entities.citation_metadata import RetrievalSourceMetadata +from core.rag.entities.context_entities import DocumentContext +from core.rag.entities.processing_entities import ParentMode, PreProcessingRule, Rule, Segmentation +from core.rag.entities.retrieval_settings import KeywordSetting, VectorSetting + +__all__ = [ + "DocumentContext", + "KeywordSetting", + "ParentMode", + "PreProcessingRule", + "RetrievalSourceMetadata", + "Rule", + "Segmentation", + "VectorSetting", +] diff --git a/api/core/rag/entities/processing_entities.py b/api/core/rag/entities/processing_entities.py new file mode 100644 index 0000000000..1b54444a19 --- /dev/null +++ b/api/core/rag/entities/processing_entities.py @@ -0,0 +1,27 @@ +from enum import StrEnum +from typing import Literal + +from pydantic import BaseModel + + +class ParentMode(StrEnum): + FULL_DOC = "full-doc" + PARAGRAPH = "paragraph" + + +class PreProcessingRule(BaseModel): + id: str + enabled: bool + + +class Segmentation(BaseModel): + separator: str = "\n" + max_tokens: int + chunk_overlap: int = 0 + + +class Rule(BaseModel): + pre_processing_rules: list[PreProcessingRule] | None = None + segmentation: Segmentation | None = None + parent_mode: Literal["full-doc", "paragraph"] | None = None + subchunk_segmentation: Segmentation | None = None diff --git a/api/core/rag/entities/retrieval_settings.py b/api/core/rag/entities/retrieval_settings.py new file mode 100644 index 0000000000..f52e0f0142 --- /dev/null +++ b/api/core/rag/entities/retrieval_settings.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel + + +class VectorSetting(BaseModel): + """ + Vector Setting. + """ + + vector_weight: float + embedding_provider_name: str + embedding_model_name: str + + +class KeywordSetting(BaseModel): + """ + Keyword Setting. + """ + + keyword_weight: float