refactor(api): remove duplicated RAG entities from services layer (#34689)

This commit is contained in:
corevibe555 2026-04-08 02:36:59 +03:00 committed by GitHub
parent 80a7843f45
commit 624db69f12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 13 additions and 61 deletions

View File

@ -26,6 +26,7 @@ from controllers.console.wraps import (
setup_required,
)
from core.ops.ops_trace_manager import OpsTraceManager
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.trigger.constants import TRIGGER_NODE_TYPES
from extensions.ext_database import db
@ -42,10 +43,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
NotionIcon,
NotionInfo,
NotionPage,
PreProcessingRule,
RerankingModel,
Rule,
Segmentation,
WebsiteInfo,
WeightKeywordSetting,
WeightModel,

View File

@ -31,6 +31,7 @@ from controllers.service_api.wraps import (
cloud_edition_billing_resource_check,
)
from core.errors.error import ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
from fields.document_fields import document_fields, document_status_fields
@ -40,11 +41,8 @@ from models.enums import SegmentStatus
from services.dataset_service import DatasetService, DocumentService
from services.entities.knowledge_entities.knowledge_entities import (
KnowledgeConfig,
PreProcessingRule,
ProcessRule,
RetrievalModel,
Rule,
Segmentation,
)
from services.file_service import FileService
from services.summary_index_service import SummaryIndexService

View File

@ -32,6 +32,7 @@ from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.entities import Rule
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType
@ -49,7 +50,6 @@ from models.account import Account
from models.dataset import Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
from models.dataset import Document as DatasetDocument
from services.account_service import AccountService
from services.entities.knowledge_entities.knowledge_entities import Rule
from services.summary_index_service import SummaryIndexService
_file_access_controller = DatabaseFileAccessController()

View File

@ -17,6 +17,7 @@ from core.rag.data_post_processor.data_post_processor import RerankingModelDict
from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.entities import ParentMode, Rule
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType
@ -30,7 +31,6 @@ from models import Account
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
from models.dataset import Document as DatasetDocument
from services.account_service import AccountService
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
from services.summary_index_service import SummaryIndexService
logger = logging.getLogger(__name__)

View File

@ -19,6 +19,7 @@ from core.rag.data_post_processor.data_post_processor import RerankingModelDict
from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.entities import Rule
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
@ -30,7 +31,6 @@ from libs import helper
from models.account import Account
from models.dataset import Dataset, DocumentSegment
from models.dataset import Document as DatasetDocument
from services.entities.knowledge_entities.knowledge_entities import Rule
from services.summary_index_service import SummaryIndexService
logger = logging.getLogger(__name__)

View File

@ -19,6 +19,7 @@ from sqlalchemy import DateTime, String, func, select
from sqlalchemy.orm import Mapped, Session, mapped_column
from configs import dify_config
from core.rag.entities import ParentMode, Rule
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.index_processor.constant.query_type import QueryType
@ -26,7 +27,6 @@ from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.tools.signature import sign_upload_file
from extensions.ext_storage import storage
from libs.uuid_utils import uuidv7
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
from .account import Account
from .base import Base, TypeBase

View File

@ -1,17 +1,12 @@
from enum import StrEnum
from typing import Literal
from pydantic import BaseModel, field_validator
from core.rag.entities import Rule
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.retrieval_methods import RetrievalMethod
class ParentMode(StrEnum):
FULL_DOC = "full-doc"
PARAGRAPH = "paragraph"
class NotionIcon(BaseModel):
type: str
url: str | None = None
@ -53,24 +48,6 @@ class DataSource(BaseModel):
info_list: InfoList
class PreProcessingRule(BaseModel):
id: str
enabled: bool
class Segmentation(BaseModel):
separator: str = "\n"
max_tokens: int
chunk_overlap: int = 0
class Rule(BaseModel):
pre_processing_rules: list[PreProcessingRule] | None = None
segmentation: Segmentation | None = None
parent_mode: Literal["full-doc", "paragraph"] | None = None
subchunk_segmentation: Segmentation | None = None
class ProcessRule(BaseModel):
mode: Literal["automatic", "custom", "hierarchical"]
rules: Rule | None = None

View File

@ -2,6 +2,7 @@ from typing import Literal
from pydantic import BaseModel, field_validator
from core.rag.entities import KeywordSetting, VectorSetting
from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -36,24 +37,6 @@ class RerankingModelConfig(BaseModel):
reranking_model_name: str | None = ""
class VectorSetting(BaseModel):
"""
Vector Setting.
"""
vector_weight: float
embedding_provider_name: str
embedding_model_name: str
class KeywordSetting(BaseModel):
"""
Keyword Setting.
"""
keyword_weight: float
class WeightedScoreConfig(BaseModel):
"""
Weighted score Config.

View File

@ -60,7 +60,7 @@ class HitTestingService:
if metadata_filtering_conditions and query:
dataset_retrieval = DatasetRetrieval()
from core.app.app_config.entities import MetadataFilteringCondition
from core.rag.entities import MetadataFilteringCondition
metadata_filtering_conditions = MetadataFilteringCondition.model_validate(metadata_filtering_conditions)

View File

@ -6,6 +6,7 @@ from sqlalchemy import delete, select
from core.model_manager import ModelInstance, ModelManager
from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.entities import ParentMode
from core.rag.index_processor.constant.doc_type import DocType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
@ -15,7 +16,6 @@ from extensions.ext_database import db
from models import UploadFile
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
from models.dataset import Document as DatasetDocument
from services.entities.knowledge_entities.knowledge_entities import ParentMode
logger = logging.getLogger(__name__)

View File

@ -4,10 +4,10 @@ from unittest.mock import MagicMock, Mock, patch
import pytest
from core.entities.knowledge_entities import PreviewDetail
from core.rag.entities import ParentMode
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor
from core.rag.models.document import AttachmentDocument, ChildDocument, Document
from services.entities.knowledge_entities.knowledge_entities import ParentMode
class TestParentChildIndexProcessor:

View File

@ -14,6 +14,7 @@ from graphon.model_runtime.entities.model_entities import ModelFeature, ModelTyp
from werkzeug.exceptions import Forbidden, NotFound
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.index_processor.constant.built_in_field import BuiltInField
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -44,12 +45,9 @@ from services.entities.knowledge_entities.knowledge_entities import (
NotionIcon,
NotionInfo,
NotionPage,
PreProcessingRule,
ProcessRule,
RerankingModel,
RetrievalModel,
Rule,
Segmentation,
SegmentUpdateArgs,
WebsiteInfo,
)

View File

@ -112,6 +112,7 @@ import pytest
from graphon.model_runtime.entities.model_entities import ModelType
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from models.dataset import Dataset, DatasetProcessRule, Document
from services.dataset_service import DatasetService, DocumentService
@ -122,10 +123,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
KnowledgeConfig,
NotionInfo,
NotionPage,
PreProcessingRule,
ProcessRule,
Rule,
Segmentation,
WebsiteInfo,
)