refactor(api): remove duplicated RAG entities from services layer (#34689)

This commit is contained in:
corevibe555 2026-04-08 02:36:59 +03:00 committed by GitHub
parent 80a7843f45
commit 624db69f12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 13 additions and 61 deletions

View File

@ -26,6 +26,7 @@ from controllers.console.wraps import (
setup_required, setup_required,
) )
from core.ops.ops_trace_manager import OpsTraceManager from core.ops.ops_trace_manager import OpsTraceManager
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.trigger.constants import TRIGGER_NODE_TYPES from core.trigger.constants import TRIGGER_NODE_TYPES
from extensions.ext_database import db from extensions.ext_database import db
@ -42,10 +43,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
NotionIcon, NotionIcon,
NotionInfo, NotionInfo,
NotionPage, NotionPage,
PreProcessingRule,
RerankingModel, RerankingModel,
Rule,
Segmentation,
WebsiteInfo, WebsiteInfo,
WeightKeywordSetting, WeightKeywordSetting,
WeightModel, WeightModel,

View File

@ -31,6 +31,7 @@ from controllers.service_api.wraps import (
cloud_edition_billing_resource_check, cloud_edition_billing_resource_check,
) )
from core.errors.error import ProviderTokenNotInitError from core.errors.error import ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db from extensions.ext_database import db
from fields.document_fields import document_fields, document_status_fields from fields.document_fields import document_fields, document_status_fields
@ -40,11 +41,8 @@ from models.enums import SegmentStatus
from services.dataset_service import DatasetService, DocumentService from services.dataset_service import DatasetService, DocumentService
from services.entities.knowledge_entities.knowledge_entities import ( from services.entities.knowledge_entities.knowledge_entities import (
KnowledgeConfig, KnowledgeConfig,
PreProcessingRule,
ProcessRule, ProcessRule,
RetrievalModel, RetrievalModel,
Rule,
Segmentation,
) )
from services.file_service import FileService from services.file_service import FileService
from services.summary_index_service import SummaryIndexService from services.summary_index_service import SummaryIndexService

View File

@ -32,6 +32,7 @@ from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.retrieval_service import RetrievalService from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.docstore.dataset_docstore import DatasetDocumentStore from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.entities import Rule
from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType from core.rag.index_processor.constant.doc_type import DocType
@ -49,7 +50,6 @@ from models.account import Account
from models.dataset import Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding from models.dataset import Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument
from services.account_service import AccountService from services.account_service import AccountService
from services.entities.knowledge_entities.knowledge_entities import Rule
from services.summary_index_service import SummaryIndexService from services.summary_index_service import SummaryIndexService
_file_access_controller = DatabaseFileAccessController() _file_access_controller = DatabaseFileAccessController()

View File

@ -17,6 +17,7 @@ from core.rag.data_post_processor.data_post_processor import RerankingModelDict
from core.rag.datasource.retrieval_service import RetrievalService from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.docstore.dataset_docstore import DatasetDocumentStore from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.entities import ParentMode, Rule
from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType from core.rag.index_processor.constant.doc_type import DocType
@ -30,7 +31,6 @@ from models import Account
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument
from services.account_service import AccountService from services.account_service import AccountService
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
from services.summary_index_service import SummaryIndexService from services.summary_index_service import SummaryIndexService
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -19,6 +19,7 @@ from core.rag.data_post_processor.data_post_processor import RerankingModelDict
from core.rag.datasource.retrieval_service import RetrievalService from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.docstore.dataset_docstore import DatasetDocumentStore from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.entities import Rule
from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
@ -30,7 +31,6 @@ from libs import helper
from models.account import Account from models.account import Account
from models.dataset import Dataset, DocumentSegment from models.dataset import Dataset, DocumentSegment
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument
from services.entities.knowledge_entities.knowledge_entities import Rule
from services.summary_index_service import SummaryIndexService from services.summary_index_service import SummaryIndexService
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -19,6 +19,7 @@ from sqlalchemy import DateTime, String, func, select
from sqlalchemy.orm import Mapped, Session, mapped_column from sqlalchemy.orm import Mapped, Session, mapped_column
from configs import dify_config from configs import dify_config
from core.rag.entities import ParentMode, Rule
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.index_processor.constant.query_type import QueryType from core.rag.index_processor.constant.query_type import QueryType
@ -26,7 +27,6 @@ from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.tools.signature import sign_upload_file from core.tools.signature import sign_upload_file
from extensions.ext_storage import storage from extensions.ext_storage import storage
from libs.uuid_utils import uuidv7 from libs.uuid_utils import uuidv7
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
from .account import Account from .account import Account
from .base import Base, TypeBase from .base import Base, TypeBase

View File

@ -1,17 +1,12 @@
from enum import StrEnum
from typing import Literal from typing import Literal
from pydantic import BaseModel, field_validator from pydantic import BaseModel, field_validator
from core.rag.entities import Rule
from core.rag.index_processor.constant.index_type import IndexStructureType from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
class ParentMode(StrEnum):
FULL_DOC = "full-doc"
PARAGRAPH = "paragraph"
class NotionIcon(BaseModel): class NotionIcon(BaseModel):
type: str type: str
url: str | None = None url: str | None = None
@ -53,24 +48,6 @@ class DataSource(BaseModel):
info_list: InfoList info_list: InfoList
class PreProcessingRule(BaseModel):
id: str
enabled: bool
class Segmentation(BaseModel):
separator: str = "\n"
max_tokens: int
chunk_overlap: int = 0
class Rule(BaseModel):
pre_processing_rules: list[PreProcessingRule] | None = None
segmentation: Segmentation | None = None
parent_mode: Literal["full-doc", "paragraph"] | None = None
subchunk_segmentation: Segmentation | None = None
class ProcessRule(BaseModel): class ProcessRule(BaseModel):
mode: Literal["automatic", "custom", "hierarchical"] mode: Literal["automatic", "custom", "hierarchical"]
rules: Rule | None = None rules: Rule | None = None

View File

@ -2,6 +2,7 @@ from typing import Literal
from pydantic import BaseModel, field_validator from pydantic import BaseModel, field_validator
from core.rag.entities import KeywordSetting, VectorSetting
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -36,24 +37,6 @@ class RerankingModelConfig(BaseModel):
reranking_model_name: str | None = "" reranking_model_name: str | None = ""
class VectorSetting(BaseModel):
"""
Vector Setting.
"""
vector_weight: float
embedding_provider_name: str
embedding_model_name: str
class KeywordSetting(BaseModel):
"""
Keyword Setting.
"""
keyword_weight: float
class WeightedScoreConfig(BaseModel): class WeightedScoreConfig(BaseModel):
""" """
Weighted score Config. Weighted score Config.

View File

@ -60,7 +60,7 @@ class HitTestingService:
if metadata_filtering_conditions and query: if metadata_filtering_conditions and query:
dataset_retrieval = DatasetRetrieval() dataset_retrieval = DatasetRetrieval()
from core.app.app_config.entities import MetadataFilteringCondition from core.rag.entities import MetadataFilteringCondition
metadata_filtering_conditions = MetadataFilteringCondition.model_validate(metadata_filtering_conditions) metadata_filtering_conditions = MetadataFilteringCondition.model_validate(metadata_filtering_conditions)

View File

@ -6,6 +6,7 @@ from sqlalchemy import delete, select
from core.model_manager import ModelInstance, ModelManager from core.model_manager import ModelInstance, ModelManager
from core.rag.datasource.keyword.keyword_factory import Keyword from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.entities import ParentMode
from core.rag.index_processor.constant.doc_type import DocType from core.rag.index_processor.constant.doc_type import DocType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor from core.rag.index_processor.index_processor_base import BaseIndexProcessor
@ -15,7 +16,6 @@ from extensions.ext_database import db
from models import UploadFile from models import UploadFile
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument
from services.entities.knowledge_entities.knowledge_entities import ParentMode
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -4,10 +4,10 @@ from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
from core.entities.knowledge_entities import PreviewDetail from core.entities.knowledge_entities import PreviewDetail
from core.rag.entities import ParentMode
from core.rag.index_processor.constant.index_type import IndexTechniqueType from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor
from core.rag.models.document import AttachmentDocument, ChildDocument, Document from core.rag.models.document import AttachmentDocument, ChildDocument, Document
from services.entities.knowledge_entities.knowledge_entities import ParentMode
class TestParentChildIndexProcessor: class TestParentChildIndexProcessor:

View File

@ -14,6 +14,7 @@ from graphon.model_runtime.entities.model_entities import ModelFeature, ModelTyp
from werkzeug.exceptions import Forbidden, NotFound from werkzeug.exceptions import Forbidden, NotFound
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.index_processor.constant.built_in_field import BuiltInField from core.rag.index_processor.constant.built_in_field import BuiltInField
from core.rag.index_processor.constant.index_type import IndexStructureType from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -44,12 +45,9 @@ from services.entities.knowledge_entities.knowledge_entities import (
NotionIcon, NotionIcon,
NotionInfo, NotionInfo,
NotionPage, NotionPage,
PreProcessingRule,
ProcessRule, ProcessRule,
RerankingModel, RerankingModel,
RetrievalModel, RetrievalModel,
Rule,
Segmentation,
SegmentUpdateArgs, SegmentUpdateArgs,
WebsiteInfo, WebsiteInfo,
) )

View File

@ -112,6 +112,7 @@ import pytest
from graphon.model_runtime.entities.model_entities import ModelType from graphon.model_runtime.entities.model_entities import ModelType
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from models.dataset import Dataset, DatasetProcessRule, Document from models.dataset import Dataset, DatasetProcessRule, Document
from services.dataset_service import DatasetService, DocumentService from services.dataset_service import DatasetService, DocumentService
@ -122,10 +123,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
KnowledgeConfig, KnowledgeConfig,
NotionInfo, NotionInfo,
NotionPage, NotionPage,
PreProcessingRule,
ProcessRule, ProcessRule,
Rule,
Segmentation,
WebsiteInfo, WebsiteInfo,
) )