mirror of
https://github.com/langgenius/dify.git
synced 2026-05-03 15:57:06 +08:00
refactor(api): remove duplicated RAG entities from services layer (#34689)
This commit is contained in:
parent
80a7843f45
commit
624db69f12
@ -26,6 +26,7 @@ from controllers.console.wraps import (
|
|||||||
setup_required,
|
setup_required,
|
||||||
)
|
)
|
||||||
from core.ops.ops_trace_manager import OpsTraceManager
|
from core.ops.ops_trace_manager import OpsTraceManager
|
||||||
|
from core.rag.entities import PreProcessingRule, Rule, Segmentation
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from core.trigger.constants import TRIGGER_NODE_TYPES
|
from core.trigger.constants import TRIGGER_NODE_TYPES
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
@ -42,10 +43,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
|
|||||||
NotionIcon,
|
NotionIcon,
|
||||||
NotionInfo,
|
NotionInfo,
|
||||||
NotionPage,
|
NotionPage,
|
||||||
PreProcessingRule,
|
|
||||||
RerankingModel,
|
RerankingModel,
|
||||||
Rule,
|
|
||||||
Segmentation,
|
|
||||||
WebsiteInfo,
|
WebsiteInfo,
|
||||||
WeightKeywordSetting,
|
WeightKeywordSetting,
|
||||||
WeightModel,
|
WeightModel,
|
||||||
|
|||||||
@ -31,6 +31,7 @@ from controllers.service_api.wraps import (
|
|||||||
cloud_edition_billing_resource_check,
|
cloud_edition_billing_resource_check,
|
||||||
)
|
)
|
||||||
from core.errors.error import ProviderTokenNotInitError
|
from core.errors.error import ProviderTokenNotInitError
|
||||||
|
from core.rag.entities import PreProcessingRule, Rule, Segmentation
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from fields.document_fields import document_fields, document_status_fields
|
from fields.document_fields import document_fields, document_status_fields
|
||||||
@ -40,11 +41,8 @@ from models.enums import SegmentStatus
|
|||||||
from services.dataset_service import DatasetService, DocumentService
|
from services.dataset_service import DatasetService, DocumentService
|
||||||
from services.entities.knowledge_entities.knowledge_entities import (
|
from services.entities.knowledge_entities.knowledge_entities import (
|
||||||
KnowledgeConfig,
|
KnowledgeConfig,
|
||||||
PreProcessingRule,
|
|
||||||
ProcessRule,
|
ProcessRule,
|
||||||
RetrievalModel,
|
RetrievalModel,
|
||||||
Rule,
|
|
||||||
Segmentation,
|
|
||||||
)
|
)
|
||||||
from services.file_service import FileService
|
from services.file_service import FileService
|
||||||
from services.summary_index_service import SummaryIndexService
|
from services.summary_index_service import SummaryIndexService
|
||||||
|
|||||||
@ -32,6 +32,7 @@ from core.rag.datasource.keyword.keyword_factory import Keyword
|
|||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.datasource.vdb.vector_factory import Vector
|
from core.rag.datasource.vdb.vector_factory import Vector
|
||||||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||||
|
from core.rag.entities import Rule
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||||
from core.rag.index_processor.constant.doc_type import DocType
|
from core.rag.index_processor.constant.doc_type import DocType
|
||||||
@ -49,7 +50,6 @@ from models.account import Account
|
|||||||
from models.dataset import Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
|
from models.dataset import Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from services.account_service import AccountService
|
from services.account_service import AccountService
|
||||||
from services.entities.knowledge_entities.knowledge_entities import Rule
|
|
||||||
from services.summary_index_service import SummaryIndexService
|
from services.summary_index_service import SummaryIndexService
|
||||||
|
|
||||||
_file_access_controller = DatabaseFileAccessController()
|
_file_access_controller = DatabaseFileAccessController()
|
||||||
|
|||||||
@ -17,6 +17,7 @@ from core.rag.data_post_processor.data_post_processor import RerankingModelDict
|
|||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.datasource.vdb.vector_factory import Vector
|
from core.rag.datasource.vdb.vector_factory import Vector
|
||||||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||||
|
from core.rag.entities import ParentMode, Rule
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||||
from core.rag.index_processor.constant.doc_type import DocType
|
from core.rag.index_processor.constant.doc_type import DocType
|
||||||
@ -30,7 +31,6 @@ from models import Account
|
|||||||
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
|
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from services.account_service import AccountService
|
from services.account_service import AccountService
|
||||||
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
|
|
||||||
from services.summary_index_service import SummaryIndexService
|
from services.summary_index_service import SummaryIndexService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@ -19,6 +19,7 @@ from core.rag.data_post_processor.data_post_processor import RerankingModelDict
|
|||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.datasource.vdb.vector_factory import Vector
|
from core.rag.datasource.vdb.vector_factory import Vector
|
||||||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||||
|
from core.rag.entities import Rule
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||||
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
||||||
@ -30,7 +31,6 @@ from libs import helper
|
|||||||
from models.account import Account
|
from models.account import Account
|
||||||
from models.dataset import Dataset, DocumentSegment
|
from models.dataset import Dataset, DocumentSegment
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from services.entities.knowledge_entities.knowledge_entities import Rule
|
|
||||||
from services.summary_index_service import SummaryIndexService
|
from services.summary_index_service import SummaryIndexService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@ -19,6 +19,7 @@ from sqlalchemy import DateTime, String, func, select
|
|||||||
from sqlalchemy.orm import Mapped, Session, mapped_column
|
from sqlalchemy.orm import Mapped, Session, mapped_column
|
||||||
|
|
||||||
from configs import dify_config
|
from configs import dify_config
|
||||||
|
from core.rag.entities import ParentMode, Rule
|
||||||
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
|
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
|
||||||
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
||||||
from core.rag.index_processor.constant.query_type import QueryType
|
from core.rag.index_processor.constant.query_type import QueryType
|
||||||
@ -26,7 +27,6 @@ from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
|||||||
from core.tools.signature import sign_upload_file
|
from core.tools.signature import sign_upload_file
|
||||||
from extensions.ext_storage import storage
|
from extensions.ext_storage import storage
|
||||||
from libs.uuid_utils import uuidv7
|
from libs.uuid_utils import uuidv7
|
||||||
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
|
|
||||||
|
|
||||||
from .account import Account
|
from .account import Account
|
||||||
from .base import Base, TypeBase
|
from .base import Base, TypeBase
|
||||||
|
|||||||
@ -1,17 +1,12 @@
|
|||||||
from enum import StrEnum
|
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
from pydantic import BaseModel, field_validator
|
from pydantic import BaseModel, field_validator
|
||||||
|
|
||||||
|
from core.rag.entities import Rule
|
||||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
|
|
||||||
|
|
||||||
class ParentMode(StrEnum):
|
|
||||||
FULL_DOC = "full-doc"
|
|
||||||
PARAGRAPH = "paragraph"
|
|
||||||
|
|
||||||
|
|
||||||
class NotionIcon(BaseModel):
|
class NotionIcon(BaseModel):
|
||||||
type: str
|
type: str
|
||||||
url: str | None = None
|
url: str | None = None
|
||||||
@ -53,24 +48,6 @@ class DataSource(BaseModel):
|
|||||||
info_list: InfoList
|
info_list: InfoList
|
||||||
|
|
||||||
|
|
||||||
class PreProcessingRule(BaseModel):
|
|
||||||
id: str
|
|
||||||
enabled: bool
|
|
||||||
|
|
||||||
|
|
||||||
class Segmentation(BaseModel):
|
|
||||||
separator: str = "\n"
|
|
||||||
max_tokens: int
|
|
||||||
chunk_overlap: int = 0
|
|
||||||
|
|
||||||
|
|
||||||
class Rule(BaseModel):
|
|
||||||
pre_processing_rules: list[PreProcessingRule] | None = None
|
|
||||||
segmentation: Segmentation | None = None
|
|
||||||
parent_mode: Literal["full-doc", "paragraph"] | None = None
|
|
||||||
subchunk_segmentation: Segmentation | None = None
|
|
||||||
|
|
||||||
|
|
||||||
class ProcessRule(BaseModel):
|
class ProcessRule(BaseModel):
|
||||||
mode: Literal["automatic", "custom", "hierarchical"]
|
mode: Literal["automatic", "custom", "hierarchical"]
|
||||||
rules: Rule | None = None
|
rules: Rule | None = None
|
||||||
|
|||||||
@ -2,6 +2,7 @@ from typing import Literal
|
|||||||
|
|
||||||
from pydantic import BaseModel, field_validator
|
from pydantic import BaseModel, field_validator
|
||||||
|
|
||||||
|
from core.rag.entities import KeywordSetting, VectorSetting
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
|
|
||||||
|
|
||||||
@ -36,24 +37,6 @@ class RerankingModelConfig(BaseModel):
|
|||||||
reranking_model_name: str | None = ""
|
reranking_model_name: str | None = ""
|
||||||
|
|
||||||
|
|
||||||
class VectorSetting(BaseModel):
|
|
||||||
"""
|
|
||||||
Vector Setting.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vector_weight: float
|
|
||||||
embedding_provider_name: str
|
|
||||||
embedding_model_name: str
|
|
||||||
|
|
||||||
|
|
||||||
class KeywordSetting(BaseModel):
|
|
||||||
"""
|
|
||||||
Keyword Setting.
|
|
||||||
"""
|
|
||||||
|
|
||||||
keyword_weight: float
|
|
||||||
|
|
||||||
|
|
||||||
class WeightedScoreConfig(BaseModel):
|
class WeightedScoreConfig(BaseModel):
|
||||||
"""
|
"""
|
||||||
Weighted score Config.
|
Weighted score Config.
|
||||||
|
|||||||
@ -60,7 +60,7 @@ class HitTestingService:
|
|||||||
if metadata_filtering_conditions and query:
|
if metadata_filtering_conditions and query:
|
||||||
dataset_retrieval = DatasetRetrieval()
|
dataset_retrieval = DatasetRetrieval()
|
||||||
|
|
||||||
from core.app.app_config.entities import MetadataFilteringCondition
|
from core.rag.entities import MetadataFilteringCondition
|
||||||
|
|
||||||
metadata_filtering_conditions = MetadataFilteringCondition.model_validate(metadata_filtering_conditions)
|
metadata_filtering_conditions = MetadataFilteringCondition.model_validate(metadata_filtering_conditions)
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,7 @@ from sqlalchemy import delete, select
|
|||||||
from core.model_manager import ModelInstance, ModelManager
|
from core.model_manager import ModelInstance, ModelManager
|
||||||
from core.rag.datasource.keyword.keyword_factory import Keyword
|
from core.rag.datasource.keyword.keyword_factory import Keyword
|
||||||
from core.rag.datasource.vdb.vector_factory import Vector
|
from core.rag.datasource.vdb.vector_factory import Vector
|
||||||
|
from core.rag.entities import ParentMode
|
||||||
from core.rag.index_processor.constant.doc_type import DocType
|
from core.rag.index_processor.constant.doc_type import DocType
|
||||||
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
||||||
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
||||||
@ -15,7 +16,6 @@ from extensions.ext_database import db
|
|||||||
from models import UploadFile
|
from models import UploadFile
|
||||||
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
|
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from services.entities.knowledge_entities.knowledge_entities import ParentMode
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@ -4,10 +4,10 @@ from unittest.mock import MagicMock, Mock, patch
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from core.entities.knowledge_entities import PreviewDetail
|
from core.entities.knowledge_entities import PreviewDetail
|
||||||
|
from core.rag.entities import ParentMode
|
||||||
from core.rag.index_processor.constant.index_type import IndexTechniqueType
|
from core.rag.index_processor.constant.index_type import IndexTechniqueType
|
||||||
from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor
|
from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor
|
||||||
from core.rag.models.document import AttachmentDocument, ChildDocument, Document
|
from core.rag.models.document import AttachmentDocument, ChildDocument, Document
|
||||||
from services.entities.knowledge_entities.knowledge_entities import ParentMode
|
|
||||||
|
|
||||||
|
|
||||||
class TestParentChildIndexProcessor:
|
class TestParentChildIndexProcessor:
|
||||||
|
|||||||
@ -14,6 +14,7 @@ from graphon.model_runtime.entities.model_entities import ModelFeature, ModelTyp
|
|||||||
from werkzeug.exceptions import Forbidden, NotFound
|
from werkzeug.exceptions import Forbidden, NotFound
|
||||||
|
|
||||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
|
from core.rag.entities import PreProcessingRule, Rule, Segmentation
|
||||||
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
||||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
@ -44,12 +45,9 @@ from services.entities.knowledge_entities.knowledge_entities import (
|
|||||||
NotionIcon,
|
NotionIcon,
|
||||||
NotionInfo,
|
NotionInfo,
|
||||||
NotionPage,
|
NotionPage,
|
||||||
PreProcessingRule,
|
|
||||||
ProcessRule,
|
ProcessRule,
|
||||||
RerankingModel,
|
RerankingModel,
|
||||||
RetrievalModel,
|
RetrievalModel,
|
||||||
Rule,
|
|
||||||
Segmentation,
|
|
||||||
SegmentUpdateArgs,
|
SegmentUpdateArgs,
|
||||||
WebsiteInfo,
|
WebsiteInfo,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -112,6 +112,7 @@ import pytest
|
|||||||
from graphon.model_runtime.entities.model_entities import ModelType
|
from graphon.model_runtime.entities.model_entities import ModelType
|
||||||
|
|
||||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
|
from core.rag.entities import PreProcessingRule, Rule, Segmentation
|
||||||
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
||||||
from models.dataset import Dataset, DatasetProcessRule, Document
|
from models.dataset import Dataset, DatasetProcessRule, Document
|
||||||
from services.dataset_service import DatasetService, DocumentService
|
from services.dataset_service import DatasetService, DocumentService
|
||||||
@ -122,10 +123,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
|
|||||||
KnowledgeConfig,
|
KnowledgeConfig,
|
||||||
NotionInfo,
|
NotionInfo,
|
||||||
NotionPage,
|
NotionPage,
|
||||||
PreProcessingRule,
|
|
||||||
ProcessRule,
|
ProcessRule,
|
||||||
Rule,
|
|
||||||
Segmentation,
|
|
||||||
WebsiteInfo,
|
WebsiteInfo,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user