mirror of https://github.com/langgenius/dify.git
fix: drop dead code phase2 unused class (#22042)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
3587bd4040
commit
d2933c2bfe
|
|
@ -25,12 +25,6 @@ class UnsupportedFileTypeError(BaseHTTPException):
|
||||||
code = 415
|
code = 415
|
||||||
|
|
||||||
|
|
||||||
class HighQualityDatasetOnlyError(BaseHTTPException):
|
|
||||||
error_code = "high_quality_dataset_only"
|
|
||||||
description = "Current operation only supports 'high-quality' datasets."
|
|
||||||
code = 400
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetNotInitializedError(BaseHTTPException):
|
class DatasetNotInitializedError(BaseHTTPException):
|
||||||
error_code = "dataset_not_initialized"
|
error_code = "dataset_not_initialized"
|
||||||
description = "The dataset is still being initialized or indexing. Please wait a moment."
|
description = "The dataset is still being initialized or indexing. Please wait a moment."
|
||||||
|
|
|
||||||
|
|
@ -13,12 +13,6 @@ class CurrentPasswordIncorrectError(BaseHTTPException):
|
||||||
code = 400
|
code = 400
|
||||||
|
|
||||||
|
|
||||||
class ProviderRequestFailedError(BaseHTTPException):
|
|
||||||
error_code = "provider_request_failed"
|
|
||||||
description = None
|
|
||||||
code = 400
|
|
||||||
|
|
||||||
|
|
||||||
class InvalidInvitationCodeError(BaseHTTPException):
|
class InvalidInvitationCodeError(BaseHTTPException):
|
||||||
error_code = "invalid_invitation_code"
|
error_code = "invalid_invitation_code"
|
||||||
description = "Invalid invitation code."
|
description = "Invalid invitation code."
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,6 @@ class UnsupportedFileTypeError(BaseHTTPException):
|
||||||
code = 415
|
code = 415
|
||||||
|
|
||||||
|
|
||||||
class HighQualityDatasetOnlyError(BaseHTTPException):
|
|
||||||
error_code = "high_quality_dataset_only"
|
|
||||||
description = "Current operation only supports 'high-quality' datasets."
|
|
||||||
code = 400
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetNotInitializedError(BaseHTTPException):
|
class DatasetNotInitializedError(BaseHTTPException):
|
||||||
error_code = "dataset_not_initialized"
|
error_code = "dataset_not_initialized"
|
||||||
description = "The dataset is still being initialized or indexing. Please wait a moment."
|
description = "The dataset is still being initialized or indexing. Please wait a moment."
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,3 @@ class RecordNotFoundError(TaskPipilineError):
|
||||||
class WorkflowRunNotFoundError(RecordNotFoundError):
|
class WorkflowRunNotFoundError(RecordNotFoundError):
|
||||||
def __init__(self, workflow_run_id: str):
|
def __init__(self, workflow_run_id: str):
|
||||||
super().__init__("WorkflowRun", workflow_run_id)
|
super().__init__("WorkflowRun", workflow_run_id)
|
||||||
|
|
||||||
|
|
||||||
class WorkflowNodeExecutionNotFoundError(RecordNotFoundError):
|
|
||||||
def __init__(self, workflow_node_execution_id: str):
|
|
||||||
super().__init__("WorkflowNodeExecution", workflow_node_execution_id)
|
|
||||||
|
|
|
||||||
|
|
@ -7,13 +7,6 @@ if TYPE_CHECKING:
|
||||||
_tool_file_manager_factory: Callable[[], "ToolFileManager"] | None = None
|
_tool_file_manager_factory: Callable[[], "ToolFileManager"] | None = None
|
||||||
|
|
||||||
|
|
||||||
class ToolFileParser:
|
|
||||||
@staticmethod
|
|
||||||
def get_tool_file_manager() -> "ToolFileManager":
|
|
||||||
assert _tool_file_manager_factory is not None
|
|
||||||
return _tool_file_manager_factory()
|
|
||||||
|
|
||||||
|
|
||||||
def set_tool_file_manager_factory(factory: Callable[[], "ToolFileManager"]) -> None:
|
def set_tool_file_manager_factory(factory: Callable[[], "ToolFileManager"]) -> None:
|
||||||
global _tool_file_manager_factory
|
global _tool_file_manager_factory
|
||||||
_tool_file_manager_factory = factory
|
_tool_file_manager_factory = factory
|
||||||
|
|
|
||||||
|
|
@ -1,52 +0,0 @@
|
||||||
import base64
|
|
||||||
import hashlib
|
|
||||||
import hmac
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
from configs import dify_config
|
|
||||||
|
|
||||||
|
|
||||||
class SignedUrlParams(BaseModel):
|
|
||||||
sign_key: str = Field(..., description="The sign key")
|
|
||||||
timestamp: str = Field(..., description="Timestamp")
|
|
||||||
nonce: str = Field(..., description="Nonce")
|
|
||||||
sign: str = Field(..., description="Signature")
|
|
||||||
|
|
||||||
|
|
||||||
class UrlSigner:
|
|
||||||
@classmethod
|
|
||||||
def get_signed_url(cls, url: str, sign_key: str, prefix: str) -> str:
|
|
||||||
signed_url_params = cls.get_signed_url_params(sign_key, prefix)
|
|
||||||
return (
|
|
||||||
f"{url}?timestamp={signed_url_params.timestamp}"
|
|
||||||
f"&nonce={signed_url_params.nonce}&sign={signed_url_params.sign}"
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_signed_url_params(cls, sign_key: str, prefix: str) -> SignedUrlParams:
|
|
||||||
timestamp = str(int(time.time()))
|
|
||||||
nonce = os.urandom(16).hex()
|
|
||||||
sign = cls._sign(sign_key, timestamp, nonce, prefix)
|
|
||||||
|
|
||||||
return SignedUrlParams(sign_key=sign_key, timestamp=timestamp, nonce=nonce, sign=sign)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def verify(cls, sign_key: str, timestamp: str, nonce: str, sign: str, prefix: str) -> bool:
|
|
||||||
recalculated_sign = cls._sign(sign_key, timestamp, nonce, prefix)
|
|
||||||
|
|
||||||
return sign == recalculated_sign
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _sign(cls, sign_key: str, timestamp: str, nonce: str, prefix: str) -> str:
|
|
||||||
if not dify_config.SECRET_KEY:
|
|
||||||
raise Exception("SECRET_KEY is not set")
|
|
||||||
|
|
||||||
data_to_sign = f"{prefix}|{sign_key}|{timestamp}|{nonce}"
|
|
||||||
secret_key = dify_config.SECRET_KEY.encode()
|
|
||||||
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
|
|
||||||
encoded_sign = base64.urlsafe_b64encode(sign).decode()
|
|
||||||
|
|
||||||
return encoded_sign
|
|
||||||
|
|
@ -135,17 +135,6 @@ class PluginEntity(PluginInstallation):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
class GithubPackage(BaseModel):
|
|
||||||
repo: str
|
|
||||||
version: str
|
|
||||||
package: str
|
|
||||||
|
|
||||||
|
|
||||||
class GithubVersion(BaseModel):
|
|
||||||
repo: str
|
|
||||||
version: str
|
|
||||||
|
|
||||||
|
|
||||||
class GenericProviderID:
|
class GenericProviderID:
|
||||||
organization: str
|
organization: str
|
||||||
plugin_name: str
|
plugin_name: str
|
||||||
|
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
"""Abstract interface for document clean implementations."""
|
|
||||||
|
|
||||||
from core.rag.cleaner.cleaner_base import BaseCleaner
|
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
|
|
||||||
def clean(self, content) -> str:
|
|
||||||
"""clean document content."""
|
|
||||||
from unstructured.cleaners.core import clean_extra_whitespace
|
|
||||||
|
|
||||||
# Returns "ITEM 1A: RISK FACTORS"
|
|
||||||
return clean_extra_whitespace(content)
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
"""Abstract interface for document clean implementations."""
|
|
||||||
|
|
||||||
from core.rag.cleaner.cleaner_base import BaseCleaner
|
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredGroupBrokenParagraphsCleaner(BaseCleaner):
|
|
||||||
def clean(self, content) -> str:
|
|
||||||
"""clean document content."""
|
|
||||||
import re
|
|
||||||
|
|
||||||
from unstructured.cleaners.core import group_broken_paragraphs
|
|
||||||
|
|
||||||
para_split_re = re.compile(r"(\s*\n\s*){3}")
|
|
||||||
|
|
||||||
return group_broken_paragraphs(content, paragraph_split=para_split_re)
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
"""Abstract interface for document clean implementations."""
|
|
||||||
|
|
||||||
from core.rag.cleaner.cleaner_base import BaseCleaner
|
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
|
|
||||||
def clean(self, content) -> str:
|
|
||||||
"""clean document content."""
|
|
||||||
from unstructured.cleaners.core import clean_non_ascii_chars
|
|
||||||
|
|
||||||
# Returns "This text contains non-ascii characters!"
|
|
||||||
return clean_non_ascii_chars(content)
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
"""Abstract interface for document clean implementations."""
|
|
||||||
|
|
||||||
from core.rag.cleaner.cleaner_base import BaseCleaner
|
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
|
|
||||||
def clean(self, content) -> str:
|
|
||||||
"""Replaces unicode quote characters, such as the \x91 character in a string."""
|
|
||||||
|
|
||||||
from unstructured.cleaners.core import replace_unicode_quotes
|
|
||||||
|
|
||||||
return replace_unicode_quotes(content)
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
"""Abstract interface for document clean implementations."""
|
|
||||||
|
|
||||||
from core.rag.cleaner.cleaner_base import BaseCleaner
|
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredTranslateTextCleaner(BaseCleaner):
|
|
||||||
def clean(self, content) -> str:
|
|
||||||
"""clean document content."""
|
|
||||||
from unstructured.cleaners.translate import translate_text
|
|
||||||
|
|
||||||
return translate_text(content)
|
|
||||||
|
|
@ -1,17 +0,0 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
|
|
||||||
class ClusterEntity(BaseModel):
|
|
||||||
"""
|
|
||||||
Model Config Entity.
|
|
||||||
"""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
cluster_id: str
|
|
||||||
displayName: str
|
|
||||||
region: str
|
|
||||||
spendingLimit: Optional[int] = 1000
|
|
||||||
version: str
|
|
||||||
createdBy: str
|
|
||||||
|
|
@ -9,8 +9,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from abc import ABC, abstractmethod
|
from collections.abc import Generator, Mapping
|
||||||
from collections.abc import Generator, Iterable, Mapping
|
|
||||||
from io import BufferedReader, BytesIO
|
from io import BufferedReader, BytesIO
|
||||||
from pathlib import Path, PurePath
|
from pathlib import Path, PurePath
|
||||||
from typing import Any, Optional, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
@ -143,21 +142,3 @@ class Blob(BaseModel):
|
||||||
if self.source:
|
if self.source:
|
||||||
str_repr += f" {self.source}"
|
str_repr += f" {self.source}"
|
||||||
return str_repr
|
return str_repr
|
||||||
|
|
||||||
|
|
||||||
class BlobLoader(ABC):
|
|
||||||
"""Abstract interface for blob loaders implementation.
|
|
||||||
|
|
||||||
Implementer should be able to load raw content from a datasource system according
|
|
||||||
to some criteria and return the raw content lazily as a stream of blobs.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def yield_blobs(
|
|
||||||
self,
|
|
||||||
) -> Iterable[Blob]:
|
|
||||||
"""A lazy loader for raw data represented by Blob object.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A generator over blobs
|
|
||||||
"""
|
|
||||||
|
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
||||||
import logging
|
|
||||||
|
|
||||||
from core.rag.extractor.extractor_base import BaseExtractor
|
|
||||||
from core.rag.models.document import Document
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredPDFExtractor(BaseExtractor):
|
|
||||||
"""Load pdf files.
|
|
||||||
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to the file to load.
|
|
||||||
|
|
||||||
api_url: Unstructured API URL
|
|
||||||
|
|
||||||
api_key: Unstructured API Key
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, file_path: str, api_url: str, api_key: str):
|
|
||||||
"""Initialize with file path."""
|
|
||||||
self._file_path = file_path
|
|
||||||
self._api_url = api_url
|
|
||||||
self._api_key = api_key
|
|
||||||
|
|
||||||
def extract(self) -> list[Document]:
|
|
||||||
if self._api_url:
|
|
||||||
from unstructured.partition.api import partition_via_api
|
|
||||||
|
|
||||||
elements = partition_via_api(
|
|
||||||
filename=self._file_path, api_url=self._api_url, api_key=self._api_key, strategy="auto"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
from unstructured.partition.pdf import partition_pdf
|
|
||||||
|
|
||||||
elements = partition_pdf(filename=self._file_path, strategy="auto")
|
|
||||||
|
|
||||||
from unstructured.chunking.title import chunk_by_title
|
|
||||||
|
|
||||||
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
|
|
||||||
documents = []
|
|
||||||
for chunk in chunks:
|
|
||||||
text = chunk.text.strip()
|
|
||||||
documents.append(Document(page_content=text))
|
|
||||||
|
|
||||||
return documents
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
import logging
|
|
||||||
|
|
||||||
from core.rag.extractor.extractor_base import BaseExtractor
|
|
||||||
from core.rag.models.document import Document
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredTextExtractor(BaseExtractor):
|
|
||||||
"""Load msg files.
|
|
||||||
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to the file to load.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, file_path: str, api_url: str):
|
|
||||||
"""Initialize with file path."""
|
|
||||||
self._file_path = file_path
|
|
||||||
self._api_url = api_url
|
|
||||||
|
|
||||||
def extract(self) -> list[Document]:
|
|
||||||
from unstructured.partition.text import partition_text
|
|
||||||
|
|
||||||
elements = partition_text(filename=self._file_path)
|
|
||||||
from unstructured.chunking.title import chunk_by_title
|
|
||||||
|
|
||||||
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
|
|
||||||
documents = []
|
|
||||||
for chunk in chunks:
|
|
||||||
text = chunk.text.strip()
|
|
||||||
documents.append(Document(page_content=text))
|
|
||||||
|
|
||||||
return documents
|
|
||||||
|
|
@ -10,7 +10,6 @@ from typing import (
|
||||||
Any,
|
Any,
|
||||||
Literal,
|
Literal,
|
||||||
Optional,
|
Optional,
|
||||||
TypedDict,
|
|
||||||
TypeVar,
|
TypeVar,
|
||||||
Union,
|
Union,
|
||||||
)
|
)
|
||||||
|
|
@ -168,167 +167,6 @@ class TextSplitter(BaseDocumentTransformer, ABC):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
class CharacterTextSplitter(TextSplitter):
|
|
||||||
"""Splitting text that looks at characters."""
|
|
||||||
|
|
||||||
def __init__(self, separator: str = "\n\n", **kwargs: Any) -> None:
|
|
||||||
"""Create a new TextSplitter."""
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
self._separator = separator
|
|
||||||
|
|
||||||
def split_text(self, text: str) -> list[str]:
|
|
||||||
"""Split incoming text and return chunks."""
|
|
||||||
# First we naively split the large input into a bunch of smaller ones.
|
|
||||||
splits = _split_text_with_regex(text, self._separator, self._keep_separator)
|
|
||||||
_separator = "" if self._keep_separator else self._separator
|
|
||||||
_good_splits_lengths = [] # cache the lengths of the splits
|
|
||||||
if splits:
|
|
||||||
_good_splits_lengths.extend(self._length_function(splits))
|
|
||||||
return self._merge_splits(splits, _separator, _good_splits_lengths)
|
|
||||||
|
|
||||||
|
|
||||||
class LineType(TypedDict):
|
|
||||||
"""Line type as typed dict."""
|
|
||||||
|
|
||||||
metadata: dict[str, str]
|
|
||||||
content: str
|
|
||||||
|
|
||||||
|
|
||||||
class HeaderType(TypedDict):
|
|
||||||
"""Header type as typed dict."""
|
|
||||||
|
|
||||||
level: int
|
|
||||||
name: str
|
|
||||||
data: str
|
|
||||||
|
|
||||||
|
|
||||||
class MarkdownHeaderTextSplitter:
|
|
||||||
"""Splitting markdown files based on specified headers."""
|
|
||||||
|
|
||||||
def __init__(self, headers_to_split_on: list[tuple[str, str]], return_each_line: bool = False):
|
|
||||||
"""Create a new MarkdownHeaderTextSplitter.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
headers_to_split_on: Headers we want to track
|
|
||||||
return_each_line: Return each line w/ associated headers
|
|
||||||
"""
|
|
||||||
# Output line-by-line or aggregated into chunks w/ common headers
|
|
||||||
self.return_each_line = return_each_line
|
|
||||||
# Given the headers we want to split on,
|
|
||||||
# (e.g., "#, ##, etc") order by length
|
|
||||||
self.headers_to_split_on = sorted(headers_to_split_on, key=lambda split: len(split[0]), reverse=True)
|
|
||||||
|
|
||||||
def aggregate_lines_to_chunks(self, lines: list[LineType]) -> list[Document]:
|
|
||||||
"""Combine lines with common metadata into chunks
|
|
||||||
Args:
|
|
||||||
lines: Line of text / associated header metadata
|
|
||||||
"""
|
|
||||||
aggregated_chunks: list[LineType] = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if aggregated_chunks and aggregated_chunks[-1]["metadata"] == line["metadata"]:
|
|
||||||
# If the last line in the aggregated list
|
|
||||||
# has the same metadata as the current line,
|
|
||||||
# append the current content to the last lines's content
|
|
||||||
aggregated_chunks[-1]["content"] += " \n" + line["content"]
|
|
||||||
else:
|
|
||||||
# Otherwise, append the current line to the aggregated list
|
|
||||||
aggregated_chunks.append(line)
|
|
||||||
|
|
||||||
return [Document(page_content=chunk["content"], metadata=chunk["metadata"]) for chunk in aggregated_chunks]
|
|
||||||
|
|
||||||
def split_text(self, text: str) -> list[Document]:
|
|
||||||
"""Split markdown file
|
|
||||||
Args:
|
|
||||||
text: Markdown file"""
|
|
||||||
|
|
||||||
# Split the input text by newline character ("\n").
|
|
||||||
lines = text.split("\n")
|
|
||||||
# Final output
|
|
||||||
lines_with_metadata: list[LineType] = []
|
|
||||||
# Content and metadata of the chunk currently being processed
|
|
||||||
current_content: list[str] = []
|
|
||||||
current_metadata: dict[str, str] = {}
|
|
||||||
# Keep track of the nested header structure
|
|
||||||
# header_stack: List[Dict[str, Union[int, str]]] = []
|
|
||||||
header_stack: list[HeaderType] = []
|
|
||||||
initial_metadata: dict[str, str] = {}
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
stripped_line = line.strip()
|
|
||||||
# Check each line against each of the header types (e.g., #, ##)
|
|
||||||
for sep, name in self.headers_to_split_on:
|
|
||||||
# Check if line starts with a header that we intend to split on
|
|
||||||
if stripped_line.startswith(sep) and (
|
|
||||||
# Header with no text OR header is followed by space
|
|
||||||
# Both are valid conditions that sep is being used a header
|
|
||||||
len(stripped_line) == len(sep) or stripped_line[len(sep)] == " "
|
|
||||||
):
|
|
||||||
# Ensure we are tracking the header as metadata
|
|
||||||
if name is not None:
|
|
||||||
# Get the current header level
|
|
||||||
current_header_level = sep.count("#")
|
|
||||||
|
|
||||||
# Pop out headers of lower or same level from the stack
|
|
||||||
while header_stack and header_stack[-1]["level"] >= current_header_level:
|
|
||||||
# We have encountered a new header
|
|
||||||
# at the same or higher level
|
|
||||||
popped_header = header_stack.pop()
|
|
||||||
# Clear the metadata for the
|
|
||||||
# popped header in initial_metadata
|
|
||||||
if popped_header["name"] in initial_metadata:
|
|
||||||
initial_metadata.pop(popped_header["name"])
|
|
||||||
|
|
||||||
# Push the current header to the stack
|
|
||||||
header: HeaderType = {
|
|
||||||
"level": current_header_level,
|
|
||||||
"name": name,
|
|
||||||
"data": stripped_line[len(sep) :].strip(),
|
|
||||||
}
|
|
||||||
header_stack.append(header)
|
|
||||||
# Update initial_metadata with the current header
|
|
||||||
initial_metadata[name] = header["data"]
|
|
||||||
|
|
||||||
# Add the previous line to the lines_with_metadata
|
|
||||||
# only if current_content is not empty
|
|
||||||
if current_content:
|
|
||||||
lines_with_metadata.append(
|
|
||||||
{
|
|
||||||
"content": "\n".join(current_content),
|
|
||||||
"metadata": current_metadata.copy(),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
current_content.clear()
|
|
||||||
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
if stripped_line:
|
|
||||||
current_content.append(stripped_line)
|
|
||||||
elif current_content:
|
|
||||||
lines_with_metadata.append(
|
|
||||||
{
|
|
||||||
"content": "\n".join(current_content),
|
|
||||||
"metadata": current_metadata.copy(),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
current_content.clear()
|
|
||||||
|
|
||||||
current_metadata = initial_metadata.copy()
|
|
||||||
|
|
||||||
if current_content:
|
|
||||||
lines_with_metadata.append({"content": "\n".join(current_content), "metadata": current_metadata})
|
|
||||||
|
|
||||||
# lines_with_metadata has each line with associated header metadata
|
|
||||||
# aggregate these into chunks based on common metadata
|
|
||||||
if not self.return_each_line:
|
|
||||||
return self.aggregate_lines_to_chunks(lines_with_metadata)
|
|
||||||
else:
|
|
||||||
return [
|
|
||||||
Document(page_content=chunk["content"], metadata=chunk["metadata"]) for chunk in lines_with_metadata
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# should be in newer Python versions (3.10+)
|
|
||||||
# @dataclass(frozen=True, kw_only=True, slots=True)
|
# @dataclass(frozen=True, kw_only=True, slots=True)
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Tokenizer:
|
class Tokenizer:
|
||||||
|
|
|
||||||
|
|
@ -1,79 +0,0 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
|
||||||
from core.workflow.nodes.base import BaseIterationState, BaseLoopState, BaseNode
|
|
||||||
from models.enums import UserFrom
|
|
||||||
from models.workflow import Workflow, WorkflowType
|
|
||||||
|
|
||||||
from .node_entities import NodeRunResult
|
|
||||||
from .variable_pool import VariablePool
|
|
||||||
|
|
||||||
|
|
||||||
class WorkflowNodeAndResult:
|
|
||||||
node: BaseNode
|
|
||||||
result: Optional[NodeRunResult] = None
|
|
||||||
|
|
||||||
def __init__(self, node: BaseNode, result: Optional[NodeRunResult] = None):
|
|
||||||
self.node = node
|
|
||||||
self.result = result
|
|
||||||
|
|
||||||
|
|
||||||
class WorkflowRunState:
|
|
||||||
tenant_id: str
|
|
||||||
app_id: str
|
|
||||||
workflow_id: str
|
|
||||||
workflow_type: WorkflowType
|
|
||||||
user_id: str
|
|
||||||
user_from: UserFrom
|
|
||||||
invoke_from: InvokeFrom
|
|
||||||
|
|
||||||
workflow_call_depth: int
|
|
||||||
|
|
||||||
start_at: float
|
|
||||||
variable_pool: VariablePool
|
|
||||||
|
|
||||||
total_tokens: int = 0
|
|
||||||
|
|
||||||
workflow_nodes_and_results: list[WorkflowNodeAndResult]
|
|
||||||
|
|
||||||
class NodeRun(BaseModel):
|
|
||||||
node_id: str
|
|
||||||
iteration_node_id: str
|
|
||||||
loop_node_id: str
|
|
||||||
|
|
||||||
workflow_node_runs: list[NodeRun]
|
|
||||||
workflow_node_steps: int
|
|
||||||
|
|
||||||
current_iteration_state: Optional[BaseIterationState]
|
|
||||||
current_loop_state: Optional[BaseLoopState]
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
workflow: Workflow,
|
|
||||||
start_at: float,
|
|
||||||
variable_pool: VariablePool,
|
|
||||||
user_id: str,
|
|
||||||
user_from: UserFrom,
|
|
||||||
invoke_from: InvokeFrom,
|
|
||||||
workflow_call_depth: int,
|
|
||||||
):
|
|
||||||
self.workflow_id = workflow.id
|
|
||||||
self.tenant_id = workflow.tenant_id
|
|
||||||
self.app_id = workflow.app_id
|
|
||||||
self.workflow_type = WorkflowType.value_of(workflow.type)
|
|
||||||
self.user_id = user_id
|
|
||||||
self.user_from = user_from
|
|
||||||
self.invoke_from = invoke_from
|
|
||||||
self.workflow_call_depth = workflow_call_depth
|
|
||||||
|
|
||||||
self.start_at = start_at
|
|
||||||
self.variable_pool = variable_pool
|
|
||||||
|
|
||||||
self.total_tokens = 0
|
|
||||||
|
|
||||||
self.workflow_node_steps = 1
|
|
||||||
self.workflow_node_runs = []
|
|
||||||
self.current_iteration_state = None
|
|
||||||
self.current_loop_state = None
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import json
|
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
@ -8,18 +7,6 @@ from core.file.models import File
|
||||||
from core.variables import Segment
|
from core.variables import Segment
|
||||||
|
|
||||||
|
|
||||||
class WorkflowRuntimeTypeEncoder(json.JSONEncoder):
|
|
||||||
def default(self, o: Any):
|
|
||||||
if isinstance(o, Segment):
|
|
||||||
return o.value
|
|
||||||
elif isinstance(o, File):
|
|
||||||
return o.to_dict()
|
|
||||||
elif isinstance(o, BaseModel):
|
|
||||||
return o.model_dump(mode="json")
|
|
||||||
else:
|
|
||||||
return super().default(o)
|
|
||||||
|
|
||||||
|
|
||||||
class WorkflowRuntimeTypeConverter:
|
class WorkflowRuntimeTypeConverter:
|
||||||
def to_json_encodable(self, value: Mapping[str, Any] | None) -> Mapping[str, Any] | None:
|
def to_json_encodable(self, value: Mapping[str, Any] | None) -> Mapping[str, Any] | None:
|
||||||
result = self._to_json_encodable_recursive(value)
|
result = self._to_json_encodable_recursive(value)
|
||||||
|
|
|
||||||
|
|
@ -148,25 +148,6 @@ class StrLen:
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
class FloatRange:
|
|
||||||
"""Restrict input to an float in a range (inclusive)"""
|
|
||||||
|
|
||||||
def __init__(self, low, high, argument="argument"):
|
|
||||||
self.low = low
|
|
||||||
self.high = high
|
|
||||||
self.argument = argument
|
|
||||||
|
|
||||||
def __call__(self, value):
|
|
||||||
value = _get_float(value)
|
|
||||||
if value < self.low or value > self.high:
|
|
||||||
error = "Invalid {arg}: {val}. {arg} must be within the range {lo} - {hi}".format(
|
|
||||||
arg=self.argument, val=value, lo=self.low, hi=self.high
|
|
||||||
)
|
|
||||||
raise ValueError(error)
|
|
||||||
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
class DatetimeString:
|
class DatetimeString:
|
||||||
def __init__(self, format, argument="argument"):
|
def __init__(self, format, argument="argument"):
|
||||||
self.format = format
|
self.format = format
|
||||||
|
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
import json
|
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
|
|
||||||
class PydanticModelEncoder(json.JSONEncoder):
|
|
||||||
def default(self, o):
|
|
||||||
if isinstance(o, BaseModel):
|
|
||||||
return o.model_dump()
|
|
||||||
else:
|
|
||||||
super().default(o)
|
|
||||||
|
|
@ -610,14 +610,6 @@ class InstalledApp(Base):
|
||||||
return tenant
|
return tenant
|
||||||
|
|
||||||
|
|
||||||
class ConversationSource(StrEnum):
|
|
||||||
"""This enumeration is designed for use with `Conversation.from_source`."""
|
|
||||||
|
|
||||||
# NOTE(QuantumGhost): The enumeration members may not cover all possible cases.
|
|
||||||
API = "api"
|
|
||||||
CONSOLE = "console"
|
|
||||||
|
|
||||||
|
|
||||||
class Conversation(Base):
|
class Conversation(Base):
|
||||||
__tablename__ = "conversations"
|
__tablename__ = "conversations"
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,6 @@ from typing import Literal, Optional
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class SegmentUpdateEntity(BaseModel):
|
|
||||||
content: str
|
|
||||||
answer: Optional[str] = None
|
|
||||||
keywords: Optional[list[str]] = None
|
|
||||||
enabled: Optional[bool] = None
|
|
||||||
|
|
||||||
|
|
||||||
class ParentMode(StrEnum):
|
class ParentMode(StrEnum):
|
||||||
FULL_DOC = "full-doc"
|
FULL_DOC = "full-doc"
|
||||||
PARAGRAPH = "paragraph"
|
PARAGRAPH = "paragraph"
|
||||||
|
|
@ -153,10 +146,6 @@ class MetadataUpdateArgs(BaseModel):
|
||||||
value: Optional[str | int | float] = None
|
value: Optional[str | int | float] = None
|
||||||
|
|
||||||
|
|
||||||
class MetadataValueUpdateArgs(BaseModel):
|
|
||||||
fields: list[MetadataUpdateArgs]
|
|
||||||
|
|
||||||
|
|
||||||
class MetadataDetail(BaseModel):
|
class MetadataDetail(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue