diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md index 0478d2e1fa..69ae7071bb 100644 --- a/CONTRIBUTING_CN.md +++ b/CONTRIBUTING_CN.md @@ -6,7 +6,7 @@ 本指南和 Dify 一样在不断完善中。如果有任何滞后于项目实际情况的地方,恳请谅解,我们也欢迎任何改进建议。 -关于许可证,请花一分钟阅读我们简短的[许可和贡献者协议](./LICENSE)。社区同时也遵循[行为准则](https://github.com/langgenius/.github/blob/main/CODE_OF_CONDUCT.md)。 +关于许可证,请花一分钟阅读我们简短的[许可和贡献者协议](./LICENSE)。同时也请遵循社区[行为准则](https://github.com/langgenius/.github/blob/main/CODE_OF_CONDUCT.md)。 ## 开始之前 diff --git a/README.md b/README.md index 65e8001dd2..efb37d6083 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ README in বাংলা

-Dify is an open-source LLM app development platform. Its intuitive interface combines agentic AI workflow, RAG pipeline, agent capabilities, model management, observability features and more, letting you quickly go from prototype to production. +Dify is an open-source LLM app development platform. Its intuitive interface combines agentic AI workflow, RAG pipeline, agent capabilities, model management, observability features, and more, allowing you to quickly move from prototype to production. ## Quick start @@ -188,7 +188,7 @@ All of Dify's offerings come with corresponding APIs, so you could effortlessly - **Dify for enterprise / organizations
** We provide additional enterprise-centric features. [Log your questions for us through this chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) or [send us an email](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) to discuss enterprise needs.
- > For startups and small businesses using AWS, check out [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one-click. It's an affordable AMI offering with the option to create apps with custom logo and branding. + > For startups and small businesses using AWS, check out [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one click. It's an affordable AMI offering with the option to create apps with custom logo and branding. ## Staying ahead @@ -233,7 +233,7 @@ Deploy Dify to AWS with [CDK](https://aws.amazon.com/cdk/) For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). At the same time, please consider supporting Dify by sharing it on social media and at events and conferences. -> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). +> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). ## Community & contact diff --git a/api/.env.example b/api/.env.example index b5820fcdc2..2cc6410cdd 100644 --- a/api/.env.example +++ b/api/.env.example @@ -297,6 +297,7 @@ LINDORM_URL=http://ld-*******************-proxy-search-pub.lindorm.aliyuncs.com: LINDORM_USERNAME=admin LINDORM_PASSWORD=admin USING_UGC_INDEX=False +LINDORM_QUERY_TIMEOUT=1 # OceanBase Vector configuration OCEANBASE_VECTOR_HOST=127.0.0.1 diff --git a/api/app_factory.py b/api/app_factory.py index 9648d770ab..586f2ded9e 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -52,7 +52,6 @@ def initialize_extensions(app: DifyApp): ext_mail, ext_migrate, ext_otel, - ext_otel_patch, ext_proxy_fix, ext_redis, ext_repositories, @@ -85,7 +84,6 @@ def initialize_extensions(app: DifyApp): ext_proxy_fix, ext_blueprints, ext_commands, - ext_otel_patch, # Apply patch before initializing OpenTelemetry ext_otel, ] for ext in extensions: diff --git a/api/commands.py b/api/commands.py index e70d6e0b49..99a3211baf 100644 --- a/api/commands.py +++ b/api/commands.py @@ -17,6 +17,7 @@ from core.rag.models.document import Document from events.app_event import app_was_created from extensions.ext_database import db from extensions.ext_redis import redis_client +from extensions.ext_storage import storage from libs.helper import email as email_validate from libs.password import hash_password, password_pattern, valid_password from libs.rsa import generate_key_pair @@ -271,6 +272,7 @@ def migrate_knowledge_vector_database(): upper_collection_vector_types = { VectorType.MILVUS, VectorType.PGVECTOR, + VectorType.VASTBASE, VectorType.RELYT, VectorType.WEAVIATE, VectorType.ORACLE, @@ -814,3 +816,331 @@ def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[ ClearFreePlanTenantExpiredLogs.process(days, batch, tenant_ids) click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green")) + + +@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") +@click.command("clear-orphaned-file-records", help="Clear orphaned file records.") +def clear_orphaned_file_records(force: bool): + """ + Clear orphaned file records in the database. + """ + + # define tables and columns to process + files_tables = [ + {"table": "upload_files", "id_column": "id", "key_column": "key"}, + {"table": "tool_files", "id_column": "id", "key_column": "file_key"}, + ] + ids_tables = [ + {"type": "uuid", "table": "message_files", "column": "upload_file_id"}, + {"type": "text", "table": "documents", "column": "data_source_info"}, + {"type": "text", "table": "document_segments", "column": "content"}, + {"type": "text", "table": "messages", "column": "answer"}, + {"type": "text", "table": "workflow_node_executions", "column": "inputs"}, + {"type": "text", "table": "workflow_node_executions", "column": "process_data"}, + {"type": "text", "table": "workflow_node_executions", "column": "outputs"}, + {"type": "text", "table": "conversations", "column": "introduction"}, + {"type": "text", "table": "conversations", "column": "system_instruction"}, + {"type": "json", "table": "messages", "column": "inputs"}, + {"type": "json", "table": "messages", "column": "message"}, + ] + + # notify user and ask for confirmation + click.echo( + click.style( + "This command will first find and delete orphaned file records from the message_files table,", fg="yellow" + ) + ) + click.echo( + click.style( + "and then it will find and delete orphaned file records in the following tables:", + fg="yellow", + ) + ) + for files_table in files_tables: + click.echo(click.style(f"- {files_table['table']}", fg="yellow")) + click.echo( + click.style("The following tables and columns will be scanned to find orphaned file records:", fg="yellow") + ) + for ids_table in ids_tables: + click.echo(click.style(f"- {ids_table['table']} ({ids_table['column']})", fg="yellow")) + click.echo("") + + click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) + click.echo( + click.style( + ( + "Since not all patterns have been fully tested, " + "please note that this command may delete unintended file records." + ), + fg="yellow", + ) + ) + click.echo( + click.style("This cannot be undone. Please make sure to back up your database before proceeding.", fg="yellow") + ) + click.echo( + click.style( + ( + "It is also recommended to run this during the maintenance window, " + "as this may cause high load on your instance." + ), + fg="yellow", + ) + ) + if not force: + click.confirm("Do you want to proceed?", abort=True) + + # start the cleanup process + click.echo(click.style("Starting orphaned file records cleanup.", fg="white")) + + # clean up the orphaned records in the message_files table where message_id doesn't exist in messages table + try: + click.echo( + click.style("- Listing message_files records where message_id doesn't exist in messages table", fg="white") + ) + query = ( + "SELECT mf.id, mf.message_id " + "FROM message_files mf LEFT JOIN messages m ON mf.message_id = m.id " + "WHERE m.id IS NULL" + ) + orphaned_message_files = [] + with db.engine.begin() as conn: + rs = conn.execute(db.text(query)) + for i in rs: + orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])}) + + if orphaned_message_files: + click.echo(click.style(f"Found {len(orphaned_message_files)} orphaned message_files records:", fg="white")) + for record in orphaned_message_files: + click.echo(click.style(f" - id: {record['id']}, message_id: {record['message_id']}", fg="black")) + + if not force: + click.confirm( + ( + f"Do you want to proceed " + f"to delete all {len(orphaned_message_files)} orphaned message_files records?" + ), + abort=True, + ) + + click.echo(click.style("- Deleting orphaned message_files records", fg="white")) + query = "DELETE FROM message_files WHERE id IN :ids" + with db.engine.begin() as conn: + conn.execute(db.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])}) + click.echo( + click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green") + ) + else: + click.echo(click.style("No orphaned message_files records found. There is nothing to delete.", fg="green")) + except Exception as e: + click.echo(click.style(f"Error deleting orphaned message_files records: {str(e)}", fg="red")) + + # clean up the orphaned records in the rest of the *_files tables + try: + # fetch file id and keys from each table + all_files_in_tables = [] + for files_table in files_tables: + click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white")) + query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}" + with db.engine.begin() as conn: + rs = conn.execute(db.text(query)) + for i in rs: + all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]}) + click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) + + # fetch referred table and columns + guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" + all_ids_in_tables = [] + for ids_table in ids_tables: + query = "" + if ids_table["type"] == "uuid": + click.echo( + click.style( + f"- Listing file ids in column {ids_table['column']} in table {ids_table['table']}", fg="white" + ) + ) + query = ( + f"SELECT {ids_table['column']} FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" + ) + with db.engine.begin() as conn: + rs = conn.execute(db.text(query)) + for i in rs: + all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])}) + elif ids_table["type"] == "text": + click.echo( + click.style( + f"- Listing file-id-like strings in column {ids_table['column']} in table {ids_table['table']}", + fg="white", + ) + ) + query = ( + f"SELECT regexp_matches({ids_table['column']}, '{guid_regexp}', 'g') AS extracted_id " + f"FROM {ids_table['table']}" + ) + with db.engine.begin() as conn: + rs = conn.execute(db.text(query)) + for i in rs: + for j in i[0]: + all_ids_in_tables.append({"table": ids_table["table"], "id": j}) + elif ids_table["type"] == "json": + click.echo( + click.style( + ( + f"- Listing file-id-like JSON string in column {ids_table['column']} " + f"in table {ids_table['table']}" + ), + fg="white", + ) + ) + query = ( + f"SELECT regexp_matches({ids_table['column']}::text, '{guid_regexp}', 'g') AS extracted_id " + f"FROM {ids_table['table']}" + ) + with db.engine.begin() as conn: + rs = conn.execute(db.text(query)) + for i in rs: + for j in i[0]: + all_ids_in_tables.append({"table": ids_table["table"], "id": j}) + click.echo(click.style(f"Found {len(all_ids_in_tables)} file ids in tables.", fg="white")) + + except Exception as e: + click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) + return + + # find orphaned files + all_files = [file["id"] for file in all_files_in_tables] + all_ids = [file["id"] for file in all_ids_in_tables] + orphaned_files = list(set(all_files) - set(all_ids)) + if not orphaned_files: + click.echo(click.style("No orphaned file records found. There is nothing to delete.", fg="green")) + return + click.echo(click.style(f"Found {len(orphaned_files)} orphaned file records.", fg="white")) + for file in orphaned_files: + click.echo(click.style(f"- orphaned file id: {file}", fg="black")) + if not force: + click.confirm(f"Do you want to proceed to delete all {len(orphaned_files)} orphaned file records?", abort=True) + + # delete orphaned records for each file + try: + for files_table in files_tables: + click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white")) + query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids" + with db.engine.begin() as conn: + conn.execute(db.text(query), {"ids": tuple(orphaned_files)}) + except Exception as e: + click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red")) + return + click.echo(click.style(f"Removed {len(orphaned_files)} orphaned file records.", fg="green")) + + +@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") +@click.command("remove-orphaned-files-on-storage", help="Remove orphaned files on the storage.") +def remove_orphaned_files_on_storage(force: bool): + """ + Remove orphaned files on the storage. + """ + + # define tables and columns to process + files_tables = [ + {"table": "upload_files", "key_column": "key"}, + {"table": "tool_files", "key_column": "file_key"}, + ] + storage_paths = ["image_files", "tools", "upload_files"] + + # notify user and ask for confirmation + click.echo(click.style("This command will find and remove orphaned files on the storage,", fg="yellow")) + click.echo( + click.style("by comparing the files on the storage with the records in the following tables:", fg="yellow") + ) + for files_table in files_tables: + click.echo(click.style(f"- {files_table['table']}", fg="yellow")) + click.echo(click.style("The following paths on the storage will be scanned to find orphaned files:", fg="yellow")) + for storage_path in storage_paths: + click.echo(click.style(f"- {storage_path}", fg="yellow")) + click.echo("") + + click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) + click.echo( + click.style( + "Currently, this command will work only for opendal based storage (STORAGE_TYPE=opendal).", fg="yellow" + ) + ) + click.echo( + click.style( + "Since not all patterns have been fully tested, please note that this command may delete unintended files.", + fg="yellow", + ) + ) + click.echo( + click.style("This cannot be undone. Please make sure to back up your storage before proceeding.", fg="yellow") + ) + click.echo( + click.style( + ( + "It is also recommended to run this during the maintenance window, " + "as this may cause high load on your instance." + ), + fg="yellow", + ) + ) + if not force: + click.confirm("Do you want to proceed?", abort=True) + + # start the cleanup process + click.echo(click.style("Starting orphaned files cleanup.", fg="white")) + + # fetch file id and keys from each table + all_files_in_tables = [] + try: + for files_table in files_tables: + click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white")) + query = f"SELECT {files_table['key_column']} FROM {files_table['table']}" + with db.engine.begin() as conn: + rs = conn.execute(db.text(query)) + for i in rs: + all_files_in_tables.append(str(i[0])) + click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) + except Exception as e: + click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) + + all_files_on_storage = [] + for storage_path in storage_paths: + try: + click.echo(click.style(f"- Scanning files on storage path {storage_path}", fg="white")) + files = storage.scan(path=storage_path, files=True, directories=False) + all_files_on_storage.extend(files) + except FileNotFoundError as e: + click.echo(click.style(f" -> Skipping path {storage_path} as it does not exist.", fg="yellow")) + continue + except Exception as e: + click.echo(click.style(f" -> Error scanning files on storage path {storage_path}: {str(e)}", fg="red")) + continue + click.echo(click.style(f"Found {len(all_files_on_storage)} files on storage.", fg="white")) + + # find orphaned files + orphaned_files = list(set(all_files_on_storage) - set(all_files_in_tables)) + if not orphaned_files: + click.echo(click.style("No orphaned files found. There is nothing to remove.", fg="green")) + return + click.echo(click.style(f"Found {len(orphaned_files)} orphaned files.", fg="white")) + for file in orphaned_files: + click.echo(click.style(f"- orphaned file: {file}", fg="black")) + if not force: + click.confirm(f"Do you want to proceed to remove all {len(orphaned_files)} orphaned files?", abort=True) + + # delete orphaned files + removed_files = 0 + error_files = 0 + for file in orphaned_files: + try: + storage.delete(file) + removed_files += 1 + click.echo(click.style(f"- Removing orphaned file: {file}", fg="white")) + except Exception as e: + error_files += 1 + click.echo(click.style(f"- Error deleting orphaned file {file}: {str(e)}", fg="red")) + continue + if error_files == 0: + click.echo(click.style(f"Removed {removed_files} orphaned files without errors.", fg="green")) + else: + click.echo(click.style(f"Removed {removed_files} orphaned files, with {error_files} errors.", fg="yellow")) diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index c2ad24094a..d285515998 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -39,6 +39,7 @@ from .vdb.tencent_vector_config import TencentVectorDBConfig from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig from .vdb.tidb_vector_config import TiDBVectorConfig from .vdb.upstash_config import UpstashConfig +from .vdb.vastbase_vector_config import VastbaseVectorConfig from .vdb.vikingdb_config import VikingDBConfig from .vdb.weaviate_config import WeaviateConfig @@ -270,6 +271,7 @@ class MiddlewareConfig( OpenSearchConfig, OracleConfig, PGVectorConfig, + VastbaseVectorConfig, PGVectoRSConfig, QdrantConfig, RelytConfig, diff --git a/api/configs/middleware/vdb/lindorm_config.py b/api/configs/middleware/vdb/lindorm_config.py index 95e1d1cfca..e80e3f4a35 100644 --- a/api/configs/middleware/vdb/lindorm_config.py +++ b/api/configs/middleware/vdb/lindorm_config.py @@ -32,3 +32,4 @@ class LindormConfig(BaseSettings): description="Using UGC index will store the same type of Index in a single index but can retrieve separately.", default=False, ) + LINDORM_QUERY_TIMEOUT: Optional[float] = Field(description="The lindorm search request timeout (s)", default=2.0) diff --git a/api/configs/middleware/vdb/opensearch_config.py b/api/configs/middleware/vdb/opensearch_config.py index 81dde4c04d..96f478e9a6 100644 --- a/api/configs/middleware/vdb/opensearch_config.py +++ b/api/configs/middleware/vdb/opensearch_config.py @@ -1,4 +1,5 @@ -from typing import Optional +import enum +from typing import Literal, Optional from pydantic import Field, PositiveInt from pydantic_settings import BaseSettings @@ -9,6 +10,14 @@ class OpenSearchConfig(BaseSettings): Configuration settings for OpenSearch """ + class AuthMethod(enum.StrEnum): + """ + Authentication method for OpenSearch + """ + + BASIC = "basic" + AWS_MANAGED_IAM = "aws_managed_iam" + OPENSEARCH_HOST: Optional[str] = Field( description="Hostname or IP address of the OpenSearch server (e.g., 'localhost' or 'opensearch.example.com')", default=None, @@ -19,6 +28,16 @@ class OpenSearchConfig(BaseSettings): default=9200, ) + OPENSEARCH_SECURE: bool = Field( + description="Whether to use SSL/TLS encrypted connection for OpenSearch (True for HTTPS, False for HTTP)", + default=False, + ) + + OPENSEARCH_AUTH_METHOD: AuthMethod = Field( + description="Authentication method for OpenSearch connection (default is 'basic')", + default=AuthMethod.BASIC, + ) + OPENSEARCH_USER: Optional[str] = Field( description="Username for authenticating with OpenSearch", default=None, @@ -29,7 +48,11 @@ class OpenSearchConfig(BaseSettings): default=None, ) - OPENSEARCH_SECURE: bool = Field( - description="Whether to use SSL/TLS encrypted connection for OpenSearch (True for HTTPS, False for HTTP)", - default=False, + OPENSEARCH_AWS_REGION: Optional[str] = Field( + description="AWS region for OpenSearch (e.g. 'us-west-2')", + default=None, + ) + + OPENSEARCH_AWS_SERVICE: Optional[Literal["es", "aoss"]] = Field( + description="AWS service for OpenSearch (e.g. 'aoss' for OpenSearch Serverless)", default=None ) diff --git a/api/configs/middleware/vdb/vastbase_vector_config.py b/api/configs/middleware/vdb/vastbase_vector_config.py new file mode 100644 index 0000000000..816d6df90a --- /dev/null +++ b/api/configs/middleware/vdb/vastbase_vector_config.py @@ -0,0 +1,45 @@ +from typing import Optional + +from pydantic import Field, PositiveInt +from pydantic_settings import BaseSettings + + +class VastbaseVectorConfig(BaseSettings): + """ + Configuration settings for Vector (Vastbase with vector extension) + """ + + VASTBASE_HOST: Optional[str] = Field( + description="Hostname or IP address of the Vastbase server with Vector extension (e.g., 'localhost')", + default=None, + ) + + VASTBASE_PORT: PositiveInt = Field( + description="Port number on which the Vastbase server is listening (default is 5432)", + default=5432, + ) + + VASTBASE_USER: Optional[str] = Field( + description="Username for authenticating with the Vastbase database", + default=None, + ) + + VASTBASE_PASSWORD: Optional[str] = Field( + description="Password for authenticating with the Vastbase database", + default=None, + ) + + VASTBASE_DATABASE: Optional[str] = Field( + description="Name of the Vastbase database to connect to", + default=None, + ) + + VASTBASE_MIN_CONNECTION: PositiveInt = Field( + description="Min connection of the Vastbase database", + default=1, + ) + + VASTBASE_MAX_CONNECTION: PositiveInt = Field( + description="Max connection of the Vastbase database", + default=5, + ) diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py index c7aedc5b8a..c7960e1356 100644 --- a/api/configs/packaging/__init__.py +++ b/api/configs/packaging/__init__.py @@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings): CURRENT_VERSION: str = Field( description="Dify version", - default="1.2.0", + default="1.3.1", ) COMMIT_SHA: str = Field( diff --git a/api/constants/__init__.py b/api/constants/__init__.py index 9162357466..a84de0a451 100644 --- a/api/constants/__init__.py +++ b/api/constants/__init__.py @@ -16,11 +16,25 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS]) if dify_config.ETL_TYPE == "Unstructured": - DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"] + DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "vtt", "properties"] DOCUMENT_EXTENSIONS.extend(("doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub")) if dify_config.UNSTRUCTURED_API_URL: DOCUMENT_EXTENSIONS.append("ppt") DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS]) else: - DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"] + DOCUMENT_EXTENSIONS = [ + "txt", + "markdown", + "md", + "mdx", + "pdf", + "html", + "htm", + "xlsx", + "xls", + "docx", + "csv", + "vtt", + "properties", + ] DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS]) diff --git a/api/controllers/console/app/annotation.py b/api/controllers/console/app/annotation.py index fcd8ed1882..48353a63af 100644 --- a/api/controllers/console/app/annotation.py +++ b/api/controllers/console/app/annotation.py @@ -186,7 +186,7 @@ class AnnotationUpdateDeleteApi(Resource): app_id = str(app_id) annotation_id = str(annotation_id) AppAnnotationService.delete_app_annotation(app_id, annotation_id) - return {"result": "success"}, 200 + return {"result": "success"}, 204 class AnnotationBatchImportApi(Resource): diff --git a/api/controllers/console/app/audio.py b/api/controllers/console/app/audio.py index 12d9157dda..7519ae96c0 100644 --- a/api/controllers/console/app/audio.py +++ b/api/controllers/console/app/audio.py @@ -80,8 +80,6 @@ class ChatMessageTextApi(Resource): @account_initialization_required @get_app_model def post(self, app_model: App): - from werkzeug.exceptions import InternalServerError - try: parser = reqparse.RequestParser() parser.add_argument("message_id", type=str, location="json") diff --git a/api/controllers/console/app/ops_trace.py b/api/controllers/console/app/ops_trace.py index dd25af8ebf..7176440e16 100644 --- a/api/controllers/console/app/ops_trace.py +++ b/api/controllers/console/app/ops_trace.py @@ -84,7 +84,7 @@ class TraceAppConfigApi(Resource): result = OpsService.delete_tracing_app_config(app_id=app_id, tracing_provider=args["tracing_provider"]) if not result: raise TracingConfigNotExist() - return {"result": "success"} + return {"result": "success"}, 204 except Exception as e: raise BadRequest(str(e)) diff --git a/api/controllers/console/auth/data_source_bearer_auth.py b/api/controllers/console/auth/data_source_bearer_auth.py index ea00c2b8c2..5f0762e4a5 100644 --- a/api/controllers/console/auth/data_source_bearer_auth.py +++ b/api/controllers/console/auth/data_source_bearer_auth.py @@ -65,7 +65,7 @@ class ApiKeyAuthDataSourceBindingDelete(Resource): ApiKeyAuthService.delete_provider_auth(current_user.current_tenant_id, binding_id) - return {"result": "success"}, 200 + return {"result": "success"}, 204 api.add_resource(ApiKeyAuthDataSource, "/api-key-auth/data-source") diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 752d124735..43615af709 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -657,6 +657,7 @@ class DatasetRetrievalSettingApi(Resource): | VectorType.ELASTICSEARCH | VectorType.ELASTICSEARCH_JA | VectorType.PGVECTOR + | VectorType.VASTBASE | VectorType.TIDB_ON_QDRANT | VectorType.LINDORM | VectorType.COUCHBASE @@ -706,6 +707,7 @@ class DatasetRetrievalSettingMockApi(Resource): | VectorType.ELASTICSEARCH_JA | VectorType.COUCHBASE | VectorType.PGVECTOR + | VectorType.VASTBASE | VectorType.LINDORM | VectorType.OPENGAUSS | VectorType.OCEANBASE diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 0b40312368..3588abeff5 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -40,7 +40,7 @@ from core.indexing_runner import IndexingRunner from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.errors.invoke import InvokeAuthorizationError -from core.plugin.manager.exc import PluginDaemonClientSideError +from core.plugin.impl.exc import PluginDaemonClientSideError from core.rag.extractor.entity.extract_setting import ExtractSetting from extensions.ext_database import db from extensions.ext_redis import redis_client diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index 696aaa94db..5c54ecbe81 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -131,7 +131,7 @@ class DatasetDocumentSegmentListApi(Resource): except services.errors.account.NoPermissionError as e: raise Forbidden(str(e)) SegmentService.delete_segments(segment_ids, document, dataset) - return {"result": "success"}, 200 + return {"result": "success"}, 204 class DatasetDocumentSegmentApi(Resource): @@ -333,7 +333,7 @@ class DatasetDocumentSegmentUpdateApi(Resource): except services.errors.account.NoPermissionError as e: raise Forbidden(str(e)) SegmentService.delete_segment(segment, document, dataset) - return {"result": "success"}, 200 + return {"result": "success"}, 204 class DatasetDocumentSegmentBatchImportApi(Resource): @@ -590,7 +590,7 @@ class ChildChunkUpdateApi(Resource): SegmentService.delete_child_chunk(child_chunk, dataset) except ChildChunkDeleteIndexServiceError as e: raise ChildChunkDeleteIndexError(str(e)) - return {"result": "success"}, 200 + return {"result": "success"}, 204 @setup_required @login_required diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index 2c031172bf..aee8323f23 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -135,7 +135,7 @@ class ExternalApiTemplateApi(Resource): raise Forbidden() ExternalDatasetService.delete_external_knowledge_api(current_user.current_tenant_id, external_knowledge_api_id) - return {"result": "success"}, 200 + return {"result": "success"}, 204 class ExternalApiUseCheckApi(Resource): diff --git a/api/controllers/console/datasets/metadata.py b/api/controllers/console/datasets/metadata.py index fc9711169f..e4cac40ca1 100644 --- a/api/controllers/console/datasets/metadata.py +++ b/api/controllers/console/datasets/metadata.py @@ -82,7 +82,7 @@ class DatasetMetadataApi(Resource): DatasetService.check_dataset_permission(dataset, current_user) MetadataService.delete_metadata(dataset_id_str, metadata_id_str) - return 200 + return {"result": "success"}, 204 class DatasetMetadataBuiltInFieldApi(Resource): diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py index 86550b2bdf..132da11878 100644 --- a/api/controllers/console/explore/installed_app.py +++ b/api/controllers/console/explore/installed_app.py @@ -113,7 +113,7 @@ class InstalledAppApi(InstalledAppResource): db.session.delete(installed_app) db.session.commit() - return {"result": "success", "message": "App uninstalled successfully"} + return {"result": "success", "message": "App uninstalled successfully"}, 204 def patch(self, installed_app): parser = reqparse.RequestParser() diff --git a/api/controllers/console/explore/saved_message.py b/api/controllers/console/explore/saved_message.py index 9f0c496645..3a1655d0ee 100644 --- a/api/controllers/console/explore/saved_message.py +++ b/api/controllers/console/explore/saved_message.py @@ -72,7 +72,7 @@ class SavedMessageApi(InstalledAppResource): SavedMessageService.delete(app_model, current_user, message_id) - return {"result": "success"} + return {"result": "success"}, 204 api.add_resource( diff --git a/api/controllers/console/extension.py b/api/controllers/console/extension.py index ed6cedb220..833da0d03c 100644 --- a/api/controllers/console/extension.py +++ b/api/controllers/console/extension.py @@ -99,7 +99,7 @@ class APIBasedExtensionDetailAPI(Resource): APIBasedExtensionService.delete(extension_data_from_db) - return {"result": "success"} + return {"result": "success"}, 204 api.add_resource(CodeBasedExtensionAPI, "/code-based-extension") diff --git a/api/controllers/console/tag/tags.py b/api/controllers/console/tag/tags.py index da83f64019..0d0d7ae95f 100644 --- a/api/controllers/console/tag/tags.py +++ b/api/controllers/console/tag/tags.py @@ -86,7 +86,7 @@ class TagUpdateDeleteApi(Resource): TagService.delete_tag(tag_id) - return 200 + return 204 class TagBindingCreateApi(Resource): diff --git a/api/controllers/console/workspace/endpoint.py b/api/controllers/console/workspace/endpoint.py index 46dee20f8b..aa1a78935d 100644 --- a/api/controllers/console/workspace/endpoint.py +++ b/api/controllers/console/workspace/endpoint.py @@ -5,7 +5,7 @@ from werkzeug.exceptions import Forbidden from controllers.console import api from controllers.console.wraps import account_initialization_required, setup_required from core.model_runtime.utils.encoders import jsonable_encoder -from core.plugin.manager.exc import PluginPermissionDeniedError +from core.plugin.impl.exc import PluginPermissionDeniedError from libs.login import login_required from services.plugin.endpoint_service import EndpointService diff --git a/api/controllers/console/workspace/plugin.py b/api/controllers/console/workspace/plugin.py index e9c1884c60..6f9ae18750 100644 --- a/api/controllers/console/workspace/plugin.py +++ b/api/controllers/console/workspace/plugin.py @@ -10,7 +10,7 @@ from controllers.console import api from controllers.console.workspace import plugin_permission_required from controllers.console.wraps import account_initialization_required, setup_required from core.model_runtime.utils.encoders import jsonable_encoder -from core.plugin.manager.exc import PluginDaemonClientSideError +from core.plugin.impl.exc import PluginDaemonClientSideError from libs.login import login_required from models.account import TenantPluginPermission from services.plugin.plugin_permission_service import PluginPermissionService diff --git a/api/controllers/console/wraps.py b/api/controllers/console/wraps.py index e5e8038ad7..6911181d82 100644 --- a/api/controllers/console/wraps.py +++ b/api/controllers/console/wraps.py @@ -10,6 +10,7 @@ from configs import dify_config from controllers.console.workspace.error import AccountNotInitializedError from extensions.ext_database import db from extensions.ext_redis import redis_client +from models.account import AccountStatus from models.dataset import RateLimitLog from models.model import DifySetup from services.feature_service import FeatureService, LicenseStatus @@ -24,7 +25,7 @@ def account_initialization_required(view): # check account initialization account = current_user - if account.status == "uninitialized": + if account.status == AccountStatus.UNINITIALIZED: raise AccountNotInitializedError() return view(*args, **kwargs) diff --git a/api/controllers/files/image_preview.py b/api/controllers/files/image_preview.py index 5adfe16a79..5bb28b3897 100644 --- a/api/controllers/files/image_preview.py +++ b/api/controllers/files/image_preview.py @@ -75,7 +75,7 @@ class FilePreviewApi(Resource): if args["as_attachment"]: encoded_filename = quote(upload_file.name) response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}" - response.headers["Content-Type"] = "application/octet-stream" + response.headers["Content-Type"] = "application/octet-stream" return response diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index cffa3665b1..c50f551faf 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -79,7 +79,7 @@ class AnnotationListApi(Resource): class AnnotationUpdateDeleteApi(Resource): @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) @marshal_with(annotation_fields) - def post(self, app_model: App, end_user: EndUser, annotation_id): + def put(self, app_model: App, end_user: EndUser, annotation_id): if not current_user.is_editor: raise Forbidden() @@ -98,7 +98,7 @@ class AnnotationUpdateDeleteApi(Resource): annotation_id = str(annotation_id) AppAnnotationService.delete_app_annotation(app_model.id, annotation_id) - return {"result": "success"}, 200 + return {"result": "success"}, 204 api.add_resource(AnnotationReplyActionApi, "/apps/annotation-reply/") diff --git a/api/controllers/service_api/app/conversation.py b/api/controllers/service_api/app/conversation.py index 334f2c5620..dfc357e1ab 100644 --- a/api/controllers/service_api/app/conversation.py +++ b/api/controllers/service_api/app/conversation.py @@ -14,6 +14,9 @@ from fields.conversation_fields import ( conversation_infinite_scroll_pagination_fields, simple_conversation_fields, ) +from fields.conversation_variable_fields import ( + conversation_variable_infinite_scroll_pagination_fields, +) from libs.helper import uuid_value from models.model import App, AppMode, EndUser from services.conversation_service import ConversationService @@ -69,7 +72,7 @@ class ConversationDetailApi(Resource): ConversationService.delete(app_model, conversation_id, end_user) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - return {"result": "success"}, 200 + return {"result": "success"}, 204 class ConversationRenameApi(Resource): @@ -93,6 +96,31 @@ class ConversationRenameApi(Resource): raise NotFound("Conversation Not Exists.") +class ConversationVariablesApi(Resource): + @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) + @marshal_with(conversation_variable_infinite_scroll_pagination_fields) + def get(self, app_model: App, end_user: EndUser, c_id): + # conversational variable only for chat app + app_mode = AppMode.value_of(app_model.mode) + if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: + raise NotChatAppError() + + conversation_id = str(c_id) + + parser = reqparse.RequestParser() + parser.add_argument("last_id", type=uuid_value, location="args") + parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") + args = parser.parse_args() + + try: + return ConversationService.get_conversational_variable( + app_model, conversation_id, end_user, args["limit"], args["last_id"] + ) + except services.errors.conversation.ConversationNotExistsError: + raise NotFound("Conversation Not Exists.") + + api.add_resource(ConversationRenameApi, "/conversations//name", endpoint="conversation_name") api.add_resource(ConversationApi, "/conversations") api.add_resource(ConversationDetailApi, "/conversations/", endpoint="conversation_detail") +api.add_resource(ConversationVariablesApi, "/conversations//variables", endpoint="conversation_variables") diff --git a/api/controllers/service_api/app/workflow.py b/api/controllers/service_api/app/workflow.py index 8b10a028f3..ca3e35aab8 100644 --- a/api/controllers/service_api/app/workflow.py +++ b/api/controllers/service_api/app/workflow.py @@ -59,7 +59,7 @@ class WorkflowRunDetailApi(Resource): Get a workflow task running detail """ app_mode = AppMode.value_of(app_model.mode) - if app_mode != AppMode.WORKFLOW: + if app_mode not in [AppMode.WORKFLOW, AppMode.ADVANCED_CHAT]: raise NotWorkflowAppError() workflow_run = db.session.query(WorkflowRun).filter(WorkflowRun.id == workflow_run_id).first() diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index eec6afc9ef..9e943e2b2d 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -323,7 +323,7 @@ class DocumentDeleteApi(DatasetApiResource): except services.errors.document.DocumentIndexingError: raise DocumentIndexingError("Cannot delete document during indexing.") - return {"result": "success"}, 200 + return {"result": "success"}, 204 class DocumentListApi(DatasetApiResource): diff --git a/api/controllers/service_api/dataset/metadata.py b/api/controllers/service_api/dataset/metadata.py index 298c8a8df8..35578eae54 100644 --- a/api/controllers/service_api/dataset/metadata.py +++ b/api/controllers/service_api/dataset/metadata.py @@ -63,7 +63,7 @@ class DatasetMetadataServiceApi(DatasetApiResource): DatasetService.check_dataset_permission(dataset, current_user) MetadataService.delete_metadata(dataset_id_str, metadata_id_str) - return 200 + return 204 class DatasetMetadataBuiltInFieldServiceApi(DatasetApiResource): diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index 2a79e15cc5..95753cfd67 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -159,7 +159,7 @@ class DatasetSegmentApi(DatasetApiResource): if not segment: raise NotFound("Segment not found.") SegmentService.delete_segment(segment, document, dataset) - return {"result": "success"}, 200 + return {"result": "success"}, 204 @cloud_edition_billing_resource_check("vector_space", "dataset") def post(self, tenant_id, dataset_id, document_id, segment_id): @@ -344,7 +344,7 @@ class DatasetChildChunkApi(DatasetApiResource): except ChildChunkDeleteIndexServiceError as e: raise ChildChunkDeleteIndexError(str(e)) - return {"result": "success"}, 200 + return {"result": "success"}, 204 @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") diff --git a/api/controllers/web/saved_message.py b/api/controllers/web/saved_message.py index 6a9b818907..ab2d4abcd3 100644 --- a/api/controllers/web/saved_message.py +++ b/api/controllers/web/saved_message.py @@ -67,7 +67,7 @@ class SavedMessageApi(WebApiResource): SavedMessageService.delete(app_model, end_user, message_id) - return {"result": "success"} + return {"result": "success"}, 204 api.add_resource(SavedMessageListApi, "/saved-messages") diff --git a/api/core/agent/strategy/plugin.py b/api/core/agent/strategy/plugin.py index a4b25f46e6..79b074cf95 100644 --- a/api/core/agent/strategy/plugin.py +++ b/api/core/agent/strategy/plugin.py @@ -4,7 +4,7 @@ from typing import Any, Optional from core.agent.entities import AgentInvokeMessage from core.agent.plugin_entities import AgentStrategyEntity, AgentStrategyParameter from core.agent.strategy.base import BaseAgentStrategy -from core.plugin.manager.agent import PluginAgentManager +from core.plugin.impl.agent import PluginAgentClient from core.plugin.utils.converter import convert_parameters_to_plugin_format @@ -42,7 +42,7 @@ class PluginAgentStrategy(BaseAgentStrategy): """ Invoke the agent strategy. """ - manager = PluginAgentManager() + manager = PluginAgentClient() initialized_params = self.initialize_parameters(params) params = convert_parameters_to_plugin_format(initialized_params) diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index ef582d28e0..fd0d7fafbd 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -7,6 +7,7 @@ from typing import Any, Literal, Optional, Union, overload from flask import Flask, current_app from pydantic import ValidationError +from sqlalchemy.orm import sessionmaker import contexts from configs import dify_config @@ -24,6 +25,8 @@ from core.app.entities.task_entities import ChatbotAppBlockingResponse, ChatbotA from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.ops.ops_trace_manager import TraceQueueManager from core.prompt.utils.get_thread_messages_length import get_thread_messages_length +from core.workflow.repository import RepositoryFactory +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from extensions.ext_database import db from factories import file_factory from models.account import Account @@ -158,11 +161,22 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create workflow node execution repository + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( + params={ + "tenant_id": application_generate_entity.app_config.tenant_id, + "app_id": application_generate_entity.app_config.app_id, + "session_factory": session_factory, + } + ) + return self._generate( workflow=workflow, user=user, invoke_from=invoke_from, application_generate_entity=application_generate_entity, + workflow_node_execution_repository=workflow_node_execution_repository, conversation=conversation, stream=streaming, ) @@ -215,11 +229,22 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create workflow node execution repository + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( + params={ + "tenant_id": application_generate_entity.app_config.tenant_id, + "app_id": application_generate_entity.app_config.app_id, + "session_factory": session_factory, + } + ) + return self._generate( workflow=workflow, user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_node_execution_repository=workflow_node_execution_repository, conversation=None, stream=streaming, ) @@ -270,11 +295,22 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create workflow node execution repository + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( + params={ + "tenant_id": application_generate_entity.app_config.tenant_id, + "app_id": application_generate_entity.app_config.app_id, + "session_factory": session_factory, + } + ) + return self._generate( workflow=workflow, user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_node_execution_repository=workflow_node_execution_repository, conversation=None, stream=streaming, ) @@ -286,6 +322,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user: Union[Account, EndUser], invoke_from: InvokeFrom, application_generate_entity: AdvancedChatAppGenerateEntity, + workflow_node_execution_repository: WorkflowNodeExecutionRepository, conversation: Optional[Conversation] = None, stream: bool = True, ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], Any, None]: @@ -296,6 +333,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): :param user: account or end user :param invoke_from: invoke from source :param application_generate_entity: application generate entity + :param workflow_node_execution_repository: repository for workflow node execution :param conversation: conversation :param stream: is stream """ @@ -348,6 +386,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation=conversation, message=message, user=user, + workflow_node_execution_repository=workflow_node_execution_repository, stream=stream, ) @@ -419,6 +458,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation: Conversation, message: Message, user: Union[Account, EndUser], + workflow_node_execution_repository: WorkflowNodeExecutionRepository, stream: bool = False, ) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]: """ @@ -430,6 +470,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): :param message: message :param user: account or end user :param stream: is stream + :param workflow_node_execution_repository: optional repository for workflow node execution :return: """ # init generate task pipeline @@ -442,6 +483,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user=user, stream=stream, dialogue_count=self._dialogue_count, + workflow_node_execution_repository=workflow_node_execution_repository, ) try: diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index 3bf6c330db..1f4db54a9c 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -65,6 +65,7 @@ from core.ops.ops_trace_manager import TraceQueueManager from core.workflow.enums import SystemVariableKey from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState from core.workflow.nodes import NodeType +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from events.message_event import message_was_created from extensions.ext_database import db from models import Conversation, EndUser, Message, MessageFile @@ -93,6 +94,7 @@ class AdvancedChatAppGenerateTaskPipeline: user: Union[Account, EndUser], stream: bool, dialogue_count: int, + workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: self._base_task_pipeline = BasedGenerateTaskPipeline( application_generate_entity=application_generate_entity, @@ -123,6 +125,7 @@ class AdvancedChatAppGenerateTaskPipeline: SystemVariableKey.WORKFLOW_ID: workflow.id, SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, }, + workflow_node_execution_repository=workflow_node_execution_repository, ) self._task_state = WorkflowTaskState() @@ -684,7 +687,9 @@ class AdvancedChatAppGenerateTaskPipeline: ) elif isinstance(event, QueueMessageReplaceEvent): # published by moderation - yield self._message_cycle_manager._message_replace_to_stream_response(answer=event.text) + yield self._message_cycle_manager._message_replace_to_stream_response( + answer=event.text, reason=event.reason + ) elif isinstance(event, QueueAdvancedChatMessageEndEvent): if not graph_runtime_state: raise ValueError("graph runtime state not initialized.") @@ -695,7 +700,8 @@ class AdvancedChatAppGenerateTaskPipeline: if output_moderation_answer: self._task_state.answer = output_moderation_answer yield self._message_cycle_manager._message_replace_to_stream_response( - answer=output_moderation_answer + answer=output_moderation_answer, + reason=QueueMessageReplaceEvent.MessageReplaceReason.OUTPUT_MODERATION, ) # Save message diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index b1f527c0f2..995082b79d 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -153,6 +153,8 @@ class MessageBasedAppGenerator(BaseAppGenerator): query = application_generate_entity.query or "New conversation" else: query = next(iter(application_generate_entity.inputs.values()), "New conversation") + if isinstance(query, int): + query = str(query) query = query or "New conversation" conversation_name = (query[:20] + "…") if len(query) > 20 else query diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index 08986b16f0..9c3d78a338 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -7,6 +7,7 @@ from typing import Any, Literal, Optional, Union, overload from flask import Flask, current_app from pydantic import ValidationError +from sqlalchemy.orm import sessionmaker import contexts from configs import dify_config @@ -22,6 +23,8 @@ from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerat from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.ops.ops_trace_manager import TraceQueueManager +from core.workflow.repository import RepositoryFactory +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from extensions.ext_database import db from factories import file_factory from models import Account, App, EndUser, Workflow @@ -133,12 +136,23 @@ class WorkflowAppGenerator(BaseAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create workflow node execution repository + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( + params={ + "tenant_id": application_generate_entity.app_config.tenant_id, + "app_id": application_generate_entity.app_config.app_id, + "session_factory": session_factory, + } + ) + return self._generate( app_model=app_model, workflow=workflow, user=user, application_generate_entity=application_generate_entity, invoke_from=invoke_from, + workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, workflow_thread_pool_id=workflow_thread_pool_id, ) @@ -151,6 +165,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user: Union[Account, EndUser], application_generate_entity: WorkflowAppGenerateEntity, invoke_from: InvokeFrom, + workflow_node_execution_repository: WorkflowNodeExecutionRepository, streaming: bool = True, workflow_thread_pool_id: Optional[str] = None, ) -> Union[Mapping[str, Any], Generator[str | Mapping[str, Any], None, None]]: @@ -162,6 +177,7 @@ class WorkflowAppGenerator(BaseAppGenerator): :param user: account or end user :param application_generate_entity: application generate entity :param invoke_from: invoke from source + :param workflow_node_execution_repository: repository for workflow node execution :param streaming: is stream :param workflow_thread_pool_id: workflow thread pool id """ @@ -193,6 +209,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow=workflow, queue_manager=queue_manager, user=user, + workflow_node_execution_repository=workflow_node_execution_repository, stream=streaming, ) @@ -245,12 +262,23 @@ class WorkflowAppGenerator(BaseAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create workflow node execution repository + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( + params={ + "tenant_id": application_generate_entity.app_config.tenant_id, + "app_id": application_generate_entity.app_config.app_id, + "session_factory": session_factory, + } + ) + return self._generate( app_model=app_model, workflow=workflow, user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, ) @@ -299,12 +327,23 @@ class WorkflowAppGenerator(BaseAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create workflow node execution repository + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( + params={ + "tenant_id": application_generate_entity.app_config.tenant_id, + "app_id": application_generate_entity.app_config.app_id, + "session_factory": session_factory, + } + ) + return self._generate( app_model=app_model, workflow=workflow, user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, ) @@ -361,6 +400,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow: Workflow, queue_manager: AppQueueManager, user: Union[Account, EndUser], + workflow_node_execution_repository: WorkflowNodeExecutionRepository, stream: bool = False, ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]: """ @@ -370,6 +410,7 @@ class WorkflowAppGenerator(BaseAppGenerator): :param queue_manager: queue manager :param user: account or end user :param stream: is stream + :param workflow_node_execution_repository: optional repository for workflow node execution :return: """ # init generate task pipeline @@ -379,6 +420,7 @@ class WorkflowAppGenerator(BaseAppGenerator): queue_manager=queue_manager, user=user, stream=stream, + workflow_node_execution_repository=workflow_node_execution_repository, ) try: diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index 1f998edb6a..67cad9c998 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -55,6 +55,7 @@ from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTas from core.app.task_pipeline.workflow_cycle_manage import WorkflowCycleManage from core.ops.ops_trace_manager import TraceQueueManager from core.workflow.enums import SystemVariableKey +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from extensions.ext_database import db from models.account import Account from models.enums import CreatedByRole @@ -82,6 +83,7 @@ class WorkflowAppGenerateTaskPipeline: queue_manager: AppQueueManager, user: Union[Account, EndUser], stream: bool, + workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: self._base_task_pipeline = BasedGenerateTaskPipeline( application_generate_entity=application_generate_entity, @@ -109,6 +111,7 @@ class WorkflowAppGenerateTaskPipeline: SystemVariableKey.WORKFLOW_ID: workflow.id, SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, }, + workflow_node_execution_repository=workflow_node_execution_repository, ) self._application_generate_entity = application_generate_entity diff --git a/api/core/app/entities/queue_entities.py b/api/core/app/entities/queue_entities.py index 3702326406..7228020e9b 100644 --- a/api/core/app/entities/queue_entities.py +++ b/api/core/app/entities/queue_entities.py @@ -264,8 +264,16 @@ class QueueMessageReplaceEvent(AppQueueEvent): QueueMessageReplaceEvent entity """ + class MessageReplaceReason(StrEnum): + """ + Reason for message replace event + """ + + OUTPUT_MODERATION = "output_moderation" + event: QueueEvent = QueueEvent.MESSAGE_REPLACE text: str + reason: str class QueueRetrieverResourcesEvent(AppQueueEvent): diff --git a/api/core/app/entities/task_entities.py b/api/core/app/entities/task_entities.py index f23ee1b9fd..817699bd20 100644 --- a/api/core/app/entities/task_entities.py +++ b/api/core/app/entities/task_entities.py @@ -148,6 +148,7 @@ class MessageReplaceStreamResponse(StreamResponse): event: StreamEvent = StreamEvent.MESSAGE_REPLACE answer: str + reason: str class AgentThoughtStreamResponse(StreamResponse): diff --git a/api/core/app/task_pipeline/based_generate_task_pipeline.py b/api/core/app/task_pipeline/based_generate_task_pipeline.py index a2e06d4e1f..5331c0cc94 100644 --- a/api/core/app/task_pipeline/based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/based_generate_task_pipeline.py @@ -126,12 +126,12 @@ class BasedGenerateTaskPipeline: if self._output_moderation_handler: self._output_moderation_handler.stop_thread() - completion = self._output_moderation_handler.moderation_completion( + completion, flagged = self._output_moderation_handler.moderation_completion( completion=completion, public_event=False ) self._output_moderation_handler = None - - return completion + if flagged: + return completion return None diff --git a/api/core/app/task_pipeline/message_cycle_manage.py b/api/core/app/task_pipeline/message_cycle_manage.py index 6223b33b67..fde506639f 100644 --- a/api/core/app/task_pipeline/message_cycle_manage.py +++ b/api/core/app/task_pipeline/message_cycle_manage.py @@ -182,10 +182,12 @@ class MessageCycleManage: from_variable_selector=from_variable_selector, ) - def _message_replace_to_stream_response(self, answer: str) -> MessageReplaceStreamResponse: + def _message_replace_to_stream_response(self, answer: str, reason: str = "") -> MessageReplaceStreamResponse: """ Message replace to stream response. :param answer: answer :return: """ - return MessageReplaceStreamResponse(task_id=self._application_generate_entity.task_id, answer=answer) + return MessageReplaceStreamResponse( + task_id=self._application_generate_entity.task_id, answer=answer, reason=reason + ) diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py index 5ce9f737d1..09e2ee74e6 100644 --- a/api/core/app/task_pipeline/workflow_cycle_manage.py +++ b/api/core/app/task_pipeline/workflow_cycle_manage.py @@ -6,7 +6,7 @@ from typing import Any, Optional, Union, cast from uuid import uuid4 from sqlalchemy import func, select -from sqlalchemy.orm import Session, sessionmaker +from sqlalchemy.orm import Session from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity from core.app.entities.queue_entities import ( @@ -49,14 +49,13 @@ from core.file import FILE_MODEL_IDENTITY, File from core.model_runtime.utils.encoders import jsonable_encoder from core.ops.entities.trace_entity import TraceTaskName from core.ops.ops_trace_manager import TraceQueueManager, TraceTask -from core.repository import RepositoryFactory from core.tools.tool_manager import ToolManager from core.workflow.entities.node_entities import NodeRunMetadataKey from core.workflow.enums import SystemVariableKey from core.workflow.nodes import NodeType from core.workflow.nodes.tool.entities import ToolNodeData +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.workflow_entry import WorkflowEntry -from extensions.ext_database import db from models.account import Account from models.enums import CreatedByRole, WorkflowRunTriggeredFrom from models.model import EndUser @@ -76,26 +75,13 @@ class WorkflowCycleManage: *, application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity], workflow_system_variables: dict[SystemVariableKey, Any], + workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: self._workflow_run: WorkflowRun | None = None self._workflow_node_executions: dict[str, WorkflowNodeExecution] = {} self._application_generate_entity = application_generate_entity self._workflow_system_variables = workflow_system_variables - - # Initialize the session factory and repository - # We use the global db engine instead of the session passed to methods - # Disable expire_on_commit to avoid the need for merging objects - self._session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - self._workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": self._application_generate_entity.app_config.tenant_id, - "app_id": self._application_generate_entity.app_config.app_id, - "session_factory": self._session_factory, - } - ) - - # We'll still keep the cache for backward compatibility and performance - # but use the repository for database operations + self._workflow_node_execution_repository = workflow_node_execution_repository def _handle_workflow_run_start( self, @@ -395,6 +381,8 @@ class WorkflowCycleManage: workflow_node_execution.elapsed_time = elapsed_time workflow_node_execution.execution_metadata = execution_metadata + self._workflow_node_execution_repository.update(workflow_node_execution) + return workflow_node_execution def _handle_workflow_node_execution_retried( diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index b3affc91a6..86887c9b4a 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -798,7 +798,25 @@ class ProviderConfiguration(BaseModel): provider_models = [m for m in provider_models if m.status == ModelStatus.ACTIVE] # resort provider_models - return sorted(provider_models, key=lambda x: x.model_type.value) + # Optimize sorting logic: first sort by provider.position order, then by model_type.value + # Get the position list for model types (retrieve only once for better performance) + model_type_positions = {} + if hasattr(self.provider, "position") and self.provider.position: + model_type_positions = self.provider.position + + def get_sort_key(model: ModelWithProviderEntity): + # Get the position list for the current model type + positions = model_type_positions.get(model.model_type.value, []) + + # If the model name is in the position list, use its index for sorting + # Otherwise use a large value (list length) to place undefined models at the end + position_index = positions.index(model.model) if model.model in positions else len(positions) + + # Return composite sort key: (model_type value, model position index) + return (model.model_type.value, position_index) + + # Sort using the composite sort key + return sorted(provider_models, key=get_sort_key) def _get_system_provider_models( self, diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index d5d2ca60fa..e5dbc30689 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -3,6 +3,8 @@ import logging import re from typing import Optional, cast +import json_repair + from core.llm_generator.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser from core.llm_generator.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser from core.llm_generator.prompts import ( @@ -366,7 +368,20 @@ class LLMGenerator: ), ) - generated_json_schema = cast(str, response.message.content) + raw_content = response.message.content + + if not isinstance(raw_content, str): + raise ValueError(f"LLM response content must be a string, got: {type(raw_content)}") + + try: + parsed_content = json.loads(raw_content) + except json.JSONDecodeError: + parsed_content = json_repair.loads(raw_content) + + if not isinstance(parsed_content, dict | list): + raise ValueError(f"Failed to parse structured output from llm: {raw_content}") + + generated_json_schema = json.dumps(parsed_content, indent=2, ensure_ascii=False) return {"output": generated_json_schema, "error": ""} except InvokeError as e: diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py index 82d22d7f89..fad7cea01c 100644 --- a/api/core/llm_generator/prompts.py +++ b/api/core/llm_generator/prompts.py @@ -1,7 +1,7 @@ # Written by YORKI MINAKO🤡, Edited by Xiaoyi CONVERSATION_TITLE_PROMPT = """You need to decompose the user's input into "subject" and "intention" in order to accurately figure out what the user's input language actually is. -Notice: the language type user use could be diverse, which can be English, Chinese, Italian, Español, Arabic, Japanese, French, and etc. -MAKE SURE your output is the SAME language as the user's input! +Notice: the language type user uses could be diverse, which can be English, Chinese, Italian, Español, Arabic, Japanese, French, and etc. +ENSURE your output is in the SAME language as the user's input! Your output is restricted only to: (Input language) Intention + Subject(short as possible) Your output MUST be a valid JSON. @@ -19,7 +19,7 @@ User Input: hi, yesterday i had some burgers. example 2: User Input: hello { - "Language Type": "The user's input is written in pure English", + "Language Type": "The user's input is pure English", "Your Reasoning": "The language of my output must be pure English.", "Your Output": "Greeting myself☺️" } @@ -46,7 +46,7 @@ example 5: User Input: why小红的年龄is老than小明? { "Language Type": "The user's input is English-Chinese mixed", - "Your Reasoning": "The English parts are subjective particles, the main intention is written in Chinese, besides, Chinese occupies a greater \"actual meaning\" than English, so the language of my output must be using Chinese.", + "Your Reasoning": "The English parts are filler words, the main intention is written in Chinese, besides, Chinese occupies a greater \"actual meaning\" than English, so the language of my output must be using Chinese.", "Your Output": "询问小红和小明的年龄" } @@ -114,6 +114,13 @@ JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE = ( "4. The returned object should contain at least one key-value pair.\n\n" "5. The returned object should always be in the format: {result: ...}\n\n" "Example:\n" + "/**\n" + " * Multiplies two numbers together.\n" + " *\n" + " * @param {number} arg1 - The first number to multiply.\n" + " * @param {number} arg2 - The second number to multiply.\n" + " * @returns {{ result: number }} The result of the multiplication.\n" + " */\n" "function main(arg1, arg2) {\n" " return {\n" " result: arg1 * arg2\n" @@ -130,7 +137,7 @@ JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE = ( SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = ( "Please help me predict the three most likely questions that human would ask, " - "and keeping each question under 20 characters.\n" + "and keep each question under 20 characters.\n" "MAKE SURE your output is the SAME language as the Assistant's latest response. " "The output must be an array in JSON format following the specified schema:\n" '["question1","question2","question3"]\n' @@ -157,9 +164,9 @@ Here is a task description for which I would like you to create a high-quality p Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include: - Do not include or section and variables in the prompt, assume user will add them at their own will. -- Clear instructions for the AI that will be using this prompt, demarcated with tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. +- Clear instructions for the AI that will be using this prompt, demarcated with tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. - Relevant examples if needed to clarify the task further, demarcated with tags. Do not include variables in the prompt. Give three pairs of input and output examples. -- Include other relevant sections demarcated with appropriate XML tags like , . +- Include other relevant sections demarcated with appropriate XML tags like , . - Use the same language as task description. - Output in ``` xml ``` and start with Please generate the full prompt template with at least 300 words and output only the prompt template. @@ -172,7 +179,7 @@ Here is a task description for which I would like you to create a high-quality p Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include: - Descriptive variable names surrounded by {{ }} (two curly brackets) to indicate where the actual values will be substituted in. Choose variable names that clearly indicate the type of value expected. Variable names have to be composed of number, english alphabets and underline and nothing else. -- Clear instructions for the AI that will be using this prompt, demarcated with tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. +- Clear instructions for the AI that will be using this prompt, demarcated with tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. - Relevant examples if needed to clarify the task further, demarcated with tags. Do not use curly brackets any other than in section. - Any other relevant sections demarcated with appropriate XML tags like , , etc. - Use the same language as task description. @@ -291,32 +298,30 @@ Your task is to convert simple user descriptions into properly formatted JSON Sc { "type": "object", "properties": { - "properties": { - "songs": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "id": { - "type": "string" - }, - "duration": { - "type": "string" - }, - "aritst": { - "type": "string" - } + "songs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" }, - "required": [ - "name", - "id", - "duration", - "aritst" - ] - } + "id": { + "type": "string" + }, + "duration": { + "type": "string" + }, + "aritst": { + "type": "string" + } + }, + "required": [ + "name", + "id", + "duration", + "aritst" + ] } } }, diff --git a/api/core/model_runtime/entities/provider_entities.py b/api/core/model_runtime/entities/provider_entities.py index 85321bed94..d0f9ee13e5 100644 --- a/api/core/model_runtime/entities/provider_entities.py +++ b/api/core/model_runtime/entities/provider_entities.py @@ -134,6 +134,9 @@ class ProviderEntity(BaseModel): # pydantic configs model_config = ConfigDict(protected_namespaces=()) + # position from plugin _position.yaml + position: Optional[dict[str, list[str]]] = {} + @field_validator("models", mode="before") @classmethod def validate_models(cls, v): diff --git a/api/core/model_runtime/model_providers/__base/ai_model.py b/api/core/model_runtime/model_providers/__base/ai_model.py index bd05590018..3c5a2dce4f 100644 --- a/api/core/model_runtime/model_providers/__base/ai_model.py +++ b/api/core/model_runtime/model_providers/__base/ai_model.py @@ -26,7 +26,7 @@ from core.model_runtime.errors.invoke import ( ) from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer from core.plugin.entities.plugin_daemon import PluginDaemonInnerError, PluginModelProviderEntity -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient class AIModel(BaseModel): @@ -141,7 +141,7 @@ class AIModel(BaseModel): :param credentials: model credentials :return: model schema """ - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() cache_key = f"{self.tenant_id}:{self.plugin_id}:{self.provider_name}:{self.model_type.value}:{model}" # sort credentials sorted_credentials = sorted(credentials.items()) if credentials else [] diff --git a/api/core/model_runtime/model_providers/__base/large_language_model.py b/api/core/model_runtime/model_providers/__base/large_language_model.py index 1b799131e7..6312587861 100644 --- a/api/core/model_runtime/model_providers/__base/large_language_model.py +++ b/api/core/model_runtime/model_providers/__base/large_language_model.py @@ -2,7 +2,7 @@ import logging import time import uuid from collections.abc import Generator, Sequence -from typing import Optional, Union +from typing import Optional, Union, cast from pydantic import ConfigDict @@ -20,7 +20,8 @@ from core.model_runtime.entities.model_entities import ( PriceType, ) from core.model_runtime.model_providers.__base.ai_model import AIModel -from core.plugin.manager.model import PluginModelManager +from core.model_runtime.utils.helper import convert_llm_result_chunk_to_str +from core.plugin.impl.model import PluginModelClient logger = logging.getLogger(__name__) @@ -140,7 +141,7 @@ class LargeLanguageModel(AIModel): result: Union[LLMResult, Generator[LLMResultChunk, None, None]] try: - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() result = plugin_model_manager.invoke_llm( tenant_id=self.tenant_id, user_id=user or "unknown", @@ -280,7 +281,9 @@ class LargeLanguageModel(AIModel): callbacks=callbacks, ) - assistant_message.content += chunk.delta.message.content + text = convert_llm_result_chunk_to_str(chunk.delta.message.content) + current_content = cast(str, assistant_message.content) + assistant_message.content = current_content + text real_model = chunk.model if chunk.delta.usage: usage = chunk.delta.usage @@ -326,7 +329,7 @@ class LargeLanguageModel(AIModel): :return: """ if dify_config.PLUGIN_BASED_TOKEN_COUNTING_ENABLED: - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.get_llm_num_tokens( tenant_id=self.tenant_id, user_id="unknown", diff --git a/api/core/model_runtime/model_providers/__base/moderation_model.py b/api/core/model_runtime/model_providers/__base/moderation_model.py index f98d7572c7..19dc1d599a 100644 --- a/api/core/model_runtime/model_providers/__base/moderation_model.py +++ b/api/core/model_runtime/model_providers/__base/moderation_model.py @@ -5,7 +5,7 @@ from pydantic import ConfigDict from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.model_providers.__base.ai_model import AIModel -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient class ModerationModel(AIModel): @@ -31,7 +31,7 @@ class ModerationModel(AIModel): self.started_at = time.perf_counter() try: - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.invoke_moderation( tenant_id=self.tenant_id, user_id=user or "unknown", diff --git a/api/core/model_runtime/model_providers/__base/rerank_model.py b/api/core/model_runtime/model_providers/__base/rerank_model.py index e905cb18d4..569e756a3b 100644 --- a/api/core/model_runtime/model_providers/__base/rerank_model.py +++ b/api/core/model_runtime/model_providers/__base/rerank_model.py @@ -3,7 +3,7 @@ from typing import Optional from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.rerank_entities import RerankResult from core.model_runtime.model_providers.__base.ai_model import AIModel -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient class RerankModel(AIModel): @@ -36,7 +36,7 @@ class RerankModel(AIModel): :return: rerank result """ try: - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.invoke_rerank( tenant_id=self.tenant_id, user_id=user or "unknown", diff --git a/api/core/model_runtime/model_providers/__base/speech2text_model.py b/api/core/model_runtime/model_providers/__base/speech2text_model.py index 97ff322f09..c69f65b681 100644 --- a/api/core/model_runtime/model_providers/__base/speech2text_model.py +++ b/api/core/model_runtime/model_providers/__base/speech2text_model.py @@ -4,7 +4,7 @@ from pydantic import ConfigDict from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.model_providers.__base.ai_model import AIModel -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient class Speech2TextModel(AIModel): @@ -28,7 +28,7 @@ class Speech2TextModel(AIModel): :return: text for given audio file """ try: - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.invoke_speech_to_text( tenant_id=self.tenant_id, user_id=user or "unknown", diff --git a/api/core/model_runtime/model_providers/__base/text_embedding_model.py b/api/core/model_runtime/model_providers/__base/text_embedding_model.py index c4c1f92177..f7bba0eba1 100644 --- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py +++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py @@ -6,7 +6,7 @@ from core.entities.embedding_type import EmbeddingInputType from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.model_providers.__base.ai_model import AIModel -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient class TextEmbeddingModel(AIModel): @@ -38,7 +38,7 @@ class TextEmbeddingModel(AIModel): :return: embeddings result """ try: - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.invoke_text_embedding( tenant_id=self.tenant_id, user_id=user or "unknown", @@ -61,7 +61,7 @@ class TextEmbeddingModel(AIModel): :param texts: texts to embed :return: """ - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.get_text_embedding_num_tokens( tenant_id=self.tenant_id, user_id="unknown", diff --git a/api/core/model_runtime/model_providers/__base/tts_model.py b/api/core/model_runtime/model_providers/__base/tts_model.py index 1f248d11ac..d51831900c 100644 --- a/api/core/model_runtime/model_providers/__base/tts_model.py +++ b/api/core/model_runtime/model_providers/__base/tts_model.py @@ -6,7 +6,7 @@ from pydantic import ConfigDict from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.model_providers.__base.ai_model import AIModel -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient logger = logging.getLogger(__name__) @@ -42,7 +42,7 @@ class TTSModel(AIModel): :return: translated audio file """ try: - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.invoke_tts( tenant_id=self.tenant_id, user_id=user or "unknown", @@ -65,7 +65,7 @@ class TTSModel(AIModel): :param credentials: The credentials required to access the TTS model. :return: A list of voices supported by the TTS model. """ - plugin_model_manager = PluginModelManager() + plugin_model_manager = PluginModelClient() return plugin_model_manager.get_tts_model_voices( tenant_id=self.tenant_id, user_id="unknown", diff --git a/api/core/model_runtime/model_providers/model_provider_factory.py b/api/core/model_runtime/model_providers/model_provider_factory.py index d2fd4916a4..ad46f64ec3 100644 --- a/api/core/model_runtime/model_providers/model_provider_factory.py +++ b/api/core/model_runtime/model_providers/model_provider_factory.py @@ -22,8 +22,8 @@ from core.model_runtime.schema_validators.model_credential_schema_validator impo from core.model_runtime.schema_validators.provider_credential_schema_validator import ProviderCredentialSchemaValidator from core.plugin.entities.plugin import ModelProviderID from core.plugin.entities.plugin_daemon import PluginModelProviderEntity -from core.plugin.manager.asset import PluginAssetManager -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.asset import PluginAssetManager +from core.plugin.impl.model import PluginModelClient logger = logging.getLogger(__name__) @@ -40,7 +40,7 @@ class ModelProviderFactory: self.provider_position_map = {} self.tenant_id = tenant_id - self.plugin_model_manager = PluginModelManager() + self.plugin_model_manager = PluginModelClient() if not self.provider_position_map: # get the path of current classes diff --git a/api/core/model_runtime/utils/helper.py b/api/core/model_runtime/utils/helper.py index 5e8a723ec7..53789a8e91 100644 --- a/api/core/model_runtime/utils/helper.py +++ b/api/core/model_runtime/utils/helper.py @@ -1,6 +1,8 @@ import pydantic from pydantic import BaseModel +from core.model_runtime.entities.message_entities import PromptMessageContentUnionTypes + def dump_model(model: BaseModel) -> dict: if hasattr(pydantic, "model_dump"): @@ -8,3 +10,18 @@ def dump_model(model: BaseModel) -> dict: return pydantic.model_dump(model) # type: ignore else: return model.model_dump() + + +def convert_llm_result_chunk_to_str(content: None | str | list[PromptMessageContentUnionTypes]) -> str: + if content is None: + message_text = "" + elif isinstance(content, str): + message_text = content + elif isinstance(content, list): + # Assuming the list contains PromptMessageContent objects with a "data" attribute + message_text = "".join( + item.data if hasattr(item, "data") and isinstance(item.data, str) else str(item) for item in content + ) + else: + message_text = str(content) + return message_text diff --git a/api/core/moderation/output_moderation.py b/api/core/moderation/output_moderation.py index e595be126c..2ec315417f 100644 --- a/api/core/moderation/output_moderation.py +++ b/api/core/moderation/output_moderation.py @@ -46,14 +46,14 @@ class OutputModeration(BaseModel): if not self.thread: self.thread = self.start_thread() - def moderation_completion(self, completion: str, public_event: bool = False) -> str: + def moderation_completion(self, completion: str, public_event: bool = False) -> tuple[str, bool]: self.buffer = completion self.is_final_chunk = True result = self.moderation(tenant_id=self.tenant_id, app_id=self.app_id, moderation_buffer=completion) if not result or not result.flagged: - return completion + return completion, False if result.action == ModerationAction.DIRECT_OUTPUT: final_output = result.preset_response @@ -61,9 +61,14 @@ class OutputModeration(BaseModel): final_output = result.text if public_event: - self.queue_manager.publish(QueueMessageReplaceEvent(text=final_output), PublishFrom.TASK_PIPELINE) + self.queue_manager.publish( + QueueMessageReplaceEvent( + text=final_output, reason=QueueMessageReplaceEvent.MessageReplaceReason.OUTPUT_MODERATION + ), + PublishFrom.TASK_PIPELINE, + ) - return final_output + return final_output, True def start_thread(self) -> threading.Thread: buffer_size = dify_config.MODERATION_BUFFER_SIZE @@ -112,7 +117,12 @@ class OutputModeration(BaseModel): # trigger replace event if self.thread_running: - self.queue_manager.publish(QueueMessageReplaceEvent(text=final_output), PublishFrom.TASK_PIPELINE) + self.queue_manager.publish( + QueueMessageReplaceEvent( + text=final_output, reason=QueueMessageReplaceEvent.MessageReplaceReason.OUTPUT_MODERATION + ), + PublishFrom.TASK_PIPELINE, + ) if result.action == ModerationAction.DIRECT_OUTPUT: break diff --git a/api/core/ops/entities/config_entity.py b/api/core/ops/entities/config_entity.py index b484242b61..874b2800b2 100644 --- a/api/core/ops/entities/config_entity.py +++ b/api/core/ops/entities/config_entity.py @@ -7,6 +7,7 @@ class TracingProviderEnum(Enum): LANGFUSE = "langfuse" LANGSMITH = "langsmith" OPIK = "opik" + WEAVE = "weave" class BaseTracingConfig(BaseModel): @@ -88,5 +89,26 @@ class OpikConfig(BaseTracingConfig): return v +class WeaveConfig(BaseTracingConfig): + """ + Model class for Weave tracing config. + """ + + api_key: str + entity: str | None = None + project: str + endpoint: str = "https://trace.wandb.ai" + + @field_validator("endpoint") + @classmethod + def set_value(cls, v, info: ValidationInfo): + if v is None or v == "": + v = "https://trace.wandb.ai" + if not v.startswith("https://"): + raise ValueError("endpoint must start with https://") + + return v + + OPS_FILE_PATH = "ops_trace/" OPS_TRACE_FAILED_KEY = "FAILED_OPS_TRACE" diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index fa78b7b8e9..b229d244f7 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -29,7 +29,7 @@ from core.ops.langfuse_trace.entities.langfuse_trace_entity import ( UnitEnum, ) from core.ops.utils import filter_none_values -from core.repository.repository_factory import RepositoryFactory +from core.workflow.repository.repository_factory import RepositoryFactory from extensions.ext_database import db from models.model import EndUser diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index 85a0eafdc1..78a51ff36e 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -28,7 +28,7 @@ from core.ops.langsmith_trace.entities.langsmith_trace_entity import ( LangSmithRunUpdateModel, ) from core.ops.utils import filter_none_values, generate_dotted_order -from core.repository.repository_factory import RepositoryFactory +from core.workflow.repository.repository_factory import RepositoryFactory from extensions.ext_database import db from models.model import EndUser, MessageFile diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index 923b9a24ed..a14b5afb8e 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -22,7 +22,7 @@ from core.ops.entities.trace_entity import ( TraceTaskName, WorkflowTraceInfo, ) -from core.repository.repository_factory import RepositoryFactory +from core.workflow.repository.repository_factory import RepositoryFactory from extensions.ext_database import db from models.model import EndUser, MessageFile diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 6fc02393fe..2c68055f87 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -20,6 +20,7 @@ from core.ops.entities.config_entity import ( LangSmithConfig, OpikConfig, TracingProviderEnum, + WeaveConfig, ) from core.ops.entities.trace_entity import ( DatasetRetrievalTraceInfo, @@ -34,7 +35,9 @@ from core.ops.entities.trace_entity import ( ) from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace +from core.ops.opik_trace.opik_trace import OpikDataTrace from core.ops.utils import get_message_data +from core.ops.weave_trace.weave_trace import WeaveDataTrace from extensions.ext_database import db from extensions.ext_storage import storage from models.model import App, AppModelConfig, Conversation, Message, MessageFile, TraceAppConfig @@ -43,8 +46,6 @@ from tasks.ops_trace_task import process_trace_tasks def build_opik_trace_instance(config: OpikConfig): - from core.ops.opik_trace.opik_trace import OpikDataTrace - return OpikDataTrace(config) @@ -67,6 +68,12 @@ provider_config_map: dict[str, dict[str, Any]] = { "other_keys": ["project", "url", "workspace"], "trace_instance": lambda config: build_opik_trace_instance(config), }, + TracingProviderEnum.WEAVE.value: { + "config_class": WeaveConfig, + "secret_keys": ["api_key"], + "other_keys": ["project", "entity", "endpoint"], + "trace_instance": WeaveDataTrace, + }, } diff --git a/api/core/ops/weave_trace/__init__.py b/api/core/ops/weave_trace/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/ops/weave_trace/entities/__init__.py b/api/core/ops/weave_trace/entities/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/ops/weave_trace/entities/weave_trace_entity.py b/api/core/ops/weave_trace/entities/weave_trace_entity.py new file mode 100644 index 0000000000..e423f5ccbb --- /dev/null +++ b/api/core/ops/weave_trace/entities/weave_trace_entity.py @@ -0,0 +1,97 @@ +from typing import Any, Optional, Union + +from pydantic import BaseModel, Field, field_validator +from pydantic_core.core_schema import ValidationInfo + +from core.ops.utils import replace_text_with_content + + +class WeaveTokenUsage(BaseModel): + input_tokens: Optional[int] = None + output_tokens: Optional[int] = None + total_tokens: Optional[int] = None + + +class WeaveMultiModel(BaseModel): + file_list: Optional[list[str]] = Field(None, description="List of files") + + +class WeaveTraceModel(WeaveTokenUsage, WeaveMultiModel): + id: str = Field(..., description="ID of the trace") + op: str = Field(..., description="Name of the operation") + inputs: Optional[Union[str, dict[str, Any], list, None]] = Field(None, description="Inputs of the trace") + outputs: Optional[Union[str, dict[str, Any], list, None]] = Field(None, description="Outputs of the trace") + attributes: Optional[Union[str, dict[str, Any], list, None]] = Field( + None, description="Metadata and attributes associated with trace" + ) + exception: Optional[str] = Field(None, description="Exception message of the trace") + + @field_validator("inputs", "outputs") + @classmethod + def ensure_dict(cls, v, info: ValidationInfo): + field_name = info.field_name + values = info.data + if v == {} or v is None: + return v + usage_metadata = { + "input_tokens": values.get("input_tokens", 0), + "output_tokens": values.get("output_tokens", 0), + "total_tokens": values.get("total_tokens", 0), + } + file_list = values.get("file_list", []) + if isinstance(v, str): + if field_name == "inputs": + return { + "messages": { + "role": "user", + "content": v, + "usage_metadata": usage_metadata, + "file_list": file_list, + }, + } + elif field_name == "outputs": + return { + "choices": { + "role": "ai", + "content": v, + "usage_metadata": usage_metadata, + "file_list": file_list, + }, + } + elif isinstance(v, list): + data = {} + if len(v) > 0 and isinstance(v[0], dict): + # rename text to content + v = replace_text_with_content(data=v) + if field_name == "inputs": + data = { + "messages": [ + dict(msg, **{"usage_metadata": usage_metadata, "file_list": file_list}) for msg in v + ] + if isinstance(v, list) + else v, + } + elif field_name == "outputs": + data = { + "choices": { + "role": "ai", + "content": v, + "usage_metadata": usage_metadata, + "file_list": file_list, + }, + } + return data + else: + return { + "choices": { + "role": "ai" if field_name == "outputs" else "user", + "content": str(v), + "usage_metadata": usage_metadata, + "file_list": file_list, + }, + } + if isinstance(v, dict): + v["usage_metadata"] = usage_metadata + v["file_list"] = file_list + return v + return v diff --git a/api/core/ops/weave_trace/weave_trace.py b/api/core/ops/weave_trace/weave_trace.py new file mode 100644 index 0000000000..49594cb0f1 --- /dev/null +++ b/api/core/ops/weave_trace/weave_trace.py @@ -0,0 +1,420 @@ +import json +import logging +import os +import uuid +from datetime import datetime, timedelta +from typing import Any, Optional, cast + +import wandb +import weave + +from core.ops.base_trace_instance import BaseTraceInstance +from core.ops.entities.config_entity import WeaveConfig +from core.ops.entities.trace_entity import ( + BaseTraceInfo, + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + TraceTaskName, + WorkflowTraceInfo, +) +from core.ops.weave_trace.entities.weave_trace_entity import WeaveTraceModel +from extensions.ext_database import db +from models.model import EndUser, MessageFile +from models.workflow import WorkflowNodeExecution + +logger = logging.getLogger(__name__) + + +class WeaveDataTrace(BaseTraceInstance): + def __init__( + self, + weave_config: WeaveConfig, + ): + super().__init__(weave_config) + self.weave_api_key = weave_config.api_key + self.project_name = weave_config.project + self.entity = weave_config.entity + + # Login with API key first + login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True) + if not login_status: + logger.error("Failed to login to Weights & Biases with the provided API key") + raise ValueError("Weave login failed") + + # Then initialize weave client + self.weave_client = weave.init( + project_name=(f"{self.entity}/{self.project_name}" if self.entity else self.project_name) + ) + self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001") + self.calls: dict[str, Any] = {} + + def get_project_url( + self, + ): + try: + project_url = f"https://wandb.ai/{self.weave_client._project_id()}" + return project_url + except Exception as e: + logger.debug(f"Weave get run url failed: {str(e)}") + raise ValueError(f"Weave get run url failed: {str(e)}") + + def trace(self, trace_info: BaseTraceInfo): + logger.debug(f"Trace info: {trace_info}") + if isinstance(trace_info, WorkflowTraceInfo): + self.workflow_trace(trace_info) + if isinstance(trace_info, MessageTraceInfo): + self.message_trace(trace_info) + if isinstance(trace_info, ModerationTraceInfo): + self.moderation_trace(trace_info) + if isinstance(trace_info, SuggestedQuestionTraceInfo): + self.suggested_question_trace(trace_info) + if isinstance(trace_info, DatasetRetrievalTraceInfo): + self.dataset_retrieval_trace(trace_info) + if isinstance(trace_info, ToolTraceInfo): + self.tool_trace(trace_info) + if isinstance(trace_info, GenerateNameTraceInfo): + self.generate_name_trace(trace_info) + + def workflow_trace(self, trace_info: WorkflowTraceInfo): + trace_id = trace_info.message_id or trace_info.workflow_run_id + if trace_info.start_time is None: + trace_info.start_time = datetime.now() + + if trace_info.message_id: + message_attributes = trace_info.metadata + message_attributes["workflow_app_log_id"] = trace_info.workflow_app_log_id + + message_attributes["message_id"] = trace_info.message_id + message_attributes["workflow_run_id"] = trace_info.workflow_run_id + message_attributes["trace_id"] = trace_id + message_attributes["start_time"] = trace_info.start_time + message_attributes["end_time"] = trace_info.end_time + message_attributes["tags"] = ["message", "workflow"] + + message_run = WeaveTraceModel( + id=trace_info.message_id, + op=str(TraceTaskName.MESSAGE_TRACE.value), + inputs=dict(trace_info.workflow_run_inputs), + outputs=dict(trace_info.workflow_run_outputs), + total_tokens=trace_info.total_tokens, + attributes=message_attributes, + exception=trace_info.error, + file_list=[], + ) + self.start_call(message_run, parent_run_id=trace_info.workflow_run_id) + self.finish_call(message_run) + + workflow_attributes = trace_info.metadata + workflow_attributes["workflow_run_id"] = trace_info.workflow_run_id + workflow_attributes["trace_id"] = trace_id + workflow_attributes["start_time"] = trace_info.start_time + workflow_attributes["end_time"] = trace_info.end_time + workflow_attributes["tags"] = ["workflow"] + + workflow_run = WeaveTraceModel( + file_list=trace_info.file_list, + total_tokens=trace_info.total_tokens, + id=trace_info.workflow_run_id, + op=str(TraceTaskName.WORKFLOW_TRACE.value), + inputs=dict(trace_info.workflow_run_inputs), + outputs=dict(trace_info.workflow_run_outputs), + attributes=workflow_attributes, + exception=trace_info.error, + ) + + self.start_call(workflow_run, parent_run_id=trace_info.message_id) + + # through workflow_run_id get all_nodes_execution + workflow_nodes_execution_id_records = ( + db.session.query(WorkflowNodeExecution.id) + .filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id) + .all() + ) + + for node_execution_id_record in workflow_nodes_execution_id_records: + node_execution = ( + db.session.query( + WorkflowNodeExecution.id, + WorkflowNodeExecution.tenant_id, + WorkflowNodeExecution.app_id, + WorkflowNodeExecution.title, + WorkflowNodeExecution.node_type, + WorkflowNodeExecution.status, + WorkflowNodeExecution.inputs, + WorkflowNodeExecution.outputs, + WorkflowNodeExecution.created_at, + WorkflowNodeExecution.elapsed_time, + WorkflowNodeExecution.process_data, + WorkflowNodeExecution.execution_metadata, + ) + .filter(WorkflowNodeExecution.id == node_execution_id_record.id) + .first() + ) + + if not node_execution: + continue + + node_execution_id = node_execution.id + tenant_id = node_execution.tenant_id + app_id = node_execution.app_id + node_name = node_execution.title + node_type = node_execution.node_type + status = node_execution.status + if node_type == "llm": + inputs = ( + json.loads(node_execution.process_data).get("prompts", {}) if node_execution.process_data else {} + ) + else: + inputs = json.loads(node_execution.inputs) if node_execution.inputs else {} + outputs = json.loads(node_execution.outputs) if node_execution.outputs else {} + created_at = node_execution.created_at or datetime.now() + elapsed_time = node_execution.elapsed_time + finished_at = created_at + timedelta(seconds=elapsed_time) + + execution_metadata = ( + json.loads(node_execution.execution_metadata) if node_execution.execution_metadata else {} + ) + node_total_tokens = execution_metadata.get("total_tokens", 0) + attributes = execution_metadata.copy() + attributes.update( + { + "workflow_run_id": trace_info.workflow_run_id, + "node_execution_id": node_execution_id, + "tenant_id": tenant_id, + "app_id": app_id, + "app_name": node_name, + "node_type": node_type, + "status": status, + } + ) + + process_data = json.loads(node_execution.process_data) if node_execution.process_data else {} + if process_data and process_data.get("model_mode") == "chat": + attributes.update( + { + "ls_provider": process_data.get("model_provider", ""), + "ls_model_name": process_data.get("model_name", ""), + } + ) + attributes["tags"] = ["node_execution"] + attributes["start_time"] = created_at + attributes["end_time"] = finished_at + attributes["elapsed_time"] = elapsed_time + attributes["workflow_run_id"] = trace_info.workflow_run_id + attributes["trace_id"] = trace_id + node_run = WeaveTraceModel( + total_tokens=node_total_tokens, + op=node_type, + inputs=inputs, + outputs=outputs, + file_list=trace_info.file_list, + attributes=attributes, + id=node_execution_id, + exception=None, + ) + + self.start_call(node_run, parent_run_id=trace_info.workflow_run_id) + self.finish_call(node_run) + + self.finish_call(workflow_run) + + def message_trace(self, trace_info: MessageTraceInfo): + # get message file data + file_list = cast(list[str], trace_info.file_list) or [] + message_file_data: Optional[MessageFile] = trace_info.message_file_data + file_url = f"{self.file_base_url}/{message_file_data.url}" if message_file_data else "" + file_list.append(file_url) + attributes = trace_info.metadata + message_data = trace_info.message_data + if message_data is None: + return + message_id = message_data.id + + user_id = message_data.from_account_id + attributes["user_id"] = user_id + + if message_data.from_end_user_id: + end_user_data: Optional[EndUser] = ( + db.session.query(EndUser).filter(EndUser.id == message_data.from_end_user_id).first() + ) + if end_user_data is not None: + end_user_id = end_user_data.session_id + attributes["end_user_id"] = end_user_id + + attributes["message_id"] = message_id + attributes["start_time"] = trace_info.start_time + attributes["end_time"] = trace_info.end_time + attributes["tags"] = ["message", str(trace_info.conversation_mode)] + message_run = WeaveTraceModel( + id=message_id, + op=str(TraceTaskName.MESSAGE_TRACE.value), + input_tokens=trace_info.message_tokens, + output_tokens=trace_info.answer_tokens, + total_tokens=trace_info.total_tokens, + inputs=trace_info.inputs, + outputs=trace_info.outputs, + exception=trace_info.error, + file_list=file_list, + attributes=attributes, + ) + self.start_call(message_run) + + # create llm run parented to message run + llm_run = WeaveTraceModel( + id=str(uuid.uuid4()), + input_tokens=trace_info.message_tokens, + output_tokens=trace_info.answer_tokens, + total_tokens=trace_info.total_tokens, + op="llm", + inputs=trace_info.inputs, + outputs=trace_info.outputs, + attributes=attributes, + file_list=[], + exception=None, + ) + self.start_call( + llm_run, + parent_run_id=message_id, + ) + self.finish_call(llm_run) + self.finish_call(message_run) + + def moderation_trace(self, trace_info: ModerationTraceInfo): + if trace_info.message_data is None: + return + + attributes = trace_info.metadata + attributes["tags"] = ["moderation"] + attributes["message_id"] = trace_info.message_id + attributes["start_time"] = trace_info.start_time or trace_info.message_data.created_at + attributes["end_time"] = trace_info.end_time or trace_info.message_data.updated_at + + moderation_run = WeaveTraceModel( + id=str(uuid.uuid4()), + op=str(TraceTaskName.MODERATION_TRACE.value), + inputs=trace_info.inputs, + outputs={ + "action": trace_info.action, + "flagged": trace_info.flagged, + "preset_response": trace_info.preset_response, + "inputs": trace_info.inputs, + }, + attributes=attributes, + exception=getattr(trace_info, "error", None), + file_list=[], + ) + self.start_call(moderation_run, parent_run_id=trace_info.message_id) + self.finish_call(moderation_run) + + def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo): + message_data = trace_info.message_data + if message_data is None: + return + attributes = trace_info.metadata + attributes["message_id"] = trace_info.message_id + attributes["tags"] = ["suggested_question"] + attributes["start_time"] = (trace_info.start_time or message_data.created_at,) + attributes["end_time"] = (trace_info.end_time or message_data.updated_at,) + + suggested_question_run = WeaveTraceModel( + id=str(uuid.uuid4()), + op=str(TraceTaskName.SUGGESTED_QUESTION_TRACE.value), + inputs=trace_info.inputs, + outputs=trace_info.suggested_question, + attributes=attributes, + exception=trace_info.error, + file_list=[], + ) + + self.start_call(suggested_question_run, parent_run_id=trace_info.message_id) + self.finish_call(suggested_question_run) + + def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo): + if trace_info.message_data is None: + return + attributes = trace_info.metadata + attributes["message_id"] = trace_info.message_id + attributes["tags"] = ["dataset_retrieval"] + attributes["start_time"] = (trace_info.start_time or trace_info.message_data.created_at,) + attributes["end_time"] = (trace_info.end_time or trace_info.message_data.updated_at,) + + dataset_retrieval_run = WeaveTraceModel( + id=str(uuid.uuid4()), + op=str(TraceTaskName.DATASET_RETRIEVAL_TRACE.value), + inputs=trace_info.inputs, + outputs={"documents": trace_info.documents}, + attributes=attributes, + exception=getattr(trace_info, "error", None), + file_list=[], + ) + + self.start_call(dataset_retrieval_run, parent_run_id=trace_info.message_id) + self.finish_call(dataset_retrieval_run) + + def tool_trace(self, trace_info: ToolTraceInfo): + attributes = trace_info.metadata + attributes["tags"] = ["tool", trace_info.tool_name] + attributes["start_time"] = trace_info.start_time + attributes["end_time"] = trace_info.end_time + + tool_run = WeaveTraceModel( + id=str(uuid.uuid4()), + op=trace_info.tool_name, + inputs=trace_info.tool_inputs, + outputs=trace_info.tool_outputs, + file_list=[cast(str, trace_info.file_url)] if trace_info.file_url else [], + attributes=attributes, + exception=trace_info.error, + ) + message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None) + message_id = message_id or None + self.start_call(tool_run, parent_run_id=message_id) + self.finish_call(tool_run) + + def generate_name_trace(self, trace_info: GenerateNameTraceInfo): + attributes = trace_info.metadata + attributes["tags"] = ["generate_name"] + attributes["start_time"] = trace_info.start_time + attributes["end_time"] = trace_info.end_time + + name_run = WeaveTraceModel( + id=str(uuid.uuid4()), + op=str(TraceTaskName.GENERATE_NAME_TRACE.value), + inputs=trace_info.inputs, + outputs=trace_info.outputs, + attributes=attributes, + exception=getattr(trace_info, "error", None), + file_list=[], + ) + + self.start_call(name_run) + self.finish_call(name_run) + + def api_check(self): + try: + login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True) + if not login_status: + raise ValueError("Weave login failed") + else: + print("Weave login successful") + return True + except Exception as e: + logger.debug(f"Weave API check failed: {str(e)}") + raise ValueError(f"Weave API check failed: {str(e)}") + + def start_call(self, run_data: WeaveTraceModel, parent_run_id: Optional[str] = None): + call = self.weave_client.create_call(op=run_data.op, inputs=run_data.inputs, attributes=run_data.attributes) + self.calls[run_data.id] = call + if parent_run_id: + self.calls[run_data.id].parent_id = parent_run_id + + def finish_call(self, run_data: WeaveTraceModel): + call = self.calls.get(run_data.id) + if call: + self.weave_client.finish_call(call=call, output=run_data.outputs, exception=run_data.exception) + else: + raise ValueError(f"Call with id {run_data.id} not found") diff --git a/api/core/plugin/backwards_invocation/app.py b/api/core/plugin/backwards_invocation/app.py index 484f52e33c..4e43561a15 100644 --- a/api/core/plugin/backwards_invocation/app.py +++ b/api/core/plugin/backwards_invocation/app.py @@ -72,7 +72,7 @@ class PluginAppBackwardsInvocation(BaseBackwardsInvocation): raise ValueError("missing query") return cls.invoke_chat_app(app, user, conversation_id, query, stream, inputs, files) - elif app.mode == AppMode.WORKFLOW.value: + elif app.mode == AppMode.WORKFLOW: return cls.invoke_workflow_app(app, user, stream, inputs, files) elif app.mode == AppMode.COMPLETION: return cls.invoke_completion_app(app, user, stream, inputs, files) diff --git a/api/core/plugin/entities/plugin_daemon.py b/api/core/plugin/entities/plugin_daemon.py index 1588cbc3c7..2bea07bea0 100644 --- a/api/core/plugin/entities/plugin_daemon.py +++ b/api/core/plugin/entities/plugin_daemon.py @@ -1,6 +1,7 @@ +from collections.abc import Mapping from datetime import datetime from enum import StrEnum -from typing import Generic, Optional, TypeVar +from typing import Any, Generic, Optional, TypeVar from pydantic import BaseModel, ConfigDict, Field @@ -158,3 +159,11 @@ class PluginInstallTaskStartResponse(BaseModel): class PluginUploadResponse(BaseModel): unique_identifier: str = Field(description="The unique identifier of the plugin.") manifest: PluginDeclaration + + +class PluginOAuthAuthorizationUrlResponse(BaseModel): + authorization_url: str = Field(description="The URL of the authorization.") + + +class PluginOAuthCredentialsResponse(BaseModel): + credentials: Mapping[str, Any] = Field(description="The credentials of the OAuth.") diff --git a/api/core/plugin/manager/agent.py b/api/core/plugin/impl/agent.py similarity index 97% rename from api/core/plugin/manager/agent.py rename to api/core/plugin/impl/agent.py index 50172f12f2..66b77c7489 100644 --- a/api/core/plugin/manager/agent.py +++ b/api/core/plugin/impl/agent.py @@ -6,10 +6,10 @@ from core.plugin.entities.plugin import GenericProviderID from core.plugin.entities.plugin_daemon import ( PluginAgentProviderEntity, ) -from core.plugin.manager.base import BasePluginManager +from core.plugin.impl.base import BasePluginClient -class PluginAgentManager(BasePluginManager): +class PluginAgentClient(BasePluginClient): def fetch_agent_strategy_providers(self, tenant_id: str) -> list[PluginAgentProviderEntity]: """ Fetch agent providers for the given tenant. diff --git a/api/core/plugin/manager/asset.py b/api/core/plugin/impl/asset.py similarity index 76% rename from api/core/plugin/manager/asset.py rename to api/core/plugin/impl/asset.py index 17755d3561..b9bfe2d2cf 100644 --- a/api/core/plugin/manager/asset.py +++ b/api/core/plugin/impl/asset.py @@ -1,7 +1,7 @@ -from core.plugin.manager.base import BasePluginManager +from core.plugin.impl.base import BasePluginClient -class PluginAssetManager(BasePluginManager): +class PluginAssetManager(BasePluginClient): def fetch_asset(self, tenant_id: str, id: str) -> bytes: """ Fetch an asset by id. diff --git a/api/core/plugin/manager/base.py b/api/core/plugin/impl/base.py similarity index 99% rename from api/core/plugin/manager/base.py rename to api/core/plugin/impl/base.py index d8d7b3e860..4f1d808a3e 100644 --- a/api/core/plugin/manager/base.py +++ b/api/core/plugin/impl/base.py @@ -18,7 +18,7 @@ from core.model_runtime.errors.invoke import ( ) from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.plugin.entities.plugin_daemon import PluginDaemonBasicResponse, PluginDaemonError, PluginDaemonInnerError -from core.plugin.manager.exc import ( +from core.plugin.impl.exc import ( PluginDaemonBadRequestError, PluginDaemonInternalServerError, PluginDaemonNotFoundError, @@ -37,7 +37,7 @@ T = TypeVar("T", bound=(BaseModel | dict | list | bool | str)) logger = logging.getLogger(__name__) -class BasePluginManager: +class BasePluginClient: def _request( self, method: str, diff --git a/api/core/plugin/manager/debugging.py b/api/core/plugin/impl/debugging.py similarity index 78% rename from api/core/plugin/manager/debugging.py rename to api/core/plugin/impl/debugging.py index fb6bad7fa3..523377895c 100644 --- a/api/core/plugin/manager/debugging.py +++ b/api/core/plugin/impl/debugging.py @@ -1,9 +1,9 @@ from pydantic import BaseModel -from core.plugin.manager.base import BasePluginManager +from core.plugin.impl.base import BasePluginClient -class PluginDebuggingManager(BasePluginManager): +class PluginDebuggingClient(BasePluginClient): def get_debugging_key(self, tenant_id: str) -> str: """ Get the debugging key for the given tenant. diff --git a/api/core/plugin/manager/endpoint.py b/api/core/plugin/impl/endpoint.py similarity index 97% rename from api/core/plugin/manager/endpoint.py rename to api/core/plugin/impl/endpoint.py index 415b981ffb..5b88742be5 100644 --- a/api/core/plugin/manager/endpoint.py +++ b/api/core/plugin/impl/endpoint.py @@ -1,8 +1,8 @@ from core.plugin.entities.endpoint import EndpointEntityWithInstance -from core.plugin.manager.base import BasePluginManager +from core.plugin.impl.base import BasePluginClient -class PluginEndpointManager(BasePluginManager): +class PluginEndpointClient(BasePluginClient): def create_endpoint( self, tenant_id: str, user_id: str, plugin_unique_identifier: str, name: str, settings: dict ) -> bool: diff --git a/api/core/plugin/manager/exc.py b/api/core/plugin/impl/exc.py similarity index 100% rename from api/core/plugin/manager/exc.py rename to api/core/plugin/impl/exc.py diff --git a/api/core/plugin/manager/model.py b/api/core/plugin/impl/model.py similarity index 99% rename from api/core/plugin/manager/model.py rename to api/core/plugin/impl/model.py index 5ebc0c2320..f7607eef8d 100644 --- a/api/core/plugin/manager/model.py +++ b/api/core/plugin/impl/model.py @@ -18,10 +18,10 @@ from core.plugin.entities.plugin_daemon import ( PluginTextEmbeddingNumTokensResponse, PluginVoicesResponse, ) -from core.plugin.manager.base import BasePluginManager +from core.plugin.impl.base import BasePluginClient -class PluginModelManager(BasePluginManager): +class PluginModelClient(BasePluginClient): def fetch_model_providers(self, tenant_id: str) -> Sequence[PluginModelProviderEntity]: """ Fetch model providers for the given tenant. diff --git a/api/core/plugin/impl/oauth.py b/api/core/plugin/impl/oauth.py new file mode 100644 index 0000000000..91774984c8 --- /dev/null +++ b/api/core/plugin/impl/oauth.py @@ -0,0 +1,98 @@ +from collections.abc import Mapping +from typing import Any + +from werkzeug import Request + +from core.plugin.entities.plugin_daemon import PluginOAuthAuthorizationUrlResponse, PluginOAuthCredentialsResponse +from core.plugin.impl.base import BasePluginClient + + +class OAuthHandler(BasePluginClient): + def get_authorization_url( + self, + tenant_id: str, + user_id: str, + plugin_id: str, + provider: str, + system_credentials: Mapping[str, Any], + ) -> PluginOAuthAuthorizationUrlResponse: + return self._request_with_plugin_daemon_response( + "POST", + f"plugin/{tenant_id}/dispatch/oauth/get_authorization_url", + PluginOAuthAuthorizationUrlResponse, + data={ + "user_id": user_id, + "data": { + "provider": provider, + "system_credentials": system_credentials, + }, + }, + headers={ + "X-Plugin-ID": plugin_id, + "Content-Type": "application/json", + }, + ) + + def get_credentials( + self, + tenant_id: str, + user_id: str, + plugin_id: str, + provider: str, + system_credentials: Mapping[str, Any], + request: Request, + ) -> PluginOAuthCredentialsResponse: + """ + Get credentials from the given request. + """ + + # encode request to raw http request + raw_request_bytes = self._convert_request_to_raw_data(request) + + return self._request_with_plugin_daemon_response( + "POST", + f"plugin/{tenant_id}/dispatch/oauth/get_credentials", + PluginOAuthCredentialsResponse, + data={ + "user_id": user_id, + "data": { + "provider": provider, + "system_credentials": system_credentials, + "raw_request_bytes": raw_request_bytes, + }, + }, + headers={ + "X-Plugin-ID": plugin_id, + "Content-Type": "application/json", + }, + ) + + def _convert_request_to_raw_data(self, request: Request) -> bytes: + """ + Convert a Request object to raw HTTP data. + + Args: + request: The Request object to convert. + + Returns: + The raw HTTP data as bytes. + """ + # Start with the request line + method = request.method + path = request.path + protocol = request.headers.get("HTTP_VERSION", "HTTP/1.1") + raw_data = f"{method} {path} {protocol}\r\n".encode() + + # Add headers + for header_name, header_value in request.headers.items(): + raw_data += f"{header_name}: {header_value}\r\n".encode() + + # Add empty line to separate headers from body + raw_data += b"\r\n" + + # Add body if exists + body = request.get_data(as_text=False) + if body: + raw_data += body + + return raw_data diff --git a/api/core/plugin/manager/plugin.py b/api/core/plugin/impl/plugin.py similarity index 98% rename from api/core/plugin/manager/plugin.py rename to api/core/plugin/impl/plugin.py index 15dcd6cb34..3349463ce5 100644 --- a/api/core/plugin/manager/plugin.py +++ b/api/core/plugin/impl/plugin.py @@ -10,10 +10,10 @@ from core.plugin.entities.plugin import ( PluginInstallationSource, ) from core.plugin.entities.plugin_daemon import PluginInstallTask, PluginInstallTaskStartResponse, PluginUploadResponse -from core.plugin.manager.base import BasePluginManager +from core.plugin.impl.base import BasePluginClient -class PluginInstallationManager(BasePluginManager): +class PluginInstaller(BasePluginClient): def fetch_plugin_by_identifier( self, tenant_id: str, diff --git a/api/core/plugin/manager/tool.py b/api/core/plugin/impl/tool.py similarity index 98% rename from api/core/plugin/manager/tool.py rename to api/core/plugin/impl/tool.py index 7592f867e1..19b26c8fe3 100644 --- a/api/core/plugin/manager/tool.py +++ b/api/core/plugin/impl/tool.py @@ -5,11 +5,11 @@ from pydantic import BaseModel from core.plugin.entities.plugin import GenericProviderID, ToolProviderID from core.plugin.entities.plugin_daemon import PluginBasicBooleanResponse, PluginToolProviderEntity -from core.plugin.manager.base import BasePluginManager +from core.plugin.impl.base import BasePluginClient from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter -class PluginToolManager(BasePluginManager): +class PluginToolManager(BasePluginClient): def fetch_tool_providers(self, tenant_id: str) -> list[PluginToolProviderEntity]: """ Fetch tool providers for the given tenant. diff --git a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py index 643ac2df4e..e9ff1ce43d 100644 --- a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py +++ b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py @@ -32,6 +32,7 @@ class LindormVectorStoreConfig(BaseModel): username: Optional[str] = None password: Optional[str] = None using_ugc: Optional[bool] = False + request_timeout: Optional[float] = 1.0 # timeout units: s @model_validator(mode="before") @classmethod @@ -251,9 +252,9 @@ class LindormVectorStore(BaseVector): query = default_vector_search_query(query_vector=query_vector, k=top_k, filters=filters, **kwargs) try: - params = {} + params = {"timeout": self._client_config.request_timeout} if self._using_ugc: - params["routing"] = self._routing + params["routing"] = self._routing # type: ignore response = self._client.search(index=self._collection_name, body=query, params=params) except Exception: logger.exception(f"Error executing vector search, query: {query}") @@ -304,8 +305,8 @@ class LindormVectorStore(BaseVector): routing=routing, routing_field=self._routing_field, ) - - response = self._client.search(index=self._collection_name, body=full_text_query) + params = {"timeout": self._client_config.request_timeout} + response = self._client.search(index=self._collection_name, body=full_text_query, params=params) docs = [] for hit in response["hits"]["hits"]: docs.append( @@ -554,6 +555,7 @@ class LindormVectorStoreFactory(AbstractVectorFactory): username=dify_config.LINDORM_USERNAME, password=dify_config.LINDORM_PASSWORD, using_ugc=dify_config.USING_UGC_INDEX, + request_timeout=dify_config.LINDORM_QUERY_TIMEOUT, ) using_ugc = dify_config.USING_UGC_INDEX if using_ugc is None: diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index 100bcb198c..7b3f826082 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -27,8 +27,8 @@ class MilvusConfig(BaseModel): uri: str # Milvus server URI token: Optional[str] = None # Optional token for authentication - user: str # Username for authentication - password: str # Password for authentication + user: Optional[str] = None # Username for authentication + password: Optional[str] = None # Password for authentication batch_size: int = 100 # Batch size for operations database: str = "default" # Database name enable_hybrid_search: bool = False # Flag to enable hybrid search @@ -43,10 +43,11 @@ class MilvusConfig(BaseModel): """ if not values.get("uri"): raise ValueError("config MILVUS_URI is required") - if not values.get("user"): - raise ValueError("config MILVUS_USER is required") - if not values.get("password"): - raise ValueError("config MILVUS_PASSWORD is required") + if not values.get("token"): + if not values.get("user"): + raise ValueError("config MILVUS_USER is required") + if not values.get("password"): + raise ValueError("config MILVUS_PASSWORD is required") return values def to_milvus_params(self): @@ -356,11 +357,14 @@ class MilvusVector(BaseVector): ) redis_client.set(collection_exist_cache_key, 1, ex=3600) - def _init_client(self, config) -> MilvusClient: + def _init_client(self, config: MilvusConfig) -> MilvusClient: """ Initialize and return a Milvus client. """ - client = MilvusClient(uri=config.uri, user=config.user, password=config.password, db_name=config.database) + if config.token: + client = MilvusClient(uri=config.uri, token=config.token, db_name=config.database) + else: + client = MilvusClient(uri=config.uri, user=config.user, password=config.password, db_name=config.database) return client diff --git a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py index 6636646cff..e23b8d197f 100644 --- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py +++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py @@ -1,10 +1,9 @@ import json import logging -import ssl -from typing import Any, Optional +from typing import Any, Literal, Optional from uuid import uuid4 -from opensearchpy import OpenSearch, helpers +from opensearchpy import OpenSearch, Urllib3AWSV4SignerAuth, Urllib3HttpConnection, helpers from opensearchpy.helpers import BulkIndexError from pydantic import BaseModel, model_validator @@ -24,9 +23,12 @@ logger = logging.getLogger(__name__) class OpenSearchConfig(BaseModel): host: str port: int + secure: bool = False + auth_method: Literal["basic", "aws_managed_iam"] = "basic" user: Optional[str] = None password: Optional[str] = None - secure: bool = False + aws_region: Optional[str] = None + aws_service: Optional[str] = None @model_validator(mode="before") @classmethod @@ -35,24 +37,40 @@ class OpenSearchConfig(BaseModel): raise ValueError("config OPENSEARCH_HOST is required") if not values.get("port"): raise ValueError("config OPENSEARCH_PORT is required") + if values.get("auth_method") == "aws_managed_iam": + if not values.get("aws_region"): + raise ValueError("config OPENSEARCH_AWS_REGION is required for AWS_MANAGED_IAM auth method") + if not values.get("aws_service"): + raise ValueError("config OPENSEARCH_AWS_SERVICE is required for AWS_MANAGED_IAM auth method") return values - def create_ssl_context(self) -> ssl.SSLContext: - ssl_context = ssl.create_default_context() - ssl_context.check_hostname = False - ssl_context.verify_mode = ssl.CERT_NONE # Disable Certificate Validation - return ssl_context + def create_aws_managed_iam_auth(self) -> Urllib3AWSV4SignerAuth: + import boto3 # type: ignore + + return Urllib3AWSV4SignerAuth( + credentials=boto3.Session().get_credentials(), + region=self.aws_region, + service=self.aws_service, # type: ignore[arg-type] + ) def to_opensearch_params(self) -> dict[str, Any]: params = { "hosts": [{"host": self.host, "port": self.port}], "use_ssl": self.secure, "verify_certs": self.secure, + "connection_class": Urllib3HttpConnection, + "pool_maxsize": 20, } - if self.user and self.password: + + if self.auth_method == "basic": + logger.info("Using basic authentication for OpenSearch Vector DB") + params["http_auth"] = (self.user, self.password) - if self.secure: - params["ssl_context"] = self.create_ssl_context() + elif self.auth_method == "aws_managed_iam": + logger.info("Using AWS managed IAM role for OpenSearch Vector DB") + + params["http_auth"] = self.create_aws_managed_iam_auth() + return params @@ -76,16 +94,23 @@ class OpenSearchVector(BaseVector): action = { "_op_type": "index", "_index": self._collection_name.lower(), - "_id": uuid4().hex, "_source": { Field.CONTENT_KEY.value: documents[i].page_content, Field.VECTOR.value: embeddings[i], # Make sure you pass an array here Field.METADATA_KEY.value: documents[i].metadata, }, } + # See https://github.com/langchain-ai/langchainjs/issues/4346#issuecomment-1935123377 + if self._client_config.aws_service not in ["aoss"]: + action["_id"] = uuid4().hex actions.append(action) - helpers.bulk(self._client, actions) + helpers.bulk( + client=self._client, + actions=actions, + timeout=30, + max_retries=3, + ) def get_ids_by_metadata_field(self, key: str, value: str): query = {"query": {"term": {f"{Field.METADATA_KEY.value}.{key}": value}}} @@ -234,6 +259,7 @@ class OpenSearchVector(BaseVector): }, } + logger.info(f"Creating OpenSearch index {self._collection_name.lower()}") self._client.indices.create(index=self._collection_name.lower(), body=index_body) redis_client.set(collection_exist_cache_key, 1, ex=3600) @@ -252,9 +278,12 @@ class OpenSearchVectorFactory(AbstractVectorFactory): open_search_config = OpenSearchConfig( host=dify_config.OPENSEARCH_HOST or "localhost", port=dify_config.OPENSEARCH_PORT, + secure=dify_config.OPENSEARCH_SECURE, + auth_method=dify_config.OPENSEARCH_AUTH_METHOD.value, user=dify_config.OPENSEARCH_USER, password=dify_config.OPENSEARCH_PASSWORD, - secure=dify_config.OPENSEARCH_SECURE, + aws_region=dify_config.OPENSEARCH_AWS_REGION, + aws_service=dify_config.OPENSEARCH_AWS_SERVICE, ) return OpenSearchVector(collection_name=collection_name, config=open_search_config) diff --git a/api/core/rag/datasource/vdb/pyvastbase/__init__.py b/api/core/rag/datasource/vdb/pyvastbase/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py b/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py new file mode 100644 index 0000000000..a61d571e16 --- /dev/null +++ b/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py @@ -0,0 +1,243 @@ +import json +import uuid +from contextlib import contextmanager +from typing import Any + +import psycopg2.extras # type: ignore +import psycopg2.pool # type: ignore +from pydantic import BaseModel, model_validator + +from configs import dify_config +from core.rag.datasource.vdb.vector_base import BaseVector +from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory +from core.rag.datasource.vdb.vector_type import VectorType +from core.rag.embedding.embedding_base import Embeddings +from core.rag.models.document import Document +from extensions.ext_redis import redis_client +from models.dataset import Dataset + + +class VastbaseVectorConfig(BaseModel): + host: str + port: int + user: str + password: str + database: str + min_connection: int + max_connection: int + + @model_validator(mode="before") + @classmethod + def validate_config(cls, values: dict) -> dict: + if not values["host"]: + raise ValueError("config VASTBASE_HOST is required") + if not values["port"]: + raise ValueError("config VASTBASE_PORT is required") + if not values["user"]: + raise ValueError("config VASTBASE_USER is required") + if not values["password"]: + raise ValueError("config VASTBASE_PASSWORD is required") + if not values["database"]: + raise ValueError("config VASTBASE_DATABASE is required") + if not values["min_connection"]: + raise ValueError("config VASTBASE_MIN_CONNECTION is required") + if not values["max_connection"]: + raise ValueError("config VASTBASE_MAX_CONNECTION is required") + if values["min_connection"] > values["max_connection"]: + raise ValueError("config VASTBASE_MIN_CONNECTION should less than VASTBASE_MAX_CONNECTION") + return values + + +SQL_CREATE_TABLE = """ +CREATE TABLE IF NOT EXISTS {table_name} ( + id UUID PRIMARY KEY, + text TEXT NOT NULL, + meta JSONB NOT NULL, + embedding floatvector({dimension}) NOT NULL +); +""" + +SQL_CREATE_INDEX = """ +CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} +USING hnsw (embedding floatvector_cosine_ops) WITH (m = 16, ef_construction = 64); +""" + + +class VastbaseVector(BaseVector): + def __init__(self, collection_name: str, config: VastbaseVectorConfig): + super().__init__(collection_name) + self.pool = self._create_connection_pool(config) + self.table_name = f"embedding_{collection_name}" + + def get_type(self) -> str: + return VectorType.VASTBASE + + def _create_connection_pool(self, config: VastbaseVectorConfig): + return psycopg2.pool.SimpleConnectionPool( + config.min_connection, + config.max_connection, + host=config.host, + port=config.port, + user=config.user, + password=config.password, + database=config.database, + ) + + @contextmanager + def _get_cursor(self): + conn = self.pool.getconn() + cur = conn.cursor() + try: + yield cur + finally: + cur.close() + conn.commit() + self.pool.putconn(conn) + + def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs): + dimension = len(embeddings[0]) + self._create_collection(dimension) + return self.add_texts(texts, embeddings) + + def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): + values = [] + pks = [] + for i, doc in enumerate(documents): + if doc.metadata is not None: + doc_id = doc.metadata.get("doc_id", str(uuid.uuid4())) + pks.append(doc_id) + values.append( + ( + doc_id, + doc.page_content, + json.dumps(doc.metadata), + embeddings[i], + ) + ) + with self._get_cursor() as cur: + psycopg2.extras.execute_values( + cur, f"INSERT INTO {self.table_name} (id, text, meta, embedding) VALUES %s", values + ) + return pks + + def text_exists(self, id: str) -> bool: + with self._get_cursor() as cur: + cur.execute(f"SELECT id FROM {self.table_name} WHERE id = %s", (id,)) + return cur.fetchone() is not None + + def get_by_ids(self, ids: list[str]) -> list[Document]: + with self._get_cursor() as cur: + cur.execute(f"SELECT meta, text FROM {self.table_name} WHERE id IN %s", (tuple(ids),)) + docs = [] + for record in cur: + docs.append(Document(page_content=record[1], metadata=record[0])) + return docs + + def delete_by_ids(self, ids: list[str]) -> None: + # Avoiding crashes caused by performing delete operations on empty lists in certain scenarios + # Scenario 1: extract a document fails, resulting in a table not being created. + # Then clicking the retry button triggers a delete operation on an empty list. + if not ids: + return + with self._get_cursor() as cur: + cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s", (tuple(ids),)) + + def delete_by_metadata_field(self, key: str, value: str) -> None: + with self._get_cursor() as cur: + cur.execute(f"DELETE FROM {self.table_name} WHERE meta->>%s = %s", (key, value)) + + def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: + """ + Search the nearest neighbors to a vector. + + :param query_vector: The input vector to search for similar items. + :param top_k: The number of nearest neighbors to return, default is 5. + :return: List of Documents that are nearest to the query vector. + """ + top_k = kwargs.get("top_k", 4) + + if not isinstance(top_k, int) or top_k <= 0: + raise ValueError("top_k must be a positive integer") + with self._get_cursor() as cur: + cur.execute( + f"SELECT meta, text, embedding <=> %s AS distance FROM {self.table_name}" + f" ORDER BY distance LIMIT {top_k}", + (json.dumps(query_vector),), + ) + docs = [] + score_threshold = float(kwargs.get("score_threshold") or 0.0) + for record in cur: + metadata, text, distance = record + score = 1 - distance + metadata["score"] = score + if score > score_threshold: + docs.append(Document(page_content=text, metadata=metadata)) + return docs + + def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: + top_k = kwargs.get("top_k", 5) + + if not isinstance(top_k, int) or top_k <= 0: + raise ValueError("top_k must be a positive integer") + with self._get_cursor() as cur: + cur.execute( + f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), plainto_tsquery(%s)) AS score + FROM {self.table_name} + WHERE to_tsvector(text) @@ plainto_tsquery(%s) + ORDER BY score DESC + LIMIT {top_k}""", + # f"'{query}'" is required in order to account for whitespace in query + (f"'{query}'", f"'{query}'"), + ) + + docs = [] + + for record in cur: + metadata, text, score = record + metadata["score"] = score + docs.append(Document(page_content=text, metadata=metadata)) + + return docs + + def delete(self) -> None: + with self._get_cursor() as cur: + cur.execute(f"DROP TABLE IF EXISTS {self.table_name}") + + def _create_collection(self, dimension: int): + cache_key = f"vector_indexing_{self._collection_name}" + lock_name = f"{cache_key}_lock" + with redis_client.lock(lock_name, timeout=20): + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" + if redis_client.get(collection_exist_cache_key): + return + + with self._get_cursor() as cur: + cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension)) + # Vastbase 支持的向量维度取值范围为 [1,16000] + if dimension <= 16000: + cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name)) + redis_client.set(collection_exist_cache_key, 1, ex=3600) + + +class VastbaseVectorFactory(AbstractVectorFactory): + def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> VastbaseVector: + if dataset.index_struct_dict: + class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"] + collection_name = class_prefix + else: + dataset_id = dataset.id + collection_name = Dataset.gen_collection_name_by_id(dataset_id) + dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.VASTBASE, collection_name)) + + return VastbaseVector( + collection_name=collection_name, + config=VastbaseVectorConfig( + host=dify_config.VASTBASE_HOST or "localhost", + port=dify_config.VASTBASE_PORT, + user=dify_config.VASTBASE_USER or "dify", + password=dify_config.VASTBASE_PASSWORD or "", + database=dify_config.VASTBASE_DATABASE or "dify", + min_connection=dify_config.VASTBASE_MIN_CONNECTION, + max_connection=dify_config.VASTBASE_MAX_CONNECTION, + ), + ) diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 05158cc7ca..66e002312a 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -74,6 +74,10 @@ class Vector: from core.rag.datasource.vdb.pgvector.pgvector import PGVectorFactory return PGVectorFactory + case VectorType.VASTBASE: + from core.rag.datasource.vdb.pyvastbase.vastbase_vector import VastbaseVectorFactory + + return VastbaseVectorFactory case VectorType.PGVECTO_RS: from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRSFactory diff --git a/api/core/rag/datasource/vdb/vector_type.py b/api/core/rag/datasource/vdb/vector_type.py index 0421be3458..7a81565e37 100644 --- a/api/core/rag/datasource/vdb/vector_type.py +++ b/api/core/rag/datasource/vdb/vector_type.py @@ -7,7 +7,9 @@ class VectorType(StrEnum): MILVUS = "milvus" MYSCALE = "myscale" PGVECTOR = "pgvector" + VASTBASE = "vastbase" PGVECTO_RS = "pgvecto-rs" + QDRANT = "qdrant" RELYT = "relyt" TIDB_VECTOR = "tidb_vector" diff --git a/api/core/rag/extractor/watercrawl/provider.py b/api/core/rag/extractor/watercrawl/provider.py index b8003b386b..21fbb2100f 100644 --- a/api/core/rag/extractor/watercrawl/provider.py +++ b/api/core/rag/extractor/watercrawl/provider.py @@ -20,7 +20,7 @@ class WaterCrawlProvider: } if options.get("crawl_sub_pages", True): spider_options["page_limit"] = options.get("limit", 1) - spider_options["max_depth"] = options.get("depth", 1) + spider_options["max_depth"] = options.get("max_depth", 1) spider_options["include_paths"] = options.get("includes", "").split(",") if options.get("includes") else [] spider_options["exclude_paths"] = options.get("excludes", "").split(",") if options.get("excludes") else [] diff --git a/api/core/rag/rerank/rerank_model.py b/api/core/rag/rerank/rerank_model.py index ac7a3f8bb8..693535413a 100644 --- a/api/core/rag/rerank/rerank_model.py +++ b/api/core/rag/rerank/rerank_model.py @@ -52,14 +52,16 @@ class RerankModelRunner(BaseRerankRunner): rerank_documents = [] for result in rerank_result.docs: - # format document - rerank_document = Document( - page_content=result.text, - metadata=documents[result.index].metadata, - provider=documents[result.index].provider, - ) - if rerank_document.metadata is not None: - rerank_document.metadata["score"] = result.score - rerank_documents.append(rerank_document) + if score_threshold is None or result.score >= score_threshold: + # format document + rerank_document = Document( + page_content=result.text, + metadata=documents[result.index].metadata, + provider=documents[result.index].provider, + ) + if rerank_document.metadata is not None: + rerank_document.metadata["score"] = result.score + rerank_documents.append(rerank_document) - return rerank_documents + rerank_documents.sort(key=lambda x: x.metadata.get("score", 0.0), reverse=True) + return rerank_documents[:top_n] if top_n else rerank_documents diff --git a/api/repositories/__init__.py b/api/core/repositories/__init__.py similarity index 72% rename from api/repositories/__init__.py rename to api/core/repositories/__init__.py index 4cc339688b..5c70d50cde 100644 --- a/api/repositories/__init__.py +++ b/api/core/repositories/__init__.py @@ -2,5 +2,5 @@ Repository implementations for data access. This package contains concrete implementations of the repository interfaces -defined in the core.repository package. +defined in the core.workflow.repository package. """ diff --git a/api/repositories/repository_registry.py b/api/core/repositories/repository_registry.py similarity index 95% rename from api/repositories/repository_registry.py rename to api/core/repositories/repository_registry.py index aa0a208d8e..b66f3ba8e6 100644 --- a/api/repositories/repository_registry.py +++ b/api/core/repositories/repository_registry.py @@ -11,9 +11,9 @@ from typing import Any from sqlalchemy.orm import sessionmaker from configs import dify_config -from core.repository.repository_factory import RepositoryFactory +from core.repositories.workflow_node_execution import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.repository.repository_factory import RepositoryFactory from extensions.ext_database import db -from repositories.workflow_node_execution import SQLAlchemyWorkflowNodeExecutionRepository logger = logging.getLogger(__name__) diff --git a/api/repositories/workflow_node_execution/__init__.py b/api/core/repositories/workflow_node_execution/__init__.py similarity index 51% rename from api/repositories/workflow_node_execution/__init__.py rename to api/core/repositories/workflow_node_execution/__init__.py index eed827bd05..76e8282b7d 100644 --- a/api/repositories/workflow_node_execution/__init__.py +++ b/api/core/repositories/workflow_node_execution/__init__.py @@ -2,7 +2,7 @@ WorkflowNodeExecution repository implementations. """ -from repositories.workflow_node_execution.sqlalchemy_repository import SQLAlchemyWorkflowNodeExecutionRepository +from core.repositories.workflow_node_execution.sqlalchemy_repository import SQLAlchemyWorkflowNodeExecutionRepository __all__ = [ "SQLAlchemyWorkflowNodeExecutionRepository", diff --git a/api/repositories/workflow_node_execution/sqlalchemy_repository.py b/api/core/repositories/workflow_node_execution/sqlalchemy_repository.py similarity index 98% rename from api/repositories/workflow_node_execution/sqlalchemy_repository.py rename to api/core/repositories/workflow_node_execution/sqlalchemy_repository.py index e0ad384be6..b1d37163a4 100644 --- a/api/repositories/workflow_node_execution/sqlalchemy_repository.py +++ b/api/core/repositories/workflow_node_execution/sqlalchemy_repository.py @@ -10,7 +10,7 @@ from sqlalchemy import UnaryExpression, asc, delete, desc, select from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker -from core.repository.workflow_node_execution_repository import OrderConfig +from core.workflow.repository.workflow_node_execution_repository import OrderConfig from models.workflow import WorkflowNodeExecution, WorkflowNodeExecutionStatus, WorkflowNodeExecutionTriggeredFrom logger = logging.getLogger(__name__) diff --git a/api/core/tools/builtin_tool/provider.py b/api/core/tools/builtin_tool/provider.py index 4f733f0ea1..cf75bd3d7e 100644 --- a/api/core/tools/builtin_tool/provider.py +++ b/api/core/tools/builtin_tool/provider.py @@ -35,8 +35,9 @@ class BuiltinToolProviderController(ToolProviderController): provider_yaml["credentials_for_provider"][credential_name]["name"] = credential_name credentials_schema = [] - for credential in provider_yaml.get("credentials_for_provider", {}).values(): - credentials_schema.append(credential) + for credential in provider_yaml.get("credentials_for_provider", {}): + credential_dict = provider_yaml.get("credentials_for_provider", {}).get(credential, {}) + credentials_schema.append(credential_dict) super().__init__( entity=ToolProviderEntity( diff --git a/api/core/tools/plugin_tool/provider.py b/api/core/tools/plugin_tool/provider.py index 3616e426b9..494b8e209c 100644 --- a/api/core/tools/plugin_tool/provider.py +++ b/api/core/tools/plugin_tool/provider.py @@ -1,6 +1,6 @@ from typing import Any -from core.plugin.manager.tool import PluginToolManager +from core.plugin.impl.tool import PluginToolManager from core.tools.__base.tool_runtime import ToolRuntime from core.tools.builtin_tool.provider import BuiltinToolProviderController from core.tools.entities.tool_entities import ToolProviderEntityWithPlugin, ToolProviderType diff --git a/api/core/tools/plugin_tool/tool.py b/api/core/tools/plugin_tool/tool.py index f31a9a0d3e..d21e3d7d1c 100644 --- a/api/core/tools/plugin_tool/tool.py +++ b/api/core/tools/plugin_tool/tool.py @@ -1,7 +1,7 @@ from collections.abc import Generator from typing import Any, Optional -from core.plugin.manager.tool import PluginToolManager +from core.plugin.impl.tool import PluginToolManager from core.plugin.utils.converter import convert_parameters_to_plugin_format from core.tools.__base.tool import Tool from core.tools.__base.tool_runtime import ToolRuntime diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py index 997917f31c..3dce1ca293 100644 --- a/api/core/tools/tool_engine.py +++ b/api/core/tools/tool_engine.py @@ -246,7 +246,7 @@ class ToolEngine: + "you do not need to create it, just tell the user to check it now." ) elif response.type == ToolInvokeMessage.MessageType.JSON: - result = json.dumps( + result += json.dumps( cast(ToolInvokeMessage.JsonMessage, response.message).json_object, ensure_ascii=False ) else: diff --git a/api/core/tools/tool_manager.py b/api/core/tools/tool_manager.py index f2d0b74f7c..aa2661fe63 100644 --- a/api/core/tools/tool_manager.py +++ b/api/core/tools/tool_manager.py @@ -10,7 +10,7 @@ from yarl import URL import contexts from core.plugin.entities.plugin import ToolProviderID -from core.plugin.manager.tool import PluginToolManager +from core.plugin.impl.tool import PluginToolManager from core.tools.__base.tool_provider import ToolProviderController from core.tools.__base.tool_runtime import ToolRuntime from core.tools.plugin_tool.provider import PluginToolProviderController diff --git a/api/core/workflow/nodes/agent/agent_node.py b/api/core/workflow/nodes/agent/agent_node.py index da40cbcdea..771e0ca7a5 100644 --- a/api/core/workflow/nodes/agent/agent_node.py +++ b/api/core/workflow/nodes/agent/agent_node.py @@ -7,8 +7,8 @@ from core.agent.plugin_entities import AgentStrategyParameter from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance, ModelManager from core.model_runtime.entities.model_entities import AIModelEntity, ModelType -from core.plugin.manager.exc import PluginDaemonClientSideError -from core.plugin.manager.plugin import PluginInstallationManager +from core.plugin.impl.exc import PluginDaemonClientSideError +from core.plugin.impl.plugin import PluginInstaller from core.provider_manager import ProviderManager from core.tools.entities.tool_entities import ToolParameter, ToolProviderType from core.tools.tool_manager import ToolManager @@ -297,7 +297,7 @@ class AgentNode(ToolNode): Get agent strategy icon :return: """ - manager = PluginInstallationManager() + manager = PluginInstaller() plugins = manager.list_plugins(self.tenant_id) try: current_plugin = next( diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index 960d0c3961..8fb1baec89 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -11,6 +11,7 @@ import docx import pandas as pd import pypandoc # type: ignore import pypdfium2 # type: ignore +import webvtt # type: ignore import yaml # type: ignore from docx.document import Document from docx.oxml.table import CT_Tbl @@ -132,6 +133,10 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str: return _extract_text_from_json(file_content) case "application/x-yaml" | "text/yaml": return _extract_text_from_yaml(file_content) + case "text/vtt": + return _extract_text_from_vtt(file_content) + case "text/properties": + return _extract_text_from_properties(file_content) case _: raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}") @@ -139,7 +144,7 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str: def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str: """Extract text from a file based on its file extension.""" match file_extension: - case ".txt" | ".markdown" | ".md" | ".html" | ".htm" | ".xml" | ".vtt": + case ".txt" | ".markdown" | ".md" | ".html" | ".htm" | ".xml": return _extract_text_from_plain_text(file_content) case ".json": return _extract_text_from_json(file_content) @@ -165,6 +170,10 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) return _extract_text_from_eml(file_content) case ".msg": return _extract_text_from_msg(file_content) + case ".vtt": + return _extract_text_from_vtt(file_content) + case ".properties": + return _extract_text_from_properties(file_content) case _: raise UnsupportedFileTypeError(f"Unsupported Extension Type: {file_extension}") @@ -214,8 +223,8 @@ def _extract_text_from_doc(file_content: bytes) -> str: """ from unstructured.partition.api import partition_via_api - if not (dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY): - raise TextExtractionError("UNSTRUCTURED_API_URL and UNSTRUCTURED_API_KEY must be set") + if not dify_config.UNSTRUCTURED_API_URL: + raise TextExtractionError("UNSTRUCTURED_API_URL must be set") try: with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file: @@ -226,7 +235,7 @@ def _extract_text_from_doc(file_content: bytes) -> str: file=file, metadata_filename=temp_file.name, api_url=dify_config.UNSTRUCTURED_API_URL, - api_key=dify_config.UNSTRUCTURED_API_KEY, + api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore ) os.unlink(temp_file.name) return "\n".join([getattr(element, "text", "") for element in elements]) @@ -462,3 +471,68 @@ def _extract_text_from_msg(file_content: bytes) -> str: return "\n".join([str(element) for element in elements]) except Exception as e: raise TextExtractionError(f"Failed to extract text from MSG: {str(e)}") from e + + +def _extract_text_from_vtt(vtt_bytes: bytes) -> str: + text = _extract_text_from_plain_text(vtt_bytes) + + # remove bom + text = text.lstrip("\ufeff") + + raw_results = [] + for caption in webvtt.from_string(text): + raw_results.append((caption.voice, caption.text)) + + # Merge consecutive utterances by the same speaker + merged_results = [] + if raw_results: + current_speaker, current_text = raw_results[0] + + for i in range(1, len(raw_results)): + spk, txt = raw_results[i] + if spk == None: + merged_results.append((None, current_text)) + continue + + if spk == current_speaker: + # If it is the same speaker, merge the utterances (joined by space) + current_text += " " + txt + else: + # If the speaker changes, register the utterance so far and move on + merged_results.append((current_speaker, current_text)) + current_speaker, current_text = spk, txt + + # Add the last element + merged_results.append((current_speaker, current_text)) + else: + merged_results = raw_results + + # Return the result in the specified format: Speaker "text" style + formatted = [f'{spk or ""} "{txt}"' for spk, txt in merged_results] + return "\n".join(formatted) + + +def _extract_text_from_properties(file_content: bytes) -> str: + try: + text = _extract_text_from_plain_text(file_content) + lines = text.splitlines() + result = [] + for line in lines: + line = line.strip() + # Preserve comments and empty lines + if not line or line.startswith("#") or line.startswith("!"): + result.append(line) + continue + + if "=" in line: + key, value = line.split("=", 1) + elif ":" in line: + key, value = line.split(":", 1) + else: + key, value = line, "" + + result.append(f"{key.strip()}: {value.strip()}") + + return "\n".join(result) + except Exception as e: + raise TextExtractionError(f"Failed to extract text from properties file: {str(e)}") from e diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index 1089e7168e..35b146e5d9 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -38,6 +38,7 @@ from core.model_runtime.entities.model_entities import ( ) from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from core.model_runtime.utils.encoders import jsonable_encoder +from core.model_runtime.utils.helper import convert_llm_result_chunk_to_str from core.plugin.entities.plugin import ModelProviderID from core.prompt.entities.advanced_prompt_entities import CompletionModelPromptTemplate, MemoryConfig from core.prompt.utils.prompt_message_util import PromptMessageUtil @@ -269,18 +270,7 @@ class LLMNode(BaseNode[LLMNodeData]): def _handle_invoke_result(self, invoke_result: LLMResult | Generator) -> Generator[NodeEvent, None, None]: if isinstance(invoke_result, LLMResult): - content = invoke_result.message.content - if content is None: - message_text = "" - elif isinstance(content, str): - message_text = content - elif isinstance(content, list): - # Assuming the list contains PromptMessageContent objects with a "data" attribute - message_text = "".join( - item.data if hasattr(item, "data") and isinstance(item.data, str) else str(item) for item in content - ) - else: - message_text = str(content) + message_text = convert_llm_result_chunk_to_str(invoke_result.message.content) yield ModelInvokeCompletedEvent( text=message_text, @@ -295,7 +285,7 @@ class LLMNode(BaseNode[LLMNodeData]): usage = None finish_reason = None for result in invoke_result: - text = result.delta.message.content + text = convert_llm_result_chunk_to_str(result.delta.message.content) full_text += text yield RunStreamChunkEvent(chunk_content=text, from_variable_selector=[self.node_id, "text"]) diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py index 6f0cc3f6d2..c72ae5b69b 100644 --- a/api/core/workflow/nodes/tool/tool_node.py +++ b/api/core/workflow/nodes/tool/tool_node.py @@ -6,8 +6,8 @@ from sqlalchemy.orm import Session from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler from core.file import File, FileTransferMethod -from core.plugin.manager.exc import PluginDaemonClientSideError -from core.plugin.manager.plugin import PluginInstallationManager +from core.plugin.impl.exc import PluginDaemonClientSideError +from core.plugin.impl.plugin import PluginInstaller from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter from core.tools.errors import ToolInvokeError from core.tools.tool_engine import ToolEngine @@ -307,7 +307,7 @@ class ToolNode(BaseNode[ToolNodeData]): icon = tool_info.get("icon", "") dict_metadata = dict(message.message.metadata) if dict_metadata.get("provider"): - manager = PluginInstallationManager() + manager = PluginInstaller() plugins = manager.list_plugins(self.tenant_id) try: current_plugin = next( diff --git a/api/core/repository/__init__.py b/api/core/workflow/repository/__init__.py similarity index 61% rename from api/core/repository/__init__.py rename to api/core/workflow/repository/__init__.py index 253df1251d..d91506e72f 100644 --- a/api/core/repository/__init__.py +++ b/api/core/workflow/repository/__init__.py @@ -6,8 +6,8 @@ for accessing and manipulating data, regardless of the underlying storage mechanism. """ -from core.repository.repository_factory import RepositoryFactory -from core.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from core.workflow.repository.repository_factory import RepositoryFactory +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository __all__ = [ "RepositoryFactory", diff --git a/api/core/repository/repository_factory.py b/api/core/workflow/repository/repository_factory.py similarity index 97% rename from api/core/repository/repository_factory.py rename to api/core/workflow/repository/repository_factory.py index 7da7e49055..45d6f5d842 100644 --- a/api/core/repository/repository_factory.py +++ b/api/core/workflow/repository/repository_factory.py @@ -8,7 +8,7 @@ It does not contain any implementation details or dependencies on specific repos from collections.abc import Callable, Mapping from typing import Any, Literal, Optional, cast -from core.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository # Type for factory functions - takes a dict of parameters and returns any repository type RepositoryFactoryFunc = Callable[[Mapping[str, Any]], Any] diff --git a/api/core/repository/workflow_node_execution_repository.py b/api/core/workflow/repository/workflow_node_execution_repository.py similarity index 100% rename from api/core/repository/workflow_node_execution_repository.py rename to api/core/workflow/repository/workflow_node_execution_repository.py diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 68f3c65a4b..18d4f4885d 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -20,7 +20,8 @@ if [[ "${MODE}" == "worker" ]]; then CONCURRENCY_OPTION="-c ${CELERY_WORKER_AMOUNT:-1}" fi - exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION --loglevel ${LOG_LEVEL:-INFO} \ + exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION \ + --max-tasks-per-child ${MAX_TASK_PRE_CHILD:-50} --loglevel ${LOG_LEVEL:-INFO} \ -Q ${CELERY_QUEUES:-dataset,mail,ops_trace,app_deletion} elif [[ "${MODE}" == "beat" ]]; then diff --git a/api/extensions/ext_commands.py b/api/extensions/ext_commands.py index be43f55ea7..ddc2158a02 100644 --- a/api/extensions/ext_commands.py +++ b/api/extensions/ext_commands.py @@ -5,6 +5,7 @@ def init_app(app: DifyApp): from commands import ( add_qdrant_index, clear_free_plan_tenant_expired_logs, + clear_orphaned_file_records, convert_to_agent_apps, create_tenant, extract_plugins, @@ -13,6 +14,7 @@ def init_app(app: DifyApp): install_plugins, migrate_data_for_plugin, old_metadata_migration, + remove_orphaned_files_on_storage, reset_email, reset_encrypt_key_pair, reset_password, @@ -36,6 +38,8 @@ def init_app(app: DifyApp): install_plugins, old_metadata_migration, clear_free_plan_tenant_expired_logs, + clear_orphaned_file_records, + remove_orphaned_files_on_storage, ] for cmd in cmds_to_register: app.cli.add_command(cmd) diff --git a/api/extensions/ext_otel.py b/api/extensions/ext_otel.py index a2edd832ec..be47fdc6d6 100644 --- a/api/extensions/ext_otel.py +++ b/api/extensions/ext_otel.py @@ -8,29 +8,6 @@ from typing import Union from celery.signals import worker_init # type: ignore from flask_login import user_loaded_from_request, user_logged_in # type: ignore -from opentelemetry import trace -from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.instrumentation.celery import CeleryInstrumentor -from opentelemetry.instrumentation.flask import FlaskInstrumentor -from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor -from opentelemetry.metrics import get_meter, get_meter_provider, set_meter_provider -from opentelemetry.propagate import set_global_textmap -from opentelemetry.propagators.b3 import B3Format -from opentelemetry.propagators.composite import CompositePropagator -from opentelemetry.sdk.metrics import MeterProvider -from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import ( - BatchSpanProcessor, - ConsoleSpanExporter, -) -from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio -from opentelemetry.semconv.resource import ResourceAttributes -from opentelemetry.trace import Span, get_current_span, get_tracer_provider, set_tracer_provider -from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator -from opentelemetry.trace.status import StatusCode from configs import dify_config from dify_app import DifyApp @@ -39,130 +16,189 @@ from dify_app import DifyApp @user_logged_in.connect @user_loaded_from_request.connect def on_user_loaded(_sender, user): - if user: - current_span = get_current_span() - if current_span: - current_span.set_attribute("service.tenant.id", user.current_tenant_id) - current_span.set_attribute("service.user.id", user.id) + if dify_config.ENABLE_OTEL: + from opentelemetry.trace import get_current_span + + if user: + current_span = get_current_span() + if current_span: + current_span.set_attribute("service.tenant.id", user.current_tenant_id) + current_span.set_attribute("service.user.id", user.id) def init_app(app: DifyApp): - if dify_config.ENABLE_OTEL: - setup_context_propagation() - # Initialize OpenTelemetry - # Follow Semantic Convertions 1.32.0 to define resource attributes - resource = Resource( - attributes={ - ResourceAttributes.SERVICE_NAME: dify_config.APPLICATION_NAME, - ResourceAttributes.SERVICE_VERSION: f"dify-{dify_config.CURRENT_VERSION}-{dify_config.COMMIT_SHA}", - ResourceAttributes.PROCESS_PID: os.getpid(), - ResourceAttributes.DEPLOYMENT_ENVIRONMENT: f"{dify_config.DEPLOY_ENV}-{dify_config.EDITION}", - ResourceAttributes.HOST_NAME: socket.gethostname(), - ResourceAttributes.HOST_ARCH: platform.machine(), - "custom.deployment.git_commit": dify_config.COMMIT_SHA, - ResourceAttributes.HOST_ID: platform.node(), - ResourceAttributes.OS_TYPE: platform.system().lower(), - ResourceAttributes.OS_DESCRIPTION: platform.platform(), - ResourceAttributes.OS_VERSION: platform.version(), - } - ) - sampler = ParentBasedTraceIdRatio(dify_config.OTEL_SAMPLING_RATE) - provider = TracerProvider(resource=resource, sampler=sampler) - set_tracer_provider(provider) - exporter: Union[OTLPSpanExporter, ConsoleSpanExporter] - metric_exporter: Union[OTLPMetricExporter, ConsoleMetricExporter] - if dify_config.OTEL_EXPORTER_TYPE == "otlp": - exporter = OTLPSpanExporter( - endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/traces", - headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, - ) - metric_exporter = OTLPMetricExporter( - endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/metrics", - headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, - ) - else: - # Fallback to console exporter - exporter = ConsoleSpanExporter() - metric_exporter = ConsoleMetricExporter() + def is_celery_worker(): + return "celery" in sys.argv[0].lower() - provider.add_span_processor( - BatchSpanProcessor( - exporter, - max_queue_size=dify_config.OTEL_MAX_QUEUE_SIZE, - schedule_delay_millis=dify_config.OTEL_BATCH_EXPORT_SCHEDULE_DELAY, - max_export_batch_size=dify_config.OTEL_MAX_EXPORT_BATCH_SIZE, - export_timeout_millis=dify_config.OTEL_BATCH_EXPORT_TIMEOUT, + def instrument_exception_logging(): + exception_handler = ExceptionLoggingHandler() + logging.getLogger().addHandler(exception_handler) + + def init_flask_instrumentor(app: DifyApp): + meter = get_meter("http_metrics", version=dify_config.CURRENT_VERSION) + _http_response_counter = meter.create_counter( + "http.server.response.count", description="Total number of HTTP responses by status code", unit="{response}" + ) + + def response_hook(span: Span, status: str, response_headers: list): + if span and span.is_recording(): + if status.startswith("2"): + span.set_status(StatusCode.OK) + else: + span.set_status(StatusCode.ERROR, status) + + status = status.split(" ")[0] + status_code = int(status) + status_class = f"{status_code // 100}xx" + _http_response_counter.add(1, {"status_code": status_code, "status_class": status_class}) + + instrumentor = FlaskInstrumentor() + if dify_config.DEBUG: + logging.info("Initializing Flask instrumentor") + instrumentor.instrument_app(app, response_hook=response_hook) + + def init_sqlalchemy_instrumentor(app: DifyApp): + with app.app_context(): + engines = list(app.extensions["sqlalchemy"].engines.values()) + SQLAlchemyInstrumentor().instrument(enable_commenter=True, engines=engines) + + def setup_context_propagation(): + # Configure propagators + set_global_textmap( + CompositePropagator( + [ + TraceContextTextMapPropagator(), # W3C trace context + B3Format(), # B3 propagation (used by many systems) + ] ) ) - reader = PeriodicExportingMetricReader( - metric_exporter, - export_interval_millis=dify_config.OTEL_METRIC_EXPORT_INTERVAL, - export_timeout_millis=dify_config.OTEL_METRIC_EXPORT_TIMEOUT, - ) - set_meter_provider(MeterProvider(resource=resource, metric_readers=[reader])) - if not is_celery_worker(): - init_flask_instrumentor(app) - CeleryInstrumentor(tracer_provider=get_tracer_provider(), meter_provider=get_meter_provider()).instrument() - init_sqlalchemy_instrumentor(app) - atexit.register(shutdown_tracer) + def shutdown_tracer(): + provider = trace.get_tracer_provider() + if hasattr(provider, "force_flush"): + provider.force_flush() -def is_celery_worker(): - return "celery" in sys.argv[0].lower() + class ExceptionLoggingHandler(logging.Handler): + """Custom logging handler that creates spans for logging.exception() calls""" + def emit(self, record): + try: + if record.exc_info: + tracer = get_tracer_provider().get_tracer("dify.exception.logging") + with tracer.start_as_current_span( + "log.exception", + attributes={ + "log.level": record.levelname, + "log.message": record.getMessage(), + "log.logger": record.name, + "log.file.path": record.pathname, + "log.file.line": record.lineno, + }, + ) as span: + span.set_status(StatusCode.ERROR) + span.record_exception(record.exc_info[1]) + span.set_attribute("exception.type", record.exc_info[0].__name__) + span.set_attribute("exception.message", str(record.exc_info[1])) + except Exception: + pass -def init_flask_instrumentor(app: DifyApp): - meter = get_meter("http_metrics", version=dify_config.CURRENT_VERSION) - _http_response_counter = meter.create_counter( - "http.server.response.count", description="Total number of HTTP responses by status code", unit="{response}" + from opentelemetry import trace + from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + from opentelemetry.instrumentation.celery import CeleryInstrumentor + from opentelemetry.instrumentation.flask import FlaskInstrumentor + from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor + from opentelemetry.metrics import get_meter, get_meter_provider, set_meter_provider + from opentelemetry.propagate import set_global_textmap + from opentelemetry.propagators.b3 import B3Format + from opentelemetry.propagators.composite import CompositePropagator + from opentelemetry.sdk.metrics import MeterProvider + from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader + from opentelemetry.sdk.resources import Resource + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, ) + from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio + from opentelemetry.semconv.resource import ResourceAttributes + from opentelemetry.trace import Span, get_tracer_provider, set_tracer_provider + from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator + from opentelemetry.trace.status import StatusCode - def response_hook(span: Span, status: str, response_headers: list): - if span and span.is_recording(): - if status.startswith("2"): - span.set_status(StatusCode.OK) - else: - span.set_status(StatusCode.ERROR, status) + setup_context_propagation() + # Initialize OpenTelemetry + # Follow Semantic Convertions 1.32.0 to define resource attributes + resource = Resource( + attributes={ + ResourceAttributes.SERVICE_NAME: dify_config.APPLICATION_NAME, + ResourceAttributes.SERVICE_VERSION: f"dify-{dify_config.CURRENT_VERSION}-{dify_config.COMMIT_SHA}", + ResourceAttributes.PROCESS_PID: os.getpid(), + ResourceAttributes.DEPLOYMENT_ENVIRONMENT: f"{dify_config.DEPLOY_ENV}-{dify_config.EDITION}", + ResourceAttributes.HOST_NAME: socket.gethostname(), + ResourceAttributes.HOST_ARCH: platform.machine(), + "custom.deployment.git_commit": dify_config.COMMIT_SHA, + ResourceAttributes.HOST_ID: platform.node(), + ResourceAttributes.OS_TYPE: platform.system().lower(), + ResourceAttributes.OS_DESCRIPTION: platform.platform(), + ResourceAttributes.OS_VERSION: platform.version(), + } + ) + sampler = ParentBasedTraceIdRatio(dify_config.OTEL_SAMPLING_RATE) + provider = TracerProvider(resource=resource, sampler=sampler) + set_tracer_provider(provider) + exporter: Union[OTLPSpanExporter, ConsoleSpanExporter] + metric_exporter: Union[OTLPMetricExporter, ConsoleMetricExporter] + if dify_config.OTEL_EXPORTER_TYPE == "otlp": + exporter = OTLPSpanExporter( + endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/traces", + headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, + ) + metric_exporter = OTLPMetricExporter( + endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/metrics", + headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, + ) + else: + # Fallback to console exporter + exporter = ConsoleSpanExporter() + metric_exporter = ConsoleMetricExporter() - status = status.split(" ")[0] - status_code = int(status) - status_class = f"{status_code // 100}xx" - _http_response_counter.add(1, {"status_code": status_code, "status_class": status_class}) - - instrumentor = FlaskInstrumentor() - if dify_config.DEBUG: - logging.info("Initializing Flask instrumentor") - instrumentor.instrument_app(app, response_hook=response_hook) - - -def init_sqlalchemy_instrumentor(app: DifyApp): - with app.app_context(): - engines = list(app.extensions["sqlalchemy"].engines.values()) - SQLAlchemyInstrumentor().instrument(enable_commenter=True, engines=engines) - - -def setup_context_propagation(): - # Configure propagators - set_global_textmap( - CompositePropagator( - [ - TraceContextTextMapPropagator(), # W3C trace context - B3Format(), # B3 propagation (used by many systems) - ] + provider.add_span_processor( + BatchSpanProcessor( + exporter, + max_queue_size=dify_config.OTEL_MAX_QUEUE_SIZE, + schedule_delay_millis=dify_config.OTEL_BATCH_EXPORT_SCHEDULE_DELAY, + max_export_batch_size=dify_config.OTEL_MAX_EXPORT_BATCH_SIZE, + export_timeout_millis=dify_config.OTEL_BATCH_EXPORT_TIMEOUT, ) ) + reader = PeriodicExportingMetricReader( + metric_exporter, + export_interval_millis=dify_config.OTEL_METRIC_EXPORT_INTERVAL, + export_timeout_millis=dify_config.OTEL_METRIC_EXPORT_TIMEOUT, + ) + set_meter_provider(MeterProvider(resource=resource, metric_readers=[reader])) + if not is_celery_worker(): + init_flask_instrumentor(app) + CeleryInstrumentor(tracer_provider=get_tracer_provider(), meter_provider=get_meter_provider()).instrument() + instrument_exception_logging() + init_sqlalchemy_instrumentor(app) + atexit.register(shutdown_tracer) + + +def is_enabled(): + return dify_config.ENABLE_OTEL @worker_init.connect(weak=False) def init_celery_worker(*args, **kwargs): - tracer_provider = get_tracer_provider() - metric_provider = get_meter_provider() - if dify_config.DEBUG: - logging.info("Initializing OpenTelemetry for Celery worker") - CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument() + if dify_config.ENABLE_OTEL: + from opentelemetry.instrumentation.celery import CeleryInstrumentor + from opentelemetry.metrics import get_meter_provider + from opentelemetry.trace import get_tracer_provider - -def shutdown_tracer(): - provider = trace.get_tracer_provider() - if hasattr(provider, "force_flush"): - provider.force_flush() + tracer_provider = get_tracer_provider() + metric_provider = get_meter_provider() + if dify_config.DEBUG: + logging.info("Initializing OpenTelemetry for Celery worker") + CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument() diff --git a/api/extensions/ext_otel_patch.py b/api/extensions/ext_otel_patch.py deleted file mode 100644 index 58309fe4d1..0000000000 --- a/api/extensions/ext_otel_patch.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Patch for OpenTelemetry context detach method to handle None tokens gracefully. - -This patch addresses the issue where OpenTelemetry's context.detach() method raises a TypeError -when called with a None token. The error occurs in the contextvars_context.py file where it tries -to call reset() on a None token. - -Related GitHub issue: https://github.com/langgenius/dify/issues/18496 - -Error being fixed: -``` -Traceback (most recent call last): - File "opentelemetry/context/__init__.py", line 154, in detach - _RUNTIME_CONTEXT.detach(token) - File "opentelemetry/context/contextvars_context.py", line 50, in detach - self._current_context.reset(token) # type: ignore - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -TypeError: expected an instance of Token, got None -``` - -Instead of modifying the third-party package directly, this patch monkey-patches the -context.detach method to gracefully handle None tokens. -""" - -import logging -from functools import wraps - -from opentelemetry import context - -logger = logging.getLogger(__name__) - -# Store the original detach method -original_detach = context.detach - - -# Create a patched version that handles None tokens -@wraps(original_detach) -def patched_detach(token): - """ - A patched version of context.detach that handles None tokens gracefully. - """ - if token is None: - logger.debug("Attempted to detach a None token, skipping") - return - - return original_detach(token) - - -def is_enabled(): - """ - Check if the extension is enabled. - Always enable this patch to prevent errors even when OpenTelemetry is disabled. - """ - return True - - -def init_app(app): - """ - Initialize the OpenTelemetry context patch. - """ - # Replace the original detach method with our patched version - context.detach = patched_detach - logger.info("OpenTelemetry context.detach patched to handle None tokens") diff --git a/api/extensions/ext_repositories.py b/api/extensions/ext_repositories.py index 27d8408ec1..b8cfea121b 100644 --- a/api/extensions/ext_repositories.py +++ b/api/extensions/ext_repositories.py @@ -4,8 +4,8 @@ Extension for initializing repositories. This extension registers repository implementations with the RepositoryFactory. """ +from core.repositories.repository_registry import register_repositories from dify_app import DifyApp -from repositories.repository_registry import register_repositories def init_app(_app: DifyApp) -> None: diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 4c811c66ba..bd35278544 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -102,6 +102,9 @@ class Storage: def delete(self, filename): return self.storage_runner.delete(filename) + def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]: + return self.storage_runner.scan(path, files=files, directories=directories) + storage = Storage() diff --git a/api/extensions/storage/base_storage.py b/api/extensions/storage/base_storage.py index 0dedd7ff8c..0393206e54 100644 --- a/api/extensions/storage/base_storage.py +++ b/api/extensions/storage/base_storage.py @@ -30,3 +30,11 @@ class BaseStorage(ABC): @abstractmethod def delete(self, filename): raise NotImplementedError + + def scan(self, path, files=True, directories=False) -> list[str]: + """ + Scan files and directories in the given path. + This method is implemented only in some storage backends. + If a storage backend doesn't support scanning, it will raise NotImplementedError. + """ + raise NotImplementedError("This storage backend doesn't support scanning") diff --git a/api/extensions/storage/opendal_storage.py b/api/extensions/storage/opendal_storage.py index ee8cfa9179..12e2738e9d 100644 --- a/api/extensions/storage/opendal_storage.py +++ b/api/extensions/storage/opendal_storage.py @@ -80,3 +80,20 @@ class OpenDALStorage(BaseStorage): logger.debug(f"file {filename} deleted") return logger.debug(f"file {filename} not found, skip delete") + + def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]: + if not self.exists(path): + raise FileNotFoundError("Path not found") + + all_files = self.op.scan(path=path) + if files and directories: + logger.debug(f"files and directories on {path} scanned") + return [f.path for f in all_files] + if files: + logger.debug(f"files on {path} scanned") + return [f.path for f in all_files if not f.path.endswith("/")] + elif directories: + logger.debug(f"directories on {path} scanned") + return [f.path for f in all_files if f.path.endswith("/")] + else: + raise ValueError("At least one of files or directories must be True") diff --git a/api/factories/agent_factory.py b/api/factories/agent_factory.py index 4b2d2cc769..4b12afb528 100644 --- a/api/factories/agent_factory.py +++ b/api/factories/agent_factory.py @@ -1,12 +1,12 @@ from core.agent.strategy.plugin import PluginAgentStrategy -from core.plugin.manager.agent import PluginAgentManager +from core.plugin.impl.agent import PluginAgentClient def get_plugin_agent_strategy( tenant_id: str, agent_strategy_provider_name: str, agent_strategy_name: str ) -> PluginAgentStrategy: # TODO: use contexts to cache the agent provider - manager = PluginAgentManager() + manager = PluginAgentClient() agent_provider = manager.fetch_agent_strategy_provider(tenant_id, agent_strategy_provider_name) for agent_strategy in agent_provider.declaration.strategies: if agent_strategy.identity.name == agent_strategy_name: diff --git a/api/fields/conversation_variable_fields.py b/api/fields/conversation_variable_fields.py index c6385efb5a..3aa3838def 100644 --- a/api/fields/conversation_variable_fields.py +++ b/api/fields/conversation_variable_fields.py @@ -19,3 +19,9 @@ paginated_conversation_variable_fields = { "has_more": fields.Boolean, "data": fields.List(fields.Nested(conversation_variable_fields), attribute="data"), } + +conversation_variable_infinite_scroll_pagination_fields = { + "limit": fields.Integer, + "has_more": fields.Boolean, + "data": fields.List(fields.Nested(conversation_variable_fields)), +} diff --git a/api/models/model.py b/api/models/model.py index 6577492d1b..901e92284a 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -3,8 +3,8 @@ import re import uuid from collections.abc import Mapping from datetime import datetime -from enum import Enum -from typing import TYPE_CHECKING, Optional +from enum import Enum, StrEnum +from typing import TYPE_CHECKING, Any, Literal, Optional, cast from core.plugin.entities.plugin import GenericProviderID from core.tools.entities.tool_entities import ToolProviderType @@ -13,9 +13,6 @@ from services.plugin.plugin_service import PluginService if TYPE_CHECKING: from models.workflow import Workflow -from enum import StrEnum -from typing import TYPE_CHECKING, Any, Literal, cast - import sqlalchemy as sa from flask import request from flask_login import UserMixin # type: ignore @@ -1015,7 +1012,9 @@ class Message(db.Model): # type: ignore[name-defined] sign_url = file_helpers.get_signed_file_url(upload_file_id) else: continue - + # if as_attachment is in the url, add it to the sign_url. + if "as_attachment" in url: + sign_url += "&as_attachment=true" re_sign_file_url_answer = re_sign_file_url_answer.replace(url, sign_url) return re_sign_file_url_answer diff --git a/api/models/workflow.py b/api/models/workflow.py index 5a67fa47a8..da60617de5 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -1,14 +1,12 @@ import json from collections.abc import Mapping, Sequence from datetime import UTC, datetime -from enum import Enum +from enum import Enum, StrEnum from typing import TYPE_CHECKING, Any, Optional, Self, Union from uuid import uuid4 if TYPE_CHECKING: from models.model import AppMode -from enum import StrEnum -from typing import TYPE_CHECKING import sqlalchemy as sa from sqlalchemy import Index, PrimaryKeyConstraint, func diff --git a/api/pyproject.toml b/api/pyproject.toml index 4992178423..72210b0774 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dify-api" -version = "1.2.0" +dynamic = ["version"] requires-python = ">=3.11,<3.13" dependencies = [ @@ -82,13 +82,19 @@ dependencies = [ "transformers~=4.35.0", "unstructured[docx,epub,md,ppt,pptx]~=0.16.1", "validators==0.21.0", + "weave~=0.51.34", "yarl~=1.18.3", + "webvtt-py~=0.5.1", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. +[tool.setuptools] +packages = [] + [tool.uv] default-groups = ["storage", "tools", "vdb"] +package = false [dependency-groups] diff --git a/api/services/agent_service.py b/api/services/agent_service.py index 0ff144052f..4c63611bb3 100644 --- a/api/services/agent_service.py +++ b/api/services/agent_service.py @@ -6,8 +6,8 @@ from flask_login import current_user # type: ignore import contexts from core.app.app_config.easy_ui_based_app.agent.manager import AgentConfigManager -from core.plugin.manager.agent import PluginAgentManager -from core.plugin.manager.exc import PluginDaemonClientSideError +from core.plugin.impl.agent import PluginAgentClient +from core.plugin.impl.exc import PluginDaemonClientSideError from core.tools.tool_manager import ToolManager from extensions.ext_database import db from models.account import Account @@ -161,7 +161,7 @@ class AgentService: """ List agent providers """ - manager = PluginAgentManager() + manager = PluginAgentClient() return manager.fetch_agent_strategy_providers(tenant_id) @classmethod @@ -169,7 +169,7 @@ class AgentService: """ Get agent provider """ - manager = PluginAgentManager() + manager = PluginAgentClient() try: return manager.fetch_agent_strategy_provider(tenant_id, provider_name) except PluginDaemonClientSideError as e: diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py index 2e2b729021..a2775fe6ad 100644 --- a/api/services/app_dsl_service.py +++ b/api/services/app_dsl_service.py @@ -40,7 +40,7 @@ IMPORT_INFO_REDIS_KEY_PREFIX = "app_import_info:" CHECK_DEPENDENCIES_REDIS_KEY_PREFIX = "app_check_dependencies:" IMPORT_INFO_REDIS_EXPIRY = 10 * 60 # 10 minutes DSL_MAX_SIZE = 10 * 1024 * 1024 # 10MB -CURRENT_DSL_VERSION = "0.1.5" +CURRENT_DSL_VERSION = "0.2.0" class ImportMode(StrEnum): @@ -77,13 +77,19 @@ def _check_version_compatibility(imported_version: str) -> ImportStatus: except version.InvalidVersion: return ImportStatus.FAILED - # Compare major version and minor version - if current_ver.major != imported_ver.major or current_ver.minor != imported_ver.minor: + # If imported version is newer than current, always return PENDING + if imported_ver > current_ver: return ImportStatus.PENDING - if current_ver.micro != imported_ver.micro: + # If imported version is older than current's major, return PENDING + if imported_ver.major < current_ver.major: + return ImportStatus.PENDING + + # If imported version is older than current's minor, return COMPLETED_WITH_WARNINGS + if imported_ver.minor < current_ver.minor: return ImportStatus.COMPLETED_WITH_WARNINGS + # If imported version equals or is older than current's micro, return COMPLETED return ImportStatus.COMPLETED diff --git a/api/services/conversation_service.py b/api/services/conversation_service.py index 6485cbf37d..afdaa49465 100644 --- a/api/services/conversation_service.py +++ b/api/services/conversation_service.py @@ -9,9 +9,14 @@ from core.app.entities.app_invoke_entities import InvokeFrom from core.llm_generator.llm_generator import LLMGenerator from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination +from models import ConversationVariable from models.account import Account from models.model import App, Conversation, EndUser, Message -from services.errors.conversation import ConversationNotExistsError, LastConversationNotExistsError +from services.errors.conversation import ( + ConversationNotExistsError, + ConversationVariableNotExistsError, + LastConversationNotExistsError, +) from services.errors.message import MessageNotExistsError @@ -166,3 +171,50 @@ class ConversationService: conversation.is_deleted = True conversation.updated_at = datetime.now(UTC).replace(tzinfo=None) db.session.commit() + + @classmethod + def get_conversational_variable( + cls, + app_model: App, + conversation_id: str, + user: Optional[Union[Account, EndUser]], + limit: int, + last_id: Optional[str], + ) -> InfiniteScrollPagination: + conversation = cls.get_conversation(app_model, conversation_id, user) + + stmt = ( + select(ConversationVariable) + .where(ConversationVariable.app_id == app_model.id) + .where(ConversationVariable.conversation_id == conversation.id) + .order_by(ConversationVariable.created_at) + ) + + with Session(db.engine) as session: + if last_id: + last_variable = session.scalar(stmt.where(ConversationVariable.id == last_id)) + if not last_variable: + raise ConversationVariableNotExistsError() + + # Filter for variables created after the last_id + stmt = stmt.where(ConversationVariable.created_at > last_variable.created_at) + + # Apply limit to query + query_stmt = stmt.limit(limit) # Get one extra to check if there are more + rows = session.scalars(query_stmt).all() + + has_more = False + if len(rows) > limit: + has_more = True + rows = rows[:limit] # Remove the extra item + + variables = [ + { + "created_at": row.created_at, + "updated_at": row.updated_at, + **row.to_variable().model_dump(), + } + for row in rows + ] + + return InfiniteScrollPagination(variables, limit, has_more) diff --git a/api/services/errors/conversation.py b/api/services/errors/conversation.py index 139dd9a70a..f8051e3417 100644 --- a/api/services/errors/conversation.py +++ b/api/services/errors/conversation.py @@ -11,3 +11,7 @@ class ConversationNotExistsError(BaseServiceError): class ConversationCompletedError(Exception): pass + + +class ConversationVariableNotExistsError(BaseServiceError): + pass diff --git a/api/services/ops_service.py b/api/services/ops_service.py index 06b4732304..6b317212d1 100644 --- a/api/services/ops_service.py +++ b/api/services/ops_service.py @@ -67,7 +67,14 @@ class OpsService: new_decrypt_tracing_config.update({"project_url": project_url}) except Exception: new_decrypt_tracing_config.update({"project_url": "https://www.comet.com/opik/"}) - + if tracing_provider == "weave" and ( + "project_url" not in decrypt_tracing_config or not decrypt_tracing_config.get("project_url") + ): + try: + project_url = OpsTraceManager.get_trace_config_project_url(decrypt_tracing_config, tracing_provider) + new_decrypt_tracing_config.update({"project_url": project_url}) + except Exception: + new_decrypt_tracing_config.update({"project_url": "https://wandb.ai/"}) trace_config_data.tracing_config = new_decrypt_tracing_config return trace_config_data.to_dict() diff --git a/api/services/plugin/dependencies_analysis.py b/api/services/plugin/dependencies_analysis.py index 07e624b4e8..830d3a4769 100644 --- a/api/services/plugin/dependencies_analysis.py +++ b/api/services/plugin/dependencies_analysis.py @@ -1,7 +1,7 @@ from configs import dify_config from core.helper import marketplace from core.plugin.entities.plugin import ModelProviderID, PluginDependency, PluginInstallationSource, ToolProviderID -from core.plugin.manager.plugin import PluginInstallationManager +from core.plugin.impl.plugin import PluginInstaller class DependenciesAnalysisService: @@ -38,7 +38,7 @@ class DependenciesAnalysisService: for dependency in dependencies: required_plugin_unique_identifiers.append(dependency.value.plugin_unique_identifier) - manager = PluginInstallationManager() + manager = PluginInstaller() # get leaked dependencies missing_plugins = manager.fetch_missing_dependencies(tenant_id, required_plugin_unique_identifiers) @@ -64,7 +64,7 @@ class DependenciesAnalysisService: Generate dependencies through the list of plugin ids """ dependencies = list(set(dependencies)) - manager = PluginInstallationManager() + manager = PluginInstaller() plugins = manager.fetch_plugin_installation_by_ids(tenant_id, dependencies) result = [] for plugin in plugins: diff --git a/api/services/plugin/endpoint_service.py b/api/services/plugin/endpoint_service.py index 35961345a8..11b8e0a3d9 100644 --- a/api/services/plugin/endpoint_service.py +++ b/api/services/plugin/endpoint_service.py @@ -1,10 +1,10 @@ -from core.plugin.manager.endpoint import PluginEndpointManager +from core.plugin.impl.endpoint import PluginEndpointClient class EndpointService: @classmethod def create_endpoint(cls, tenant_id: str, user_id: str, plugin_unique_identifier: str, name: str, settings: dict): - return PluginEndpointManager().create_endpoint( + return PluginEndpointClient().create_endpoint( tenant_id=tenant_id, user_id=user_id, plugin_unique_identifier=plugin_unique_identifier, @@ -14,7 +14,7 @@ class EndpointService: @classmethod def list_endpoints(cls, tenant_id: str, user_id: str, page: int, page_size: int): - return PluginEndpointManager().list_endpoints( + return PluginEndpointClient().list_endpoints( tenant_id=tenant_id, user_id=user_id, page=page, @@ -23,7 +23,7 @@ class EndpointService: @classmethod def list_endpoints_for_single_plugin(cls, tenant_id: str, user_id: str, plugin_id: str, page: int, page_size: int): - return PluginEndpointManager().list_endpoints_for_single_plugin( + return PluginEndpointClient().list_endpoints_for_single_plugin( tenant_id=tenant_id, user_id=user_id, plugin_id=plugin_id, @@ -33,7 +33,7 @@ class EndpointService: @classmethod def update_endpoint(cls, tenant_id: str, user_id: str, endpoint_id: str, name: str, settings: dict): - return PluginEndpointManager().update_endpoint( + return PluginEndpointClient().update_endpoint( tenant_id=tenant_id, user_id=user_id, endpoint_id=endpoint_id, @@ -43,7 +43,7 @@ class EndpointService: @classmethod def delete_endpoint(cls, tenant_id: str, user_id: str, endpoint_id: str): - return PluginEndpointManager().delete_endpoint( + return PluginEndpointClient().delete_endpoint( tenant_id=tenant_id, user_id=user_id, endpoint_id=endpoint_id, @@ -51,7 +51,7 @@ class EndpointService: @classmethod def enable_endpoint(cls, tenant_id: str, user_id: str, endpoint_id: str): - return PluginEndpointManager().enable_endpoint( + return PluginEndpointClient().enable_endpoint( tenant_id=tenant_id, user_id=user_id, endpoint_id=endpoint_id, @@ -59,7 +59,7 @@ class EndpointService: @classmethod def disable_endpoint(cls, tenant_id: str, user_id: str, endpoint_id: str): - return PluginEndpointManager().disable_endpoint( + return PluginEndpointClient().disable_endpoint( tenant_id=tenant_id, user_id=user_id, endpoint_id=endpoint_id, diff --git a/api/services/plugin/oauth_service.py b/api/services/plugin/oauth_service.py new file mode 100644 index 0000000000..461247419b --- /dev/null +++ b/api/services/plugin/oauth_service.py @@ -0,0 +1,7 @@ +from core.plugin.impl.base import BasePluginClient + + +class OAuthService(BasePluginClient): + @classmethod + def get_authorization_url(cls, tenant_id: str, user_id: str, provider_name: str) -> str: + return "1234567890" diff --git a/api/services/plugin/plugin_migration.py b/api/services/plugin/plugin_migration.py index ec9e0aa8dc..dbaaa7160e 100644 --- a/api/services/plugin/plugin_migration.py +++ b/api/services/plugin/plugin_migration.py @@ -17,7 +17,7 @@ from core.agent.entities import AgentToolEntity from core.helper import marketplace from core.plugin.entities.plugin import ModelProviderID, PluginInstallationSource, ToolProviderID from core.plugin.entities.plugin_daemon import PluginInstallTaskStatus -from core.plugin.manager.plugin import PluginInstallationManager +from core.plugin.impl.plugin import PluginInstaller from core.tools.entities.tool_entities import ToolProviderType from models.account import Tenant from models.engine import db @@ -331,7 +331,7 @@ class PluginMigration: """ Install plugins. """ - manager = PluginInstallationManager() + manager = PluginInstaller() plugins = cls.extract_unique_plugins(extracted_plugins) not_installed = [] @@ -426,7 +426,7 @@ class PluginMigration: """ Install plugins for a tenant. """ - manager = PluginInstallationManager() + manager = PluginInstaller() # download all the plugins and upload thread_pool = ThreadPoolExecutor(max_workers=10) diff --git a/api/services/plugin/plugin_service.py b/api/services/plugin/plugin_service.py index 96a07d36b9..be722a59ad 100644 --- a/api/services/plugin/plugin_service.py +++ b/api/services/plugin/plugin_service.py @@ -18,9 +18,9 @@ from core.plugin.entities.plugin import ( PluginInstallationSource, ) from core.plugin.entities.plugin_daemon import PluginInstallTask, PluginUploadResponse -from core.plugin.manager.asset import PluginAssetManager -from core.plugin.manager.debugging import PluginDebuggingManager -from core.plugin.manager.plugin import PluginInstallationManager +from core.plugin.impl.asset import PluginAssetManager +from core.plugin.impl.debugging import PluginDebuggingClient +from core.plugin.impl.plugin import PluginInstaller from extensions.ext_redis import redis_client logger = logging.getLogger(__name__) @@ -91,7 +91,7 @@ class PluginService: """ get the debugging key of the tenant """ - manager = PluginDebuggingManager() + manager = PluginDebuggingClient() return manager.get_debugging_key(tenant_id) @staticmethod @@ -106,7 +106,7 @@ class PluginService: """ list all plugins of the tenant """ - manager = PluginInstallationManager() + manager = PluginInstaller() plugins = manager.list_plugins(tenant_id) return plugins @@ -115,7 +115,7 @@ class PluginService: """ List plugin installations from ids """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.fetch_plugin_installation_by_ids(tenant_id, ids) @staticmethod @@ -133,7 +133,7 @@ class PluginService: """ check if the plugin unique identifier is already installed by other tenant """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.fetch_plugin_by_identifier(tenant_id, plugin_unique_identifier) @staticmethod @@ -141,7 +141,7 @@ class PluginService: """ Fetch plugin manifest """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.fetch_plugin_manifest(tenant_id, plugin_unique_identifier) @staticmethod @@ -149,12 +149,12 @@ class PluginService: """ Fetch plugin installation tasks """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.fetch_plugin_installation_tasks(tenant_id, page, page_size) @staticmethod def fetch_install_task(tenant_id: str, task_id: str) -> PluginInstallTask: - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.fetch_plugin_installation_task(tenant_id, task_id) @staticmethod @@ -162,7 +162,7 @@ class PluginService: """ Delete a plugin installation task """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.delete_plugin_installation_task(tenant_id, task_id) @staticmethod @@ -172,7 +172,7 @@ class PluginService: """ Delete all plugin installation task items """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.delete_all_plugin_installation_task_items(tenant_id) @staticmethod @@ -180,7 +180,7 @@ class PluginService: """ Delete a plugin installation task item """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.delete_plugin_installation_task_item(tenant_id, task_id, identifier) @staticmethod @@ -190,11 +190,14 @@ class PluginService: """ Upgrade plugin with marketplace """ + if not dify_config.MARKETPLACE_ENABLED: + raise ValueError("marketplace is not enabled") + if original_plugin_unique_identifier == new_plugin_unique_identifier: raise ValueError("you should not upgrade plugin with the same plugin") # check if plugin pkg is already downloaded - manager = PluginInstallationManager() + manager = PluginInstaller() try: manager.fetch_plugin_manifest(tenant_id, new_plugin_unique_identifier) @@ -227,7 +230,7 @@ class PluginService: """ Upgrade plugin with github """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.upgrade_plugin( tenant_id, original_plugin_unique_identifier, @@ -247,7 +250,7 @@ class PluginService: returns: plugin_unique_identifier """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.upload_pkg(tenant_id, pkg, verify_signature) @staticmethod @@ -262,7 +265,7 @@ class PluginService: f"https://github.com/{repo}/releases/download/{version}/{package}", dify_config.PLUGIN_MAX_PACKAGE_SIZE ) - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.upload_pkg( tenant_id, pkg, @@ -276,12 +279,12 @@ class PluginService: """ Upload a plugin bundle and return the dependencies. """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.upload_bundle(tenant_id, bundle, verify_signature) @staticmethod def install_from_local_pkg(tenant_id: str, plugin_unique_identifiers: Sequence[str]): - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.install_from_identifiers( tenant_id, plugin_unique_identifiers, @@ -295,7 +298,7 @@ class PluginService: Install plugin from github release package files, returns plugin_unique_identifier """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.install_from_identifiers( tenant_id, [plugin_unique_identifier], @@ -316,7 +319,10 @@ class PluginService: """ Fetch marketplace package """ - manager = PluginInstallationManager() + if not dify_config.MARKETPLACE_ENABLED: + raise ValueError("marketplace is not enabled") + + manager = PluginInstaller() try: declaration = manager.fetch_plugin_manifest(tenant_id, plugin_unique_identifier) except Exception: @@ -333,7 +339,10 @@ class PluginService: Install plugin from marketplace package files, returns installation task id """ - manager = PluginInstallationManager() + if not dify_config.MARKETPLACE_ENABLED: + raise ValueError("marketplace is not enabled") + + manager = PluginInstaller() # check if already downloaded for plugin_unique_identifier in plugin_unique_identifiers: @@ -359,7 +368,7 @@ class PluginService: @staticmethod def uninstall(tenant_id: str, plugin_installation_id: str) -> bool: - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.uninstall(tenant_id, plugin_installation_id) @staticmethod @@ -367,5 +376,5 @@ class PluginService: """ Check if the tools exist """ - manager = PluginInstallationManager() + manager = PluginInstaller() return manager.check_tools_existence(tenant_id, provider_ids) diff --git a/api/services/tools/builtin_tools_manage_service.py b/api/services/tools/builtin_tools_manage_service.py index 075c60842b..3ccd14415d 100644 --- a/api/services/tools/builtin_tools_manage_service.py +++ b/api/services/tools/builtin_tools_manage_service.py @@ -8,7 +8,7 @@ from configs import dify_config from core.helper.position_helper import is_filtered from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.plugin import GenericProviderID, ToolProviderID -from core.plugin.manager.exc import PluginDaemonClientSideError +from core.plugin.impl.exc import PluginDaemonClientSideError from core.tools.builtin_tool.providers._positions import BuiltinToolProviderSort from core.tools.entities.api_entities import ToolApiEntity, ToolProviderApiEntity from core.tools.errors import ToolNotFoundError, ToolProviderCredentialValidationError, ToolProviderNotFoundError diff --git a/api/services/workflow_run_service.py b/api/services/workflow_run_service.py index 8b7213eefb..f7c4f500a8 100644 --- a/api/services/workflow_run_service.py +++ b/api/services/workflow_run_service.py @@ -2,8 +2,8 @@ import threading from typing import Optional import contexts -from core.repository import RepositoryFactory -from core.repository.workflow_node_execution_repository import OrderConfig +from core.workflow.repository import RepositoryFactory +from core.workflow.repository.workflow_node_execution_repository import OrderConfig from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination from models.enums import WorkflowRunTriggeredFrom diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index 63e3791147..ebe65e5d5f 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -11,7 +11,6 @@ from sqlalchemy.orm import Session from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager from core.model_runtime.utils.encoders import jsonable_encoder -from core.repository import RepositoryFactory from core.variables import Variable from core.workflow.entities.node_entities import NodeRunResult from core.workflow.errors import WorkflowNodeRunFailedError @@ -22,6 +21,7 @@ from core.workflow.nodes.enums import ErrorStrategy from core.workflow.nodes.event import RunCompletedEvent from core.workflow.nodes.event.types import NodeEvent from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING +from core.workflow.repository import RepositoryFactory from core.workflow.workflow_entry import WorkflowEntry from events.app_event import app_draft_workflow_was_synced, app_published_workflow_was_updated from extensions.ext_database import db diff --git a/api/tasks/ops_trace_task.py b/api/tasks/ops_trace_task.py index 2b49e4bb23..2e77332ffe 100644 --- a/api/tasks/ops_trace_task.py +++ b/api/tasks/ops_trace_task.py @@ -44,7 +44,10 @@ def process_trace_tasks(file_info): trace_info = trace_type(**trace_info) trace_instance.trace(trace_info) logging.info(f"Processing trace tasks success, app_id: {app_id}") - except Exception: + except Exception as e: + logging.info( + f"error:\n\n\n{e}\n\n\n\n", + ) failed_key = f"{OPS_TRACE_FAILED_KEY}_{app_id}" redis_client.incr(failed_key) logging.info(f"Processing trace tasks failed, app_id: {app_id}") diff --git a/api/tasks/remove_app_and_related_data_task.py b/api/tasks/remove_app_and_related_data_task.py index cd8981abf6..dedf1c5334 100644 --- a/api/tasks/remove_app_and_related_data_task.py +++ b/api/tasks/remove_app_and_related_data_task.py @@ -7,7 +7,7 @@ from celery import shared_task # type: ignore from sqlalchemy import delete from sqlalchemy.exc import SQLAlchemyError -from core.repository import RepositoryFactory +from core.workflow.repository import RepositoryFactory from extensions.ext_database import db from models.dataset import AppDatasetJoin from models.model import ( diff --git a/api/tests/integration_tests/model_runtime/__mock/plugin_daemon.py b/api/tests/integration_tests/model_runtime/__mock/plugin_daemon.py index 6dfc01ab4c..e3c592b583 100644 --- a/api/tests/integration_tests/model_runtime/__mock/plugin_daemon.py +++ b/api/tests/integration_tests/model_runtime/__mock/plugin_daemon.py @@ -6,7 +6,7 @@ import pytest # import monkeypatch from _pytest.monkeypatch import MonkeyPatch -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient from tests.integration_tests.model_runtime.__mock.plugin_model import MockModelClass @@ -23,9 +23,9 @@ def mock_plugin_daemon( def unpatch() -> None: monkeypatch.undo() - monkeypatch.setattr(PluginModelManager, "invoke_llm", MockModelClass.invoke_llm) - monkeypatch.setattr(PluginModelManager, "fetch_model_providers", MockModelClass.fetch_model_providers) - monkeypatch.setattr(PluginModelManager, "get_model_schema", MockModelClass.get_model_schema) + monkeypatch.setattr(PluginModelClient, "invoke_llm", MockModelClass.invoke_llm) + monkeypatch.setattr(PluginModelClient, "fetch_model_providers", MockModelClass.fetch_model_providers) + monkeypatch.setattr(PluginModelClient, "get_model_schema", MockModelClass.get_model_schema) return unpatch diff --git a/api/tests/integration_tests/model_runtime/__mock/plugin_model.py b/api/tests/integration_tests/model_runtime/__mock/plugin_model.py index 50913662e2..d699866fb4 100644 --- a/api/tests/integration_tests/model_runtime/__mock/plugin_model.py +++ b/api/tests/integration_tests/model_runtime/__mock/plugin_model.py @@ -19,10 +19,10 @@ from core.model_runtime.entities.model_entities import ( ) from core.model_runtime.entities.provider_entities import ConfigurateMethod, ProviderEntity from core.plugin.entities.plugin_daemon import PluginModelProviderEntity -from core.plugin.manager.model import PluginModelManager +from core.plugin.impl.model import PluginModelClient -class MockModelClass(PluginModelManager): +class MockModelClass(PluginModelClient): def fetch_model_providers(self, tenant_id: str) -> Sequence[PluginModelProviderEntity]: """ Fetch model providers for the given tenant. @@ -232,7 +232,7 @@ class MockModelClass(PluginModelManager): ) def invoke_llm( - self: PluginModelManager, + self: PluginModelClient, *, tenant_id: str, user_id: str, diff --git a/api/tests/integration_tests/plugin/tools/test_fetch_all_tools.py b/api/tests/integration_tests/plugin/tools/test_fetch_all_tools.py index c6d836ed6d..b6d583e338 100644 --- a/api/tests/integration_tests/plugin/tools/test_fetch_all_tools.py +++ b/api/tests/integration_tests/plugin/tools/test_fetch_all_tools.py @@ -1,4 +1,4 @@ -from core.plugin.manager.tool import PluginToolManager +from core.plugin.impl.tool import PluginToolManager from tests.integration_tests.plugin.__mock.http import setup_http_mock diff --git a/api/tests/integration_tests/vdb/opensearch/test_opensearch.py b/api/tests/integration_tests/vdb/opensearch/test_opensearch.py index 35eed75c2f..2d44dd2924 100644 --- a/api/tests/integration_tests/vdb/opensearch/test_opensearch.py +++ b/api/tests/integration_tests/vdb/opensearch/test_opensearch.py @@ -23,13 +23,70 @@ def setup_mock_redis(): ext_redis.redis_client.lock = MagicMock(return_value=mock_redis_lock) +class TestOpenSearchConfig: + def test_to_opensearch_params(self): + config = OpenSearchConfig( + host="localhost", + port=9200, + secure=True, + user="admin", + password="password", + ) + + params = config.to_opensearch_params() + + assert params["hosts"] == [{"host": "localhost", "port": 9200}] + assert params["use_ssl"] is True + assert params["verify_certs"] is True + assert params["connection_class"].__name__ == "Urllib3HttpConnection" + assert params["http_auth"] == ("admin", "password") + + @patch("boto3.Session") + @patch("core.rag.datasource.vdb.opensearch.opensearch_vector.Urllib3AWSV4SignerAuth") + def test_to_opensearch_params_with_aws_managed_iam( + self, mock_aws_signer_auth: MagicMock, mock_boto_session: MagicMock + ): + mock_credentials = MagicMock() + mock_boto_session.return_value.get_credentials.return_value = mock_credentials + + mock_auth_instance = MagicMock() + mock_aws_signer_auth.return_value = mock_auth_instance + + aws_region = "ap-southeast-2" + aws_service = "aoss" + host = f"aoss-endpoint.{aws_region}.aoss.amazonaws.com" + port = 9201 + + config = OpenSearchConfig( + host=host, + port=port, + secure=True, + auth_method="aws_managed_iam", + aws_region=aws_region, + aws_service=aws_service, + ) + + params = config.to_opensearch_params() + + assert params["hosts"] == [{"host": host, "port": port}] + assert params["use_ssl"] is True + assert params["verify_certs"] is True + assert params["connection_class"].__name__ == "Urllib3HttpConnection" + assert params["http_auth"] is mock_auth_instance + + mock_aws_signer_auth.assert_called_once_with( + credentials=mock_credentials, region=aws_region, service=aws_service + ) + assert mock_boto_session.return_value.get_credentials.called + + class TestOpenSearchVector: def setup_method(self): self.collection_name = "test_collection" self.example_doc_id = "example_doc_id" self.vector = OpenSearchVector( collection_name=self.collection_name, - config=OpenSearchConfig(host="localhost", port=9200, user="admin", password="password", secure=False), + config=OpenSearchConfig(host="localhost", port=9200, secure=False, user="admin", password="password"), ) self.vector._client = MagicMock() diff --git a/api/tests/integration_tests/vdb/pyvastbase/__init__.py b/api/tests/integration_tests/vdb/pyvastbase/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/vdb/pyvastbase/test_vastbase_vector.py b/api/tests/integration_tests/vdb/pyvastbase/test_vastbase_vector.py new file mode 100644 index 0000000000..3d7873442b --- /dev/null +++ b/api/tests/integration_tests/vdb/pyvastbase/test_vastbase_vector.py @@ -0,0 +1,27 @@ +from core.rag.datasource.vdb.pyvastbase.vastbase_vector import VastbaseVector, VastbaseVectorConfig +from tests.integration_tests.vdb.test_vector_store import ( + AbstractVectorTest, + get_example_text, + setup_mock_redis, +) + + +class VastbaseVectorTest(AbstractVectorTest): + def __init__(self): + super().__init__() + self.vector = VastbaseVector( + collection_name=self.collection_name, + config=VastbaseVectorConfig( + host="localhost", + port=5434, + user="dify", + password="Difyai123456", + database="dify", + min_connection=1, + max_connection=5, + ), + ) + + +def test_vastbase_vector(setup_mock_redis): + VastbaseVectorTest().run_all_tests() diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py index 2a29ad3e41..f3dbd1836b 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py @@ -864,10 +864,11 @@ def test_condition_parallel_correct_output(mock_close, mock_remove, app): with patch.object(CodeNode, "_run", new=code_generator): generator = graph_engine.run() stream_content = "" - res_content = "VAT:\ndify 123" + wrong_content = ["Stamp Duty", "other"] for item in generator: if isinstance(item, NodeRunStreamChunkEvent): stream_content += f"{item.chunk_content}\n" if isinstance(item, GraphRunSucceededEvent): - assert item.outputs == {"answer": res_content} - assert stream_content == res_content + "\n" + assert item.outputs is not None + answer = item.outputs["answer"] + assert all(rc not in answer for rc in wrong_content) diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py index 36847f8a13..c16b453cba 100644 --- a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py +++ b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py @@ -8,9 +8,9 @@ import pytest from pytest_mock import MockerFixture from sqlalchemy.orm import Session, sessionmaker -from core.repository.workflow_node_execution_repository import OrderConfig +from core.repositories.workflow_node_execution.sqlalchemy_repository import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.repository.workflow_node_execution_repository import OrderConfig from models.workflow import WorkflowNodeExecution -from repositories.workflow_node_execution.sqlalchemy_repository import SQLAlchemyWorkflowNodeExecutionRepository @pytest.fixture @@ -80,7 +80,7 @@ def test_get_by_node_execution_id(repository, session, mocker: MockerFixture): """Test get_by_node_execution_id method.""" session_obj, _ = session # Set up mock - mock_select = mocker.patch("repositories.workflow_node_execution.sqlalchemy_repository.select") + mock_select = mocker.patch("core.repositories.workflow_node_execution.sqlalchemy_repository.select") mock_stmt = mocker.MagicMock() mock_select.return_value = mock_stmt mock_stmt.where.return_value = mock_stmt @@ -99,7 +99,7 @@ def test_get_by_workflow_run(repository, session, mocker: MockerFixture): """Test get_by_workflow_run method.""" session_obj, _ = session # Set up mock - mock_select = mocker.patch("repositories.workflow_node_execution.sqlalchemy_repository.select") + mock_select = mocker.patch("core.repositories.workflow_node_execution.sqlalchemy_repository.select") mock_stmt = mocker.MagicMock() mock_select.return_value = mock_stmt mock_stmt.where.return_value = mock_stmt @@ -120,7 +120,7 @@ def test_get_running_executions(repository, session, mocker: MockerFixture): """Test get_running_executions method.""" session_obj, _ = session # Set up mock - mock_select = mocker.patch("repositories.workflow_node_execution.sqlalchemy_repository.select") + mock_select = mocker.patch("core.repositories.workflow_node_execution.sqlalchemy_repository.select") mock_stmt = mocker.MagicMock() mock_select.return_value = mock_stmt mock_stmt.where.return_value = mock_stmt @@ -158,7 +158,7 @@ def test_clear(repository, session, mocker: MockerFixture): """Test clear method.""" session_obj, _ = session # Set up mock - mock_delete = mocker.patch("repositories.workflow_node_execution.sqlalchemy_repository.delete") + mock_delete = mocker.patch("core.repositories.workflow_node_execution.sqlalchemy_repository.delete") mock_stmt = mocker.MagicMock() mock_delete.return_value = mock_stmt mock_stmt.where.return_value = mock_stmt diff --git a/api/tests/unit_tests/utils/http_parser/test_oauth_convert_request_to_raw_data.py b/api/tests/unit_tests/utils/http_parser/test_oauth_convert_request_to_raw_data.py new file mode 100644 index 0000000000..f788a9756b --- /dev/null +++ b/api/tests/unit_tests/utils/http_parser/test_oauth_convert_request_to_raw_data.py @@ -0,0 +1,20 @@ +from werkzeug import Request +from werkzeug.datastructures import Headers +from werkzeug.test import EnvironBuilder + +from core.plugin.impl.oauth import OAuthHandler + + +def test_oauth_convert_request_to_raw_data(): + oauth_handler = OAuthHandler() + builder = EnvironBuilder( + method="GET", + path="/test", + headers=Headers({"Content-Type": "application/json"}), + ) + request = Request(builder.get_environ()) + raw_request_bytes = oauth_handler._convert_request_to_raw_data(request) + + assert b"GET /test HTTP/1.1" in raw_request_bytes + assert b"Content-Type: application/json" in raw_request_bytes + assert b"\r\n\r\n" in raw_request_bytes diff --git a/api/uv.lock b/api/uv.lock index 6c8699dd7c..4604041ac4 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1,12 +1,19 @@ version = 1 +revision = 1 requires-python = ">=3.11, <3.13" resolution-markers = [ - "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy'", - "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy'", - "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy'", - "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy'", - "python_full_version < '3.12' and platform_python_implementation != 'PyPy'", - "python_full_version < '3.12' and platform_python_implementation == 'PyPy'", + "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", + "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'", + "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'", + "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'", + "python_full_version < '3.12' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", + "python_full_version < '3.12' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'", + "python_full_version < '3.12' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'", + "python_full_version < '3.12' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'", ] [[package]] @@ -627,7 +634,7 @@ name = "build" version = "1.2.2.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "os_name == 'nt'" }, + { name = "colorama", marker = "os_name == 'nt' and sys_platform != 'linux'" }, { name = "packaging" }, { name = "pyproject-hooks" }, ] @@ -1148,7 +1155,6 @@ wheels = [ [[package]] name = "dify-api" -version = "1.2.0" source = { virtual = "." } dependencies = [ { name = "authlib" }, @@ -1227,6 +1233,8 @@ dependencies = [ { name = "transformers" }, { name = "unstructured", extra = ["docx", "epub", "md", "ppt", "pptx"] }, { name = "validators" }, + { name = "weave" }, + { name = "webvtt-py" }, { name = "yarl" }, ] @@ -1396,6 +1404,8 @@ requires-dist = [ { name = "transformers", specifier = "~=4.35.0" }, { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" }, { name = "validators", specifier = "==0.21.0" }, + { name = "weave", specifier = "~=0.51.34" }, + { name = "webvtt-py", specifier = "~=0.5.1" }, { name = "yarl", specifier = "~=1.18.3" }, ] @@ -1487,6 +1497,15 @@ vdb = [ { name = "xinference-client", specifier = "~=1.2.2" }, ] +[[package]] +name = "diskcache" +version = "5.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550 }, +] + [[package]] name = "distro" version = "1.9.0" @@ -1496,6 +1515,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, ] +[[package]] +name = "docker-pycreds" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c5/e6/d1f6c00b7221e2d7c4b470132c931325c8b22c51ca62417e300f5ce16009/docker-pycreds-0.4.0.tar.gz", hash = "sha256:6ce3270bcaf404cc4c3e27e4b6c70d3521deae82fb508767870fdbf772d584d4", size = 8754 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/e8/f6bd1eee09314e7e6dee49cbe2c5e22314ccdb38db16c9fc72d2fa80d054/docker_pycreds-0.4.0-py2.py3-none-any.whl", hash = "sha256:7266112468627868005106ec19cd0d722702d2b7d5912a28e19b826c3d37af49", size = 8982 }, +] + [[package]] name = "docstring-parser" version = "0.16" @@ -1840,6 +1871,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/b2/5d20664ef6a077bec9f27f7a7ee761edc64946d0b1e293726a3d074a9a18/gevent-24.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:68bee86b6e1c041a187347ef84cf03a792f0b6c7238378bf6ba4118af11feaae", size = 1541631 }, ] +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794 }, +] + +[[package]] +name = "gitpython" +version = "3.1.44" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 }, +] + [[package]] name = "gmpy2" version = "2.2.1" @@ -2087,6 +2142,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/3a/1a7cac16438f4e5319a0c879416d5e5032c98c3db2874e6e5300b3b475e6/gotrue-2.11.4-py3-none-any.whl", hash = "sha256:712e5018acc00d93cfc6d7bfddc3114eb3c420ab03b945757a8ba38c5fc3caa8", size = 41106 }, ] +[[package]] +name = "gql" +version = "3.5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "backoff" }, + { name = "graphql-core" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/ef/5298d9d628b6a54b3b810052cb5a935d324fe28d9bfdeb741733d5c2446b/gql-3.5.2.tar.gz", hash = "sha256:07e1325b820c8ba9478e95de27ce9f23250486e7e79113dbb7659a442dc13e74", size = 180502 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/71/b028b937992056e721bbf0371e13819fcca0dacde7b3c821f775ed903917/gql-3.5.2-py2.py3-none-any.whl", hash = "sha256:c830ffc38b3997b2a146317b27758305ab3d0da3bde607b49f34e32affb23ba2", size = 74346 }, +] + +[package.optional-dependencies] +aiohttp = [ + { name = "aiohttp" }, +] +requests = [ + { name = "requests" }, + { name = "requests-toolbelt" }, +] + +[[package]] +name = "graphql-core" +version = "3.2.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/9e/aa527fb09a9d7399d5d7d2aa2da490e4580707652d3b4fc156996ae88a5b/graphql-core-3.2.4.tar.gz", hash = "sha256:acbe2e800980d0e39b4685dd058c2f4042660b89ebca38af83020fd872ff1264", size = 504611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/cc72c4c658c6316f188a60bc4e5a91cd4ceaaa8c3e7e691ac9297e4e72c7/graphql_core-3.2.4-py3-none-any.whl", hash = "sha256:1604f2042edc5f3114f49cac9d77e25863be51b23a54a61a23245cf32f6476f0", size = 203179 }, +] + [[package]] name = "greenlet" version = "3.1.1" @@ -3815,6 +3903,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 }, ] +[[package]] +name = "platformdirs" +version = "4.3.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/2d/7d512a3913d60623e7eb945c6d1b4f0bddf1d0b7ada5225274c87e5b53d1/platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351", size = 21291 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/45/59578566b3275b8fd9157885918fcd0c4d74162928a5310926887b856a51/platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94", size = 18499 }, +] + [[package]] name = "pluggy" version = "1.5.0" @@ -4084,8 +4181,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/cd/ed6e429fb0792ce368f66e83246264dd3a7a045b0b1e63043ed22a063ce5/pycryptodome-3.19.1-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7c9e222d0976f68d0cf6409cfea896676ddc1d98485d601e9508f90f60e2b0a2", size = 2144914 }, { url = "https://files.pythonhosted.org/packages/f6/23/b064bd4cfbf2cc5f25afcde0e7c880df5b20798172793137ba4b62d82e72/pycryptodome-3.19.1-cp35-abi3-win32.whl", hash = "sha256:4805e053571140cb37cf153b5c72cd324bb1e3e837cbe590a19f69b6cf85fd03", size = 1713105 }, { url = "https://files.pythonhosted.org/packages/7d/e0/ded1968a5257ab34216a0f8db7433897a2337d59e6d03be113713b346ea2/pycryptodome-3.19.1-cp35-abi3-win_amd64.whl", hash = "sha256:a470237ee71a1efd63f9becebc0ad84b88ec28e6784a2047684b693f458f41b7", size = 1749222 }, - { url = "https://files.pythonhosted.org/packages/1d/e3/0c9679cd66cf5604b1f070bdf4525a0c01a15187be287d8348b2eafb718e/pycryptodome-3.19.1-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:ed932eb6c2b1c4391e166e1a562c9d2f020bfff44a0e1b108f67af38b390ea89", size = 1629005 }, - { url = "https://files.pythonhosted.org/packages/13/75/0d63bf0daafd0580b17202d8a9dd57f28c8487f26146b3e2799b0c5a059c/pycryptodome-3.19.1-pp27-pypy_73-win32.whl", hash = "sha256:81e9d23c0316fc1b45d984a44881b220062336bbdc340aa9218e8d0656587934", size = 1697997 }, ] [[package]] @@ -4939,6 +5034,38 @@ flask = [ { name = "markupsafe" }, ] +[[package]] +name = "setproctitle" +version = "1.3.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/4d/6a840c8d2baa07b57329490e7094f90aac177a1d5226bc919046f1106860/setproctitle-1.3.5.tar.gz", hash = "sha256:1e6eaeaf8a734d428a95d8c104643b39af7d247d604f40a7bebcf3960a853c5e", size = 26737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/4a/9e0243c5df221102fb834a947f5753d9da06ad5f84e36b0e2e93f7865edb/setproctitle-1.3.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1c8dcc250872385f2780a5ea58050b58cbc8b6a7e8444952a5a65c359886c593", size = 17256 }, + { url = "https://files.pythonhosted.org/packages/c7/a1/76ad2ba6f5bd00609238e3d64eeded4598e742a5f25b5cc1a0efdae5f674/setproctitle-1.3.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ca82fae9eb4800231dd20229f06e8919787135a5581da245b8b05e864f34cc8b", size = 11893 }, + { url = "https://files.pythonhosted.org/packages/47/3a/75d11fedff5b21ba9a4c5fe3dfa5e596f831d094ef1896713a72e9e38833/setproctitle-1.3.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0424e1d33232322541cb36fb279ea5242203cd6f20de7b4fb2a11973d8e8c2ce", size = 31631 }, + { url = "https://files.pythonhosted.org/packages/5a/12/58220de5600e0ed2e5562297173187d863db49babb03491ffe9c101299bc/setproctitle-1.3.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fec8340ab543144d04a9d805d80a0aad73fdeb54bea6ff94e70d39a676ea4ec0", size = 32975 }, + { url = "https://files.pythonhosted.org/packages/fa/c4/fbb308680d83c1c7aa626950308318c6e6381a8273779163a31741f3c752/setproctitle-1.3.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eab441c89f181271ab749077dcc94045a423e51f2fb0b120a1463ef9820a08d0", size = 30126 }, + { url = "https://files.pythonhosted.org/packages/31/6e/baaf70bd9a881dd8c12cbccdd7ca0ff291024a37044a8245e942e12e7135/setproctitle-1.3.5-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c371550a2288901a0dcd84192691ebd3197a43c95f3e0b396ed6d1cedf5c6c", size = 31135 }, + { url = "https://files.pythonhosted.org/packages/a6/dc/d8ab6b1c3d844dc14f596e3cce76604570848f8a67ba6a3812775ed2c015/setproctitle-1.3.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:78288ff5f9c415c56595b2257ad218936dd9fa726b36341b373b31ca958590fe", size = 30874 }, + { url = "https://files.pythonhosted.org/packages/d4/84/62a359b3aa51228bd88f78b44ebb0256a5b96dd2487881c1e984a59b617d/setproctitle-1.3.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f1f13a25fc46731acab518602bb1149bfd8b5fabedf8290a7c0926d61414769d", size = 29893 }, + { url = "https://files.pythonhosted.org/packages/e2/d6/b3c52c03ee41e7f006e1a737e0db1c58d1dc28e258b83548e653d0c34f1c/setproctitle-1.3.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1534d6cd3854d035e40bf4c091984cbdd4d555d7579676d406c53c8f187c006f", size = 32293 }, + { url = "https://files.pythonhosted.org/packages/55/09/c0ba311879d9c05860503a7e2708ace85913b9a816786402a92c664fe930/setproctitle-1.3.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:62a01c76708daac78b9688ffb95268c57cb57fa90b543043cda01358912fe2db", size = 30247 }, + { url = "https://files.pythonhosted.org/packages/9e/43/cc7155461f0b5a48aebdb87d78239ff3a51ebda0905de478d9fa6ab92d9c/setproctitle-1.3.5-cp311-cp311-win32.whl", hash = "sha256:ea07f29735d839eaed985990a0ec42c8aecefe8050da89fec35533d146a7826d", size = 11476 }, + { url = "https://files.pythonhosted.org/packages/e7/57/6e937ac7aa52db69225f02db2cfdcb66ba1db6fdc65a4ddbdf78e214f72a/setproctitle-1.3.5-cp311-cp311-win_amd64.whl", hash = "sha256:ab3ae11e10d13d514d4a5a15b4f619341142ba3e18da48c40e8614c5a1b5e3c3", size = 12189 }, + { url = "https://files.pythonhosted.org/packages/2b/19/04755958495de57e4891de50f03e77b3fe9ca6716a86de00faa00ad0ee5a/setproctitle-1.3.5-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:523424b9be4dea97d95b8a584b183f35c7bab2d0a3d995b01febf5b8a8de90e4", size = 17250 }, + { url = "https://files.pythonhosted.org/packages/b9/3d/2ca9df5aa49b975296411dcbbe272cdb1c5e514c43b8be7d61751bb71a46/setproctitle-1.3.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b6ec1d86c1b4d7b5f2bdceadf213310cf24696b82480a2a702194b8a0bfbcb47", size = 11878 }, + { url = "https://files.pythonhosted.org/packages/36/d6/e90e23b4627e016a4f862d4f892be92c9765dd6bf1e27a48e52cd166d4a3/setproctitle-1.3.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea6c505264275a43e9b2acd2acfc11ac33caf52bc3167c9fced4418a810f6b1c", size = 31940 }, + { url = "https://files.pythonhosted.org/packages/15/13/167cdd55e00a8e10b36aad79646c3bf3c23fba0c08a9b8db9b74622c1b13/setproctitle-1.3.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b91e68e6685998e6353f296100ecabc313a6cb3e413d66a03d74b988b61f5ff", size = 33370 }, + { url = "https://files.pythonhosted.org/packages/9b/22/574a110527df133409a75053b7d6ff740993ccf30b8713d042f26840d351/setproctitle-1.3.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc1fda208ae3a2285ad27aeab44c41daf2328abe58fa3270157a739866779199", size = 30628 }, + { url = "https://files.pythonhosted.org/packages/52/79/78b05c7d792c9167b917acdab1773b1ff73b016560f45d8155be2baa1a82/setproctitle-1.3.5-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:828727d220e46f048b82289018300a64547b46aaed96bf8810c05fe105426b41", size = 31672 }, + { url = "https://files.pythonhosted.org/packages/b0/62/4509735be062129694751ac55d5e1fbb6d86fa46a8689b7d5e2c23dae5b0/setproctitle-1.3.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:83b016221cf80028b2947be20630faa14e3e72a403e35f0ba29550b4e856767b", size = 31378 }, + { url = "https://files.pythonhosted.org/packages/72/e7/b394c55934b89f00c2ef7d5e6f18cca5d8dfa26ef628700c4de0c85e3f3d/setproctitle-1.3.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6d8a411e752e794d052434139ca4234ffeceeb8d8d8ddc390a9051d7942b2726", size = 30370 }, + { url = "https://files.pythonhosted.org/packages/13/ee/e1f27bf52d2bec7060bb6311ab0ccede8de98ed5394e3a59e7a14a453fb5/setproctitle-1.3.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:50cfbf86b9c63a2c2903f1231f0a58edeb775e651ae1af84eec8430b0571f29b", size = 32875 }, + { url = "https://files.pythonhosted.org/packages/6e/08/13b561085d2de53b9becfa5578545d99114e9ff2aa3dc151bcaadf80b17e/setproctitle-1.3.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f3b5e2eacd572444770026c9dd3ddc7543ce427cdf452d40a408d1e95beefb30", size = 30903 }, + { url = "https://files.pythonhosted.org/packages/65/f0/6cd06fffff2553be7b0571447d0c0ef8b727ef44cc2d6a33452677a311c8/setproctitle-1.3.5-cp312-cp312-win32.whl", hash = "sha256:cf4e3ded98027de2596c6cc5bbd3302adfb3ca315c848f56516bb0b7e88de1e9", size = 11468 }, + { url = "https://files.pythonhosted.org/packages/c1/8c/e8a7cb568c4552618838941b332203bfc77ab0f2d67c1cb8f24dee0370ec/setproctitle-1.3.5-cp312-cp312-win_amd64.whl", hash = "sha256:f7a8c01ffd013dda2bed6e7d5cb59fbb609e72f805abf3ee98360f38f7758d9b", size = 12190 }, +] + [[package]] name = "setuptools" version = "78.1.0" @@ -4993,6 +5120,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] +[[package]] +name = "smmap" +version = "5.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -5876,6 +6012,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, ] +[[package]] +name = "uuid-utils" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/0a/cbdb2eb4845dafeb632d02a18f47b02f87f2ce4f25266f5e3c017976ce89/uuid_utils-0.10.0.tar.gz", hash = "sha256:5db0e1890e8f008657ffe6ded4d9459af724ab114cfe82af1557c87545301539", size = 18828 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/54/9d22fa16b19e5d1676eba510f08a9c458d96e2a62ff2c8ebad64251afb18/uuid_utils-0.10.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8d5a4508feefec62456cd6a41bcdde458d56827d908f226803b886d22a3d5e63", size = 573006 }, + { url = "https://files.pythonhosted.org/packages/08/8e/f895c6e52aa603e521fbc13b8626ba5dd99b6e2f5a55aa96ba5b232f4c53/uuid_utils-0.10.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:dbefc2b9113f9dfe56bdae58301a2b3c53792221410d422826f3d1e3e6555fe7", size = 292543 }, + { url = "https://files.pythonhosted.org/packages/b6/58/cc4834f377a5e97d6e184408ad96d13042308de56643b6e24afe1f6f34df/uuid_utils-0.10.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffc49c33edf87d1ec8112a9b43e4cf55326877716f929c165a2cc307d31c73d5", size = 323340 }, + { url = "https://files.pythonhosted.org/packages/37/e3/6aeddf148f6a7dd7759621b000e8c85382ec83f52ae79b60842d1dc3ab6b/uuid_utils-0.10.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0636b6208f69d5a4e629707ad2a89a04dfa8d1023e1999181f6830646ca048a1", size = 329653 }, + { url = "https://files.pythonhosted.org/packages/0c/00/dd6c2164ace70b7b1671d9129267df331481d7d1e5f9c5e6a564f07953f6/uuid_utils-0.10.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bc06452856b724df9dedfc161c3582199547da54aeb81915ec2ed54f92d19b0", size = 365471 }, + { url = "https://files.pythonhosted.org/packages/b4/e7/0ab8080fcae5462a7b5e555c1cef3d63457baffb97a59b9bc7b005a3ecb1/uuid_utils-0.10.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:263b2589111c61decdd74a762e8f850c9e4386fb78d2cf7cb4dfc537054cda1b", size = 325844 }, + { url = "https://files.pythonhosted.org/packages/73/39/52d94e9ef75b03f44b39ffc6ac3167e93e74ef4d010a93d25589d9f48540/uuid_utils-0.10.0-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a558db48b7096de6b4d2d2210d82bba8586a6d55f99106b03bb7d01dc5c5bcd6", size = 344389 }, + { url = "https://files.pythonhosted.org/packages/7c/29/4824566f62666238290d99c62a58e4ab2a8b9cf2eccf94cebd9b3359131e/uuid_utils-0.10.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:807465067f3c892514230326ac71a79b28a8dfe2c88ecd2d5675fc844f3c76b5", size = 510078 }, + { url = "https://files.pythonhosted.org/packages/5e/8f/bbcc7130d652462c685f0d3bd26bb214b754215b476340885a4cb50fb89a/uuid_utils-0.10.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:57423d4a2b9d7b916de6dbd75ba85465a28f9578a89a97f7d3e098d9aa4e5d4a", size = 515937 }, + { url = "https://files.pythonhosted.org/packages/23/f8/34e0c00f5f188604d336713e6a020fcf53b10998e8ab24735a39ab076740/uuid_utils-0.10.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:76d8d660f18ff6b767e319b1b5f927350cd92eafa4831d7ef5b57fdd1d91f974", size = 494111 }, + { url = "https://files.pythonhosted.org/packages/1a/52/b7f0066cc90a7a9c28d54061ed195cd617fde822e5d6ac3ccc88509c3c44/uuid_utils-0.10.0-cp39-abi3-win32.whl", hash = "sha256:6c11a71489338837db0b902b75e1ba7618d5d29f05fde4f68b3f909177dbc226", size = 173520 }, + { url = "https://files.pythonhosted.org/packages/8b/15/f04f58094674d333974243fb45d2c740cf4b79186fb707168e57943c84a3/uuid_utils-0.10.0-cp39-abi3-win_amd64.whl", hash = "sha256:11c55ae64f6c0a7a0c741deae8ca2a4eaa11e9c09dbb7bec2099635696034cf7", size = 182965 }, +] + [[package]] name = "uuid6" version = "2024.7.10" @@ -5965,6 +6121,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/37/da/7ccbe82470dc27e1cfd0466dc637248be906eb8447c28a40c1c74cf617ee/volcengine_compat-1.0.156-py3-none-any.whl", hash = "sha256:4abc149a7601ebad8fa2d28fab50c7945145cf74daecb71bca797b0bdc82c5a5", size = 677272 }, ] +[[package]] +name = "wandb" +version = "0.18.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "docker-pycreds" }, + { name = "gitpython" }, + { name = "platformdirs" }, + { name = "protobuf" }, + { name = "psutil" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "sentry-sdk" }, + { name = "setproctitle" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/57/8a61979c40a7a0a5206ef3369ed474326135bf292f172019f35dca97a235/wandb-0.18.3.tar.gz", hash = "sha256:eb2574cea72bc908c6ce1b37edf7a889619e6e06e1b4714eecfe0662ded43c06", size = 8686381 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1c/4a/6fa1d584ecd69cea5b9943ec5cfa36276cbd567efa8709135a7e4ab89cfb/wandb-0.18.3-py3-none-any.whl", hash = "sha256:7da64f7da0ff7572439de10bfd45534e8811e71e78ac2ccc3b818f1c0f3a9aef", size = 5015658 }, + { url = "https://files.pythonhosted.org/packages/59/8f/deef595ca67833ea5aceb5da5fc10759a5e8f8bce85b17761b1614fa2ba9/wandb-0.18.3-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:6674d8a5c40c79065b9c7eb765136756d5ebc9457a5f9abc820a660fb23f8b67", size = 10081571 }, + { url = "https://files.pythonhosted.org/packages/06/85/b55642d095407369dd7ad1d8ea1e7f410d60fcdb6c29bcc9afb1e5522d51/wandb-0.18.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:741f566e409a2684d3047e4cc25e8e914d78196b901190937b24b6abb8b052e5", size = 10008319 }, + { url = "https://files.pythonhosted.org/packages/b4/53/5387afaab29876e669973b3bb5bda829e3c10e509caef59f614bf20c0106/wandb-0.18.3-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:8be5e877570b693001c52dcc2089e48e6a4dcbf15f3adf5c9349f95148b59d58", size = 10250633 }, + { url = "https://files.pythonhosted.org/packages/bd/79/2fa554283afa7259e296313160164947daf52e0d42b04d6ecf9c5af01e15/wandb-0.18.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d788852bd4739fa18de3918f309c3a955b5cef3247fae1c40df3a63af637e1a0", size = 12339454 }, + { url = "https://files.pythonhosted.org/packages/86/a6/11eaa16c96469b4d6fc0fb3271e70d5bbe2c3a93c15fc677de9a1aa4374a/wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab81424eb207d78239a8d69c90521a70074fb81e3709055484e43c76fe44dc08", size = 12970950 }, + { url = "https://files.pythonhosted.org/packages/13/dd/ccaa5a51e2557368300eec9e362b5688151e45a052e33017633baa3011a9/wandb-0.18.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2c91315b8b62423eae18577d66a4b4bb8e4341a7d5c849cb2963e3b3dff0bf6d", size = 13038220 }, + { url = "https://files.pythonhosted.org/packages/bc/6f/fabbf2161078556384ef48f3db89182773010cdd14900986004e702b85f5/wandb-0.18.3-py3-none-win32.whl", hash = "sha256:92a647dab783938ec87776a9fae8a13e72e6dad939c53e357cdea9d2570f0ad8", size = 12573298 }, + { url = "https://files.pythonhosted.org/packages/d8/7b/e94b46d620d26b2e1f486f2746febdcb6579be20f361355b40263ddd8262/wandb-0.18.3-py3-none-win_amd64.whl", hash = "sha256:29cac2cfa3124241fed22cfedc9a52e1500275ee9bbb0b428ce4bf63c4723bf0", size = 12573303 }, +] + [[package]] name = "watchfiles" version = "1.0.5" @@ -6011,6 +6197,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, ] +[[package]] +name = "weave" +version = "0.51.43" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "diskcache" }, + { name = "emoji" }, + { name = "gql", extra = ["aiohttp", "requests"] }, + { name = "jsonschema" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "rich" }, + { name = "tenacity" }, + { name = "uuid-utils" }, + { name = "wandb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/b4/8fb1e21bc0b0442be9c4c5e4644847596cd75a35a313a5887f1eadda8da2/weave-0.51.43.tar.gz", hash = "sha256:bab4ba6f7ba33f1975e5f6399b7fc4ad6b25c0e2cd22d197bb9358a7b9596b91", size = 368936 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/40/1e374d3f1f8389a4228426b5a87aae7428a7eb74dfa633de98d86796eb41/weave-0.51.43-py3-none-any.whl", hash = "sha256:2e9faa0e21bd5a6fea363142891ee4f2e347951b98f0d7082acb0273432cb940", size = 473685 }, +] + [[package]] name = "weaviate-client" version = "3.21.0" @@ -6075,6 +6283,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/c8/d529f8a32ce40d98309f4470780631e971a5a842b60aec864833b3615786/websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b", size = 157416 }, ] +[[package]] +name = "webvtt-py" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/f6/7c9c964681fb148e0293e6860108d378e09ccab2218f9063fd3eb87f840a/webvtt-py-0.5.1.tar.gz", hash = "sha256:2040dd325277ddadc1e0c6cc66cbc4a1d9b6b49b24c57a0c3364374c3e8a3dc1", size = 55128 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/ed/aad7e0f5a462d679f7b4d2e0d8502c3096740c883b5bbed5103146480937/webvtt_py-0.5.1-py3-none-any.whl", hash = "sha256:9d517d286cfe7fc7825e9d4e2079647ce32f5678eb58e39ef544ffbb932610b7", size = 19802 }, +] + [[package]] name = "werkzeug" version = "3.1.3" diff --git a/docker/.env.example b/docker/.env.example index 0b80dccb37..7bff2975fb 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -39,6 +39,12 @@ APP_WEB_URL= # File preview or download Url prefix. # used to display File preview or download Url to the front-end or as Multi-model inputs; # Url is signed and has expiration time. +# Setting FILES_URL is required for file processing plugins. +# - For https://example.com, use FILES_URL=https://example.com +# - For http://example.com, use FILES_URL=http://example.com +# Recommendation: use a dedicated domain (e.g., https://upload.example.com). +# Alternatively, use http://:5001 or http://api:5001, +# ensuring port 5001 is externally accessible (see docker-compose.yaml). FILES_URL= # ------------------------------ @@ -406,6 +412,7 @@ QDRANT_GRPC_PORT=6334 # Milvus configuration. Only available when VECTOR_STORE is `milvus`. # The milvus uri. MILVUS_URI=http://host.docker.internal:19530 +MILVUS_DATABASE= MILVUS_TOKEN= MILVUS_USER= MILVUS_PASSWORD= @@ -441,6 +448,15 @@ PGVECTOR_MAX_CONNECTION=5 PGVECTOR_PG_BIGM=false PGVECTOR_PG_BIGM_VERSION=1.2-20240606 +# vastbase configurations, only available when VECTOR_STORE is `vastbase` +VASTBASE_HOST=vastbase +VASTBASE_PORT=5432 +VASTBASE_USER=dify +VASTBASE_PASSWORD=Difyai123456 +VASTBASE_DATABASE=dify +VASTBASE_MIN_CONNECTION=1 +VASTBASE_MAX_CONNECTION=5 + # pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs` PGVECTO_RS_HOST=pgvecto-rs PGVECTO_RS_PORT=5432 @@ -510,9 +526,13 @@ RELYT_DATABASE=postgres # open search configuration, only available when VECTOR_STORE is `opensearch` OPENSEARCH_HOST=opensearch OPENSEARCH_PORT=9200 +OPENSEARCH_SECURE=true +OPENSEARCH_AUTH_METHOD=basic OPENSEARCH_USER=admin OPENSEARCH_PASSWORD=admin -OPENSEARCH_SECURE=true +# If using AWS managed IAM, e.g. Managed Cluster or OpenSearch Serverless +OPENSEARCH_AWS_REGION=ap-southeast-1 +OPENSEARCH_AWS_SERVICE=aoss # tencent vector configurations, only available when VECTOR_STORE is `tencent` TENCENT_VECTOR_DB_URL=http://127.0.0.1 @@ -553,6 +573,7 @@ VIKINGDB_SOCKET_TIMEOUT=30 LINDORM_URL=http://lindorm:30070 LINDORM_USERNAME=lindorm LINDORM_PASSWORD=lindorm +LINDORM_QUERY_TIMEOUT=1 # OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase` OCEANBASE_VECTOR_HOST=oceanbase diff --git a/docker/README.md b/docker/README.md index 38b11a677f..22dfe2c91c 100644 --- a/docker/README.md +++ b/docker/README.md @@ -14,7 +14,6 @@ Welcome to the new `docker` directory for deploying Dify using Docker Compose. T - **Unified Vector Database Services**: All vector database services are now managed from a single Docker Compose file `docker-compose.yaml`. You can switch between different vector databases by setting the `VECTOR_STORE` environment variable in your `.env` file. - **Mandatory .env File**: A `.env` file is now required to run `docker compose up`. This file is crucial for configuring your deployment and for any custom settings to persist through upgrades. -- **Legacy Support**: Previous deployment files are now located in the `docker-legacy` directory and will no longer be maintained. ### How to Deploy Dify with `docker-compose.yaml` diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index 377ff9c117..bfbfe6c19a 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:1.2.0 + image: langgenius/dify-api:1.3.1 restart: always environment: # Use the shared environment variables. @@ -31,7 +31,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:1.2.0 + image: langgenius/dify-api:1.3.1 restart: always environment: # Use the shared environment variables. @@ -57,7 +57,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:1.2.0 + image: langgenius/dify-web:1.3.1 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} @@ -142,7 +142,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.0.7-local + image: langgenius/dify-plugin-daemon:0.0.9-local restart: always environment: # Use the shared environment variables. @@ -363,6 +363,30 @@ services: timeout: 3s retries: 30 + # get image from https://www.vastdata.com.cn/ + vastbase: + image: vastdata/vastbase-vector + profiles: + - vastbase + restart: always + environment: + - VB_DBCOMPATIBILITY=PG + - VB_DB=dify + - VB_USERNAME=dify + - VB_PASSWORD=Difyai123456 + ports: + - '5434:5432' + volumes: + - ./vastbase/lic:/home/vastbase/vastbase/lic + - ./vastbase/data:/home/vastbase/data + - ./vastbase/backup:/home/vastbase/backup + - ./vastbase/backup_log:/home/vastbase/backup_log + healthcheck: + test: [ 'CMD', 'pg_isready' ] + interval: 1s + timeout: 3s + retries: 30 + # pgvecto-rs vector store pgvecto-rs: image: tensorchord/pgvecto-rs:pg16-v0.3.0 diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index 1702a5395f..01c7573a95 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -71,7 +71,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.0.7-local + image: langgenius/dify-plugin-daemon:0.0.9-local restart: always env_file: - ./middleware.env diff --git a/docker/docker-compose.png b/docker/docker-compose.png index bdac113086..015d450236 100644 Binary files a/docker/docker-compose.png and b/docker/docker-compose.png differ diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 81fa651ed9..3ed0f60e96 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -138,6 +138,7 @@ x-shared-env: &shared-api-worker-env QDRANT_GRPC_ENABLED: ${QDRANT_GRPC_ENABLED:-false} QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334} MILVUS_URI: ${MILVUS_URI:-http://host.docker.internal:19530} + MILVUS_DATABASE: ${MILVUS_DATABASE:-} MILVUS_TOKEN: ${MILVUS_TOKEN:-} MILVUS_USER: ${MILVUS_USER:-} MILVUS_PASSWORD: ${MILVUS_PASSWORD:-} @@ -163,6 +164,13 @@ x-shared-env: &shared-api-worker-env PGVECTOR_MAX_CONNECTION: ${PGVECTOR_MAX_CONNECTION:-5} PGVECTOR_PG_BIGM: ${PGVECTOR_PG_BIGM:-false} PGVECTOR_PG_BIGM_VERSION: ${PGVECTOR_PG_BIGM_VERSION:-1.2-20240606} + VASTBASE_HOST: ${VASTBASE_HOST:-vastbase} + VASTBASE_PORT: ${VASTBASE_PORT:-5432} + VASTBASE_USER: ${VASTBASE_USER:-dify} + VASTBASE_PASSWORD: ${VASTBASE_PASSWORD:-Difyai123456} + VASTBASE_DATABASE: ${VASTBASE_DATABASE:-dify} + VASTBASE_MIN_CONNECTION: ${VASTBASE_MIN_CONNECTION:-1} + VASTBASE_MAX_CONNECTION: ${VASTBASE_MAX_CONNECTION:-5} PGVECTO_RS_HOST: ${PGVECTO_RS_HOST:-pgvecto-rs} PGVECTO_RS_PORT: ${PGVECTO_RS_PORT:-5432} PGVECTO_RS_USER: ${PGVECTO_RS_USER:-postgres} @@ -217,9 +225,12 @@ x-shared-env: &shared-api-worker-env RELYT_DATABASE: ${RELYT_DATABASE:-postgres} OPENSEARCH_HOST: ${OPENSEARCH_HOST:-opensearch} OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200} + OPENSEARCH_SECURE: ${OPENSEARCH_SECURE:-true} + OPENSEARCH_AUTH_METHOD: ${OPENSEARCH_AUTH_METHOD:-basic} OPENSEARCH_USER: ${OPENSEARCH_USER:-admin} OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin} - OPENSEARCH_SECURE: ${OPENSEARCH_SECURE:-true} + OPENSEARCH_AWS_REGION: ${OPENSEARCH_AWS_REGION:-ap-southeast-1} + OPENSEARCH_AWS_SERVICE: ${OPENSEARCH_AWS_SERVICE:-aoss} TENCENT_VECTOR_DB_URL: ${TENCENT_VECTOR_DB_URL:-http://127.0.0.1} TENCENT_VECTOR_DB_API_KEY: ${TENCENT_VECTOR_DB_API_KEY:-dify} TENCENT_VECTOR_DB_TIMEOUT: ${TENCENT_VECTOR_DB_TIMEOUT:-30} @@ -250,6 +261,7 @@ x-shared-env: &shared-api-worker-env LINDORM_URL: ${LINDORM_URL:-http://lindorm:30070} LINDORM_USERNAME: ${LINDORM_USERNAME:-lindorm} LINDORM_PASSWORD: ${LINDORM_PASSWORD:-lindorm} + LINDORM_QUERY_TIMEOUT: ${LINDORM_QUERY_TIMEOUT:-1} OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase} OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881} OCEANBASE_VECTOR_USER: ${OCEANBASE_VECTOR_USER:-root@test} @@ -479,7 +491,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:1.2.0 + image: langgenius/dify-api:1.3.1 restart: always environment: # Use the shared environment variables. @@ -508,7 +520,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:1.2.0 + image: langgenius/dify-api:1.3.1 restart: always environment: # Use the shared environment variables. @@ -534,7 +546,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:1.2.0 + image: langgenius/dify-web:1.3.1 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} @@ -619,7 +631,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.0.7-local + image: langgenius/dify-plugin-daemon:0.0.9-local restart: always environment: # Use the shared environment variables. @@ -840,6 +852,30 @@ services: timeout: 3s retries: 30 + # get image from https://www.vastdata.com.cn/ + vastbase: + image: vastdata/vastbase-vector + profiles: + - vastbase + restart: always + environment: + - VB_DBCOMPATIBILITY=PG + - VB_DB=dify + - VB_USERNAME=dify + - VB_PASSWORD=Difyai123456 + ports: + - '5434:5432' + volumes: + - ./vastbase/lic:/home/vastbase/vastbase/lic + - ./vastbase/data:/home/vastbase/data + - ./vastbase/backup:/home/vastbase/backup + - ./vastbase/backup_log:/home/vastbase/backup_log + healthcheck: + test: [ 'CMD', 'pg_isready' ] + interval: 1s + timeout: 3s + retries: 30 + # pgvecto-rs vector store pgvecto-rs: image: tensorchord/pgvecto-rs:pg16-v0.3.0 diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config-popup.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config-popup.tsx index eb23da2ae0..0efc5082a4 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config-popup.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config-popup.tsx @@ -5,7 +5,7 @@ import { useTranslation } from 'react-i18next' import { useBoolean } from 'ahooks' import TracingIcon from './tracing-icon' import ProviderPanel from './provider-panel' -import type { LangFuseConfig, LangSmithConfig, OpikConfig } from './type' +import type { LangFuseConfig, LangSmithConfig, OpikConfig, WeaveConfig } from './type' import { TracingProvider } from './type' import ProviderConfigModal from './provider-config-modal' import Indicator from '@/app/components/header/indicator' @@ -26,7 +26,8 @@ export type PopupProps = { langSmithConfig: LangSmithConfig | null langFuseConfig: LangFuseConfig | null opikConfig: OpikConfig | null - onConfigUpdated: (provider: TracingProvider, payload: LangSmithConfig | LangFuseConfig | OpikConfig) => void + weaveConfig: WeaveConfig | null + onConfigUpdated: (provider: TracingProvider, payload: LangSmithConfig | LangFuseConfig | OpikConfig | WeaveConfig) => void onConfigRemoved: (provider: TracingProvider) => void } @@ -40,6 +41,7 @@ const ConfigPopup: FC = ({ langSmithConfig, langFuseConfig, opikConfig, + weaveConfig, onConfigUpdated, onConfigRemoved, }) => { @@ -63,7 +65,7 @@ const ConfigPopup: FC = ({ } }, [onChooseProvider]) - const handleConfigUpdated = useCallback((payload: LangSmithConfig | LangFuseConfig | OpikConfig) => { + const handleConfigUpdated = useCallback((payload: LangSmithConfig | LangFuseConfig | OpikConfig | WeaveConfig) => { onConfigUpdated(currentProvider!, payload) hideConfigModal() }, [currentProvider, hideConfigModal, onConfigUpdated]) @@ -73,8 +75,8 @@ const ConfigPopup: FC = ({ hideConfigModal() }, [currentProvider, hideConfigModal, onConfigRemoved]) - const providerAllConfigured = langSmithConfig && langFuseConfig && opikConfig - const providerAllNotConfigured = !langSmithConfig && !langFuseConfig && !opikConfig + const providerAllConfigured = langSmithConfig && langFuseConfig && opikConfig && weaveConfig + const providerAllNotConfigured = !langSmithConfig && !langFuseConfig && !opikConfig && !weaveConfig const switchContent = ( = ({ /> ) + const weavePanel = ( + + ) const configuredProviderPanel = () => { const configuredPanels: JSX.Element[] = [] - if (langSmithConfig) - configuredPanels.push(langSmithPanel) - if (langFuseConfig) configuredPanels.push(langfusePanel) + if (langSmithConfig) + configuredPanels.push(langSmithPanel) + if (opikConfig) configuredPanels.push(opikPanel) + if (weaveConfig) + configuredPanels.push(weavePanel) + return configuredPanels } const moreProviderPanel = () => { const notConfiguredPanels: JSX.Element[] = [] - if (!langSmithConfig) - notConfiguredPanels.push(langSmithPanel) - if (!langFuseConfig) notConfiguredPanels.push(langfusePanel) + if (!langSmithConfig) + notConfiguredPanels.push(langSmithPanel) + if (!opikConfig) notConfiguredPanels.push(opikPanel) + if (!weaveConfig) + notConfiguredPanels.push(weavePanel) + return notConfiguredPanels } @@ -158,7 +178,9 @@ const ConfigPopup: FC = ({ return langSmithConfig if (currentProvider === TracingProvider.langfuse) return langFuseConfig - return opikConfig + if (currentProvider === TracingProvider.opik) + return opikConfig + return weaveConfig } return ( @@ -199,9 +221,10 @@ const ConfigPopup: FC = ({ <>
{t(`${I18N_PREFIX}.configProviderTitle.${providerAllConfigured ? 'configured' : 'notConfigured'}`)}
- {langSmithPanel} {langfusePanel} + {langSmithPanel} {opikPanel} + {weavePanel}
) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts index 0f3f280b30..5d3c4076bd 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts @@ -4,4 +4,5 @@ export const docURL = { [TracingProvider.langSmith]: 'https://docs.smith.langchain.com/', [TracingProvider.langfuse]: 'https://docs.langfuse.com', [TracingProvider.opik]: 'https://www.comet.com/docs/opik/tracing/integrations/dify#setup-instructions', + [TracingProvider.weave]: 'https://weave-docs.wandb.ai/', } diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/panel.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/panel.tsx index 160110f7ed..8575117c41 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/panel.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/panel.tsx @@ -7,12 +7,12 @@ import { import { useTranslation } from 'react-i18next' import { usePathname } from 'next/navigation' import { useBoolean } from 'ahooks' -import type { LangFuseConfig, LangSmithConfig, OpikConfig } from './type' +import type { LangFuseConfig, LangSmithConfig, OpikConfig, WeaveConfig } from './type' import { TracingProvider } from './type' import TracingIcon from './tracing-icon' import ConfigButton from './config-button' import cn from '@/utils/classnames' -import { LangfuseIcon, LangsmithIcon, OpikIcon } from '@/app/components/base/icons/src/public/tracing' +import { LangfuseIcon, LangsmithIcon, OpikIcon, WeaveIcon } from '@/app/components/base/icons/src/public/tracing' import Indicator from '@/app/components/header/indicator' import { fetchTracingConfig as doFetchTracingConfig, fetchTracingStatus, updateTracingStatus } from '@/service/apps' import type { TracingStatus } from '@/models/app' @@ -82,12 +82,15 @@ const Panel: FC = () => { ? LangfuseIcon : inUseTracingProvider === TracingProvider.opik ? OpikIcon - : LangsmithIcon + : inUseTracingProvider === TracingProvider.weave + ? WeaveIcon + : LangsmithIcon const [langSmithConfig, setLangSmithConfig] = useState(null) const [langFuseConfig, setLangFuseConfig] = useState(null) const [opikConfig, setOpikConfig] = useState(null) - const hasConfiguredTracing = !!(langSmithConfig || langFuseConfig || opikConfig) + const [weaveConfig, setWeaveConfig] = useState(null) + const hasConfiguredTracing = !!(langSmithConfig || langFuseConfig || opikConfig || weaveConfig) const fetchTracingConfig = async () => { const { tracing_config: langSmithConfig, has_not_configured: langSmithHasNotConfig } = await doFetchTracingConfig({ appId, provider: TracingProvider.langSmith }) @@ -99,6 +102,9 @@ const Panel: FC = () => { const { tracing_config: opikConfig, has_not_configured: OpikHasNotConfig } = await doFetchTracingConfig({ appId, provider: TracingProvider.opik }) if (!OpikHasNotConfig) setOpikConfig(opikConfig as OpikConfig) + const { tracing_config: weaveConfig, has_not_configured: weaveHasNotConfig } = await doFetchTracingConfig({ appId, provider: TracingProvider.weave }) + if (!weaveHasNotConfig) + setWeaveConfig(weaveConfig as WeaveConfig) } const handleTracingConfigUpdated = async (provider: TracingProvider) => { @@ -110,6 +116,8 @@ const Panel: FC = () => { setLangFuseConfig(tracing_config as LangFuseConfig) else if (provider === TracingProvider.opik) setOpikConfig(tracing_config as OpikConfig) + else if (provider === TracingProvider.weave) + setWeaveConfig(tracing_config as WeaveConfig) } const handleTracingConfigRemoved = (provider: TracingProvider) => { @@ -119,6 +127,8 @@ const Panel: FC = () => { setLangFuseConfig(null) else if (provider === TracingProvider.opik) setOpikConfig(null) + else if (provider === TracingProvider.weave) + setWeaveConfig(null) if (provider === inUseTracingProvider) { handleTracingStatusChange({ enabled: false, @@ -178,6 +188,7 @@ const Panel: FC = () => { langSmithConfig={langSmithConfig} langFuseConfig={langFuseConfig} opikConfig={opikConfig} + weaveConfig={weaveConfig} onConfigUpdated={handleTracingConfigUpdated} onConfigRemoved={handleTracingConfigRemoved} controlShowPopup={controlShowPopup} @@ -212,6 +223,7 @@ const Panel: FC = () => { langSmithConfig={langSmithConfig} langFuseConfig={langFuseConfig} opikConfig={opikConfig} + weaveConfig={weaveConfig} onConfigUpdated={handleTracingConfigUpdated} onConfigRemoved={handleTracingConfigRemoved} controlShowPopup={controlShowPopup} diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx index a7675c4a66..c0b52a9b10 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx @@ -4,7 +4,7 @@ import React, { useCallback, useState } from 'react' import { useTranslation } from 'react-i18next' import { useBoolean } from 'ahooks' import Field from './field' -import type { LangFuseConfig, LangSmithConfig, OpikConfig } from './type' +import type { LangFuseConfig, LangSmithConfig, OpikConfig, WeaveConfig } from './type' import { TracingProvider } from './type' import { docURL } from './config' import { @@ -22,10 +22,10 @@ import Divider from '@/app/components/base/divider' type Props = { appId: string type: TracingProvider - payload?: LangSmithConfig | LangFuseConfig | OpikConfig | null + payload?: LangSmithConfig | LangFuseConfig | OpikConfig | WeaveConfig | null onRemoved: () => void onCancel: () => void - onSaved: (payload: LangSmithConfig | LangFuseConfig | OpikConfig) => void + onSaved: (payload: LangSmithConfig | LangFuseConfig | OpikConfig | WeaveConfig) => void onChosen: (provider: TracingProvider) => void } @@ -50,6 +50,13 @@ const opikConfigTemplate = { workspace: '', } +const weaveConfigTemplate = { + api_key: '', + entity: '', + project: '', + endpoint: '', +} + const ProviderConfigModal: FC = ({ appId, type, @@ -63,7 +70,7 @@ const ProviderConfigModal: FC = ({ const isEdit = !!payload const isAdd = !isEdit const [isSaving, setIsSaving] = useState(false) - const [config, setConfig] = useState((() => { + const [config, setConfig] = useState((() => { if (isEdit) return payload @@ -73,7 +80,10 @@ const ProviderConfigModal: FC = ({ else if (type === TracingProvider.langfuse) return langFuseConfigTemplate - return opikConfigTemplate + else if (type === TracingProvider.opik) + return opikConfigTemplate + + return weaveConfigTemplate })()) const [isShowRemoveConfirm, { setTrue: showRemoveConfirm, @@ -127,6 +137,14 @@ const ProviderConfigModal: FC = ({ // const postData = config as OpikConfig } + if (type === TracingProvider.weave) { + const postData = config as WeaveConfig + if (!errorMessage && !postData.api_key) + errorMessage = t('common.errorMsg.fieldRequired', { field: 'API Key' }) + if (!errorMessage && !postData.project) + errorMessage = t('common.errorMsg.fieldRequired', { field: t(`${I18N_PREFIX}.project`) }) + } + return errorMessage }, [config, t, type]) const handleSave = useCallback(async () => { @@ -176,6 +194,40 @@ const ProviderConfigModal: FC = ({
+ {type === TracingProvider.weave && ( + <> + + + + + + )} {type === TracingProvider.langSmith && ( <> = ({ /> )} -
{ [TracingProvider.langSmith]: LangsmithIconBig, [TracingProvider.langfuse]: LangfuseIconBig, [TracingProvider.opik]: OpikIconBig, + [TracingProvider.weave]: WeaveIconBig, })[type] } diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/type.ts b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/type.ts index 982d01ffb3..386c58974e 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/type.ts +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/type.ts @@ -2,6 +2,7 @@ export enum TracingProvider { langSmith = 'langsmith', langfuse = 'langfuse', opik = 'opik', + weave = 'weave', } export type LangSmithConfig = { @@ -22,3 +23,10 @@ export type OpikConfig = { workspace: string url: string } + +export type WeaveConfig = { + api_key: string + entity: string + project: string + endpoint: string +} diff --git a/web/app/components/app/app-publisher/index.tsx b/web/app/components/app/app-publisher/index.tsx index 360741ab2e..d4357a0955 100644 --- a/web/app/components/app/app-publisher/index.tsx +++ b/web/app/components/app/app-publisher/index.tsx @@ -231,7 +231,7 @@ const AppPublisher = ({ > {t('workflow.common.runApp')} - {appDetail?.mode === 'workflow' + {appDetail?.mode === 'workflow' || appDetail?.mode === 'completion' ? ( = ({ }} > {t('appDebug.feature.dataSet.noDataSet')} - {t('appDebug.feature.dataSet.toCreate')} + {t('appDebug.feature.dataSet.toCreate')}
)} diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx index 645f6045f0..3170d33a82 100644 --- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx +++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx @@ -4,7 +4,6 @@ import { useMount } from 'ahooks' import { useTranslation } from 'react-i18next' import { isEqual } from 'lodash-es' import { RiCloseLine } from '@remixicon/react' -import { BookOpenIcon } from '@heroicons/react/24/outline' import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development' import cn from '@/utils/classnames' import IndexMethodRadio from '@/app/components/datasets/settings/index-method-radio' @@ -223,10 +222,6 @@ const SettingsModal: FC = ({ className='resize-none' placeholder={t('datasetSettings.form.descPlaceholder') || ''} /> - - - {t('datasetSettings.form.descWrite')} -
diff --git a/web/app/components/app/create-app-dialog/app-list/index.tsx b/web/app/components/app/create-app-dialog/app-list/index.tsx index 702a07397d..0b0b325d9a 100644 --- a/web/app/components/app/create-app-dialog/app-list/index.tsx +++ b/web/app/components/app/create-app-dialog/app-list/index.tsx @@ -191,14 +191,16 @@ const Apps = ({
{!searchKeywords &&
- { setCurrCategory(category) }} onCreateFromBlank={onCreateFromBlank} /> + { setCurrCategory(category) }} onCreateFromBlank={onCreateFromBlank} />
}
{searchFilteredList && searchFilteredList.length > 0 && <>
{searchKeywords ?

{searchFilteredList.length > 1 ? t('app.newApp.foundResults', { count: searchFilteredList.length }) : t('app.newApp.foundResult', { count: searchFilteredList.length })}

- : } + :
+ +
}
void + current: AppCategories | string + categories: string[] + onClick?: (category: AppCategories | string) => void onCreateFromBlank?: () => void } -export default function Sidebar({ current, onClick, onCreateFromBlank }: SidebarProps) { +export default function Sidebar({ current, categories, onClick, onCreateFromBlank }: SidebarProps) { const { t } = useTranslation() return
-
    +
    -
    {t('app.newAppFromTemplate.byCategories')}
    +
    {t('app.newAppFromTemplate.byCategories')}
      - - - - - - + {categories.map(category => ())}
    @@ -45,47 +35,26 @@ export default function Sidebar({ current, onClick, onCreateFromBlank }: Sidebar type CategoryItemProps = { active: boolean - category: AppCategories - onClick?: (category: AppCategories) => void + category: AppCategories | string + onClick?: (category: AppCategories | string) => void } function CategoryItem({ category, active, onClick }: CategoryItemProps) { return
  • { onClick?.(category) }}> -
    - -
    + {category === AppCategories.RECOMMENDED &&
    + +
    }
  • } type AppCategoryLabelProps = { - category: AppCategories + category: AppCategories | string className?: string } export function AppCategoryLabel({ category, className }: AppCategoryLabelProps) { const { t } = useTranslation() - return {t(`app.newAppFromTemplate.sidebar.${category}`)} -} - -type AppCategoryIconProps = { - category: AppCategories -} -function AppCategoryIcon({ category }: AppCategoryIconProps) { - if (category === AppCategories.AGENT) - return - if (category === AppCategories.ASSISTANT) - return - if (category === AppCategories.HR) - return - if (category === AppCategories.PROGRAMMING) - return - if (category === AppCategories.RECOMMENDED) - return - if (category === AppCategories.WRITING) - return - if (category === AppCategories.WORKFLOW) - return - return + return {category === AppCategories.RECOMMENDED ? t('app.newAppFromTemplate.sidebar.Recommended') : category} } diff --git a/web/app/components/app/create-from-dsl-modal/dsl-confirm-modal.tsx b/web/app/components/app/create-from-dsl-modal/dsl-confirm-modal.tsx new file mode 100644 index 0000000000..e6aadaa326 --- /dev/null +++ b/web/app/components/app/create-from-dsl-modal/dsl-confirm-modal.tsx @@ -0,0 +1,46 @@ +import { useTranslation } from 'react-i18next' +import Modal from '@/app/components/base/modal' +import Button from '@/app/components/base/button' + +type DSLConfirmModalProps = { + versions?: { + importedVersion: string + systemVersion: string + } + onCancel: () => void + onConfirm: () => void + confirmDisabled?: boolean +} +const DSLConfirmModal = ({ + versions = { importedVersion: '', systemVersion: '' }, + onCancel, + onConfirm, + confirmDisabled = false, +}: DSLConfirmModalProps) => { + const { t } = useTranslation() + + return ( + onCancel()} + className='w-[480px]' + > +
    +
    {t('app.newApp.appCreateDSLErrorTitle')}
    +
    +
    {t('app.newApp.appCreateDSLErrorPart1')}
    +
    {t('app.newApp.appCreateDSLErrorPart2')}
    +
    +
    {t('app.newApp.appCreateDSLErrorPart3')}{versions.importedVersion}
    +
    {t('app.newApp.appCreateDSLErrorPart4')}{versions.systemVersion}
    +
    +
    +
    + + +
    +
    + ) +} + +export default DSLConfirmModal diff --git a/web/app/components/app/overview/embedded/index.tsx b/web/app/components/app/overview/embedded/index.tsx index d4e5dd8898..691b727b8e 100644 --- a/web/app/components/app/overview/embedded/index.tsx +++ b/web/app/components/app/overview/embedded/index.tsx @@ -29,7 +29,7 @@ const OPTION_MAP = { iframe: { getContent: (url: string, token: string) => `