diff --git a/.devcontainer/README.md b/.devcontainer/README.md index 2b18630a21..359e2e5aef 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -1,23 +1,26 @@ # Development with devcontainer + This project includes a devcontainer configuration that allows you to open the project in a container with a fully configured development environment. Both frontend and backend environments are initialized when the container is started. + ## GitHub Codespaces + [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/langgenius/dify) you can simply click the button above to open this project in GitHub Codespaces. For more info, check out the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/developing-online-with-codespaces/creating-a-codespace#creating-a-codespace). - ## VS Code Dev Containers + [![Open in Dev Containers](https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langgenius/dify) if you have VS Code installed, you can click the button above to open this project in VS Code Dev Containers. You can learn more in the [Dev Containers documentation](https://code.visualstudio.com/docs/devcontainers/containers). - ## Pros of Devcontainer + Unified Development Environment: By using devcontainers, you can ensure that all developers are developing in the same environment, reducing the occurrence of "it works on my machine" type of issues. Quick Start: New developers can set up their development environment in a few simple steps, without spending a lot of time on environment configuration. @@ -25,11 +28,13 @@ Quick Start: New developers can set up their development environment in a few si Isolation: Devcontainers isolate your project from your host operating system, reducing the chance of OS updates or other application installations impacting the development environment. ## Cons of Devcontainer + Learning Curve: For developers unfamiliar with Docker and VS Code, using devcontainers may be somewhat complex. Performance Impact: While usually minimal, programs running inside a devcontainer may be slightly slower than those running directly on the host. ## Troubleshooting + if you see such error message when you open this project in codespaces: ![Alt text](troubleshooting.png) diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh index c25bde87b0..39a653953e 100755 --- a/.devcontainer/post_create_command.sh +++ b/.devcontainer/post_create_command.sh @@ -1,6 +1,6 @@ #!/bin/bash -npm add -g pnpm@10.13.1 +npm add -g pnpm@10.15.0 cd web && pnpm install pipx install uv diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md index 47e2453f41..a59630d112 100644 --- a/.github/CODE_OF_CONDUCT.md +++ b/.github/CODE_OF_CONDUCT.md @@ -17,27 +17,25 @@ diverse, inclusive, and healthy community. Examples of behavior that contributes to a positive environment for our community include: -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience -* Focusing on what is best not just for us as individuals, but for the +- Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: -* The use of sexualized language or imagery, and sexual attention or +- The use of sexualized language or imagery, and sexual attention or advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a +- Other conduct which could reasonably be considered inappropriate in a professional setting ## Language Policy To facilitate clear and effective communication, all discussions, comments, documentation, and pull requests in this project should be conducted in English. This ensures that all contributors can participate and collaborate effectively. - - diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index f4a5f754e0..aa5a50918a 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,8 +1,8 @@ > [!IMPORTANT] > > 1. Make sure you have read our [contribution guidelines](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) -> 2. Ensure there is an associated issue and you have been assigned to it -> 3. Use the correct syntax to link this PR: `Fixes #`. +> 1. Ensure there is an associated issue and you have been assigned to it +> 1. Use the correct syntax to link this PR: `Fixes #`. ## Summary @@ -12,7 +12,7 @@ | Before | After | |--------|-------| -| ... | ... | +| ... | ... | ## Checklist diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml index 9c3daddbfc..63d681e7ed 100644 --- a/.github/workflows/api-tests.yml +++ b/.github/workflows/api-tests.yml @@ -47,7 +47,16 @@ jobs: - name: Run Unit tests run: | uv run --project api bash dev/pytest/pytest_unit_tests.sh - + - name: Run ty check + run: | + cd api + uv add --dev ty + uv run ty check || true + - name: Run pyrefly check + run: | + cd api + uv add --dev pyrefly + uv run pyrefly check || true - name: Coverage Summary run: | set -x diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 152ff3b648..dada6229db 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -23,6 +23,10 @@ jobs: uv run ruff check --fix-only . # Format code uv run ruff format . - + - name: ast-grep + run: | + uvx --from ast-grep-cli sg --pattern 'db.session.query($WHATEVER).filter($HERE)' --rewrite 'db.session.query($WHATEVER).where($HERE)' -l py --update-all + - name: mdformat + run: | + uvx mdformat . - uses: autofix-ci/action@635ffb0c9798bd160680f18fd73371e355b85f27 - diff --git a/.vscode/README.md b/.vscode/README.md index 26516f0540..87b45787c3 100644 --- a/.vscode/README.md +++ b/.vscode/README.md @@ -4,10 +4,10 @@ This `launch.json.template` file provides various debug configurations for the D ## How to Use -1. **Create `launch.json`**: If you don't have one, create a file named `launch.json` inside the `.vscode` directory. -2. **Copy Content**: Copy the entire content from `launch.json.template` into your newly created `launch.json` file. -3. **Select Debug Configuration**: Go to the Run and Debug view in VS Code / Cursor (Ctrl+Shift+D or Cmd+Shift+D). -4. **Start Debugging**: Select the desired configuration from the dropdown menu and click the green play button. +1. **Create `launch.json`**: If you don't have one, create a file named `launch.json` inside the `.vscode` directory. +1. **Copy Content**: Copy the entire content from `launch.json.template` into your newly created `launch.json` file. +1. **Select Debug Configuration**: Go to the Run and Debug view in VS Code / Cursor (Ctrl+Shift+D or Cmd+Shift+D). +1. **Start Debugging**: Select the desired configuration from the dropdown menu and click the green play button. ## Tips diff --git a/CLAUDE.md b/CLAUDE.md index 7ce04382c9..fd437d7bf0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,6 +7,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Dify is an open-source platform for developing LLM applications with an intuitive interface combining agentic AI workflows, RAG pipelines, agent capabilities, and model management. The codebase consists of: + - **Backend API** (`/api`): Python Flask application with Domain-Driven Design architecture - **Frontend Web** (`/web`): Next.js 15 application with TypeScript and React 19 - **Docker deployment** (`/docker`): Containerized deployment configurations @@ -46,6 +47,7 @@ pnpm test # Run Jest tests ## Testing Guidelines ### Backend Testing + - Use `pytest` for all backend tests - Write tests first (TDD approach) - Test structure: Arrange-Act-Assert @@ -53,11 +55,13 @@ pnpm test # Run Jest tests ## Code Style Requirements ### Python + - Use type hints for all functions and class attributes - No `Any` types unless absolutely necessary - Implement special methods (`__repr__`, `__str__`) appropriately -### TypeScript/JavaScript +### TypeScript/JavaScript + - Strict TypeScript configuration - ESLint with Prettier integration - Avoid `any` type @@ -73,10 +77,11 @@ pnpm test # Run Jest tests ## Common Development Tasks ### Adding a New API Endpoint + 1. Create controller in `/api/controllers/` -2. Add service logic in `/api/services/` -3. Update routes in controller's `__init__.py` -4. Write tests in `/api/tests/` +1. Add service logic in `/api/services/` +1. Update routes in controller's `__init__.py` +1. Write tests in `/api/tests/` ## Project-Specific Conventions diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d4ba36485..fdc414b047 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,11 +34,11 @@ Don't forget to link an existing issue or open a new issue in the PR's descripti How we prioritize: - | Issue Type | Priority | - | ------------------------------------------------------------ | --------------- | - | Bugs in core functions (cloud service, cannot login, applications not working, security loopholes) | Critical | - | Non-critical bugs, performance boosts | Medium Priority | - | Minor fixes (typos, confusing but working UI) | Low Priority | +| Issue Type | Priority | +| ------------------------------------------------------------ | --------------- | +| Bugs in core functions (cloud service, cannot login, applications not working, security loopholes) | Critical | +| Non-critical bugs, performance boosts | Medium Priority | +| Minor fixes (typos, confusing but working UI) | Low Priority | ### Feature requests @@ -52,23 +52,25 @@ How we prioritize: How we prioritize: - | Feature Type | Priority | - | ------------------------------------------------------------ | --------------- | - | High-Priority Features as being labeled by a team member | High Priority | - | Popular feature requests from our [community feedback board](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Medium Priority | - | Non-core features and minor enhancements | Low Priority | - | Valuable but not immediate | Future-Feature | +| Feature Type | Priority | +| ------------------------------------------------------------ | --------------- | +| High-Priority Features as being labeled by a team member | High Priority | +| Popular feature requests from our [community feedback board](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Medium Priority | +| Non-core features and minor enhancements | Low Priority | +| Valuable but not immediate | Future-Feature | + ## Submitting your PR ### Pull Request Process 1. Fork the repository -2. Before you draft a PR, please create an issue to discuss the changes you want to make -3. Create a new branch for your changes -4. Please add tests for your changes accordingly -5. Ensure your code passes the existing tests -6. Please link the issue in the PR description, `fixes #` -7. Get merged! +1. Before you draft a PR, please create an issue to discuss the changes you want to make +1. Create a new branch for your changes +1. Please add tests for your changes accordingly +1. Ensure your code passes the existing tests +1. Please link the issue in the PR description, `fixes #` +1. Get merged! + ### Setup the project #### Frontend @@ -82,12 +84,14 @@ For setting up the backend service, kindly refer to our detailed [instructions]( #### Other things to note We recommend reviewing this document carefully before proceeding with the setup, as it contains essential information about: + - Prerequisites and dependencies - Installation steps - Configuration details - Common troubleshooting tips Feel free to reach out if you encounter any issues during the setup process. + ## Getting Help If you ever get stuck or get a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/8Tpq4AcN9c) for a quick chat. diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md index 69ae7071bb..c278c8fd7a 100644 --- a/CONTRIBUTING_CN.md +++ b/CONTRIBUTING_CN.md @@ -34,12 +34,11 @@ 优先级划分: - | 问题类型 | 优先级 | - | -------------------------------------------------- | ---------- | - | 核心功能 bug(云服务、登录失败、应用无法使用、安全漏洞) | 紧急 | - | 非关键 bug、性能优化 | 中等优先级 | - | 小修复(拼写错误、界面混乱但可用) | 低优先级 | - +| 问题类型 | 优先级 | +| -------------------------------------------------- | ---------- | +| 核心功能 bug(云服务、登录失败、应用无法使用、安全漏洞) | 紧急 | +| 非关键 bug、性能优化 | 中等优先级 | +| 小修复(拼写错误、界面混乱但可用) | 低优先级 | ### 功能请求 @@ -53,12 +52,12 @@ 优先级划分: - | 功能类型 | 优先级 | - | -------------------------------------------------- | ---------- | - | 被团队成员标记为高优先级的功能 | 高优先级 | - | 来自[社区反馈板](https://github.com/langgenius/dify/discussions/categories/feedbacks)的热门功能请求 | 中等优先级 | - | 非核心功能和小改进 | 低优先级 | - | 有价值但非紧急的功能 | 未来特性 | +| 功能类型 | 优先级 | +| -------------------------------------------------- | ---------- | +| 被团队成员标记为高优先级的功能 | 高优先级 | +| 来自[社区反馈板](https://github.com/langgenius/dify/discussions/categories/feedbacks)的热门功能请求 | 中等优先级 | +| 非核心功能和小改进 | 低优先级 | +| 有价值但非紧急的功能 | 未来特性 | ## 提交 PR @@ -67,12 +66,12 @@ ### PR 提交流程 1. Fork 本仓库 -2. 在提交 PR 之前,请先创建 issue 讨论你想要做的修改 -3. 为你的修改创建一个新的分支 -4. 请为你的修改添加相应的测试 -5. 确保你的代码能通过现有的测试 -6. 请在 PR 描述中关联相关 issue,格式为 `fixes #` -7. 等待合并! +1. 在提交 PR 之前,请先创建 issue 讨论你想要做的修改 +1. 为你的修改创建一个新的分支 +1. 请为你的修改添加相应的测试 +1. 确保你的代码能通过现有的测试 +1. 请在 PR 描述中关联相关 issue,格式为 `fixes #` +1. 等待合并! #### 前端 @@ -85,6 +84,7 @@ #### 其他注意事项 我们建议在开始设置之前仔细阅读本文档,因为它包含以下重要信息: + - 前置条件和依赖项 - 安装步骤 - 配置细节 diff --git a/CONTRIBUTING_DE.md b/CONTRIBUTING_DE.md index ddbf3abc55..f819e80bbb 100644 --- a/CONTRIBUTING_DE.md +++ b/CONTRIBUTING_DE.md @@ -32,11 +32,11 @@ Vergessen Sie nicht, in der PR-Beschreibung ein bestehendes Issue zu verlinken o Unsere Priorisierung: - | Fehlertyp | Priorität | - | ------------------------------------------------------------ | --------------- | - | Fehler in Kernfunktionen (Cloud-Service, Login nicht möglich, Anwendungen funktionieren nicht, Sicherheitslücken) | Kritisch | - | Nicht-kritische Fehler, Leistungsverbesserungen | Mittlere Priorität | - | Kleinere Korrekturen (Tippfehler, verwirrende aber funktionierende UI) | Niedrige Priorität | +| Fehlertyp | Priorität | +| ------------------------------------------------------------ | --------------- | +| Fehler in Kernfunktionen (Cloud-Service, Login nicht möglich, Anwendungen funktionieren nicht, Sicherheitslücken) | Kritisch | +| Nicht-kritische Fehler, Leistungsverbesserungen | Mittlere Priorität | +| Kleinere Korrekturen (Tippfehler, verwirrende aber funktionierende UI) | Niedrige Priorität | ### Feature-Anfragen @@ -50,24 +50,24 @@ Unsere Priorisierung: Unsere Priorisierung: - | Feature-Typ | Priorität | - | ------------------------------------------------------------ | --------------- | - | Hochprioritäre Features (durch Teammitglied gekennzeichnet) | Hohe Priorität | - | Beliebte Feature-Anfragen aus unserem [Community-Feedback-Board](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Mittlere Priorität | - | Nicht-Kernfunktionen und kleinere Verbesserungen | Niedrige Priorität | - | Wertvoll, aber nicht dringend | Zukunfts-Feature | +| Feature-Typ | Priorität | +| ------------------------------------------------------------ | --------------- | +| Hochprioritäre Features (durch Teammitglied gekennzeichnet) | Hohe Priorität | +| Beliebte Feature-Anfragen aus unserem [Community-Feedback-Board](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Mittlere Priorität | +| Nicht-Kernfunktionen und kleinere Verbesserungen | Niedrige Priorität | +| Wertvoll, aber nicht dringend | Zukunfts-Feature | ## Einreichen Ihres PRs ### Pull-Request-Prozess 1. Repository forken -2. Vor dem Erstellen eines PRs bitte ein Issue zur Diskussion der Änderungen erstellen -3. Einen neuen Branch für Ihre Änderungen erstellen -4. Tests für Ihre Änderungen hinzufügen -5. Sicherstellen, dass Ihr Code die bestehenden Tests besteht -6. Issue in der PR-Beschreibung verlinken (`fixes #`) -7. Auf den Merge warten! +1. Vor dem Erstellen eines PRs bitte ein Issue zur Diskussion der Änderungen erstellen +1. Einen neuen Branch für Ihre Änderungen erstellen +1. Tests für Ihre Änderungen hinzufügen +1. Sicherstellen, dass Ihr Code die bestehenden Tests besteht +1. Issue in der PR-Beschreibung verlinken (`fixes #`) +1. Auf den Merge warten! ### Projekt einrichten @@ -82,6 +82,7 @@ Für die Einrichtung des Backend-Service folgen Sie bitte unseren detaillierten #### Weitere Hinweise Wir empfehlen, dieses Dokument sorgfältig zu lesen, da es wichtige Informationen enthält über: + - Voraussetzungen und Abhängigkeiten - Installationsschritte - Konfigurationsdetails @@ -92,4 +93,3 @@ Bei Problemen während der Einrichtung können Sie sich gerne an uns wenden. ## Hilfe bekommen Wenn Sie beim Mitwirken Fragen haben oder nicht weiterkommen, stellen Sie Ihre Fragen einfach im entsprechenden GitHub Issue oder besuchen Sie unseren [Discord](https://discord.gg/8Tpq4AcN9c) für einen schnellen Austausch. - diff --git a/CONTRIBUTING_ES.md b/CONTRIBUTING_ES.md index 98cbb5b457..e19d958c65 100644 --- a/CONTRIBUTING_ES.md +++ b/CONTRIBUTING_ES.md @@ -34,11 +34,11 @@ No olvides vincular un issue existente o abrir uno nuevo en la descripción del Cómo priorizamos: - | Tipo de Issue | Prioridad | - | ------------------------------------------------------------ | --------------- | - | Errores en funciones principales (servicio en la nube, no poder iniciar sesión, aplicaciones que no funcionan, fallos de seguridad) | Crítica | - | Errores no críticos, mejoras de rendimiento | Prioridad Media | - | Correcciones menores (errores tipográficos, UI confusa pero funcional) | Prioridad Baja | +| Tipo de Issue | Prioridad | +| ------------------------------------------------------------ | --------------- | +| Errores en funciones principales (servicio en la nube, no poder iniciar sesión, aplicaciones que no funcionan, fallos de seguridad) | Crítica | +| Errores no críticos, mejoras de rendimiento | Prioridad Media | +| Correcciones menores (errores tipográficos, UI confusa pero funcional) | Prioridad Baja | ### Solicitudes de funcionalidades @@ -52,23 +52,25 @@ Cómo priorizamos: Cómo priorizamos: - | Tipo de Funcionalidad | Prioridad | - | ------------------------------------------------------------ | --------------- | - | Funcionalidades de alta prioridad etiquetadas por un miembro del equipo | Prioridad Alta | - | Solicitudes populares de funcionalidades de nuestro [tablero de comentarios de la comunidad](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Prioridad Media | - | Funcionalidades no principales y mejoras menores | Prioridad Baja | - | Valiosas pero no inmediatas | Futura-Funcionalidad | +| Tipo de Funcionalidad | Prioridad | +| ------------------------------------------------------------ | --------------- | +| Funcionalidades de alta prioridad etiquetadas por un miembro del equipo | Prioridad Alta | +| Solicitudes populares de funcionalidades de nuestro [tablero de comentarios de la comunidad](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Prioridad Media | +| Funcionalidades no principales y mejoras menores | Prioridad Baja | +| Valiosas pero no inmediatas | Futura-Funcionalidad | + ## Enviando tu PR ### Proceso de Pull Request 1. Haz un fork del repositorio -2. Antes de redactar un PR, por favor crea un issue para discutir los cambios que quieres hacer -3. Crea una nueva rama para tus cambios -4. Por favor añade pruebas para tus cambios en consecuencia -5. Asegúrate de que tu código pasa las pruebas existentes -6. Por favor vincula el issue en la descripción del PR, `fixes #` -7. ¡Fusiona tu código! +1. Antes de redactar un PR, por favor crea un issue para discutir los cambios que quieres hacer +1. Crea una nueva rama para tus cambios +1. Por favor añade pruebas para tus cambios en consecuencia +1. Asegúrate de que tu código pasa las pruebas existentes +1. Por favor vincula el issue en la descripción del PR, `fixes #` +1. ¡Fusiona tu código! + ### Configuración del proyecto #### Frontend @@ -82,12 +84,14 @@ Para configurar el servicio backend, por favor consulta nuestras [instrucciones #### Otras cosas a tener en cuenta Recomendamos revisar este documento cuidadosamente antes de proceder con la configuración, ya que contiene información esencial sobre: + - Requisitos previos y dependencias - Pasos de instalación - Detalles de configuración - Consejos comunes de solución de problemas No dudes en contactarnos si encuentras algún problema durante el proceso de configuración. + ## Obteniendo Ayuda -Si alguna vez te quedas atascado o tienes una pregunta urgente mientras contribuyes, simplemente envíanos tus consultas a través del issue relacionado de GitHub, o únete a nuestro [Discord](https://discord.gg/8Tpq4AcN9c) para una charla rápida. +Si alguna vez te quedas atascado o tienes una pregunta urgente mientras contribuyes, simplemente envíanos tus consultas a través del issue relacionado de GitHub, o únete a nuestro [Discord](https://discord.gg/8Tpq4AcN9c) para una charla rápida. diff --git a/CONTRIBUTING_FR.md b/CONTRIBUTING_FR.md index fc8410dfd6..335e943fcd 100644 --- a/CONTRIBUTING_FR.md +++ b/CONTRIBUTING_FR.md @@ -34,11 +34,11 @@ N'oubliez pas de lier un problème existant ou d'ouvrir un nouveau problème dan Comment nous priorisons : - | Type de Problème | Priorité | - | ------------------------------------------------------------ | --------------- | - | Bugs dans les fonctions principales (service cloud, impossibilité de se connecter, applications qui ne fonctionnent pas, failles de sécurité) | Critique | - | Bugs non critiques, améliorations de performance | Priorité Moyenne | - | Corrections mineures (fautes de frappe, UI confuse mais fonctionnelle) | Priorité Basse | +| Type de Problème | Priorité | +| ------------------------------------------------------------ | --------------- | +| Bugs dans les fonctions principales (service cloud, impossibilité de se connecter, applications qui ne fonctionnent pas, failles de sécurité) | Critique | +| Bugs non critiques, améliorations de performance | Priorité Moyenne | +| Corrections mineures (fautes de frappe, UI confuse mais fonctionnelle) | Priorité Basse | ### Demandes de fonctionnalités @@ -52,23 +52,25 @@ Comment nous priorisons : Comment nous priorisons : - | Type de Fonctionnalité | Priorité | - | ------------------------------------------------------------ | --------------- | - | Fonctionnalités hautement prioritaires étiquetées par un membre de l'équipe | Priorité Haute | - | Demandes populaires de fonctionnalités de notre [tableau de feedback communautaire](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Priorité Moyenne | - | Fonctionnalités non essentielles et améliorations mineures | Priorité Basse | - | Précieuses mais non immédiates | Fonctionnalité Future | +| Type de Fonctionnalité | Priorité | +| ------------------------------------------------------------ | --------------- | +| Fonctionnalités hautement prioritaires étiquetées par un membre de l'équipe | Priorité Haute | +| Demandes populaires de fonctionnalités de notre [tableau de feedback communautaire](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Priorité Moyenne | +| Fonctionnalités non essentielles et améliorations mineures | Priorité Basse | +| Précieuses mais non immédiates | Fonctionnalité Future | + ## Soumettre votre PR ### Processus de Pull Request 1. Forkez le dépôt -2. Avant de rédiger une PR, veuillez créer un problème pour discuter des changements que vous souhaitez apporter -3. Créez une nouvelle branche pour vos changements -4. Veuillez ajouter des tests pour vos changements en conséquence -5. Assurez-vous que votre code passe les tests existants -6. Veuillez lier le problème dans la description de la PR, `fixes #` -7. Faites fusionner votre code ! +1. Avant de rédiger une PR, veuillez créer un problème pour discuter des changements que vous souhaitez apporter +1. Créez une nouvelle branche pour vos changements +1. Veuillez ajouter des tests pour vos changements en conséquence +1. Assurez-vous que votre code passe les tests existants +1. Veuillez lier le problème dans la description de la PR, `fixes #` +1. Faites fusionner votre code ! + ### Configuration du projet #### Frontend @@ -82,12 +84,14 @@ Pour configurer le service backend, veuillez consulter nos [instructions détail #### Autres choses à noter Nous recommandons de revoir attentivement ce document avant de procéder à la configuration, car il contient des informations essentielles sur : + - Prérequis et dépendances - Étapes d'installation - Détails de configuration - Conseils courants de dépannage N'hésitez pas à nous contacter si vous rencontrez des problèmes pendant le processus de configuration. + ## Obtenir de l'aide -Si jamais vous êtes bloqué ou avez une question urgente en contribuant, envoyez-nous simplement vos questions via le problème GitHub concerné, ou rejoignez notre [Discord](https://discord.gg/8Tpq4AcN9c) pour une discussion rapide. +Si jamais vous êtes bloqué ou avez une question urgente en contribuant, envoyez-nous simplement vos questions via le problème GitHub concerné, ou rejoignez notre [Discord](https://discord.gg/8Tpq4AcN9c) pour une discussion rapide. diff --git a/CONTRIBUTING_JA.md b/CONTRIBUTING_JA.md index e991d0263e..2d0d79fc16 100644 --- a/CONTRIBUTING_JA.md +++ b/CONTRIBUTING_JA.md @@ -34,11 +34,11 @@ PRの説明には、既存のイシューへのリンクを含めるか、新し 優先順位の付け方: - | 問題の種類 | 優先度 | - | ------------------------------------------------------------ | --------- | - | コア機能のバグ(クラウドサービス、ログイン不可、アプリケーション不具合、セキュリティ脆弱性) | 最重要 | - | 重要度の低いバグ、パフォーマンス改善 | 中程度 | - | 軽微な修正(タイプミス、分かりにくいが動作するUI) | 低 | +| 問題の種類 | 優先度 | +| ------------------------------------------------------------ | --------- | +| コア機能のバグ(クラウドサービス、ログイン不可、アプリケーション不具合、セキュリティ脆弱性) | 最重要 | +| 重要度の低いバグ、パフォーマンス改善 | 中程度 | +| 軽微な修正(タイプミス、分かりにくいが動作するUI) | 低 | ### 機能リクエスト @@ -52,24 +52,24 @@ PRの説明には、既存のイシューへのリンクを含めるか、新し 優先順位の付け方: - | 機能の種類 | 優先度 | - | ------------------------------------------------------------ | --------- | - | チームメンバーによって高優先度とラベル付けされた機能 | 高 | - | [コミュニティフィードボード](https://github.com/langgenius/dify/discussions/categories/feedbacks)での人気の機能リクエスト | 中程度 | - | 非コア機能と軽微な改善 | 低 | - | 価値はあるが緊急性の低いもの | 将来対応 | +| 機能の種類 | 優先度 | +| ------------------------------------------------------------ | --------- | +| チームメンバーによって高優先度とラベル付けされた機能 | 高 | +| [コミュニティフィードボード](https://github.com/langgenius/dify/discussions/categories/feedbacks)での人気の機能リクエスト | 中程度 | +| 非コア機能と軽微な改善 | 低 | +| 価値はあるが緊急性の低いもの | 将来対応 | ## PRの提出 ### プルリクエストのプロセス 1. リポジトリをフォークする -2. PRを作成する前に、変更内容についてイシューで議論する -3. 変更用の新しいブランチを作成する -4. 変更に応じたテストを追加する -5. 既存のテストをパスすることを確認する -6. PRの説明文にイシューをリンクする(`fixes #`) -7. マージ完了! +1. PRを作成する前に、変更内容についてイシューで議論する +1. 変更用の新しいブランチを作成する +1. 変更に応じたテストを追加する +1. 既存のテストをパスすることを確認する +1. PRの説明文にイシューをリンクする(`fixes #`) +1. マージ完了! ### プロジェクトのセットアップ @@ -84,6 +84,7 @@ PRの説明には、既存のイシューへのリンクを含めるか、新し #### その他の注意点 セットアップを進める前に、以下の重要な情報が含まれているため、このドキュメントを注意深く確認することをお勧めします: + - 前提条件と依存関係 - インストール手順 - 設定の詳細 @@ -94,4 +95,3 @@ PRの説明には、既存のイシューへのリンクを含めるか、新し ## サポートを受ける 貢献中に行き詰まったり、緊急の質問がある場合は、関連するGitHubイシューで質問するか、[Discord](https://discord.gg/8Tpq4AcN9c)で気軽にチャットしてください。 - diff --git a/CONTRIBUTING_KR.md b/CONTRIBUTING_KR.md index 78d3f38c47..14b1c9a9ca 100644 --- a/CONTRIBUTING_KR.md +++ b/CONTRIBUTING_KR.md @@ -34,11 +34,11 @@ PR 설명에 기존 이슈를 연결하거나 새 이슈를 여는 것을 잊지 우선순위 결정 방법: - | 이슈 유형 | 우선순위 | - | ------------------------------------------------------------ | --------------- | - | 핵심 기능의 버그(클라우드 서비스, 로그인 불가, 애플리케이션 작동 불능, 보안 취약점) | 중대 | - | 비중요 버그, 성능 향상 | 중간 우선순위 | - | 사소한 수정(오타, 혼란스럽지만 작동하는 UI) | 낮은 우선순위 | +| 이슈 유형 | 우선순위 | +| ------------------------------------------------------------ | --------------- | +| 핵심 기능의 버그(클라우드 서비스, 로그인 불가, 애플리케이션 작동 불능, 보안 취약점) | 중대 | +| 비중요 버그, 성능 향상 | 중간 우선순위 | +| 사소한 수정(오타, 혼란스럽지만 작동하는 UI) | 낮은 우선순위 | ### 기능 요청 @@ -52,23 +52,25 @@ PR 설명에 기존 이슈를 연결하거나 새 이슈를 여는 것을 잊지 우선순위 결정 방법: - | 기능 유형 | 우선순위 | - | ------------------------------------------------------------ | --------------- | - | 팀 구성원에 의해 레이블이 지정된 고우선순위 기능 | 높은 우선순위 | - | 우리의 [커뮤니티 피드백 보드](https://github.com/langgenius/dify/discussions/categories/feedbacks)에서 인기 있는 기능 요청 | 중간 우선순위 | - | 비핵심 기능 및 사소한 개선 | 낮은 우선순위 | - | 가치 있지만 즉시 필요하지 않은 기능 | 미래 기능 | +| 기능 유형 | 우선순위 | +| ------------------------------------------------------------ | --------------- | +| 팀 구성원에 의해 레이블이 지정된 고우선순위 기능 | 높은 우선순위 | +| 우리의 [커뮤니티 피드백 보드](https://github.com/langgenius/dify/discussions/categories/feedbacks)에서 인기 있는 기능 요청 | 중간 우선순위 | +| 비핵심 기능 및 사소한 개선 | 낮은 우선순위 | +| 가치 있지만 즉시 필요하지 않은 기능 | 미래 기능 | + ## PR 제출하기 ### Pull Request 프로세스 1. 저장소를 포크하세요 -2. PR을 작성하기 전에, 변경하고자 하는 내용에 대해 논의하기 위한 이슈를 생성해 주세요 -3. 변경 사항을 위한 새 브랜치를 만드세요 -4. 변경 사항에 대한 테스트를 적절히 추가해 주세요 -5. 코드가 기존 테스트를 통과하는지 확인하세요 -6. PR 설명에 이슈를 연결해 주세요, `fixes #<이슈_번호>` -7. 병합 완료! +1. PR을 작성하기 전에, 변경하고자 하는 내용에 대해 논의하기 위한 이슈를 생성해 주세요 +1. 변경 사항을 위한 새 브랜치를 만드세요 +1. 변경 사항에 대한 테스트를 적절히 추가해 주세요 +1. 코드가 기존 테스트를 통과하는지 확인하세요 +1. PR 설명에 이슈를 연결해 주세요, `fixes #<이슈_번호>` +1. 병합 완료! + ### 프로젝트 설정하기 #### 프론트엔드 @@ -82,12 +84,14 @@ PR 설명에 기존 이슈를 연결하거나 새 이슈를 여는 것을 잊지 #### 기타 참고 사항 설정을 진행하기 전에 이 문서를 주의 깊게 검토하는 것을 권장합니다. 다음과 같은 필수 정보가 포함되어 있습니다: + - 필수 조건 및 종속성 - 설치 단계 - 구성 세부 정보 - 일반적인 문제 해결 팁 설정 과정에서 문제가 발생하면 언제든지 연락해 주세요. + ## 도움 받기 -기여하는 동안 막히거나 긴급한 질문이 있으면, 관련 GitHub 이슈를 통해 질문을 보내거나, 빠른 대화를 위해 우리의 [Discord](https://discord.gg/8Tpq4AcN9c)에 참여하세요. +기여하는 동안 막히거나 긴급한 질문이 있으면, 관련 GitHub 이슈를 통해 질문을 보내거나, 빠른 대화를 위해 우리의 [Discord](https://discord.gg/8Tpq4AcN9c)에 참여하세요. diff --git a/CONTRIBUTING_PT.md b/CONTRIBUTING_PT.md index 7347fd7f9c..aeabcad51f 100644 --- a/CONTRIBUTING_PT.md +++ b/CONTRIBUTING_PT.md @@ -34,11 +34,11 @@ Não se esqueça de vincular um problema existente ou abrir um novo problema na Como priorizamos: - | Tipo de Problema | Prioridade | - | ------------------------------------------------------------ | --------------- | - | Bugs em funções centrais (serviço em nuvem, não conseguir fazer login, aplicações não funcionando, falhas de segurança) | Crítica | - | Bugs não críticos, melhorias de desempenho | Prioridade Média | - | Correções menores (erros de digitação, interface confusa mas funcional) | Prioridade Baixa | +| Tipo de Problema | Prioridade | +| ------------------------------------------------------------ | --------------- | +| Bugs em funções centrais (serviço em nuvem, não conseguir fazer login, aplicações não funcionando, falhas de segurança) | Crítica | +| Bugs não críticos, melhorias de desempenho | Prioridade Média | +| Correções menores (erros de digitação, interface confusa mas funcional) | Prioridade Baixa | ### Solicitações de recursos @@ -52,23 +52,25 @@ Como priorizamos: Como priorizamos: - | Tipo de Recurso | Prioridade | - | ------------------------------------------------------------ | --------------- | - | Recursos de alta prioridade conforme rotulado por um membro da equipe | Prioridade Alta | - | Solicitações populares de recursos do nosso [quadro de feedback da comunidade](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Prioridade Média | - | Recursos não essenciais e melhorias menores | Prioridade Baixa | - | Valiosos mas não imediatos | Recurso Futuro | +| Tipo de Recurso | Prioridade | +| ------------------------------------------------------------ | --------------- | +| Recursos de alta prioridade conforme rotulado por um membro da equipe | Prioridade Alta | +| Solicitações populares de recursos do nosso [quadro de feedback da comunidade](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Prioridade Média | +| Recursos não essenciais e melhorias menores | Prioridade Baixa | +| Valiosos mas não imediatos | Recurso Futuro | + ## Enviando seu PR ### Processo de Pull Request 1. Faça um fork do repositório -2. Antes de elaborar um PR, por favor crie um problema para discutir as mudanças que você quer fazer -3. Crie um novo branch para suas alterações -4. Por favor, adicione testes para suas alterações conforme apropriado -5. Certifique-se de que seu código passa nos testes existentes -6. Por favor, vincule o problema na descrição do PR, `fixes #` -7. Faça o merge do seu código! +1. Antes de elaborar um PR, por favor crie um problema para discutir as mudanças que você quer fazer +1. Crie um novo branch para suas alterações +1. Por favor, adicione testes para suas alterações conforme apropriado +1. Certifique-se de que seu código passa nos testes existentes +1. Por favor, vincule o problema na descrição do PR, `fixes #` +1. Faça o merge do seu código! + ### Configurando o projeto #### Frontend @@ -82,12 +84,14 @@ Para configurar o serviço backend, por favor consulte nossas [instruções deta #### Outras coisas a observar Recomendamos revisar este documento cuidadosamente antes de prosseguir com a configuração, pois ele contém informações essenciais sobre: + - Pré-requisitos e dependências - Etapas de instalação - Detalhes de configuração - Dicas comuns de solução de problemas Sinta-se à vontade para entrar em contato se encontrar quaisquer problemas durante o processo de configuração. + ## Obtendo Ajuda -Se você ficar preso ou tiver uma dúvida urgente enquanto contribui, simplesmente envie suas perguntas através do problema relacionado no GitHub, ou entre no nosso [Discord](https://discord.gg/8Tpq4AcN9c) para uma conversa rápida. +Se você ficar preso ou tiver uma dúvida urgente enquanto contribui, simplesmente envie suas perguntas através do problema relacionado no GitHub, ou entre no nosso [Discord](https://discord.gg/8Tpq4AcN9c) para uma conversa rápida. diff --git a/CONTRIBUTING_TR.md b/CONTRIBUTING_TR.md index 681f05689b..d016802a53 100644 --- a/CONTRIBUTING_TR.md +++ b/CONTRIBUTING_TR.md @@ -34,11 +34,11 @@ PR açıklamasında mevcut bir sorunu bağlamayı veya yeni bir sorun açmayı u Nasıl önceliklendiriyoruz: - | Sorun Türü | Öncelik | - | ------------------------------------------------------------ | --------------- | - | Temel işlevlerdeki hatalar (bulut hizmeti, giriş yapamama, çalışmayan uygulamalar, güvenlik açıkları) | Kritik | - | Kritik olmayan hatalar, performans artışları | Orta Öncelik | - | Küçük düzeltmeler (yazım hataları, kafa karıştırıcı ama çalışan UI) | Düşük Öncelik | +| Sorun Türü | Öncelik | +| ------------------------------------------------------------ | --------------- | +| Temel işlevlerdeki hatalar (bulut hizmeti, giriş yapamama, çalışmayan uygulamalar, güvenlik açıkları) | Kritik | +| Kritik olmayan hatalar, performans artışları | Orta Öncelik | +| Küçük düzeltmeler (yazım hataları, kafa karıştırıcı ama çalışan UI) | Düşük Öncelik | ### Özellik İstekleri @@ -52,23 +52,25 @@ Nasıl önceliklendiriyoruz: Nasıl önceliklendiriyoruz: - | Özellik Türü | Öncelik | - | ------------------------------------------------------------ | --------------- | - | Bir ekip üyesi tarafından etiketlenen Yüksek Öncelikli Özellikler | Yüksek Öncelik | - | [Topluluk geri bildirim panosundan](https://github.com/langgenius/dify/discussions/categories/feedbacks) popüler özellik istekleri | Orta Öncelik | - | Temel olmayan özellikler ve küçük geliştirmeler | Düşük Öncelik | - | Değerli ama acil olmayan | Gelecek-Özellik | +| Özellik Türü | Öncelik | +| ------------------------------------------------------------ | --------------- | +| Bir ekip üyesi tarafından etiketlenen Yüksek Öncelikli Özellikler | Yüksek Öncelik | +| [Topluluk geri bildirim panosundan](https://github.com/langgenius/dify/discussions/categories/feedbacks) popüler özellik istekleri | Orta Öncelik | +| Temel olmayan özellikler ve küçük geliştirmeler | Düşük Öncelik | +| Değerli ama acil olmayan | Gelecek-Özellik | + ## PR'nizi Göndermek ### Pull Request Süreci 1. Depoyu fork edin -2. Bir PR taslağı oluşturmadan önce, yapmak istediğiniz değişiklikleri tartışmak için lütfen bir sorun oluşturun -3. Değişiklikleriniz için yeni bir dal oluşturun -4. Lütfen değişiklikleriniz için uygun testler ekleyin -5. Kodunuzun mevcut testleri geçtiğinden emin olun -6. Lütfen PR açıklamasında sorunu bağlayın, `fixes #` -7. Kodunuzu birleştirin! +1. Bir PR taslağı oluşturmadan önce, yapmak istediğiniz değişiklikleri tartışmak için lütfen bir sorun oluşturun +1. Değişiklikleriniz için yeni bir dal oluşturun +1. Lütfen değişiklikleriniz için uygun testler ekleyin +1. Kodunuzun mevcut testleri geçtiğinden emin olun +1. Lütfen PR açıklamasında sorunu bağlayın, `fixes #` +1. Kodunuzu birleştirin! + ### Projeyi Kurma #### Frontend @@ -82,12 +84,14 @@ Backend hizmetini kurmak için, lütfen `api/README.md` dosyasındaki detaylı [ #### Dikkat Edilecek Diğer Şeyler Kuruluma geçmeden önce bu belgeyi dikkatlice incelemenizi öneririz, çünkü şunlar hakkında temel bilgiler içerir: + - Ön koşullar ve bağımlılıklar - Kurulum adımları - Yapılandırma detayları - Yaygın sorun giderme ipuçları Kurulum süreci sırasında herhangi bir sorunla karşılaşırsanız bizimle iletişime geçmekten çekinmeyin. + ## Yardım Almak -Katkıda bulunurken takılırsanız veya yanıcı bir sorunuz olursa, sorularınızı ilgili GitHub sorunu aracılığıyla bize gönderin veya hızlı bir sohbet için [Discord'umuza](https://discord.gg/8Tpq4AcN9c) katılın. +Katkıda bulunurken takılırsanız veya yanıcı bir sorunuz olursa, sorularınızı ilgili GitHub sorunu aracılığıyla bize gönderin veya hızlı bir sohbet için [Discord'umuza](https://discord.gg/8Tpq4AcN9c) katılın. diff --git a/CONTRIBUTING_TW.md b/CONTRIBUTING_TW.md index a61ea918c5..5c4d7022fe 100644 --- a/CONTRIBUTING_TW.md +++ b/CONTRIBUTING_TW.md @@ -22,7 +22,7 @@ ### 錯誤回報 -> [!IMPORTANT] +> [!IMPORTANT]\ > 提交錯誤回報時,請務必包含以下資訊: - 清晰明確的標題 @@ -34,15 +34,15 @@ 優先順序評估: - | 議題類型 | 優先級 | - | -------- | ------ | - | 核心功能錯誤(雲端服務、無法登入、應用程式無法運作、安全漏洞) | 緊急 | - | 非緊急錯誤、效能優化 | 中等 | - | 次要修正(拼字錯誤、介面混淆但可運作) | 低 | +| 議題類型 | 優先級 | +| -------- | ------ | +| 核心功能錯誤(雲端服務、無法登入、應用程式無法運作、安全漏洞) | 緊急 | +| 非緊急錯誤、效能優化 | 中等 | +| 次要修正(拼字錯誤、介面混淆但可運作) | 低 | ### 功能請求 -> [!NOTE] +> [!NOTE]\ > 提交功能請求時,請務必包含以下資訊: - 清晰明確的標題 @@ -52,24 +52,24 @@ 優先順序評估: - | 功能類型 | 優先級 | - | -------- | ------ | - | 團隊成員標記為高優先級的功能 | 高 | - | 來自[社群回饋板](https://github.com/langgenius/dify/discussions/categories/feedbacks)的熱門功能請求 | 中 | - | 非核心功能和小幅改進 | 低 | - | 有價值但非急迫的功能 | 未來功能 | +| 功能類型 | 優先級 | +| -------- | ------ | +| 團隊成員標記為高優先級的功能 | 高 | +| 來自[社群回饋板](https://github.com/langgenius/dify/discussions/categories/feedbacks)的熱門功能請求 | 中 | +| 非核心功能和小幅改進 | 低 | +| 有價值但非急迫的功能 | 未來功能 | ## 提交 PR ### PR 流程 1. Fork 專案 -2. 在開始撰寫 PR 前,請先建立議題討論你想做的更改 -3. 為你的更改建立新分支 -4. 請為你的更改新增相應的測試 -5. 確保你的程式碼通過現有測試 -6. 請在 PR 描述中連結相關議題,使用 `fixes #` -7. 等待合併! +1. 在開始撰寫 PR 前,請先建立議題討論你想做的更改 +1. 為你的更改建立新分支 +1. 請為你的更改新增相應的測試 +1. 確保你的程式碼通過現有測試 +1. 請在 PR 描述中連結相關議題,使用 `fixes #` +1. 等待合併! ### 專案設定 @@ -84,6 +84,7 @@ #### 其他注意事項 我們建議在開始設定前仔細閱讀此文件,因為它包含以下重要資訊: + - 前置需求和相依性 - 安裝步驟 - 設定細節 @@ -94,4 +95,3 @@ ## 尋求協助 如果你在貢獻過程中遇到困難或有急切的問題,可以透過相關的 GitHub 議題詢問,或加入我們的 [Discord](https://discord.gg/8Tpq4AcN9c) 進行即時交流。 - diff --git a/CONTRIBUTING_VI.md b/CONTRIBUTING_VI.md index 807054acce..2ad431296a 100644 --- a/CONTRIBUTING_VI.md +++ b/CONTRIBUTING_VI.md @@ -22,7 +22,7 @@ Hãy tham gia, đóng góp và cùng nhau xây dựng điều tuyệt vời! ### Báo cáo lỗi -> [!QUAN TRỌNG] +> [!QUAN TRỌNG]\ > Vui lòng đảm bảo cung cấp các thông tin sau khi gửi báo cáo lỗi: - Tiêu đề rõ ràng và mô tả @@ -34,11 +34,11 @@ Hãy tham gia, đóng góp và cùng nhau xây dựng điều tuyệt vời! Cách chúng tôi ưu tiên: - | Loại vấn đề | Mức độ ưu tiên | - | ----------- | -------------- | - | Lỗi trong các chức năng cốt lõi (dịch vụ đám mây, không thể đăng nhập, ứng dụng không hoạt động, lỗ hổng bảo mật) | Quan trọng | - | Lỗi không nghiêm trọng, cải thiện hiệu suất | Ưu tiên trung bình | - | Sửa lỗi nhỏ (lỗi chính tả, UI gây nhầm lẫn nhưng vẫn hoạt động) | Ưu tiên thấp | +| Loại vấn đề | Mức độ ưu tiên | +| ----------- | -------------- | +| Lỗi trong các chức năng cốt lõi (dịch vụ đám mây, không thể đăng nhập, ứng dụng không hoạt động, lỗ hổng bảo mật) | Quan trọng | +| Lỗi không nghiêm trọng, cải thiện hiệu suất | Ưu tiên trung bình | +| Sửa lỗi nhỏ (lỗi chính tả, UI gây nhầm lẫn nhưng vẫn hoạt động) | Ưu tiên thấp | ### Yêu cầu tính năng @@ -52,24 +52,24 @@ Cách chúng tôi ưu tiên: Cách chúng tôi ưu tiên: - | Loại tính năng | Mức độ ưu tiên | - | -------------- | -------------- | - | Tính năng ưu tiên cao được gắn nhãn bởi thành viên nhóm | Ưu tiên cao | - | Yêu cầu tính năng phổ biến từ [bảng phản hồi cộng đồng](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Ưu tiên trung bình | - | Tính năng không cốt lõi và cải tiến nhỏ | Ưu tiên thấp | - | Có giá trị nhưng không cấp bách | Tính năng tương lai | +| Loại tính năng | Mức độ ưu tiên | +| -------------- | -------------- | +| Tính năng ưu tiên cao được gắn nhãn bởi thành viên nhóm | Ưu tiên cao | +| Yêu cầu tính năng phổ biến từ [bảng phản hồi cộng đồng](https://github.com/langgenius/dify/discussions/categories/feedbacks) | Ưu tiên trung bình | +| Tính năng không cốt lõi và cải tiến nhỏ | Ưu tiên thấp | +| Có giá trị nhưng không cấp bách | Tính năng tương lai | ## Gửi PR của bạn ### Quy trình tạo Pull Request 1. Fork repository -2. Trước khi soạn PR, vui lòng tạo issue để thảo luận về các thay đổi bạn muốn thực hiện -3. Tạo nhánh mới cho các thay đổi của bạn -4. Vui lòng thêm test cho các thay đổi tương ứng -5. Đảm bảo code của bạn vượt qua các test hiện có -6. Vui lòng liên kết issue trong mô tả PR, `fixes #` -7. Được merge! +1. Trước khi soạn PR, vui lòng tạo issue để thảo luận về các thay đổi bạn muốn thực hiện +1. Tạo nhánh mới cho các thay đổi của bạn +1. Vui lòng thêm test cho các thay đổi tương ứng +1. Đảm bảo code của bạn vượt qua các test hiện có +1. Vui lòng liên kết issue trong mô tả PR, `fixes #` +1. Được merge! ### Thiết lập dự án @@ -84,6 +84,7 @@ Cách chúng tôi ưu tiên: #### Các điểm cần lưu ý khác Chúng tôi khuyến nghị xem xét kỹ tài liệu này trước khi tiến hành thiết lập, vì nó chứa thông tin thiết yếu về: + - Điều kiện tiên quyết và dependencies - Các bước cài đặt - Chi tiết cấu hình @@ -94,4 +95,3 @@ Chúng tôi khuyến nghị xem xét kỹ tài liệu này trước khi tiến h ## Nhận trợ giúp Nếu bạn bị mắc kẹt hoặc có câu hỏi cấp bách trong quá trình đóng góp, chỉ cần gửi câu hỏi của bạn thông qua issue GitHub liên quan, hoặc tham gia [Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi để trò chuyện nhanh. - diff --git a/README.md b/README.md index 80e44b0728..90da1d3def 100644 --- a/README.md +++ b/README.md @@ -107,74 +107,6 @@ Monitor and analyze application logs and performance over time. You could contin **7. Backend-as-a-Service**: All of Dify's offerings come with corresponding APIs, so you could effortlessly integrate Dify into your own business logic. -## Feature Comparison - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FeatureDify.AILangChainFlowiseOpenAI Assistants API
Programming ApproachAPI + App-orientedPython CodeApp-orientedAPI-oriented
Supported LLMsRich VarietyRich VarietyRich VarietyOpenAI-only
RAG Engine
Agent
Workflow
Observability
Enterprise Feature (SSO/Access control)
Local Deployment
- ## Using Dify - **Cloud
** @@ -185,7 +117,8 @@ All of Dify's offerings come with corresponding APIs, so you could effortlessly Use our [documentation](https://docs.dify.ai) for further references and more in-depth instructions. - **Dify for enterprise / organizations
** - We provide additional enterprise-centric features. [Log your questions for us through this chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) or [send us an email](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) to discuss enterprise needs.
+ We provide additional enterprise-centric features. [Log your questions for us through this chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) or [send us an email](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) to discuss enterprise needs.
+ > For startups and small businesses using AWS, check out [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one click. It's an affordable AMI offering with the option to create apps with custom logo and branding. ## Staying ahead @@ -230,16 +163,15 @@ Deploy Dify to AWS with [CDK](https://aws.amazon.com/cdk/) #### Using Alibaba Cloud Computing Nest -Quickly deploy Dify to Alibaba cloud with [Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) +Quickly deploy Dify to Alibaba cloud with [Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) #### Using Alibaba Cloud Data Management -One-Click deploy Dify to Alibaba Cloud with [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) +One-Click deploy Dify to Alibaba Cloud with [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) #### Deploy to AKS with Azure Devops Pipeline -One-Click deploy Dify to AKS with [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) - +One-Click deploy Dify to AKS with [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) ## Contributing diff --git a/README_AR.md b/README_AR.md index 9c8378d087..2451757ab5 100644 --- a/README_AR.md +++ b/README_AR.md @@ -52,7 +52,7 @@ مشروع Dify هو منصة تطوير تطبيقات الذكاء الصناعي مفتوحة المصدر. تجمع واجهته البديهية بين سير العمل الذكي بالذكاء الاصطناعي وخط أنابيب RAG وقدرات الوكيل وإدارة النماذج وميزات الملاحظة وأكثر من ذلك، مما يتيح لك الانتقال بسرعة من المرحلة التجريبية إلى الإنتاج. إليك قائمة بالميزات الأساسية:

-**1. سير العمل**: قم ببناء واختبار سير عمل الذكاء الاصطناعي القوي على قماش بصري، مستفيدًا من جميع الميزات التالية وأكثر. +**1. سير العمل**: قم ببناء واختبار سير عمل الذكاء الاصطناعي القوي على قماش بصري، مستفيدًا من جميع الميزات التالية وأكثر. **2. الدعم الشامل للنماذج**: تكامل سلس مع مئات من LLMs الخاصة / مفتوحة المصدر من عشرات من موفري التحليل والحلول المستضافة ذاتيًا، مما يغطي GPT و Mistral و Llama3 وأي نماذج متوافقة مع واجهة OpenAI API. يمكن العثور على قائمة كاملة بمزودي النموذج المدعومين [هنا](https://docs.dify.ai/getting-started/readme/model-providers). @@ -68,88 +68,20 @@ **7.الواجهة الخلفية (Backend) كخدمة**: تأتي جميع عروض Dify مع APIs مطابقة، حتى يمكنك دمج Dify بسهولة في منطق أعمالك الخاص. -## مقارنة الميزات - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
الميزةDify.AILangChainFlowiseOpenAI Assistants API
نهج البرمجةموجّه لـ تطبيق + واجهة برمجة تطبيق (API)برمجة Pythonموجه لتطبيقواجهة برمجة تطبيق (API)
LLMs المدعومةتنوع غنيتنوع غنيتنوع غنيفقط OpenAI
محرك RAG
الوكيل
سير العمل
الملاحظة
ميزات الشركات (SSO / مراقبة الوصول)
نشر محلي
- ## استخدام Dify - **سحابة
** -نحن نستضيف [خدمة Dify Cloud](https://dify.ai) لأي شخص لتجربتها بدون أي إعدادات. توفر كل قدرات النسخة التي تمت استضافتها ذاتيًا، وتتضمن 200 أمر GPT-4 مجانًا في خطة الصندوق الرملي. + نحن نستضيف [خدمة Dify Cloud](https://dify.ai) لأي شخص لتجربتها بدون أي إعدادات. توفر كل قدرات النسخة التي تمت استضافتها ذاتيًا، وتتضمن 200 أمر GPT-4 مجانًا في خطة الصندوق الرملي. - **استضافة ذاتية لنسخة المجتمع Dify
** -ابدأ سريعًا في تشغيل Dify في بيئتك باستخدام [دليل البدء السريع](#البدء السريع). -استخدم [توثيقنا](https://docs.dify.ai) للمزيد من المراجع والتعليمات الأعمق. + ابدأ سريعًا في تشغيل Dify في بيئتك باستخدام \[دليل البدء السريع\](#البدء السريع). + استخدم [توثيقنا](https://docs.dify.ai) للمزيد من المراجع والتعليمات الأعمق. - **مشروع Dify للشركات / المؤسسات
** -نحن نوفر ميزات إضافية مركزة على الشركات. [جدول اجتماع معنا](https://cal.com/guchenhe/30min) أو [أرسل لنا بريدًا إلكترونيًا](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) لمناقشة احتياجات الشركات.
+ نحن نوفر ميزات إضافية مركزة على الشركات. [جدول اجتماع معنا](https://cal.com/guchenhe/30min) أو [أرسل لنا بريدًا إلكترونيًا](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) لمناقشة احتياجات الشركات.
> بالنسبة للشركات الناشئة والشركات الصغيرة التي تستخدم خدمات AWS، تحقق من [Dify Premium على AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) ونشرها في شبكتك الخاصة على AWS VPC بنقرة واحدة. إنها عرض AMI بأسعار معقولة مع خيار إنشاء تطبيقات بشعار وعلامة تجارية مخصصة. -> + ## البقاء قدمًا قم بإضافة نجمة إلى Dify على GitHub وتلق تنبيهًا فوريًا بالإصدارات الجديدة. @@ -157,11 +89,11 @@ ![نجمنا](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) ## البداية السريعة -> + > قبل تثبيت Dify، تأكد من أن جهازك يلبي الحد الأدنى من متطلبات النظام التالية: > ->- معالج >= 2 نواة ->- ذاكرة وصول عشوائي (RAM) >= 4 جيجابايت +> - معالج >= 2 نواة +> - ذاكرة وصول عشوائي (RAM) >= 4 جيجابايت
@@ -212,8 +144,9 @@ docker compose up -d - [AWS CDK بواسطة @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) #### استخدام Alibaba Cloud للنشر - [بسرعة نشر Dify إلى سحابة علي بابا مع عش الحوسبة السحابية علي بابا](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) - + +[بسرعة نشر Dify إلى سحابة علي بابا مع عش الحوسبة السحابية علي بابا](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) + #### استخدام Alibaba Cloud Data Management للنشر انشر ​​Dify على علي بابا كلاود بنقرة واحدة باستخدام [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) @@ -222,7 +155,6 @@ docker compose up -d انشر Dify على AKS بنقرة واحدة باستخدام [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) - ## المساهمة لأولئك الذين يرغبون في المساهمة، انظر إلى [دليل المساهمة](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) لدينا. @@ -237,6 +169,7 @@ docker compose up -d ## المجتمع والاتصال + - [مناقشة GitHub](https://github.com/langgenius/dify/discussions). الأفضل لـ: مشاركة التعليقات وطرح الأسئلة. - [المشكلات على GitHub](https://github.com/langgenius/dify/issues). الأفضل لـ: الأخطاء التي تواجهها في استخدام Dify.AI، واقتراحات الميزات. انظر [دليل المساهمة](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). - [Discord](https://discord.gg/FngNHpbcY7). الأفضل لـ: مشاركة تطبيقاتك والترفيه مع المجتمع. diff --git a/README_BN.md b/README_BN.md index a31aafdf56..ef24dea171 100644 --- a/README_BN.md +++ b/README_BN.md @@ -56,133 +56,67 @@ ডিফাই একটি ওপেন-সোর্স LLM অ্যাপ ডেভেলপমেন্ট প্ল্যাটফর্ম। এটি ইন্টুইটিভ ইন্টারফেস, এজেন্টিক AI ওয়ার্কফ্লো, RAG পাইপলাইন, এজেন্ট ক্যাপাবিলিটি, মডেল ম্যানেজমেন্ট, মনিটরিং সুবিধা এবং আরও অনেক কিছু একত্রিত করে, যা দ্রুত প্রোটোটাইপ থেকে প্রোডাকশন পর্যন্ত নিয়ে যেতে সহায়তা করে। ## কুইক স্টার্ট + +> ডিফাই ইনস্টল করার আগে, নিশ্চিত করুন যে আপনার মেশিন নিম্নলিখিত ন্যূনতম কনফিগারেশনের প্রয়োজনীয়তা পূরন করে : > -> ডিফাই ইনস্টল করার আগে, নিশ্চিত করুন যে আপনার মেশিন নিম্নলিখিত ন্যূনতম কনফিগারেশনের প্রয়োজনীয়তা পূরন করে : -> ->- সিপিউ >= 2 কোর ->- র‍্যাম >= 4 জিবি +> - সিপিউ >= 2 কোর +> - র‍্যাম >= 4 জিবি
ডিফাই সার্ভার চালু করার সবচেয়ে সহজ উপায় [docker compose](docker/docker-compose.yaml) মাধ্যমে। নিম্নলিখিত কমান্ডগুলো ব্যবহার করে ডিফাই চালানোর আগে, নিশ্চিত করুন যে আপনার মেশিনে [Docker](https://docs.docker.com/get-docker/) এবং [Docker Compose](https://docs.docker.com/compose/install/) ইনস্টল করা আছে : + ```bash cd dify cd docker cp .env.example .env docker compose up -d ``` + চালানোর পর, আপনি আপনার ব্রাউজারে [http://localhost/install](http://localhost/install)-এ ডিফাই ড্যাশবোর্ডে অ্যাক্সেস করতে পারেন এবং ইনিশিয়ালাইজেশন প্রক্রিয়া শুরু করতে পারেন। #### সাহায্যের খোঁজে -ডিফাই সেট আপ করতে সমস্যা হলে দয়া করে আমাদের [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs) দেখুন। যদি তবুও সমস্যা থেকে থাকে, তাহলে [কমিউনিটি এবং আমাদের](#community--contact) সাথে যোগাযোগ করুন। +ডিফাই সেট আপ করতে সমস্যা হলে দয়া করে আমাদের [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs) দেখুন। যদি তবুও সমস্যা থেকে থাকে, তাহলে [কমিউনিটি এবং আমাদের](#community--contact) সাথে যোগাযোগ করুন। > যদি আপনি ডিফাইতে অবদান রাখতে বা অতিরিক্ত উন্নয়ন করতে চান, আমাদের [সোর্স কোড থেকে ডিপ্লয়মেন্টের গাইড](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code) দেখুন। ## প্রধান ফিচারসমূহ **১. ওয়ার্কফ্লো**: - ভিজ্যুয়াল ক্যানভাসে AI ওয়ার্কফ্লো তৈরি এবং পরীক্ষা করুন, নিম্নলিখিত সব ফিচার এবং তার বাইরেও আরও অনেক কিছু ব্যবহার করে। +ভিজ্যুয়াল ক্যানভাসে AI ওয়ার্কফ্লো তৈরি এবং পরীক্ষা করুন, নিম্নলিখিত সব ফিচার এবং তার বাইরেও আরও অনেক কিছু ব্যবহার করে। -**২. মডেল সাপোর্ট**: - GPT, Mistral, Llama3, এবং যেকোনো OpenAI API-সামঞ্জস্যপূর্ণ মডেলসহ, কয়েক ডজন ইনফারেন্স প্রদানকারী এবং সেল্ফ-হোস্টেড সমাধান থেকে শুরু করে প্রোপ্রাইটরি/ওপেন-সোর্স LLM-এর সাথে সহজে ইন্টিগ্রেশন। সমর্থিত মডেল প্রদানকারীদের একটি সম্পূর্ণ তালিকা পাওয়া যাবে [এখানে](https://docs.dify.ai/getting-started/readme/model-providers)। +**২. মডেল সাপোর্ট**: +GPT, Mistral, Llama3, এবং যেকোনো OpenAI API-সামঞ্জস্যপূর্ণ মডেলসহ, কয়েক ডজন ইনফারেন্স প্রদানকারী এবং সেল্ফ-হোস্টেড সমাধান থেকে শুরু করে প্রোপ্রাইটরি/ওপেন-সোর্স LLM-এর সাথে সহজে ইন্টিগ্রেশন। সমর্থিত মডেল প্রদানকারীদের একটি সম্পূর্ণ তালিকা পাওয়া যাবে [এখানে](https://docs.dify.ai/getting-started/readme/model-providers)। ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) -**3. প্রম্পট IDE**: - প্রম্পট তৈরি, মডেলের পারফরম্যান্স তুলনা এবং চ্যাট-বেজড অ্যাপে টেক্সট-টু-স্পিচের মতো বৈশিষ্ট্য যুক্ত করার জন্য ইন্টুইটিভ ইন্টারফেস। +**3. প্রম্পট IDE**: +প্রম্পট তৈরি, মডেলের পারফরম্যান্স তুলনা এবং চ্যাট-বেজড অ্যাপে টেক্সট-টু-স্পিচের মতো বৈশিষ্ট্য যুক্ত করার জন্য ইন্টুইটিভ ইন্টারফেস। **4. RAG পাইপলাইন**: - ডকুমেন্ট ইনজেশন থেকে শুরু করে রিট্রিভ পর্যন্ত সবকিছুই বিস্তৃত RAG ক্যাপাবিলিটির আওতাভুক্ত। PDF, PPT এবং অন্যান্য সাধারণ ডকুমেন্ট ফর্ম্যাট থেকে টেক্সট এক্সট্রাকশনের জন্য আউট-অফ-বক্স সাপোর্ট। +ডকুমেন্ট ইনজেশন থেকে শুরু করে রিট্রিভ পর্যন্ত সবকিছুই বিস্তৃত RAG ক্যাপাবিলিটির আওতাভুক্ত। PDF, PPT এবং অন্যান্য সাধারণ ডকুমেন্ট ফর্ম্যাট থেকে টেক্সট এক্সট্রাকশনের জন্য আউট-অফ-বক্স সাপোর্ট। -**5. এজেন্ট ক্যাপাবিলিটি**: - LLM ফাংশন কলিং বা ReAct উপর ভিত্তি করে এজেন্ট ডিফাইন করতে পারেন এবং এজেন্টের জন্য পূর্ব-নির্মিত বা কাস্টম টুলস যুক্ত করতে পারেন। Dify AI এজেন্টদের জন্য 50+ বিল্ট-ইন টুলস সরবরাহ করে, যেমন Google Search, DALL·E, Stable Diffusion এবং WolframAlpha। +**5. এজেন্ট ক্যাপাবিলিটি**: +LLM ফাংশন কলিং বা ReAct উপর ভিত্তি করে এজেন্ট ডিফাইন করতে পারেন এবং এজেন্টের জন্য পূর্ব-নির্মিত বা কাস্টম টুলস যুক্ত করতে পারেন। Dify AI এজেন্টদের জন্য 50+ বিল্ট-ইন টুলস সরবরাহ করে, যেমন Google Search, DALL·E, Stable Diffusion এবং WolframAlpha। -**6. এলএলএম-অপ্স**: - সময়ের সাথে সাথে অ্যাপ্লিকেশন লগ এবং পারফরম্যান্স মনিটর এবং বিশ্লেষণ করুন। প্রডাকশন ডেটা এবং annotation এর উপর ভিত্তি করে প্রম্পট, ডেটাসেট এবং মডেলগুলিকে ক্রমাগত উন্নত করতে পারেন। +**6. এলএলএম-অপ্স**: +সময়ের সাথে সাথে অ্যাপ্লিকেশন লগ এবং পারফরম্যান্স মনিটর এবং বিশ্লেষণ করুন। প্রডাকশন ডেটা এবং annotation এর উপর ভিত্তি করে প্রম্পট, ডেটাসেট এবং মডেলগুলিকে ক্রমাগত উন্নত করতে পারেন। **7. ব্যাকএন্ড-অ্যাজ-এ-সার্ভিস**: - ডিফাই-এর সমস্ত অফার সংশ্লিষ্ট API-সহ আছে, যাতে আপনি অনায়াসে ডিফাইকে আপনার নিজস্ব বিজনেস লজিকে ইন্টেগ্রেট করতে পারেন। +ডিফাই-এর সমস্ত অফার সংশ্লিষ্ট API-সহ আছে, যাতে আপনি অনায়াসে ডিফাইকে আপনার নিজস্ব বিজনেস লজিকে ইন্টেগ্রেট করতে পারেন। -## বৈশিষ্ট্য তুলনা - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
বৈশিষ্ট্যDify.AILangChainFlowiseOpenAI Assistants API
প্রোগ্রামিং পদ্ধতিAPI + App-orientedPython CodeApp-orientedAPI-oriented
সাপোর্টেড LLMsRich VarietyRich VarietyRich VarietyOpenAI-only
RAG ইঞ্জিন
এজেন্ট
ওয়ার্কফ্লো
অবজার্ভেবল
এন্টারপ্রাইজ ফিচার (SSO/Access control)
লোকাল ডেপ্লয়মেন্ট
- -## ডিফাই-এর ব্যবহার +## ডিফাই-এর ব্যবহার - **ক্লাউড
** -জিরো সেটাপে ব্যবহার করতে আমাদের [Dify Cloud](https://dify.ai) সার্ভিসটি ব্যবহার করতে পারেন। এখানে সেল্ফহোস্টিং-এর সকল ফিচার ও ক্যাপাবিলিটিসহ স্যান্ডবক্সে ২০০ জিপিটি-৪ কল ফ্রি পাবেন। + জিরো সেটাপে ব্যবহার করতে আমাদের [Dify Cloud](https://dify.ai) সার্ভিসটি ব্যবহার করতে পারেন। এখানে সেল্ফহোস্টিং-এর সকল ফিচার ও ক্যাপাবিলিটিসহ স্যান্ডবক্সে ২০০ জিপিটি-৪ কল ফ্রি পাবেন। - **সেল্ফহোস্টিং ডিফাই কমিউনিটি সংস্করণ
** -সেল্ফহোস্ট করতে এই [স্টার্টার গাইড](#quick-start) ব্যবহার করে দ্রুত আপনার এনভায়রনমেন্টে ডিফাই চালান। -আরো ইন-ডেপথ রেফারেন্সের জন্য [ডকুমেন্টেশন](https://docs.dify.ai) দেখেন। + সেল্ফহোস্ট করতে এই [স্টার্টার গাইড](#quick-start) ব্যবহার করে দ্রুত আপনার এনভায়রনমেন্টে ডিফাই চালান। + আরো ইন-ডেপথ রেফারেন্সের জন্য [ডকুমেন্টেশন](https://docs.dify.ai) দেখেন। - **এন্টারপ্রাইজ / প্রতিষ্ঠানের জন্য Dify
** -আমরা এন্টারপ্রাইজ/প্রতিষ্ঠান-কেন্দ্রিক সেবা প্রদান করে থাকি । [এই চ্যাটবটের মাধ্যমে আপনার প্রশ্নগুলি আমাদের জন্য লগ করুন।](https://udify.app/chat/22L1zSxg6yW1cWQg) অথবা [আমাদের ইমেল পাঠান](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) আপনার চাহিদা সম্পর্কে আলোচনা করার জন্য।
+ আমরা এন্টারপ্রাইজ/প্রতিষ্ঠান-কেন্দ্রিক সেবা প্রদান করে থাকি । [এই চ্যাটবটের মাধ্যমে আপনার প্রশ্নগুলি আমাদের জন্য লগ করুন।](https://udify.app/chat/22L1zSxg6yW1cWQg) অথবা [আমাদের ইমেল পাঠান](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) আপনার চাহিদা সম্পর্কে আলোচনা করার জন্য।
> AWS ব্যবহারকারী স্টার্টআপ এবং ছোট ব্যবসার জন্য, [AWS মার্কেটপ্লেসে Dify Premium](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) দেখুন এবং এক-ক্লিকের মাধ্যমে এটি আপনার নিজস্ব AWS VPC-তে ডিপ্লয় করুন। এটি একটি সাশ্রয়ী মূল্যের AMI অফার, যাতে কাস্টম লোগো এবং ব্র্যান্ডিং সহ অ্যাপ তৈরির সুবিধা আছে। @@ -194,10 +128,10 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন ## Advanced Setup -যদি আপনার কনফিগারেশনটি কাস্টমাইজ করার প্রয়োজন হয়, তাহলে অনুগ্রহ করে আমাদের [.env.example](docker/.env.example) ফাইল দেখুন এবং আপনার `.env` ফাইলে সংশ্লিষ্ট মানগুলি আপডেট করুন। এছাড়াও, আপনার নির্দিষ্ট এনভায়রনমেন্ট এবং প্রয়োজনীয়তার উপর ভিত্তি করে আপনাকে `docker-compose.yaml` ফাইলে সমন্বয় করতে হতে পারে, যেমন ইমেজ ভার্সন পরিবর্তন করা, পোর্ট ম্যাপিং করা, অথবা ভলিউম মাউন্ট করা। +যদি আপনার কনফিগারেশনটি কাস্টমাইজ করার প্রয়োজন হয়, তাহলে অনুগ্রহ করে আমাদের [.env.example](docker/.env.example) ফাইল দেখুন এবং আপনার `.env` ফাইলে সংশ্লিষ্ট মানগুলি আপডেট করুন। এছাড়াও, আপনার নির্দিষ্ট এনভায়রনমেন্ট এবং প্রয়োজনীয়তার উপর ভিত্তি করে আপনাকে `docker-compose.yaml` ফাইলে সমন্বয় করতে হতে পারে, যেমন ইমেজ ভার্সন পরিবর্তন করা, পোর্ট ম্যাপিং করা, অথবা ভলিউম মাউন্ট করা। যেকোনো পরিবর্তন করার পর, অনুগ্রহ করে `docker-compose up -d` পুনরায় চালান। ভেরিয়েবলের সম্পূর্ণ তালিকা [এখানে] (https://docs.dify.ai/getting-started/install-self-hosted/environments) খুঁজে পেতে পারেন। -যদি আপনি একটি হাইলি এভেইলেবল সেটআপ কনফিগার করতে চান, তাহলে কমিউনিটি [Helm Charts](https://helm.sh/) এবং YAML ফাইল রয়েছে যা Dify কে Kubernetes-এ ডিপ্লয় করার প্রক্রিয়া বর্ণনা করে। +যদি আপনি একটি হাইলি এভেইলেবল সেটআপ কনফিগার করতে চান, তাহলে কমিউনিটি [Helm Charts](https://helm.sh/) এবং YAML ফাইল রয়েছে যা Dify কে Kubernetes-এ ডিপ্লয় করার প্রক্রিয়া বর্ণনা করে। - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify) - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm) @@ -206,7 +140,6 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s) - [🚀 নতুন! YAML ফাইলসমূহ (Dify v1.6.0 সমর্থিত) তৈরি করেছেন @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes) - #### টেরাফর্ম ব্যবহার করে ডিপ্লয় [terraform](https://www.terraform.io/) ব্যবহার করে এক ক্লিকেই ক্লাউড প্ল্যাটফর্মে Dify ডিপ্লয় করুন। @@ -230,17 +163,16 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন #### Alibaba Cloud ব্যবহার করে ডিপ্লয় - [Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) +[Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) #### Alibaba Cloud Data Management ব্যবহার করে ডিপ্লয় - [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) +[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) - #### AKS-এ ডিপ্লয় করার জন্য Azure Devops Pipeline ব্যবহার +#### AKS-এ ডিপ্লয় করার জন্য Azure Devops Pipeline ব্যবহার [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) ব্যবহার করে Dify কে AKS-এ এক ক্লিকে ডিপ্লয় করুন - ## Contributing যারা কোড অবদান রাখতে চান, তাদের জন্য আমাদের [অবদান নির্দেশিকা] দেখুন (https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)। @@ -251,9 +183,9 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন ## কমিউনিটি এবং যোগাযোগ - [GitHub Discussion](https://github.com/langgenius/dify/discussions) ফিডব্যাক এবং প্রতিক্রিয়া জানানোর মাধ্যম। -- [GitHub Issues](https://github.com/langgenius/dify/issues). Dify.AI ব্যবহার করে আপনি যেসব বাগের সম্মুখীন হন এবং ফিচার প্রস্তাবনা। আমাদের [অবদান নির্দেশিকা](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) দেখুন। -- [Discord](https://discord.gg/FngNHpbcY7) আপনার এপ্লিকেশন শেয়ার এবং কমিউনিটি আড্ডার মাধ্যম। -- [X(Twitter)](https://twitter.com/dify_ai) আপনার এপ্লিকেশন শেয়ার এবং কমিউনিটি আড্ডার মাধ্যম। +- [GitHub Issues](https://github.com/langgenius/dify/issues). Dify.AI ব্যবহার করে আপনি যেসব বাগের সম্মুখীন হন এবং ফিচার প্রস্তাবনা। আমাদের [অবদান নির্দেশিকা](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) দেখুন। +- [Discord](https://discord.gg/FngNHpbcY7) আপনার এপ্লিকেশন শেয়ার এবং কমিউনিটি আড্ডার মাধ্যম। +- [X(Twitter)](https://twitter.com/dify_ai) আপনার এপ্লিকেশন শেয়ার এবং কমিউনিটি আড্ডার মাধ্যম। **অবদানকারীদের তালিকা** @@ -265,7 +197,7 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন [![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) -## নিরাপত্তা বিষয়ক +## নিরাপত্তা বিষয়ক আপনার গোপনীয়তা রক্ষা করতে, অনুগ্রহ করে GitHub-এ নিরাপত্তা সংক্রান্ত সমস্যা পোস্ট করা এড়িয়ে চলুন। পরিবর্তে, আপনার প্রশ্নগুলি ঠিকানায় পাঠান এবং আমরা আপনাকে আরও বিস্তারিত উত্তর প্রদান করব। diff --git a/README_CN.md b/README_CN.md index 0698693429..2949b38867 100644 --- a/README_CN.md +++ b/README_CN.md @@ -48,8 +48,7 @@ README in বাংলা - -# +#
langgenius%2Fdify | 趋势转变 @@ -58,109 +57,41 @@ Dify 是一个开源的 LLM 应用开发平台。其直观的界面结合了 AI 工作流、RAG 管道、Agent、模型管理、可观测性功能等,让您可以快速从原型到生产。以下是其核心功能列表:

-**1. 工作流**: - 在画布上构建和测试功能强大的 AI 工作流程,利用以下所有功能以及更多功能。 +**1. 工作流**: +在画布上构建和测试功能强大的 AI 工作流程,利用以下所有功能以及更多功能。 -**2. 全面的模型支持**: - 与数百种专有/开源 LLMs 以及数十种推理提供商和自托管解决方案无缝集成,涵盖 GPT、Mistral、Llama3 以及任何与 OpenAI API 兼容的模型。完整的支持模型提供商列表可在[此处](https://docs.dify.ai/getting-started/readme/model-providers)找到。 +**2. 全面的模型支持**: +与数百种专有/开源 LLMs 以及数十种推理提供商和自托管解决方案无缝集成,涵盖 GPT、Mistral、Llama3 以及任何与 OpenAI API 兼容的模型。完整的支持模型提供商列表可在[此处](https://docs.dify.ai/getting-started/readme/model-providers)找到。 ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. Prompt IDE**: +用于制作提示、比较模型性能以及向基于聊天的应用程序添加其他功能(如文本转语音)的直观界面。 -**3. Prompt IDE**: - 用于制作提示、比较模型性能以及向基于聊天的应用程序添加其他功能(如文本转语音)的直观界面。 +**4. RAG Pipeline**: +广泛的 RAG 功能,涵盖从文档摄入到检索的所有内容,支持从 PDF、PPT 和其他常见文档格式中提取文本的开箱即用的支持。 -**4. RAG Pipeline**: - 广泛的 RAG 功能,涵盖从文档摄入到检索的所有内容,支持从 PDF、PPT 和其他常见文档格式中提取文本的开箱即用的支持。 +**5. Agent 智能体**: +您可以基于 LLM 函数调用或 ReAct 定义 Agent,并为 Agent 添加预构建或自定义工具。Dify 为 AI Agent 提供了 50 多种内置工具,如谷歌搜索、DALL·E、Stable Diffusion 和 WolframAlpha 等。 -**5. Agent 智能体**: - 您可以基于 LLM 函数调用或 ReAct 定义 Agent,并为 Agent 添加预构建或自定义工具。Dify 为 AI Agent 提供了 50 多种内置工具,如谷歌搜索、DALL·E、Stable Diffusion 和 WolframAlpha 等。 +**6. LLMOps**: +随时间监视和分析应用程序日志和性能。您可以根据生产数据和标注持续改进提示、数据集和模型。 -**6. LLMOps**: - 随时间监视和分析应用程序日志和性能。您可以根据生产数据和标注持续改进提示、数据集和模型。 - -**7. 后端即服务**: - 所有 Dify 的功能都带有相应的 API,因此您可以轻松地将 Dify 集成到自己的业务逻辑中。 - - -## 功能比较 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
功能Dify.AILangChainFlowiseOpenAI Assistant API
编程方法API + 应用程序导向Python 代码应用程序导向API 导向
支持的 LLMs丰富多样丰富多样丰富多样仅限 OpenAI
RAG 引擎
Agent
工作流
可观测性
企业功能(SSO/访问控制)
本地部署
+**7. 后端即服务**: +所有 Dify 的功能都带有相应的 API,因此您可以轻松地将 Dify 集成到自己的业务逻辑中。 ## 使用 Dify - **云
** -我们提供[ Dify 云服务](https://dify.ai),任何人都可以零设置尝试。它提供了自部署版本的所有功能,并在沙盒计划中包含 200 次免费的 GPT-4 调用。 + 我们提供[ Dify 云服务](https://dify.ai),任何人都可以零设置尝试。它提供了自部署版本的所有功能,并在沙盒计划中包含 200 次免费的 GPT-4 调用。 - **自托管 Dify 社区版
** -使用这个[入门指南](#快速启动)快速在您的环境中运行 Dify。 -使用我们的[文档](https://docs.dify.ai)进行进一步的参考和更深入的说明。 + 使用这个[入门指南](#%E5%BF%AB%E9%80%9F%E5%90%AF%E5%8A%A8)快速在您的环境中运行 Dify。 + 使用我们的[文档](https://docs.dify.ai)进行进一步的参考和更深入的说明。 - **面向企业/组织的 Dify
** -我们提供额外的面向企业的功能。[给我们发送电子邮件](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry)讨论企业需求。
+ 我们提供额外的面向企业的功能。[给我们发送电子邮件](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry)讨论企业需求。
+ > 对于使用 AWS 的初创公司和中小型企业,请查看 [AWS Marketplace 上的 Dify 高级版](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6),并使用一键部署到您自己的 AWS VPC。它是一个价格实惠的 AMI 产品,提供了使用自定义徽标和品牌创建应用程序的选项。 ## 保持领先 @@ -199,30 +130,35 @@ docker compose up -d 使用 [Helm Chart](https://helm.sh/) 版本或者 Kubernetes 资源清单(YAML),可以在 Kubernetes 上部署 Dify。 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify) + - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm) + - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts) + - [YAML 文件 by @Winson-030](https://github.com/Winson-030/dify-kubernetes) + - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s) - [🚀 NEW! YAML 文件 (支持 Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes) - - #### 使用 Terraform 部署 使用 [terraform](https://www.terraform.io/) 一键将 Dify 部署到云平台 ##### Azure Global + - [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### 使用 AWS CDK 部署 使用 [CDK](https://aws.amazon.com/cdk/) 将 Dify 部署到 AWS -##### AWS +##### AWS + - [AWS CDK by @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK by @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -242,7 +178,6 @@ docker compose up -d [![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) - ## Contributing 对于那些想要贡献代码的人,请参阅我们的[贡献指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。 @@ -262,10 +197,10 @@ docker compose up -d - [GitHub Discussion](https://github.com/langgenius/dify/discussions). 👉:分享您的应用程序并与社区交流。 - [GitHub Issues](https://github.com/langgenius/dify/issues)。👉:使用 Dify.AI 时遇到的错误和问题,请参阅[贡献指南](CONTRIBUTING.md)。 -- [电子邮件支持](mailto:hello@dify.ai?subject=[GitHub]Questions%20About%20Dify)。👉:关于使用 Dify.AI 的问题。 +- [电子邮件支持](mailto:hello@dify.ai?subject=%5BGitHub%5DQuestions%20About%20Dify)。👉:关于使用 Dify.AI 的问题。 - [Discord](https://discord.gg/FngNHpbcY7)。👉:分享您的应用程序并与社区交流。 - [X(Twitter)](https://twitter.com/dify_ai)。👉:分享您的应用程序并与社区交流。 -- [商业许可](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry)。👉:有关商业用途许可 Dify.AI 的商业咨询。 +- [商业许可](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry)。👉:有关商业用途许可 Dify.AI 的商业咨询。 ## 安全问题 diff --git a/README_DE.md b/README_DE.md index 392cc7885e..a593a12abf 100644 --- a/README_DE.md +++ b/README_DE.md @@ -56,10 +56,11 @@ Dify ist eine Open-Source-Plattform zur Entwicklung von LLM-Anwendungen. Ihre intuitive Benutzeroberfläche vereint agentenbasierte KI-Workflows, RAG-Pipelines, Agentenfunktionen, Modellverwaltung, Überwachungsfunktionen und mehr, sodass Sie schnell von einem Prototyp in die Produktion übergehen können. ## Schnellstart + > Bevor Sie Dify installieren, stellen Sie sicher, dass Ihr System die folgenden Mindestanforderungen erfüllt: -> ->- CPU >= 2 Core ->- RAM >= 4 GiB +> +> - CPU >= 2 Core +> - RAM >= 4 GiB
@@ -75,115 +76,48 @@ docker compose up -d Nachdem Sie den Server gestartet haben, können Sie über Ihren Browser auf das Dify Dashboard unter [http://localhost/install](http://localhost/install) zugreifen und den Initialisierungsprozess starten. #### Hilfe suchen + Bitte beachten Sie unsere [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs), wenn Sie Probleme bei der Einrichtung von Dify haben. Wenden Sie sich an [die Community und uns](#community--contact), falls weiterhin Schwierigkeiten auftreten. > Wenn Sie zu Dify beitragen oder zusätzliche Entwicklungen durchführen möchten, lesen Sie bitte unseren [Leitfaden zur Bereitstellung aus dem Quellcode](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code). ## Wesentliche Merkmale -**1. Workflow**: - Erstellen und testen Sie leistungsstarke KI-Workflows auf einer visuellen Oberfläche, wobei Sie alle der folgenden Funktionen und darüber hinaus nutzen können. -**2. Umfassende Modellunterstützung**: - Nahtlose Integration mit Hunderten von proprietären und Open-Source-LLMs von Dutzenden Inferenzanbietern und selbstgehosteten Lösungen, die GPT, Mistral, Llama3 und alle mit der OpenAI API kompatiblen Modelle abdecken. Eine vollständige Liste der unterstützten Modellanbieter finden Sie [hier](https://docs.dify.ai/getting-started/readme/model-providers). +**1. Workflow**: +Erstellen und testen Sie leistungsstarke KI-Workflows auf einer visuellen Oberfläche, wobei Sie alle der folgenden Funktionen und darüber hinaus nutzen können. +**2. Umfassende Modellunterstützung**: +Nahtlose Integration mit Hunderten von proprietären und Open-Source-LLMs von Dutzenden Inferenzanbietern und selbstgehosteten Lösungen, die GPT, Mistral, Llama3 und alle mit der OpenAI API kompatiblen Modelle abdecken. Eine vollständige Liste der unterstützten Modellanbieter finden Sie [hier](https://docs.dify.ai/getting-started/readme/model-providers). ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. Prompt IDE**: +Intuitive Benutzeroberfläche zum Erstellen von Prompts, zum Vergleichen der Modellleistung und zum Hinzufügen zusätzlicher Funktionen wie Text-to-Speech in einer chatbasierten Anwendung. -**3. Prompt IDE**: - Intuitive Benutzeroberfläche zum Erstellen von Prompts, zum Vergleichen der Modellleistung und zum Hinzufügen zusätzlicher Funktionen wie Text-to-Speech in einer chatbasierten Anwendung. +**4. RAG Pipeline**: +Umfassende RAG-Funktionalitäten, die alles von der Dokumenteneinlesung bis zur -abfrage abdecken, mit sofort einsatzbereiter Unterstützung für die Textextraktion aus PDFs, PPTs und anderen gängigen Dokumentformaten. -**4. RAG Pipeline**: - Umfassende RAG-Funktionalitäten, die alles von der Dokumenteneinlesung bis zur -abfrage abdecken, mit sofort einsatzbereiter Unterstützung für die Textextraktion aus PDFs, PPTs und anderen gängigen Dokumentformaten. +**5. Fähigkeiten des Agenten**: +Sie können Agenten basierend auf LLM Function Calling oder ReAct definieren und vorgefertigte oder benutzerdefinierte Tools für den Agenten hinzufügen. Dify stellt über 50 integrierte Tools für KI-Agenten bereit, wie zum Beispiel Google Search, DALL·E, Stable Diffusion und WolframAlpha. -**5. Fähigkeiten des Agenten**: - Sie können Agenten basierend auf LLM Function Calling oder ReAct definieren und vorgefertigte oder benutzerdefinierte Tools für den Agenten hinzufügen. Dify stellt über 50 integrierte Tools für KI-Agenten bereit, wie zum Beispiel Google Search, DALL·E, Stable Diffusion und WolframAlpha. +**6. LLMOps**: +Überwachen und analysieren Sie Anwendungsprotokolle und die Leistung im Laufe der Zeit. Sie können kontinuierlich Prompts, Datensätze und Modelle basierend auf Produktionsdaten und Annotationen verbessern. -**6. LLMOps**: - Überwachen und analysieren Sie Anwendungsprotokolle und die Leistung im Laufe der Zeit. Sie können kontinuierlich Prompts, Datensätze und Modelle basierend auf Produktionsdaten und Annotationen verbessern. - -**7. Backend-as-a-Service**: - Alle Dify-Angebote kommen mit entsprechenden APIs, sodass Sie Dify mühelos in Ihre eigene Geschäftslogik integrieren können. - -## Vergleich der Merkmale - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FeatureDify.AILangChainFlowiseOpenAI Assistants API
Programming ApproachAPI + App-orientedPython CodeApp-orientedAPI-oriented
Supported LLMsRich VarietyRich VarietyRich VarietyOpenAI-only
RAG Engine
Agent
Workflow
Observability
Enterprise Feature (SSO/Access control)
Local Deployment
+**7. Backend-as-a-Service**: +Alle Dify-Angebote kommen mit entsprechenden APIs, sodass Sie Dify mühelos in Ihre eigene Geschäftslogik integrieren können. ## Dify verwenden - **Cloud
** -Wir hosten einen [Dify Cloud](https://dify.ai)-Service, den jeder ohne Einrichtung ausprobieren kann. Er bietet alle Funktionen der selbstgehosteten Version und beinhaltet 200 kostenlose GPT-4-Aufrufe im Sandbox-Plan. + Wir hosten einen [Dify Cloud](https://dify.ai)-Service, den jeder ohne Einrichtung ausprobieren kann. Er bietet alle Funktionen der selbstgehosteten Version und beinhaltet 200 kostenlose GPT-4-Aufrufe im Sandbox-Plan. - **Selbstgehostete Dify Community Edition
** -Starten Sie Dify schnell in Ihrer Umgebung mit diesem [Schnellstart-Leitfaden](#quick-start). Nutzen Sie unsere [Dokumentation](https://docs.dify.ai) für weiterführende Informationen und detaillierte Anweisungen. + Starten Sie Dify schnell in Ihrer Umgebung mit diesem [Schnellstart-Leitfaden](#quick-start). Nutzen Sie unsere [Dokumentation](https://docs.dify.ai) für weiterführende Informationen und detaillierte Anweisungen. - **Dify für Unternehmen / Organisationen
** -Wir bieten zusätzliche, unternehmensspezifische Funktionen. [Über diesen Chatbot können Sie uns Ihre Fragen mitteilen](https://udify.app/chat/22L1zSxg6yW1cWQg) oder [senden Sie uns eine E-Mail](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry), um Ihre unternehmerischen Bedürfnisse zu besprechen.
- > Für Startups und kleine Unternehmen, die AWS nutzen, schauen Sie sich [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) an und stellen Sie es mit nur einem Klick in Ihrer eigenen AWS VPC bereit. Es handelt sich um ein erschwingliches AMI-Angebot mit der Option, Apps mit individuellem Logo und Branding zu erstellen. + Wir bieten zusätzliche, unternehmensspezifische Funktionen. [Über diesen Chatbot können Sie uns Ihre Fragen mitteilen](https://udify.app/chat/22L1zSxg6yW1cWQg) oder [senden Sie uns eine E-Mail](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry), um Ihre unternehmerischen Bedürfnisse zu besprechen.
+ > Für Startups und kleine Unternehmen, die AWS nutzen, schauen Sie sich [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) an und stellen Sie es mit nur einem Klick in Ihrer eigenen AWS VPC bereit. Es handelt sich um ein erschwingliches AMI-Angebot mit der Option, Apps mit individuellem Logo und Branding zu erstellen. ## Immer einen Schritt voraus @@ -191,7 +125,6 @@ Star Dify auf GitHub und lassen Sie sich sofort über neue Releases benachrichti ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - ## Erweiterte Einstellungen Falls Sie die Konfiguration anpassen müssen, lesen Sie bitte die Kommentare in unserer [.env.example](docker/.env.example)-Datei und aktualisieren Sie die entsprechenden Werte in Ihrer `.env`-Datei. Zusätzlich müssen Sie eventuell Anpassungen an der `docker-compose.yaml`-Datei vornehmen, wie zum Beispiel das Ändern von Image-Versionen, Portzuordnungen oder Volumen-Mounts, je nach Ihrer spezifischen Einsatzumgebung und Ihren Anforderungen. Nachdem Sie Änderungen vorgenommen haben, starten Sie `docker-compose up -d` erneut. Eine vollständige Liste der verfügbaren Umgebungsvariablen finden Sie [hier](https://docs.dify.ai/getting-started/install-self-hosted/environments). @@ -210,20 +143,23 @@ Falls Sie eine hochverfügbare Konfiguration einrichten möchten, gibt es von de Stellen Sie Dify mit nur einem Klick mithilfe von [terraform](https://www.terraform.io/) auf einer Cloud-Plattform bereit. ##### Azure Global + - [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### Verwendung von AWS CDK für die Bereitstellung Bereitstellung von Dify auf AWS mit [CDK](https://aws.amazon.com/cdk/) -##### AWS +##### AWS + - [AWS CDK by @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK by @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) -#### Alibaba Cloud +#### Alibaba Cloud [Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) @@ -235,20 +171,18 @@ Ein-Klick-Bereitstellung von Dify in der Alibaba Cloud mit [Alibaba Cloud Data M Stellen Sie Dify mit einem Klick in AKS bereit, indem Sie [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) verwenden - ## Contributing Falls Sie Code beitragen möchten, lesen Sie bitte unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Gleichzeitig bitten wir Sie, Dify zu unterstützen, indem Sie es in den sozialen Medien teilen und auf Veranstaltungen und Konferenzen präsentieren. - > Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen – außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c). ## Gemeinschaft & Kontakt -* [GitHub Discussion](https://github.com/langgenius/dify/discussions). Am besten geeignet für: den Austausch von Feedback und das Stellen von Fragen. -* [GitHub Issues](https://github.com/langgenius/dify/issues). Am besten für: Fehler, auf die Sie bei der Verwendung von Dify.AI stoßen, und Funktionsvorschläge. Siehe unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). -* [Discord](https://discord.gg/FngNHpbcY7). Am besten geeignet für: den Austausch von Bewerbungen und den Austausch mit der Community. -* [X(Twitter)](https://twitter.com/dify_ai). Am besten geeignet für: den Austausch von Bewerbungen und den Austausch mit der Community. +- [GitHub Discussion](https://github.com/langgenius/dify/discussions). Am besten geeignet für: den Austausch von Feedback und das Stellen von Fragen. +- [GitHub Issues](https://github.com/langgenius/dify/issues). Am besten für: Fehler, auf die Sie bei der Verwendung von Dify.AI stoßen, und Funktionsvorschläge. Siehe unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +- [Discord](https://discord.gg/FngNHpbcY7). Am besten geeignet für: den Austausch von Bewerbungen und den Austausch mit der Community. +- [X(Twitter)](https://twitter.com/dify_ai). Am besten geeignet für: den Austausch von Bewerbungen und den Austausch mit der Community. **Mitwirkende** @@ -260,7 +194,6 @@ Falls Sie Code beitragen möchten, lesen Sie bitte unseren [Contribution Guide]( [![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) - ## Offenlegung der Sicherheit Um Ihre Privatsphäre zu schützen, vermeiden Sie es bitte, Sicherheitsprobleme auf GitHub zu posten. Schicken Sie Ihre Fragen stattdessen an security@dify.ai und wir werden Ihnen eine ausführlichere Antwort geben. @@ -268,4 +201,3 @@ Um Ihre Privatsphäre zu schützen, vermeiden Sie es bitte, Sicherheitsprobleme ## Lizenz Dieses Repository steht unter der [Dify Open Source License](LICENSE), die im Wesentlichen Apache 2.0 mit einigen zusätzlichen Einschränkungen ist. - diff --git a/README_ES.md b/README_ES.md index 859da5bfd7..c7a18dc675 100644 --- a/README_ES.md +++ b/README_ES.md @@ -48,7 +48,7 @@ README in বাংলা

-# +#

langgenius%2Fdify | Trendshift @@ -56,111 +56,42 @@ Dify es una plataforma de desarrollo de aplicaciones de LLM de código abierto. Su interfaz intuitiva combina flujo de trabajo de IA, pipeline RAG, capacidades de agente, gestión de modelos, características de observabilidad y más, lo que le permite pasar rápidamente de un prototipo a producción. Aquí hay una lista de las características principales:

-**1. Flujo de trabajo**: - Construye y prueba potentes flujos de trabajo de IA en un lienzo visual, aprovechando todas las siguientes características y más. +**1. Flujo de trabajo**: +Construye y prueba potentes flujos de trabajo de IA en un lienzo visual, aprovechando todas las siguientes características y más. -**2. Soporte de modelos completo**: - Integración perfecta con cientos de LLMs propietarios / de código abierto de docenas de proveedores de inferencia y soluciones auto-alojadas, que cubren GPT, Mistral, Llama3 y cualquier modelo compatible con la API de OpenAI. Se puede encontrar una lista completa de proveedores de modelos admitidos [aquí](https://docs.dify.ai/getting-started/readme/model-providers). +**2. Soporte de modelos completo**: +Integración perfecta con cientos de LLMs propietarios / de código abierto de docenas de proveedores de inferencia y soluciones auto-alojadas, que cubren GPT, Mistral, Llama3 y cualquier modelo compatible con la API de OpenAI. Se puede encontrar una lista completa de proveedores de modelos admitidos [aquí](https://docs.dify.ai/getting-started/readme/model-providers). ![proveedores-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. IDE de prompt**: +Interfaz intuitiva para crear prompts, comparar el rendimiento del modelo y agregar características adicionales como texto a voz a una aplicación basada en chat. -**3. IDE de prompt**: - Interfaz intuitiva para crear prompts, comparar el rendimiento del modelo y agregar características adicionales como texto a voz a una aplicación basada en chat. +**4. Pipeline RAG**: +Amplias capacidades de RAG que cubren todo, desde la ingestión de documentos hasta la recuperación, con soporte listo para usar para la extracción de texto de PDF, PPT y otros formatos de documento comunes. -**4. Pipeline RAG**: - Amplias capacidades de RAG que cubren todo, desde la ingestión de documentos hasta la recuperación, con soporte listo para usar para la extracción de texto de PDF, PPT y otros formatos de documento comunes. +**5. Capacidades de agente**: +Puedes definir agentes basados en LLM Function Calling o ReAct, y agregar herramientas preconstruidas o personalizadas para el agente. Dify proporciona más de 50 herramientas integradas para agentes de IA, como Búsqueda de Google, DALL·E, Difusión Estable y WolframAlpha. -**5. Capacidades de agente**: - Puedes definir agentes basados en LLM Function Calling o ReAct, y agregar herramientas preconstruidas o personalizadas para el agente. Dify proporciona más de 50 herramientas integradas para agentes de IA, como Búsqueda de Google, DALL·E, Difusión Estable y WolframAlpha. +**6. LLMOps**: +Supervisa y analiza registros de aplicaciones y rendimiento a lo largo del tiempo. Podrías mejorar continuamente prompts, conjuntos de datos y modelos basados en datos de producción y anotaciones. -**6. LLMOps**: - Supervisa y analiza registros de aplicaciones y rendimiento a lo largo del tiempo. Podrías mejorar continuamente prompts, conjuntos de datos y modelos basados en datos de producción y anotaciones. - -**7. Backend como servicio**: - Todas las ofertas de Dify vienen con APIs correspondientes, por lo que podrías integrar Dify sin esfuerzo en tu propia lógica empresarial. - - -## Comparación de características - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CaracterísticaDify.AILangChainFlowiseAPI de Asistentes de OpenAI
Enfoque de programaciónAPI + orientado a la aplicaciónCódigo PythonOrientado a la aplicaciónOrientado a la API
LLMs admitidosGran variedadGran variedadGran variedadSolo OpenAI
Motor RAG
Agente
Flujo de trabajo
Observabilidad
Característica empresarial (SSO/Control de acceso)
Implementación local
+**7. Backend como servicio**: +Todas las ofertas de Dify vienen con APIs correspondientes, por lo que podrías integrar Dify sin esfuerzo en tu propia lógica empresarial. ## Usando Dify - **Nube
** -Hospedamos un servicio [Dify Cloud](https://dify.ai) para que cualquiera lo pruebe sin configuración. Proporciona todas las capacidades de la versión autoimplementada e incluye 200 llamadas gratuitas a GPT-4 en el plan sandbox. + Hospedamos un servicio [Dify Cloud](https://dify.ai) para que cualquiera lo pruebe sin configuración. Proporciona todas las capacidades de la versión autoimplementada e incluye 200 llamadas gratuitas a GPT-4 en el plan sandbox. - **Auto-alojamiento de Dify Community Edition
** -Pon rápidamente Dify en funcionamiento en tu entorno con esta [guía de inicio rápido](#quick-start). -Usa nuestra [documentación](https://docs.dify.ai) para más referencias e instrucciones más detalladas. + Pon rápidamente Dify en funcionamiento en tu entorno con esta [guía de inicio rápido](#quick-start). + Usa nuestra [documentación](https://docs.dify.ai) para más referencias e instrucciones más detalladas. - **Dify para Empresas / Organizaciones
** -Proporcionamos características adicionales centradas en la empresa. [Envíanos un correo electrónico](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) para discutir las necesidades empresariales.
- > Para startups y pequeñas empresas que utilizan AWS, echa un vistazo a [Dify Premium en AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e impleméntalo en tu propio VPC de AWS con un clic. Es una AMI asequible que ofrece la opción de crear aplicaciones con logotipo y marca personalizados. + Proporcionamos características adicionales centradas en la empresa. [Envíanos un correo electrónico](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) para discutir las necesidades empresariales.
+ > Para startups y pequeñas empresas que utilizan AWS, echa un vistazo a [Dify Premium en AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e impleméntalo en tu propio VPC de AWS con un clic. Es una AMI asequible que ofrece la opción de crear aplicaciones con logotipo y marca personalizados. ## Manteniéndote al tanto @@ -168,13 +99,12 @@ Dale estrella a Dify en GitHub y serás notificado instantáneamente de las nuev ![danos estrella](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## Inicio Rápido + > Antes de instalar Dify, asegúrate de que tu máquina cumpla con los siguientes requisitos mínimos del sistema: -> ->- CPU >= 2 núcleos ->- RAM >= 4GB +> +> - CPU >= 2 núcleos +> - RAM >= 4GB
@@ -210,16 +140,19 @@ Si desea configurar una configuración de alta disponibilidad, la comunidad prop Despliega Dify en una plataforma en la nube con un solo clic utilizando [terraform](https://www.terraform.io/) ##### Azure Global + - [Azure Terraform por @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform por @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### Usando AWS CDK para el Despliegue Despliegue Dify en AWS usando [CDK](https://aws.amazon.com/cdk/) -##### AWS +##### AWS + - [AWS CDK por @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK por @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -235,13 +168,11 @@ Despliega Dify en Alibaba Cloud con un solo clic con [Alibaba Cloud Data Managem Implementa Dify en AKS con un clic usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) - ## Contribuir -Para aquellos que deseen contribuir con código, consulten nuestra [Guía de contribución](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +Para aquellos que deseen contribuir con código, consulten nuestra [Guía de contribución](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Al mismo tiempo, considera apoyar a Dify compartiéndolo en redes sociales y en eventos y conferencias. - > Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c). **Contribuidores** @@ -252,15 +183,22 @@ Al mismo tiempo, considera apoyar a Dify compartiéndolo en redes sociales y en ## Comunidad y Contacto -* [Discusión en GitHub](https://github.com/langgenius/dify/discussions). Lo mejor para: compartir comentarios y hacer preguntas. -* [Reporte de problemas en GitHub](https://github.com/langgenius/dify/issues). Lo mejor para: errores que encuentres usando Dify.AI y propuestas de características. Consulta nuestra [Guía de contribución](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). -* [Discord](https://discord.gg/FngNHpbcY7). Lo mejor para: compartir tus aplicaciones y pasar el rato con la comunidad. -* [X(Twitter)](https://twitter.com/dify_ai). Lo mejor para: compartir tus aplicaciones y pasar el rato con la comunidad. +- [Discusión en GitHub](https://github.com/langgenius/dify/discussions). Lo mejor para: compartir comentarios y hacer preguntas. +- [Reporte de problemas en GitHub](https://github.com/langgenius/dify/issues). Lo mejor para: errores que encuentres usando Dify.AI y propuestas de características. Consulta nuestra [Guía de contribución](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +- [Discord](https://discord.gg/FngNHpbcY7). Lo mejor para: compartir tus aplicaciones y pasar el rato con la comunidad. +- [X(Twitter)](https://twitter.com/dify_ai). Lo mejor para: compartir tus aplicaciones y pasar el rato con la comunidad. ## Historial de Estrellas [![Gráfico de Historial de Estrellas](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) +## Divulgación de Seguridad + +Para proteger tu privacidad, evita publicar problemas de seguridad en GitHub. En su lugar, envía tus preguntas a security@dify.ai y te proporcionaremos una respuesta más detallada. + +## Licencia + +Este repositorio está disponible bajo la [Licencia de Código Abierto de Dify](LICENSE), que es esencialmente Apache 2.0 con algunas restricciones adicionales. ## Divulgación de Seguridad @@ -269,10 +207,3 @@ Para proteger tu privacidad, evita publicar problemas de seguridad en GitHub. En ## Licencia Este repositorio está disponible bajo la [Licencia de Código Abierto de Dify](LICENSE), que es esencialmente Apache 2.0 con algunas restricciones adicionales. -## Divulgación de Seguridad - -Para proteger tu privacidad, evita publicar problemas de seguridad en GitHub. En su lugar, envía tus preguntas a security@dify.ai y te proporcionaremos una respuesta más detallada. - -## Licencia - -Este repositorio está disponible bajo la [Licencia de Código Abierto de Dify](LICENSE), que es esencialmente Apache 2.0 con algunas restricciones adicionales. diff --git a/README_FR.md b/README_FR.md index fcadad419b..316d50c929 100644 --- a/README_FR.md +++ b/README_FR.md @@ -48,7 +48,7 @@ README in বাংলা

-# +#

langgenius%2Fdify | Trendshift @@ -56,111 +56,42 @@ Dify est une plateforme de développement d'applications LLM open source. Son interface intuitive combine un flux de travail d'IA, un pipeline RAG, des capacités d'agent, une gestion de modèles, des fonctionnalités d'observabilité, et plus encore, vous permettant de passer rapidement du prototype à la production. Voici une liste des fonctionnalités principales:

-**1. Flux de travail** : - Construisez et testez des flux de travail d'IA puissants sur un canevas visuel, en utilisant toutes les fonctionnalités suivantes et plus encore. +**1. Flux de travail** : +Construisez et testez des flux de travail d'IA puissants sur un canevas visuel, en utilisant toutes les fonctionnalités suivantes et plus encore. -**2. Prise en charge complète des modèles** : - Intégration transparente avec des centaines de LLM propriétaires / open source provenant de dizaines de fournisseurs d'inférence et de solutions auto-hébergées, couvrant GPT, Mistral, Llama3, et tous les modèles compatibles avec l'API OpenAI. Une liste complète des fournisseurs de modèles pris en charge se trouve [ici](https://docs.dify.ai/getting-started/readme/model-providers). +**2. Prise en charge complète des modèles** : +Intégration transparente avec des centaines de LLM propriétaires / open source provenant de dizaines de fournisseurs d'inférence et de solutions auto-hébergées, couvrant GPT, Mistral, Llama3, et tous les modèles compatibles avec l'API OpenAI. Une liste complète des fournisseurs de modèles pris en charge se trouve [ici](https://docs.dify.ai/getting-started/readme/model-providers). ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. IDE de prompt** : +Interface intuitive pour créer des prompts, comparer les performances des modèles et ajouter des fonctionnalités supplémentaires telles que la synthèse vocale à une application basée sur des chats. -**3. IDE de prompt** : - Interface intuitive pour créer des prompts, comparer les performances des modèles et ajouter des fonctionnalités supplémentaires telles que la synthèse vocale à une application basée sur des chats. +**4. Pipeline RAG** : +Des capacités RAG étendues qui couvrent tout, de l'ingestion de documents à la récupération, avec un support prêt à l'emploi pour l'extraction de texte à partir de PDF, PPT et autres formats de document courants. -**4. Pipeline RAG** : - Des capacités RAG étendues qui couvrent tout, de l'ingestion de documents à la récupération, avec un support prêt à l'emploi pour l'extraction de texte à partir de PDF, PPT et autres formats de document courants. +**5. Capacités d'agent** : +Vous pouvez définir des agents basés sur l'appel de fonction LLM ou ReAct, et ajouter des outils pré-construits ou personnalisés pour l'agent. Dify fournit plus de 50 outils intégrés pour les agents d'IA, tels que la recherche Google, DALL·E, Stable Diffusion et WolframAlpha. -**5. Capacités d'agent** : - Vous pouvez définir des agents basés sur l'appel de fonction LLM ou ReAct, et ajouter des outils pré-construits ou personnalisés pour l'agent. Dify fournit plus de 50 outils intégrés pour les agents d'IA, tels que la recherche Google, DALL·E, Stable Diffusion et WolframAlpha. +**6. LLMOps** : +Surveillez et analysez les journaux d'application et les performances au fil du temps. Vous pouvez continuellement améliorer les prompts, les ensembles de données et les modèles en fonction des données de production et des annotations. -**6. LLMOps** : - Surveillez et analysez les journaux d'application et les performances au fil du temps. Vous pouvez continuellement améliorer les prompts, les ensembles de données et les modèles en fonction des données de production et des annotations. - -**7. Backend-as-a-Service** : - Toutes les offres de Dify sont accompagnées d'API correspondantes, vous permettant d'intégrer facilement Dify dans votre propre logique métier. - - -## Comparaison des fonctionnalités - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FonctionnalitéDify.AILangChainFlowiseOpenAI Assistants API
Approche de programmationAPI + ApplicationCode PythonApplicationAPI
LLMs pris en chargeGrande variétéGrande variétéGrande variétéUniquement OpenAI
Moteur RAG
Agent
Flux de travail
Observabilité
Fonctionnalité d'entreprise (SSO/Contrôle d'accès)
Déploiement local
+**7. Backend-as-a-Service** : +Toutes les offres de Dify sont accompagnées d'API correspondantes, vous permettant d'intégrer facilement Dify dans votre propre logique métier. ## Utiliser Dify - **Cloud
** -Nous hébergeons un service [Dify Cloud](https://dify.ai) pour que tout le monde puisse l'essayer sans aucune configuration. Il fournit toutes les capacités de la version auto-hébergée et comprend 200 appels GPT-4 gratuits dans le plan bac à sable. + Nous hébergeons un service [Dify Cloud](https://dify.ai) pour que tout le monde puisse l'essayer sans aucune configuration. Il fournit toutes les capacités de la version auto-hébergée et comprend 200 appels GPT-4 gratuits dans le plan bac à sable. - **Auto-hébergement Dify Community Edition
** -Lancez rapidement Dify dans votre environnement avec ce [guide de démarrage](#quick-start). -Utilisez notre [documentation](https://docs.dify.ai) pour plus de références et des instructions plus détaillées. + Lancez rapidement Dify dans votre environnement avec ce [guide de démarrage](#quick-start). + Utilisez notre [documentation](https://docs.dify.ai) pour plus de références et des instructions plus détaillées. - **Dify pour les entreprises / organisations
** -Nous proposons des fonctionnalités supplémentaires adaptées aux entreprises. [Envoyez-nous un e-mail](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) pour discuter des besoins de l'entreprise.
- > Pour les startups et les petites entreprises utilisant AWS, consultez [Dify Premium sur AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) et déployez-le dans votre propre VPC AWS en un clic. C'est une offre AMI abordable avec la possibilité de créer des applications avec un logo et une marque personnalisés. + Nous proposons des fonctionnalités supplémentaires adaptées aux entreprises. [Envoyez-nous un e-mail](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) pour discuter des besoins de l'entreprise.
+ > Pour les startups et les petites entreprises utilisant AWS, consultez [Dify Premium sur AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) et déployez-le dans votre propre VPC AWS en un clic. C'est une offre AMI abordable avec la possibilité de créer des applications avec un logo et une marque personnalisés. ## Rester en avance @@ -168,13 +99,12 @@ Mettez une étoile à Dify sur GitHub et soyez instantanément informé des nouv ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## Démarrage rapide + > Avant d'installer Dify, assurez-vous que votre machine répond aux exigences système minimales suivantes: -> ->- CPU >= 2 cœurs ->- RAM >= 4 Go +> +> - CPU >= 2 cœurs +> - RAM >= 4 Go
@@ -208,16 +138,19 @@ Si vous souhaitez configurer une configuration haute disponibilité, la communau Déployez Dify sur une plateforme cloud en un clic en utilisant [terraform](https://www.terraform.io/) ##### Azure Global + - [Azure Terraform par @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform par @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### Utilisation d'AWS CDK pour le déploiement Déployez Dify sur AWS en utilisant [CDK](https://aws.amazon.com/cdk/) -##### AWS +##### AWS + - [AWS CDK par @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK par @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -233,13 +166,11 @@ Déployez Dify en un clic sur Alibaba Cloud avec [Alibaba Cloud Data Management] Déployez Dify sur AKS en un clic en utilisant [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) - ## Contribuer -Pour ceux qui souhaitent contribuer du code, consultez notre [Guide de contribution](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +Pour ceux qui souhaitent contribuer du code, consultez notre [Guide de contribution](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Dans le même temps, veuillez envisager de soutenir Dify en le partageant sur les réseaux sociaux et lors d'événements et de conférences. - > Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c). **Contributeurs** @@ -250,15 +181,22 @@ Dans le même temps, veuillez envisager de soutenir Dify en le partageant sur le ## Communauté & Contact -* [Discussion GitHub](https://github.com/langgenius/dify/discussions). Meilleur pour: partager des commentaires et poser des questions. -* [Problèmes GitHub](https://github.com/langgenius/dify/issues). Meilleur pour: les bogues que vous rencontrez en utilisant Dify.AI et les propositions de fonctionnalités. Consultez notre [Guide de contribution](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). -* [Discord](https://discord.gg/FngNHpbcY7). Meilleur pour: partager vos applications et passer du temps avec la communauté. -* [X(Twitter)](https://twitter.com/dify_ai). Meilleur pour: partager vos applications et passer du temps avec la communauté. +- [Discussion GitHub](https://github.com/langgenius/dify/discussions). Meilleur pour: partager des commentaires et poser des questions. +- [Problèmes GitHub](https://github.com/langgenius/dify/issues). Meilleur pour: les bogues que vous rencontrez en utilisant Dify.AI et les propositions de fonctionnalités. Consultez notre [Guide de contribution](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +- [Discord](https://discord.gg/FngNHpbcY7). Meilleur pour: partager vos applications et passer du temps avec la communauté. +- [X(Twitter)](https://twitter.com/dify_ai). Meilleur pour: partager vos applications et passer du temps avec la communauté. ## Historique des étoiles [![Graphique de l'historique des étoiles](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) +## Divulgation de sécurité + +Pour protéger votre vie privée, veuillez éviter de publier des problèmes de sécurité sur GitHub. Au lieu de cela, envoyez vos questions à security@dify.ai et nous vous fournirons une réponse plus détaillée. + +## Licence + +Ce référentiel est disponible sous la [Licence open source Dify](LICENSE), qui est essentiellement l'Apache 2.0 avec quelques restrictions supplémentaires. ## Divulgation de sécurité @@ -267,10 +205,3 @@ Pour protéger votre vie privée, veuillez éviter de publier des problèmes de ## Licence Ce référentiel est disponible sous la [Licence open source Dify](LICENSE), qui est essentiellement l'Apache 2.0 avec quelques restrictions supplémentaires. -## Divulgation de sécurité - -Pour protéger votre vie privée, veuillez éviter de publier des problèmes de sécurité sur GitHub. Au lieu de cela, envoyez vos questions à security@dify.ai et nous vous fournirons une réponse plus détaillée. - -## Licence - -Ce référentiel est disponible sous la [Licence open source Dify](LICENSE), qui est essentiellement l'Apache 2.0 avec quelques restrictions supplémentaires. diff --git a/README_JA.md b/README_JA.md index 6ddc30789c..785706a88a 100644 --- a/README_JA.md +++ b/README_JA.md @@ -48,7 +48,7 @@ README in বাংলা

-# +#

langgenius%2Fdify | Trendshift @@ -58,110 +58,41 @@ DifyはオープンソースのLLMアプリケーション開発プラットフ

**1. ワークフロー**: - 強力なAIワークフローをビジュアルキャンバス上で構築し、テストできます。すべての機能、および以下の機能を使用できます。 +強力なAIワークフローをビジュアルキャンバス上で構築し、テストできます。すべての機能、および以下の機能を使用できます。 **2. 総合的なモデルサポート**: - 数百ものプロプライエタリ/オープンソースのLLMと、数十もの推論プロバイダーおよびセルフホスティングソリューションとのシームレスな統合を提供します。GPT、Mistral、Llama3、OpenAI APIと互換性のあるすべてのモデルを統合されています。サポートされているモデルプロバイダーの完全なリストは[こちら](https://docs.dify.ai/getting-started/readme/model-providers)をご覧ください。 +数百ものプロプライエタリ/オープンソースのLLMと、数十もの推論プロバイダーおよびセルフホスティングソリューションとのシームレスな統合を提供します。GPT、Mistral、Llama3、OpenAI APIと互換性のあるすべてのモデルを統合されています。サポートされているモデルプロバイダーの完全なリストは[こちら](https://docs.dify.ai/getting-started/readme/model-providers)をご覧ください。 ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) - **3. プロンプトIDE**: - プロンプトの作成、モデルパフォーマンスの比較が行え、チャットベースのアプリに音声合成などの機能も追加できます。 +プロンプトの作成、モデルパフォーマンスの比較が行え、チャットベースのアプリに音声合成などの機能も追加できます。 **4. RAGパイプライン**: - ドキュメントの取り込みから検索までをカバーする広範なRAG機能ができます。ほかにもPDF、PPT、その他の一般的なドキュメントフォーマットからのテキスト抽出のサポートも提供します。 +ドキュメントの取り込みから検索までをカバーする広範なRAG機能ができます。ほかにもPDF、PPT、その他の一般的なドキュメントフォーマットからのテキスト抽出のサポートも提供します。 **5. エージェント機能**: - LLM Function CallingやReActに基づくエージェントの定義が可能で、AIエージェント用のプリビルトまたはカスタムツールを追加できます。Difyには、Google検索、DALL·E、Stable Diffusion、WolframAlphaなどのAIエージェント用の50以上の組み込みツールが提供します。 +LLM Function CallingやReActに基づくエージェントの定義が可能で、AIエージェント用のプリビルトまたはカスタムツールを追加できます。Difyには、Google検索、DALL·E、Stable Diffusion、WolframAlphaなどのAIエージェント用の50以上の組み込みツールが提供します。 **6. LLMOps**: - アプリケーションのログやパフォーマンスを監視と分析し、生産のデータと注釈に基づいて、プロンプト、データセット、モデルを継続的に改善できます。 +アプリケーションのログやパフォーマンスを監視と分析し、生産のデータと注釈に基づいて、プロンプト、データセット、モデルを継続的に改善できます。 **7. Backend-as-a-Service**: - すべての機能はAPIを提供されており、Difyを自分のビジネスロジックに簡単に統合できます。 - - -## 機能比較 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
機能Dify.AILangChainFlowiseOpenAI Assistants API
プログラミングアプローチAPI + アプリ指向Pythonコードアプリ指向API指向
サポートされているLLMバラエティ豊かバラエティ豊かバラエティ豊かOpenAIのみ
RAGエンジン
エージェント
ワークフロー
観測性
エンタープライズ機能(SSO/アクセス制御)
ローカル展開
+すべての機能はAPIを提供されており、Difyを自分のビジネスロジックに簡単に統合できます。 ## Difyの使用方法 - **クラウド
** -[こちら](https://dify.ai)のDify Cloudサービスを利用して、セットアップ不要で試すことができます。サンドボックスプランには、200回のGPT-4呼び出しが無料で含まれています。 + [こちら](https://dify.ai)のDify Cloudサービスを利用して、セットアップ不要で試すことができます。サンドボックスプランには、200回のGPT-4呼び出しが無料で含まれています。 - **Dify Community Editionのセルフホスティング
** -この[スタートガイド](#クイックスタート)を使用して、ローカル環境でDifyを簡単に実行できます。 -詳しくは[ドキュメント](https://docs.dify.ai)をご覧ください。 + この[スタートガイド](#%E3%82%AF%E3%82%A4%E3%83%83%E3%82%AF%E3%82%B9%E3%82%BF%E3%83%BC%E3%83%88)を使用して、ローカル環境でDifyを簡単に実行できます。 + 詳しくは[ドキュメント](https://docs.dify.ai)をご覧ください。 - **企業/組織向けのDify
** -企業中心の機能を提供しています。[メールを送信](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry)して企業のニーズについて相談してください。
- > AWSを使用しているスタートアップ企業や中小企業の場合は、[AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6)のDify Premiumをチェックして、ワンクリックで自分のAWS VPCにデプロイできます。さらに、手頃な価格のAMIオファリングとして、ロゴやブランディングをカスタマイズしてアプリケーションを作成するオプションがあります。 + 企業中心の機能を提供しています。[メールを送信](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry)して企業のニーズについて相談してください。
+ > AWSを使用しているスタートアップ企業や中小企業の場合は、[AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6)のDify Premiumをチェックして、ワンクリックで自分のAWS VPCにデプロイできます。さらに、手頃な価格のAMIオファリングとして、ロゴやブランディングをカスタマイズしてアプリケーションを作成するオプションがあります。 ## 最新の情報を入手 @@ -169,13 +100,12 @@ GitHub上でDifyにスターを付けることで、Difyに関する新しいニ ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## クイックスタート + > Difyをインストールする前に、お使いのマシンが以下の最小システム要件を満たしていることを確認してください: > ->- CPU >= 2コア ->- RAM >= 4GB +> - CPU >= 2コア +> - RAM >= 4GB
@@ -209,9 +139,11 @@ docker compose up -d [terraform](https://www.terraform.io/) を使用して、ワンクリックでDifyをクラウドプラットフォームにデプロイします ##### Azure Global + - [@nikawangによるAzure Terraform](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [@sotazumによるGoogle Cloud Terraform](https://github.com/DeNA/dify-google-cloud-terraform) #### AWS CDK を使用したデプロイ @@ -219,26 +151,27 @@ docker compose up -d [CDK](https://aws.amazon.com/cdk/) を使用して、DifyをAWSにデプロイします ##### AWS + - [@KevinZhaoによるAWS CDK (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [@tmokmssによるAWS CDK (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) #### Alibaba Cloud + [Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) #### Alibaba Cloud Data Management + [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) を利用して、DifyをAlibaba Cloudへワンクリックでデプロイできます #### AKSへのデプロイにAzure Devops Pipelineを使用 [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)を使用してDifyをAKSにワンクリックでデプロイ - ## 貢献 コードに貢献したい方は、[Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)を参照してください。 同時に、DifyをSNSやイベント、カンファレンスで共有してサポートしていただけると幸いです。 - > Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。 **貢献者** @@ -249,12 +182,10 @@ docker compose up -d ## コミュニティ & お問い合わせ -* [GitHub Discussion](https://github.com/langgenius/dify/discussions). 主に: フィードバックの共有や質問。 -* [GitHub Issues](https://github.com/langgenius/dify/issues). 主に: Dify.AIを使用する際に発生するエラーや問題については、[貢献ガイド](CONTRIBUTING_JA.md)を参照してください -* [Discord](https://discord.gg/FngNHpbcY7). 主に: アプリケーションの共有やコミュニティとの交流。 -* [X(Twitter)](https://twitter.com/dify_ai). 主に: アプリケーションの共有やコミュニティとの交流。 - - +- [GitHub Discussion](https://github.com/langgenius/dify/discussions). 主に: フィードバックの共有や質問。 +- [GitHub Issues](https://github.com/langgenius/dify/issues). 主に: Dify.AIを使用する際に発生するエラーや問題については、[貢献ガイド](CONTRIBUTING_JA.md)を参照してください +- [Discord](https://discord.gg/FngNHpbcY7). 主に: アプリケーションの共有やコミュニティとの交流。 +- [X(Twitter)](https://twitter.com/dify_ai). 主に: アプリケーションの共有やコミュニティとの交流。 ## ライセンス diff --git a/README_KL.md b/README_KL.md index 7232da8003..93da9a6140 100644 --- a/README_KL.md +++ b/README_KL.md @@ -48,7 +48,7 @@ README in বাংলা

-# +#

langgenius%2Fdify | Trendshift @@ -56,111 +56,42 @@ Dify is an open-source LLM app development platform. Its intuitive interface combines AI workflow, RAG pipeline, agent capabilities, model management, observability features and more, letting you quickly go from prototype to production. Here's a list of the core features:

-**1. Workflow**: - Build and test powerful AI workflows on a visual canvas, leveraging all the following features and beyond. +**1. Workflow**: +Build and test powerful AI workflows on a visual canvas, leveraging all the following features and beyond. -**2. Comprehensive model support**: - Seamless integration with hundreds of proprietary / open-source LLMs from dozens of inference providers and self-hosted solutions, covering GPT, Mistral, Llama3, and any OpenAI API-compatible models. A full list of supported model providers can be found [here](https://docs.dify.ai/getting-started/readme/model-providers). +**2. Comprehensive model support**: +Seamless integration with hundreds of proprietary / open-source LLMs from dozens of inference providers and self-hosted solutions, covering GPT, Mistral, Llama3, and any OpenAI API-compatible models. A full list of supported model providers can be found [here](https://docs.dify.ai/getting-started/readme/model-providers). ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. Prompt IDE**: +Intuitive interface for crafting prompts, comparing model performance, and adding additional features such as text-to-speech to a chat-based app. -**3. Prompt IDE**: - Intuitive interface for crafting prompts, comparing model performance, and adding additional features such as text-to-speech to a chat-based app. +**4. RAG Pipeline**: +Extensive RAG capabilities that cover everything from document ingestion to retrieval, with out-of-box support for text extraction from PDFs, PPTs, and other common document formats. -**4. RAG Pipeline**: - Extensive RAG capabilities that cover everything from document ingestion to retrieval, with out-of-box support for text extraction from PDFs, PPTs, and other common document formats. +**5. Agent capabilities**: +You can define agents based on LLM Function Calling or ReAct, and add pre-built or custom tools for the agent. Dify provides 50+ built-in tools for AI agents, such as Google Search, DALL·E, Stable Diffusion and WolframAlpha. -**5. Agent capabilities**: - You can define agents based on LLM Function Calling or ReAct, and add pre-built or custom tools for the agent. Dify provides 50+ built-in tools for AI agents, such as Google Search, DALL·E, Stable Diffusion and WolframAlpha. +**6. LLMOps**: +Monitor and analyze application logs and performance over time. You could continuously improve prompts, datasets, and models based on production data and annotations. -**6. LLMOps**: - Monitor and analyze application logs and performance over time. You could continuously improve prompts, datasets, and models based on production data and annotations. - -**7. Backend-as-a-Service**: - All of Dify's offerings come with corresponding APIs, so you could effortlessly integrate Dify into your own business logic. - - -## Feature Comparison - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FeatureDify.AILangChainFlowiseOpenAI Assistants API
Programming ApproachAPI + App-orientedPython CodeApp-orientedAPI-oriented
Supported LLMsRich VarietyRich VarietyRich VarietyOpenAI-only
RAG Engine
Agent
Workflow
Observability
Enterprise Feature (SSO/Access control)
Local Deployment
+**7. Backend-as-a-Service**: +All of Dify's offerings come with corresponding APIs, so you could effortlessly integrate Dify into your own business logic. ## Using Dify - **Cloud
** -We host a [Dify Cloud](https://dify.ai) service for anyone to try with zero setup. It provides all the capabilities of the self-deployed version, and includes 200 free GPT-4 calls in the sandbox plan. + We host a [Dify Cloud](https://dify.ai) service for anyone to try with zero setup. It provides all the capabilities of the self-deployed version, and includes 200 free GPT-4 calls in the sandbox plan. - **Self-hosting Dify Community Edition
** -Quickly get Dify running in your environment with this [starter guide](#quick-start). -Use our [documentation](https://docs.dify.ai) for further references and more in-depth instructions. + Quickly get Dify running in your environment with this [starter guide](#quick-start). + Use our [documentation](https://docs.dify.ai) for further references and more in-depth instructions. - **Dify for Enterprise / Organizations
** -We provide additional enterprise-centric features. [Send us an email](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) to discuss enterprise needs.
- > For startups and small businesses using AWS, check out [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one-click. It's an affordable AMI offering with the option to create apps with custom logo and branding. + We provide additional enterprise-centric features. [Send us an email](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) to discuss enterprise needs.
+ > For startups and small businesses using AWS, check out [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one-click. It's an affordable AMI offering with the option to create apps with custom logo and branding. ## Staying ahead @@ -168,13 +99,12 @@ Star Dify on GitHub and be instantly notified of new releases. ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## Quick Start + > Before installing Dify, make sure your machine meets the following minimum system requirements: -> ->- CPU >= 2 Core ->- RAM >= 4GB +> +> - CPU >= 2 Core +> - RAM >= 4GB
@@ -208,16 +138,19 @@ If you'd like to configure a highly-available setup, there are community-contrib wa'logh nIqHom neH ghun deployment toy'wI' [terraform](https://www.terraform.io/) lo'laH. ##### Azure Global + - [Azure Terraform mung @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform qachlot @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### AWS CDK atorlugh pilersitsineq wa'logh nIqHom neH ghun deployment toy'wI' [CDK](https://aws.amazon.com/cdk/) lo'laH. -##### AWS +##### AWS + - [AWS CDK qachlot @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK qachlot @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -233,13 +166,11 @@ wa'logh nIqHom neH ghun deployment toy'wI' [CDK](https://aws.amazon.com/cdk/) lo [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) lo'laH Dify AKS 'e' wa'DIch click 'e' Deploy - ## Contributing -For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). At the same time, please consider supporting Dify by sharing it on social media and at events and conferences. - > We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). **Contributors** @@ -250,18 +181,18 @@ At the same time, please consider supporting Dify by sharing it on social media ## Community & Contact -* [GitHub Discussion](https://github.com/langgenius/dify/discussions +- \[GitHub Discussion\](https://github.com/langgenius/dify/discussions ). Best for: sharing feedback and asking questions. -* [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). -* [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community. -* [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community. + +- [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +- [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community. +- [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community. ## Star History [![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) - ## Security Disclosure To protect your privacy, please avoid posting security issues on GitHub. Instead, send your questions to security@dify.ai and we will provide you with a more detailed answer. diff --git a/README_KR.md b/README_KR.md index 74010d43ed..3b58339e12 100644 --- a/README_KR.md +++ b/README_KR.md @@ -48,99 +48,30 @@ README in বাংলা

- - Dify는 오픈 소스 LLM 앱 개발 플랫폼입니다. 직관적인 인터페이스를 통해 AI 워크플로우, RAG 파이프라인, 에이전트 기능, 모델 관리, 관찰 기능 등을 결합하여 프로토타입에서 프로덕션까지 빠르게 전환할 수 있습니다. 주요 기능 목록은 다음과 같습니다:

+Dify는 오픈 소스 LLM 앱 개발 플랫폼입니다. 직관적인 인터페이스를 통해 AI 워크플로우, RAG 파이프라인, 에이전트 기능, 모델 관리, 관찰 기능 등을 결합하여 프로토타입에서 프로덕션까지 빠르게 전환할 수 있습니다. 주요 기능 목록은 다음과 같습니다:

**1. 워크플로우**: - 다음 기능들을 비롯한 다양한 기능을 활용하여 시각적 캔버스에서 강력한 AI 워크플로우를 구축하고 테스트하세요. +다음 기능들을 비롯한 다양한 기능을 활용하여 시각적 캔버스에서 강력한 AI 워크플로우를 구축하고 테스트하세요. -**2. 포괄적인 모델 지원:**: +**2. 포괄적인 모델 지원:**: 수십 개의 추론 제공업체와 자체 호스팅 솔루션에서 제공하는 수백 개의 독점 및 오픈 소스 LLM과 원활하게 통합되며, GPT, Mistral, Llama3 및 모든 OpenAI API 호환 모델을 포함합니다. 지원되는 모델 제공업체의 전체 목록은 [여기](https://docs.dify.ai/getting-started/readme/model-providers)에서 확인할 수 있습니다. ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) - **3. 통합 개발환경**: - 프롬프트를 작성하고, 모델 성능을 비교하며, 텍스트-음성 변환과 같은 추가 기능을 채팅 기반 앱에 추가할 수 있는 직관적인 인터페이스를 제공합니다. +프롬프트를 작성하고, 모델 성능을 비교하며, 텍스트-음성 변환과 같은 추가 기능을 채팅 기반 앱에 추가할 수 있는 직관적인 인터페이스를 제공합니다. -**4. RAG 파이프라인**: - 문서 수집부터 검색까지 모든 것을 다루며, PDF, PPT 및 기타 일반적인 문서 형식에서 텍스트 추출을 위한 기본 지원이 포함되어 있는 광범위한 RAG 기능을 제공합니다. +**4. RAG 파이프라인**: +문서 수집부터 검색까지 모든 것을 다루며, PDF, PPT 및 기타 일반적인 문서 형식에서 텍스트 추출을 위한 기본 지원이 포함되어 있는 광범위한 RAG 기능을 제공합니다. **5. 에이전트 기능**: - LLM 함수 호출 또는 ReAct를 기반으로 에이전트를 정의하고 에이전트에 대해 사전 구축된 도구나 사용자 정의 도구를 추가할 수 있습니다. Dify는 Google Search, DALL·E, Stable Diffusion, WolframAlpha 등 AI 에이전트를 위한 50개 이상의 내장 도구를 제공합니다. +LLM 함수 호출 또는 ReAct를 기반으로 에이전트를 정의하고 에이전트에 대해 사전 구축된 도구나 사용자 정의 도구를 추가할 수 있습니다. Dify는 Google Search, DALL·E, Stable Diffusion, WolframAlpha 등 AI 에이전트를 위한 50개 이상의 내장 도구를 제공합니다. **6. LLMOps**: - 시간 경과에 따른 애플리케이션 로그와 성능을 모니터링하고 분석합니다. 생산 데이터와 주석을 기반으로 프롬프트, 데이터세트, 모델을 지속적으로 개선할 수 있습니다. +시간 경과에 따른 애플리케이션 로그와 성능을 모니터링하고 분석합니다. 생산 데이터와 주석을 기반으로 프롬프트, 데이터세트, 모델을 지속적으로 개선할 수 있습니다. **7. Backend-as-a-Service**: - Dify의 모든 제품에는 해당 API가 함께 제공되므로 Dify를 자신의 비즈니스 로직에 쉽게 통합할 수 있습니다. - -## 기능 비교 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
기능Dify.AILangChainFlowiseOpenAI Assistants API
프로그래밍 접근 방식API + 앱 중심Python 코드앱 중심API 중심
지원되는 LLMs다양한 종류다양한 종류다양한 종류OpenAI 전용
RAG 엔진
에이전트
워크플로우
가시성
기업용 기능 (SSO/접근 제어)
로컬 배포
+Dify의 모든 제품에는 해당 API가 함께 제공되므로 Dify를 자신의 비즈니스 로직에 쉽게 통합할 수 있습니다. ## Dify 사용하기 @@ -148,27 +79,26 @@ 우리는 누구나 설정이 필요 없이 사용해 볼 수 있도록 [Dify 클라우드](https://dify.ai) 서비스를 호스팅합니다. 이는 자체 배포 버전의 모든 기능을 제공하며, 샌드박스 플랜에서 무료로 200회의 GPT-4 호출을 포함합니다. - **셀프-호스팅 Dify 커뮤니티 에디션
** - 환경에서 Dify를 빠르게 실행하려면 이 [스타터 가이드를](#quick-start) 참조하세요. + 환경에서 Dify를 빠르게 실행하려면 이 [스타터 가이드를](#quick-start) 참조하세요. 추가 참조 및 더 심층적인 지침은 [문서](https://docs.dify.ai)를 사용하세요. - **기업 / 조직을 위한 Dify
** - 우리는 추가적인 기업 중심 기능을 제공합니다. 잡거나 [이메일 보내기](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry)를 통해 기업 요구 사항을 논의하십시오.
+ 우리는 추가적인 기업 중심 기능을 제공합니다. 잡거나 [이메일 보내기](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry)를 통해 기업 요구 사항을 논의하십시오.
+ > AWS를 사용하는 스타트업 및 중소기업의 경우 [AWS Marketplace에서 Dify Premium](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6)을 확인하고 한 번의 클릭으로 자체 AWS VPC에 배포하십시오. 맞춤형 로고와 브랜딩이 포함된 앱을 생성할 수 있는 옵션이 포함된 저렴한 AMI 제품입니다. - - ## 앞서가기 GitHub에서 Dify에 별표를 찍어 새로운 릴리스를 즉시 알림 받으세요. ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## 빠른 시작 ->Dify를 설치하기 전에 컴퓨터가 다음과 같은 최소 시스템 요구 사항을 충족하는지 확인하세요 : ->- CPU >= 2 Core ->- RAM >= 4GB + +> Dify를 설치하기 전에 컴퓨터가 다음과 같은 최소 시스템 요구 사항을 충족하는지 확인하세요 : +> +> - CPU >= 2 Core +> - RAM >= 4GB
@@ -202,16 +132,19 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했 [terraform](https://www.terraform.io/)을 사용하여 단 한 번의 클릭으로 Dify를 클라우드 플랫폼에 배포하십시오 ##### Azure Global + - [nikawang의 Azure Terraform](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [sotazum의 Google Cloud Terraform](https://github.com/DeNA/dify-google-cloud-terraform) #### AWS CDK를 사용한 배포 [CDK](https://aws.amazon.com/cdk/)를 사용하여 AWS에 Dify 배포 -##### AWS +##### AWS + - [KevinZhao의 AWS CDK (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [tmokmss의 AWS CDK (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -227,14 +160,12 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했 [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)을 사용하여 Dify를 AKS에 원클릭으로 배포 - ## 기여 코드에 기여하고 싶은 분들은 [기여 가이드](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)를 참조하세요. 동시에 Dify를 소셜 미디어와 행사 및 컨퍼런스에 공유하여 지원하는 것을 고려해 주시기 바랍니다. - -> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요. +> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요. **기여자** @@ -244,17 +175,15 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했 ## 커뮤니티 & 연락처 -* [GitHub 토론](https://github.com/langgenius/dify/discussions). 피드백 공유 및 질문하기에 적합합니다. -* [GitHub 이슈](https://github.com/langgenius/dify/issues). Dify.AI 사용 중 발견한 버그와 기능 제안에 적합합니다. [기여 가이드](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)를 참조하세요. -* [디스코드](https://discord.gg/FngNHpbcY7). 애플리케이션 공유 및 커뮤니티와 소통하기에 적합합니다. -* [트위터](https://twitter.com/dify_ai). 애플리케이션 공유 및 커뮤니티와 소통하기에 적합합니다. - +- [GitHub 토론](https://github.com/langgenius/dify/discussions). 피드백 공유 및 질문하기에 적합합니다. +- [GitHub 이슈](https://github.com/langgenius/dify/issues). Dify.AI 사용 중 발견한 버그와 기능 제안에 적합합니다. [기여 가이드](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)를 참조하세요. +- [디스코드](https://discord.gg/FngNHpbcY7). 애플리케이션 공유 및 커뮤니티와 소통하기에 적합합니다. +- [트위터](https://twitter.com/dify_ai). 애플리케이션 공유 및 커뮤니티와 소통하기에 적합합니다. ## Star 히스토리 [![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) - ## 보안 공개 개인정보 보호를 위해 보안 문제를 GitHub에 게시하지 마십시오. 대신 security@dify.ai로 질문을 보내주시면 더 자세한 답변을 드리겠습니다. diff --git a/README_PT.md b/README_PT.md index f9e3ef7f4b..ec2e4245f6 100644 --- a/README_PT.md +++ b/README_PT.md @@ -1,4 +1,5 @@ ![cover-v5-optimized](./images/GitHub_README_if.png) +

📌 Introduzindo o Dify Workflow com Upload de Arquivo: Recrie o Podcast Google NotebookLM

@@ -55,111 +56,42 @@ Dify é uma plataforma de desenvolvimento de aplicativos LLM de código aberto. Sua interface intuitiva combina workflow de IA, pipeline RAG, capacidades de agente, gerenciamento de modelos, recursos de observabilidade e muito mais, permitindo que você vá rapidamente do protótipo à produção. Aqui está uma lista das principais funcionalidades:

-**1. Workflow**: - Construa e teste workflows poderosos de IA em uma interface visual, aproveitando todos os recursos a seguir e muito mais. +**1. Workflow**: +Construa e teste workflows poderosos de IA em uma interface visual, aproveitando todos os recursos a seguir e muito mais. -**2. Suporte abrangente a modelos**: - Integração perfeita com centenas de LLMs proprietários e de código aberto de diversas provedoras e soluções auto-hospedadas, abrangendo GPT, Mistral, Llama3 e qualquer modelo compatível com a API da OpenAI. A lista completa de provedores suportados pode ser encontrada [aqui](https://docs.dify.ai/getting-started/readme/model-providers). +**2. Suporte abrangente a modelos**: +Integração perfeita com centenas de LLMs proprietários e de código aberto de diversas provedoras e soluções auto-hospedadas, abrangendo GPT, Mistral, Llama3 e qualquer modelo compatível com a API da OpenAI. A lista completa de provedores suportados pode ser encontrada [aqui](https://docs.dify.ai/getting-started/readme/model-providers). ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. IDE de Prompt**: +Interface intuitiva para criação de prompts, comparação de desempenho de modelos e adição de recursos como conversão de texto para fala em um aplicativo baseado em chat. -**3. IDE de Prompt**: - Interface intuitiva para criação de prompts, comparação de desempenho de modelos e adição de recursos como conversão de texto para fala em um aplicativo baseado em chat. +**4. Pipeline RAG**: +Extensas capacidades de RAG que cobrem desde a ingestão de documentos até a recuperação, com suporte nativo para extração de texto de PDFs, PPTs e outros formatos de documentos comuns. -**4. Pipeline RAG**: - Extensas capacidades de RAG que cobrem desde a ingestão de documentos até a recuperação, com suporte nativo para extração de texto de PDFs, PPTs e outros formatos de documentos comuns. +**5. Capacidades de agente**: +Você pode definir agentes com base em LLM Function Calling ou ReAct e adicionar ferramentas pré-construídas ou personalizadas para o agente. O Dify oferece mais de 50 ferramentas integradas para agentes de IA, como Google Search, DALL·E, Stable Diffusion e WolframAlpha. -**5. Capacidades de agente**: - Você pode definir agentes com base em LLM Function Calling ou ReAct e adicionar ferramentas pré-construídas ou personalizadas para o agente. O Dify oferece mais de 50 ferramentas integradas para agentes de IA, como Google Search, DALL·E, Stable Diffusion e WolframAlpha. +**6. LLMOps**: +Monitore e analise os registros e o desempenho do aplicativo ao longo do tempo. É possível melhorar continuamente prompts, conjuntos de dados e modelos com base nos dados de produção e anotações. -**6. LLMOps**: - Monitore e analise os registros e o desempenho do aplicativo ao longo do tempo. É possível melhorar continuamente prompts, conjuntos de dados e modelos com base nos dados de produção e anotações. - -**7. Backend como Serviço**: - Todas os recursos do Dify vêm com APIs correspondentes, permitindo que você integre o Dify sem esforço na lógica de negócios da sua empresa. - - -## Comparação de recursos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
RecursoDify.AILangChainFlowiseOpenAI Assistants API
Abordagem de ProgramaçãoOrientada a API + AplicativoCódigo PythonOrientada a AplicativoOrientada a API
LLMs SuportadosVariedade RicaVariedade RicaVariedade RicaApenas OpenAI
RAG Engine
Agente
Workflow
Observabilidade
Recursos Empresariais (SSO/Controle de Acesso)
Implantação Local
+**7. Backend como Serviço**: +Todas os recursos do Dify vêm com APIs correspondentes, permitindo que você integre o Dify sem esforço na lógica de negócios da sua empresa. ## Usando o Dify - **Nuvem
** -Oferecemos o serviço [Dify Cloud](https://dify.ai) para qualquer pessoa experimentar sem nenhuma configuração. Ele fornece todas as funcionalidades da versão auto-hospedada, incluindo 200 chamadas GPT-4 gratuitas no plano sandbox. + Oferecemos o serviço [Dify Cloud](https://dify.ai) para qualquer pessoa experimentar sem nenhuma configuração. Ele fornece todas as funcionalidades da versão auto-hospedada, incluindo 200 chamadas GPT-4 gratuitas no plano sandbox. - **Auto-hospedagem do Dify Community Edition
** -Configure rapidamente o Dify no seu ambiente com este [guia inicial](#quick-start). -Use nossa [documentação](https://docs.dify.ai) para referências adicionais e instruções mais detalhadas. + Configure rapidamente o Dify no seu ambiente com este [guia inicial](#quick-start). + Use nossa [documentação](https://docs.dify.ai) para referências adicionais e instruções mais detalhadas. - **Dify para empresas/organizações
** -Oferecemos recursos adicionais voltados para empresas. [Envie suas perguntas através deste chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) ou [envie-nos um e-mail](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) para discutir necessidades empresariais.
- > Para startups e pequenas empresas que utilizam AWS, confira o [Dify Premium no AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e implemente no seu próprio AWS VPC com um clique. É uma oferta AMI acessível com a opção de criar aplicativos com logotipo e marca personalizados. + Oferecemos recursos adicionais voltados para empresas. [Envie suas perguntas através deste chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) ou [envie-nos um e-mail](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) para discutir necessidades empresariais.
+ > Para startups e pequenas empresas que utilizam AWS, confira o [Dify Premium no AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e implemente no seu próprio AWS VPC com um clique. É uma oferta AMI acessível com a opção de criar aplicativos com logotipo e marca personalizados. ## Mantendo-se atualizado @@ -167,13 +99,12 @@ Dê uma estrela no Dify no GitHub e seja notificado imediatamente sobre novos la ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## Início rápido + > Antes de instalar o Dify, certifique-se de que sua máquina atenda aos seguintes requisitos mínimos de sistema: -> ->- CPU >= 2 Núcleos ->- RAM >= 4 GiB +> +> - CPU >= 2 Núcleos +> - RAM >= 4 GiB
@@ -207,16 +138,19 @@ Se deseja configurar uma instalação de alta disponibilidade, há [Helm Charts] Implante o Dify na Plataforma Cloud com um único clique usando [terraform](https://www.terraform.io/) ##### Azure Global + - [Azure Terraform por @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform por @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### Usando AWS CDK para Implantação Implante o Dify na AWS usando [CDK](https://aws.amazon.com/cdk/) -##### AWS +##### AWS + - [AWS CDK por @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK por @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -232,10 +166,9 @@ Implante o Dify na Alibaba Cloud com um clique usando o [Alibaba Cloud Data Mana Implante o Dify no AKS com um clique usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) - ## Contribuindo -Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em eventos e conferências. > Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c). @@ -248,10 +181,10 @@ Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em ## Comunidade e contato -* [Discussões no GitHub](https://github.com/langgenius/dify/discussions). Melhor para: compartilhar feedback e fazer perguntas. -* [Problemas no GitHub](https://github.com/langgenius/dify/issues). Melhor para: relatar bugs encontrados no Dify.AI e propor novos recursos. Veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). -* [Discord](https://discord.gg/FngNHpbcY7). Melhor para: compartilhar suas aplicações e interagir com a comunidade. -* [X(Twitter)](https://twitter.com/dify_ai). Melhor para: compartilhar suas aplicações e interagir com a comunidade. +- [Discussões no GitHub](https://github.com/langgenius/dify/discussions). Melhor para: compartilhar feedback e fazer perguntas. +- [Problemas no GitHub](https://github.com/langgenius/dify/issues). Melhor para: relatar bugs encontrados no Dify.AI e propor novos recursos. Veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +- [Discord](https://discord.gg/FngNHpbcY7). Melhor para: compartilhar suas aplicações e interagir com a comunidade. +- [X(Twitter)](https://twitter.com/dify_ai). Melhor para: compartilhar suas aplicações e interagir com a comunidade. ## Histórico de estrelas diff --git a/README_SI.md b/README_SI.md index ac16df798b..c20dc3484f 100644 --- a/README_SI.md +++ b/README_SI.md @@ -50,14 +50,14 @@ README in বাংলা

- -Dify je odprtokodna platforma za razvoj aplikacij LLM. Njegov intuitivni vmesnik združuje agentski potek dela z umetno inteligenco, cevovod RAG, zmogljivosti agentov, upravljanje modelov, funkcije opazovanja in več, kar vam omogoča hiter prehod od prototipa do proizvodnje. +Dify je odprtokodna platforma za razvoj aplikacij LLM. Njegov intuitivni vmesnik združuje agentski potek dela z umetno inteligenco, cevovod RAG, zmogljivosti agentov, upravljanje modelov, funkcije opazovanja in več, kar vam omogoča hiter prehod od prototipa do proizvodnje. ## Hitri začetek + > Preden namestite Dify, se prepričajte, da vaša naprava izpolnjuje naslednje minimalne sistemske zahteve: -> ->- CPU >= 2 Core ->- RAM >= 4 GiB +> +> - CPU >= 2 Core +> - RAM >= 4 GiB
@@ -73,116 +73,48 @@ docker compose up -d Po zagonu lahko dostopate do nadzorne plošče Dify v brskalniku na [http://localhost/install](http://localhost/install) in začnete postopek inicializacije. #### Iskanje pomoči + Prosimo, glejte naša pogosta vprašanja [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs) če naletite na težave pri nastavitvi Dify. Če imate še vedno težave, se obrnite na [skupnost ali nas](#community--contact). > Če želite prispevati k Difyju ali narediti dodaten razvoj, glejte naš vodnik za [uvajanje iz izvorne kode](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code) ## Ključne značilnosti -**1. Potek dela**: - Zgradite in preizkusite zmogljive poteke dela AI na vizualnem platnu, pri čemer izkoristite vse naslednje funkcije in več. -**2. Celovita podpora za modele**: - Brezhibna integracija s stotinami lastniških/odprtokodnih LLM-jev ducatov ponudnikov sklepanja in samostojnih rešitev, ki pokrivajo GPT, Mistral, Llama3 in vse modele, združljive z API-jem OpenAI. Celoten seznam podprtih ponudnikov modelov najdete [tukaj](https://docs.dify.ai/getting-started/readme/model-providers). +**1. Potek dela**: +Zgradite in preizkusite zmogljive poteke dela AI na vizualnem platnu, pri čemer izkoristite vse naslednje funkcije in več. + +**2. Celovita podpora za modele**: +Brezhibna integracija s stotinami lastniških/odprtokodnih LLM-jev ducatov ponudnikov sklepanja in samostojnih rešitev, ki pokrivajo GPT, Mistral, Llama3 in vse modele, združljive z API-jem OpenAI. Celoten seznam podprtih ponudnikov modelov najdete [tukaj](https://docs.dify.ai/getting-started/readme/model-providers). ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. Prompt IDE**: +intuitivni vmesnik za ustvarjanje pozivov, primerjavo zmogljivosti modela in dodajanje dodatnih funkcij, kot je pretvorba besedila v govor, aplikaciji, ki temelji na klepetu. -**3. Prompt IDE**: - intuitivni vmesnik za ustvarjanje pozivov, primerjavo zmogljivosti modela in dodajanje dodatnih funkcij, kot je pretvorba besedila v govor, aplikaciji, ki temelji na klepetu. +**4. RAG Pipeline**: +E Obsežne zmogljivosti RAG, ki pokrivajo vse od vnosa dokumenta do priklica, s podporo za ekstrakcijo besedila iz datotek PDF, PPT in drugih običajnih formatov dokumentov. -**4. RAG Pipeline**: - E Obsežne zmogljivosti RAG, ki pokrivajo vse od vnosa dokumenta do priklica, s podporo za ekstrakcijo besedila iz datotek PDF, PPT in drugih običajnih formatov dokumentov. +**5. Agent capabilities**: +definirate lahko agente, ki temeljijo na klicanju funkcij LLM ali ReAct, in dodate vnaprej izdelana orodja ali orodja po meri za agenta. Dify ponuja več kot 50 vgrajenih orodij za agente AI, kot so Google Search, DALL·E, Stable Diffusion in WolframAlpha. -**5. Agent capabilities**: - definirate lahko agente, ki temeljijo na klicanju funkcij LLM ali ReAct, in dodate vnaprej izdelana orodja ali orodja po meri za agenta. Dify ponuja več kot 50 vgrajenih orodij za agente AI, kot so Google Search, DALL·E, Stable Diffusion in WolframAlpha. +**6. LLMOps**: +Spremljajte in analizirajte dnevnike aplikacij in učinkovitost skozi čas. Pozive, nabore podatkov in modele lahko nenehno izboljšujete na podlagi proizvodnih podatkov in opomb. -**6. LLMOps**: - Spremljajte in analizirajte dnevnike aplikacij in učinkovitost skozi čas. Pozive, nabore podatkov in modele lahko nenehno izboljšujete na podlagi proizvodnih podatkov in opomb. - -**7. Backend-as-a-Service**: - AVse ponudbe Difyja so opremljene z ustreznimi API-ji, tako da lahko Dify brez težav integrirate v svojo poslovno logiko. - -## Primerjava Funkcij - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FunkcijaDify.AILangChainFlowiseOpenAI Assistants API
Programski pristopAPI + usmerjeno v aplikacijePython kodaUsmerjeno v aplikacijeUsmerjeno v API
Podprti LLM-jiBogata izbiraBogata izbiraBogata izbiraSamo OpenAI
RAG pogon
Agent
Potek dela
Spremljanje
Funkcija za podjetja (SSO/nadzor dostopa)
Lokalna namestitev
+**7. Backend-as-a-Service**: +AVse ponudbe Difyja so opremljene z ustreznimi API-ji, tako da lahko Dify brez težav integrirate v svojo poslovno logiko. ## Uporaba Dify - **Cloud
** -Gostimo storitev Dify Cloud za vsakogar, ki jo lahko preizkusite brez nastavitev. Zagotavlja vse zmožnosti različice za samostojno namestitev in vključuje 200 brezplačnih klicev GPT-4 v načrtu peskovnika. + Gostimo storitev Dify Cloud za vsakogar, ki jo lahko preizkusite brez nastavitev. Zagotavlja vse zmožnosti različice za samostojno namestitev in vključuje 200 brezplačnih klicev GPT-4 v načrtu peskovnika. - **Self-hosting Dify Community Edition
** -Hitro zaženite Dify v svojem okolju s tem [začetnim vodnikom](#quick-start) . Za dodatne reference in podrobnejša navodila uporabite našo [dokumentacijo](https://docs.dify.ai) . - + Hitro zaženite Dify v svojem okolju s tem [začetnim vodnikom](#quick-start) . Za dodatne reference in podrobnejša navodila uporabite našo [dokumentacijo](https://docs.dify.ai) . - **Dify za podjetja/organizacije
** -Ponujamo dodatne funkcije, osredotočene na podjetja. Zabeležite svoja vprašanja prek tega klepetalnega robota ali nam pošljite e-pošto, da se pogovorimo o potrebah podjetja.
- > Za novoustanovljena podjetja in mala podjetja, ki uporabljajo AWS, si oglejte Dify Premium na AWS Marketplace in ga z enim klikom uvedite v svoj AWS VPC. To je cenovno ugodna ponudba AMI z možnostjo ustvarjanja aplikacij z logotipom in blagovno znamko po meri. + Ponujamo dodatne funkcije, osredotočene na podjetja. Zabeležite svoja vprašanja prek tega klepetalnega robota ali nam pošljite e-pošto, da se pogovorimo o potrebah podjetja.
+ > Za novoustanovljena podjetja in mala podjetja, ki uporabljajo AWS, si oglejte Dify Premium na AWS Marketplace in ga z enim klikom uvedite v svoj AWS VPC. To je cenovno ugodna ponudba AMI z možnostjo ustvarjanja aplikacij z logotipom in blagovno znamko po meri. ## Staying ahead @@ -190,7 +122,6 @@ Star Dify on GitHub and be instantly notified of new releases. ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - ## Napredne nastavitve Če morate prilagoditi konfiguracijo, si oglejte komentarje v naši datoteki .env.example in posodobite ustrezne vrednosti v svoji .env datoteki. Poleg tega boste morda morali prilagoditi docker-compose.yamlsamo datoteko, na primer spremeniti različice slike, preslikave vrat ali namestitve nosilca, glede na vaše specifično okolje in zahteve za uvajanje. Po kakršnih koli spremembah ponovno zaženite docker-compose up -d. Celoten seznam razpoložljivih spremenljivk okolja najdete tukaj . @@ -208,16 +139,19 @@ Star Dify on GitHub and be instantly notified of new releases. namestite Dify v Cloud Platform z enim klikom z uporabo [terraform](https://www.terraform.io/) ##### Azure Global + - [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### Uporaba AWS CDK za uvajanje Uvedite Dify v AWS z uporabo [CDK](https://aws.amazon.com/cdk/) -##### AWS +##### AWS + - [AWS CDK by @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK by @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -233,21 +167,18 @@ Z enim klikom namestite Dify na Alibaba Cloud z [Alibaba Cloud Data Management]( Z enim klikom namestite Dify v AKS z uporabo [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) - ## Prispevam -Za tiste, ki bi radi prispevali kodo, si oglejte naš vodnik za prispevke . Hkrati vas prosimo, da podprete Dify tako, da ga delite na družbenih medijih ter na dogodkih in konferencah. - - +Za tiste, ki bi radi prispevali kodo, si oglejte naš vodnik za prispevke . Hkrati vas prosimo, da podprete Dify tako, da ga delite na družbenih medijih ter na dogodkih in konferencah. > Iščemo sodelavce za pomoč pri prevajanju Difyja v jezike, ki niso mandarinščina ali angleščina. Če želite pomagati, si oglejte i18n README za več informacij in nam pustite komentar v global-userskanalu našega strežnika skupnosti Discord . ## Skupnost in stik -* [GitHub Discussion](https://github.com/langgenius/dify/discussions). Najboljše za: izmenjavo povratnih informacij in postavljanje vprašanj. -* [GitHub Issues](https://github.com/langgenius/dify/issues). Najboljše za: hrošče, na katere naletite pri uporabi Dify.AI, in predloge funkcij. Oglejte si naš [vodnik za prispevke](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). -* [Discord](https://discord.gg/FngNHpbcY7). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. -* [X(Twitter)](https://twitter.com/dify_ai). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. +- [GitHub Discussion](https://github.com/langgenius/dify/discussions). Najboljše za: izmenjavo povratnih informacij in postavljanje vprašanj. +- [GitHub Issues](https://github.com/langgenius/dify/issues). Najboljše za: hrošče, na katere naletite pri uporabi Dify.AI, in predloge funkcij. Oglejte si naš [vodnik za prispevke](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). +- [Discord](https://discord.gg/FngNHpbcY7). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. +- [X(Twitter)](https://twitter.com/dify_ai). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. **Contributors** @@ -259,7 +190,6 @@ Za tiste, ki bi radi prispevali kodo, si oglejte naš vodnik za prispevke . Hkra [![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) - ## Varnostno razkritje Zaradi zaščite vaše zasebnosti se izogibajte objavljanju varnostnih vprašanj na GitHub. Namesto tega pošljite vprašanja na security@dify.ai in zagotovili vam bomo podrobnejši odgovor. diff --git a/README_TR.md b/README_TR.md index 8065ec908c..510b112e68 100644 --- a/README_TR.md +++ b/README_TR.md @@ -48,11 +48,10 @@ README in বাংলা

- Dify, açık kaynaklı bir LLM uygulama geliştirme platformudur. Sezgisel arayüzü, AI iş akışı, RAG pipeline'ı, ajan yetenekleri, model yönetimi, gözlemlenebilirlik özellikleri ve daha fazlasını birleştirerek, prototipten üretime hızlıca geçmenizi sağlar. İşte temel özelliklerin bir listesi:

-**1. Workflow**: +**1. Workflow**: Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edin, aşağıdaki tüm özellikleri ve daha fazlasını kullanarak. **2. Kapsamlı model desteği**: @@ -60,101 +59,33 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. Prompt IDE**: +Komut istemlerini oluşturmak, model performansını karşılaştırmak ve sohbet tabanlı uygulamalara metin-konuşma gibi ek özellikler eklemek için kullanıcı dostu bir arayüz. -**3. Prompt IDE**: - Komut istemlerini oluşturmak, model performansını karşılaştırmak ve sohbet tabanlı uygulamalara metin-konuşma gibi ek özellikler eklemek için kullanıcı dostu bir arayüz. +**4. RAG Pipeline**: +Belge alımından bilgi çekmeye kadar geniş kapsamlı RAG yetenekleri. PDF'ler, PPT'ler ve diğer yaygın belge formatlarından metin çıkarma için hazır destek sunar. -**4. RAG Pipeline**: - Belge alımından bilgi çekmeye kadar geniş kapsamlı RAG yetenekleri. PDF'ler, PPT'ler ve diğer yaygın belge formatlarından metin çıkarma için hazır destek sunar. +**5. Ajan yetenekleri**: +LLM Fonksiyon Çağırma veya ReAct'a dayalı ajanlar tanımlayabilir ve bu ajanlara önceden hazırlanmış veya özel araçlar ekleyebilirsiniz. Dify, AI ajanları için Google Arama, DALL·E, Stable Diffusion ve WolframAlpha gibi 50'den fazla yerleşik araç sağlar. -**5. Ajan yetenekleri**: - LLM Fonksiyon Çağırma veya ReAct'a dayalı ajanlar tanımlayabilir ve bu ajanlara önceden hazırlanmış veya özel araçlar ekleyebilirsiniz. Dify, AI ajanları için Google Arama, DALL·E, Stable Diffusion ve WolframAlpha gibi 50'den fazla yerleşik araç sağlar. +**6. LLMOps**: +Uygulama loglarını ve performans metriklerini zaman içinde izleme ve analiz etme imkanı. Üretim ortamından elde edilen verilere ve kullanıcı geri bildirimlerine dayanarak, prompt'ları, veri setlerini ve modelleri sürekli olarak optimize edebilirsiniz. Bu sayede, AI uygulamanızın performansını ve doğruluğunu sürekli olarak artırabilirsiniz. -**6. LLMOps**: - Uygulama loglarını ve performans metriklerini zaman içinde izleme ve analiz etme imkanı. Üretim ortamından elde edilen verilere ve kullanıcı geri bildirimlerine dayanarak, prompt'ları, veri setlerini ve modelleri sürekli olarak optimize edebilirsiniz. Bu sayede, AI uygulamanızın performansını ve doğruluğunu sürekli olarak artırabilirsiniz. - -**7. Hizmet Olarak Backend**: - Dify'ın tüm özellikleri ilgili API'lerle birlikte gelir, böylece Dify'ı kendi iş mantığınıza kolayca entegre edebilirsiniz. - - -## Özellik karşılaştırması - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ÖzellikDify.AILangChainFlowiseOpenAI Assistants API
Programlama YaklaşımıAPI + Uygulama odaklıPython KoduUygulama odaklıAPI odaklı
Desteklenen LLM'lerZengin ÇeşitlilikZengin ÇeşitlilikZengin ÇeşitlilikYalnızca OpenAI
RAG Motoru
Ajan
İş Akışı
Gözlemlenebilirlik
Kurumsal Özellikler (SSO/Erişim kontrolü)
Yerel Dağıtım
+**7. Hizmet Olarak Backend**: +Dify'ın tüm özellikleri ilgili API'lerle birlikte gelir, böylece Dify'ı kendi iş mantığınıza kolayca entegre edebilirsiniz. ## Dify'ı Kullanma - **Cloud
** -Herkesin sıfır kurulumla denemesi için bir [Dify Cloud](https://dify.ai) hizmeti sunuyoruz. Bu hizmet, kendi kendine dağıtılan versiyonun tüm yeteneklerini sağlar ve sandbox planında 200 ücretsiz GPT-4 çağrısı içerir. + Herkesin sıfır kurulumla denemesi için bir [Dify Cloud](https://dify.ai) hizmeti sunuyoruz. Bu hizmet, kendi kendine dağıtılan versiyonun tüm yeteneklerini sağlar ve sandbox planında 200 ücretsiz GPT-4 çağrısı içerir. - **Dify Topluluk Sürümünü Kendi Sunucunuzda Barındırma
** -Bu [başlangıç kılavuzu](#quick-start) ile Dify'ı kendi ortamınızda hızlıca çalıştırın. -Daha fazla referans ve detaylı talimatlar için [dokümantasyonumuzu](https://docs.dify.ai) kullanın. + Bu [başlangıç kılavuzu](#quick-start) ile Dify'ı kendi ortamınızda hızlıca çalıştırın. + Daha fazla referans ve detaylı talimatlar için [dokümantasyonumuzu](https://docs.dify.ai) kullanın. - **Kurumlar / organizasyonlar için Dify
** -Ek kurumsal odaklı özellikler sunuyoruz. Kurumsal ihtiyaçları görüşmek için [bize bir e-posta gönderin](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry).
+ Ek kurumsal odaklı özellikler sunuyoruz. Kurumsal ihtiyaçları görüşmek için [bize bir e-posta gönderin](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry).
+ > AWS kullanan startuplar ve küçük işletmeler için, [AWS Marketplace'deki Dify Premium'a](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) göz atın ve tek tıklamayla kendi AWS VPC'nize dağıtın. Bu, özel logo ve marka ile uygulamalar oluşturma seçeneğine sahip uygun fiyatlı bir AMI teklifdir. ## Güncel Kalma @@ -163,13 +94,12 @@ GitHub'da Dify'a yıldız verin ve yeni sürümlerden anında haberdar olun. ![bizi-yıldızlayın](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## Hızlı başlangıç + > Dify'ı kurmadan önce, makinenizin aşağıdaki minimum sistem gereksinimlerini karşıladığından emin olun: -> ->- CPU >= 2 Çekirdek ->- RAM >= 4GB +> +> - CPU >= 2 Çekirdek +> - RAM >= 4GB
Dify sunucusunu başlatmanın en kolay yolu, [docker-compose.yml](docker/docker-compose.yaml) dosyamızı çalıştırmaktır. Kurulum komutunu çalıştırmadan önce, makinenizde [Docker](https://docs.docker.com/get-docker/) ve [Docker Compose](https://docs.docker.com/compose/install/)'un kurulu olduğundan emin olun: @@ -201,16 +131,19 @@ Yüksek kullanılabilirliğe sahip bir kurulum yapılandırmak isterseniz, Dify' Dify'ı bulut platformuna tek tıklamayla dağıtın [terraform](https://www.terraform.io/) kullanarak ##### Azure Global + - [Azure Terraform tarafından @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform tarafından @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### AWS CDK ile Dağıtım [CDK](https://aws.amazon.com/cdk/) kullanarak Dify'ı AWS'ye dağıtın -##### AWS +##### AWS + - [AWS CDK tarafından @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK tarafından @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) @@ -226,7 +159,6 @@ Dify'ı bulut platformuna tek tıklamayla dağıtın [terraform](https://www.ter [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) kullanarak Dify'ı tek tıkla AKS'ye dağıtın - ## Katkıda Bulunma Kod katkısında bulunmak isteyenler için [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakabilirsiniz. @@ -242,10 +174,10 @@ Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda p ## Topluluk & iletişim -* [GitHub Tartışmaları](https://github.com/langgenius/dify/discussions). En uygun: geri bildirim paylaşmak ve soru sormak için. -* [GitHub Sorunları](https://github.com/langgenius/dify/issues). En uygun: Dify.AI kullanırken karşılaştığınız hatalar ve özellik önerileri için. [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakın. -* [Discord](https://discord.gg/FngNHpbcY7). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. -* [X(Twitter)](https://twitter.com/dify_ai). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. +- [GitHub Tartışmaları](https://github.com/langgenius/dify/discussions). En uygun: geri bildirim paylaşmak ve soru sormak için. +- [GitHub Sorunları](https://github.com/langgenius/dify/issues). En uygun: Dify.AI kullanırken karşılaştığınız hatalar ve özellik önerileri için. [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakın. +- [Discord](https://discord.gg/FngNHpbcY7). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. +- [X(Twitter)](https://twitter.com/dify_ai). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. ## Star history diff --git a/README_TW.md b/README_TW.md index c36027183c..35a01fa16a 100644 --- a/README_TW.md +++ b/README_TW.md @@ -106,85 +106,18 @@ docker compose up -d **7. 後端即服務**: Dify 的所有功能都提供相應的 API,因此您可以輕鬆地將 Dify 整合到您自己的業務邏輯中。 -## 功能比較 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
功能Dify.AILangChainFlowiseOpenAI Assistants API
程式設計方法API + 應用導向Python 代碼應用導向API 導向
支援的 LLM 模型豐富多樣豐富多樣豐富多樣僅限 OpenAI
RAG 引擎
代理功能
工作流程
可觀察性
企業級功能 (SSO/存取控制)
本地部署
- ## 使用 Dify - **雲端服務
** 我們提供 [Dify Cloud](https://dify.ai) 服務,任何人都可以零配置嘗試。它提供與自部署版本相同的所有功能,並在沙盒計劃中包含 200 次免費 GPT-4 調用。 - **自託管 Dify 社區版
** - 使用這份[快速指南](#快速開始)在您的環境中快速運行 Dify。 + 使用這份[快速指南](#%E5%BF%AB%E9%80%9F%E9%96%8B%E5%A7%8B)在您的環境中快速運行 Dify。 使用我們的[文檔](https://docs.dify.ai)獲取更多參考和深入指導。 - **企業/組織版 Dify
** - 我們提供額外的企業中心功能。[通過這個聊天機器人記錄您的問題](https://udify.app/chat/22L1zSxg6yW1cWQg)或[發送電子郵件給我們](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry)討論企業需求。
+ 我們提供額外的企業中心功能。[通過這個聊天機器人記錄您的問題](https://udify.app/chat/22L1zSxg6yW1cWQg)或[發送電子郵件給我們](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry)討論企業需求。
+ > 對於使用 AWS 的初創企業和小型企業,請查看 [AWS Marketplace 上的 Dify Premium](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6),並一鍵部署到您自己的 AWS VPC。這是一個經濟實惠的 AMI 產品,可選擇使用自定義徽標和品牌創建應用。 ## 保持領先 @@ -238,7 +171,6 @@ Dify 的所有功能都提供相應的 API,因此您可以輕鬆地將 Dify 使用[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 將 Dify 一鍵部署到 AKS - ## 貢獻 對於想要貢獻程式碼的開發者,請參閱我們的[貢獻指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。 diff --git a/README_VI.md b/README_VI.md index 958a70114a..f161b20f9d 100644 --- a/README_VI.md +++ b/README_VI.md @@ -48,115 +48,45 @@ README in বাংলা

- Dify là một nền tảng phát triển ứng dụng LLM mã nguồn mở. Giao diện trực quan kết hợp quy trình làm việc AI, mô hình RAG, khả năng tác nhân, quản lý mô hình, tính năng quan sát và hơn thế nữa, cho phép bạn nhanh chóng chuyển từ nguyên mẫu sang sản phẩm. Đây là danh sách các tính năng cốt lõi:

-**1. Quy trình làm việc**: - Xây dựng và kiểm tra các quy trình làm việc AI mạnh mẽ trên một canvas trực quan, tận dụng tất cả các tính năng sau đây và hơn thế nữa. +**1. Quy trình làm việc**: +Xây dựng và kiểm tra các quy trình làm việc AI mạnh mẽ trên một canvas trực quan, tận dụng tất cả các tính năng sau đây và hơn thế nữa. -**2. Hỗ trợ mô hình toàn diện**: - Tích hợp liền mạch với hàng trăm mô hình LLM độc quyền / mã nguồn mở từ hàng chục nhà cung cấp suy luận và giải pháp tự lưu trữ, bao gồm GPT, Mistral, Llama3, và bất kỳ mô hình tương thích API OpenAI nào. Danh sách đầy đủ các nhà cung cấp mô hình được hỗ trợ có thể được tìm thấy [tại đây](https://docs.dify.ai/getting-started/readme/model-providers). +**2. Hỗ trợ mô hình toàn diện**: +Tích hợp liền mạch với hàng trăm mô hình LLM độc quyền / mã nguồn mở từ hàng chục nhà cung cấp suy luận và giải pháp tự lưu trữ, bao gồm GPT, Mistral, Llama3, và bất kỳ mô hình tương thích API OpenAI nào. Danh sách đầy đủ các nhà cung cấp mô hình được hỗ trợ có thể được tìm thấy [tại đây](https://docs.dify.ai/getting-started/readme/model-providers). ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) +**3. IDE Prompt**: +Giao diện trực quan để tạo prompt, so sánh hiệu suất mô hình và thêm các tính năng bổ sung như chuyển văn bản thành giọng nói cho một ứng dụng dựa trên trò chuyện. -**3. IDE Prompt**: - Giao diện trực quan để tạo prompt, so sánh hiệu suất mô hình và thêm các tính năng bổ sung như chuyển văn bản thành giọng nói cho một ứng dụng dựa trên trò chuyện. +**4. Mô hình RAG**: +Khả năng RAG mở rộng bao gồm mọi thứ từ nhập tài liệu đến truy xuất, với hỗ trợ sẵn có cho việc trích xuất văn bản từ PDF, PPT và các định dạng tài liệu phổ biến khác. -**4. Mô hình RAG**: - Khả năng RAG mở rộng bao gồm mọi thứ từ nhập tài liệu đến truy xuất, với hỗ trợ sẵn có cho việc trích xuất văn bản từ PDF, PPT và các định dạng tài liệu phổ biến khác. +**5. Khả năng tác nhân**: +Bạn có thể định nghĩa các tác nhân dựa trên LLM Function Calling hoặc ReAct, và thêm các công cụ được xây dựng sẵn hoặc tùy chỉnh cho tác nhân. Dify cung cấp hơn 50 công cụ tích hợp sẵn cho các tác nhân AI, như Google Search, DALL·E, Stable Diffusion và WolframAlpha. -**5. Khả năng tác nhân**: - Bạn có thể định nghĩa các tác nhân dựa trên LLM Function Calling hoặc ReAct, và thêm các công cụ được xây dựng sẵn hoặc tùy chỉnh cho tác nhân. Dify cung cấp hơn 50 công cụ tích hợp sẵn cho các tác nhân AI, như Google Search, DALL·E, Stable Diffusion và WolframAlpha. +**6. LLMOps**: +Giám sát và phân tích nhật ký và hiệu suất ứng dụng theo thời gian. Bạn có thể liên tục cải thiện prompt, bộ dữ liệu và mô hình dựa trên dữ liệu sản xuất và chú thích. -**6. LLMOps**: - Giám sát và phân tích nhật ký và hiệu suất ứng dụng theo thời gian. Bạn có thể liên tục cải thiện prompt, bộ dữ liệu và mô hình dựa trên dữ liệu sản xuất và chú thích. - -**7. Backend-as-a-Service**: - Tất cả các dịch vụ của Dify đều đi kèm với các API tương ứng, vì vậy bạn có thể dễ dàng tích hợp Dify vào logic kinh doanh của riêng mình. - - -## So sánh tính năng - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Tính năngDify.AILangChainFlowiseOpenAI Assistants API
Phương pháp lập trìnhHướng API + Ứng dụngMã PythonHướng ứng dụngHướng API
LLMs được hỗ trợĐa dạng phong phúĐa dạng phong phúĐa dạng phong phúChỉ OpenAI
RAG Engine
Agent
Quy trình làm việc
Khả năng quan sát
Tính năng doanh nghiệp (SSO/Kiểm soát truy cập)
Triển khai cục bộ
+**7. Backend-as-a-Service**: +Tất cả các dịch vụ của Dify đều đi kèm với các API tương ứng, vì vậy bạn có thể dễ dàng tích hợp Dify vào logic kinh doanh của riêng mình. ## Sử dụng Dify - **Cloud
** -Chúng tôi lưu trữ dịch vụ [Dify Cloud](https://dify.ai) cho bất kỳ ai muốn thử mà không cần cài đặt. Nó cung cấp tất cả các khả năng của phiên bản tự triển khai và bao gồm 200 lượt gọi GPT-4 miễn phí trong gói sandbox. + Chúng tôi lưu trữ dịch vụ [Dify Cloud](https://dify.ai) cho bất kỳ ai muốn thử mà không cần cài đặt. Nó cung cấp tất cả các khả năng của phiên bản tự triển khai và bao gồm 200 lượt gọi GPT-4 miễn phí trong gói sandbox. - **Tự triển khai Dify Community Edition
** -Nhanh chóng chạy Dify trong môi trường của bạn với [hướng dẫn bắt đầu](#quick-start) này. -Sử dụng [tài liệu](https://docs.dify.ai) của chúng tôi để tham khảo thêm và nhận hướng dẫn chi tiết hơn. + Nhanh chóng chạy Dify trong môi trường của bạn với [hướng dẫn bắt đầu](#quick-start) này. + Sử dụng [tài liệu](https://docs.dify.ai) của chúng tôi để tham khảo thêm và nhận hướng dẫn chi tiết hơn. - **Dify cho doanh nghiệp / tổ chức
** -Chúng tôi cung cấp các tính năng bổ sung tập trung vào doanh nghiệp. [Ghi lại câu hỏi của bạn cho chúng tôi thông qua chatbot này](https://udify.app/chat/22L1zSxg6yW1cWQg) hoặc [gửi email cho chúng tôi](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) để thảo luận về nhu cầu doanh nghiệp.
- > Đối với các công ty khởi nghiệp và doanh nghiệp nhỏ sử dụng AWS, hãy xem [Dify Premium trên AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) và triển khai nó vào AWS VPC của riêng bạn chỉ với một cú nhấp chuột. Đây là một AMI giá cả phải chăng với tùy chọn tạo ứng dụng với logo và thương hiệu tùy chỉnh. + Chúng tôi cung cấp các tính năng bổ sung tập trung vào doanh nghiệp. [Ghi lại câu hỏi của bạn cho chúng tôi thông qua chatbot này](https://udify.app/chat/22L1zSxg6yW1cWQg) hoặc [gửi email cho chúng tôi](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) để thảo luận về nhu cầu doanh nghiệp.
+ > Đối với các công ty khởi nghiệp và doanh nghiệp nhỏ sử dụng AWS, hãy xem [Dify Premium trên AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) và triển khai nó vào AWS VPC của riêng bạn chỉ với một cú nhấp chuột. Đây là một AMI giá cả phải chăng với tùy chọn tạo ứng dụng với logo và thương hiệu tùy chỉnh. ## Luôn cập nhật @@ -164,13 +94,12 @@ Yêu thích Dify trên GitHub và được thông báo ngay lập tức về cá ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) - - ## Bắt đầu nhanh + > Trước khi cài đặt Dify, hãy đảm bảo máy của bạn đáp ứng các yêu cầu hệ thống tối thiểu sau: -> ->- CPU >= 2 Core ->- RAM >= 4GB +> +> - CPU >= 2 Core +> - RAM >= 4GB
@@ -203,20 +132,22 @@ Nếu bạn muốn cấu hình một cài đặt có độ sẵn sàng cao, có Triển khai Dify lên nền tảng đám mây với một cú nhấp chuột bằng cách sử dụng [terraform](https://www.terraform.io/) ##### Azure Global + - [Azure Terraform bởi @nikawang](https://github.com/nikawang/dify-azure-terraform) ##### Google Cloud + - [Google Cloud Terraform bởi @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) #### Sử dụng AWS CDK để Triển khai Triển khai Dify trên AWS bằng [CDK](https://aws.amazon.com/cdk/) -##### AWS +##### AWS + - [AWS CDK bởi @KevinZhao (EKS based)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK bởi @tmokmss (ECS based)](https://github.com/aws-samples/dify-self-hosted-on-aws) - #### Alibaba Cloud [Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) @@ -229,13 +160,11 @@ Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng Triển khai Dify lên AKS chỉ với một cú nhấp chuột bằng [Azure Devops Pipeline Helm Chart bởi @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) - ## Đóng góp -Đối với những người muốn đóng góp mã, xem [Hướng dẫn Đóng góp](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) của chúng tôi. +Đối với những người muốn đóng góp mã, xem [Hướng dẫn Đóng góp](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) của chúng tôi. Đồng thời, vui lòng xem xét hỗ trợ Dify bằng cách chia sẻ nó trên mạng xã hội và tại các sự kiện và hội nghị. - > Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi. **Người đóng góp** @@ -246,10 +175,10 @@ Triển khai Dify lên AKS chỉ với một cú nhấp chuột bằng [Azure De ## Cộng đồng & liên hệ -* [Thảo luận GitHub](https://github.com/langgenius/dify/discussions). Tốt nhất cho: chia sẻ phản hồi và đặt câu hỏi. -* [Vấn đề GitHub](https://github.com/langgenius/dify/issues). Tốt nhất cho: lỗi bạn gặp phải khi sử dụng Dify.AI và đề xuất tính năng. Xem [Hướng dẫn Đóng góp](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) của chúng tôi. -* [Discord](https://discord.gg/FngNHpbcY7). Tốt nhất cho: chia sẻ ứng dụng của bạn và giao lưu với cộng đồng. -* [X(Twitter)](https://twitter.com/dify_ai). Tốt nhất cho: chia sẻ ứng dụng của bạn và giao lưu với cộng đồng. +- [Thảo luận GitHub](https://github.com/langgenius/dify/discussions). Tốt nhất cho: chia sẻ phản hồi và đặt câu hỏi. +- [Vấn đề GitHub](https://github.com/langgenius/dify/issues). Tốt nhất cho: lỗi bạn gặp phải khi sử dụng Dify.AI và đề xuất tính năng. Xem [Hướng dẫn Đóng góp](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) của chúng tôi. +- [Discord](https://discord.gg/FngNHpbcY7). Tốt nhất cho: chia sẻ ứng dụng của bạn và giao lưu với cộng đồng. +- [X(Twitter)](https://twitter.com/dify_ai). Tốt nhất cho: chia sẻ ứng dụng của bạn và giao lưu với cộng đồng. ## Lịch sử Yêu thích diff --git a/api/.env.example b/api/.env.example index 3c30872422..3052dbfe2b 100644 --- a/api/.env.example +++ b/api/.env.example @@ -478,6 +478,13 @@ API_WORKFLOW_NODE_EXECUTION_REPOSITORY=repositories.sqlalchemy_api_workflow_node # API workflow run repository implementation API_WORKFLOW_RUN_REPOSITORY=repositories.sqlalchemy_api_workflow_run_repository.DifyAPISQLAlchemyWorkflowRunRepository +# Workflow log cleanup configuration +# Enable automatic cleanup of workflow run logs to manage database size +WORKFLOW_LOG_CLEANUP_ENABLED=true +# Number of days to retain workflow run logs (default: 30 days) +WORKFLOW_LOG_RETENTION_DAYS=30 +# Batch size for workflow log cleanup operations (default: 100) +WORKFLOW_LOG_CLEANUP_BATCH_SIZE=100 # App configuration APP_MAX_EXECUTION_TIME=1200 diff --git a/api/README.md b/api/README.md index b5298edf92..8309a0e69b 100644 --- a/api/README.md +++ b/api/README.md @@ -3,7 +3,7 @@ ## Usage > [!IMPORTANT] -> +> > In the v1.3.0 release, `poetry` has been replaced with > [`uv`](https://docs.astral.sh/uv/) as the package manager > for Dify API backend service. @@ -20,25 +20,29 @@ cd ../api ``` -2. Copy `.env.example` to `.env` +1. Copy `.env.example` to `.env` ```cli - cp .env.example .env + cp .env.example .env ``` -3. Generate a `SECRET_KEY` in the `.env` file. + +1. Generate a `SECRET_KEY` in the `.env` file. bash for Linux + ```bash for Linux sed -i "/^SECRET_KEY=/c\SECRET_KEY=$(openssl rand -base64 42)" .env ``` + bash for Mac + ```bash for Mac secret_key=$(openssl rand -base64 42) sed -i '' "/^SECRET_KEY=/c\\ SECRET_KEY=${secret_key}" .env ``` -4. Create environment. +1. Create environment. Dify API service uses [UV](https://docs.astral.sh/uv/) to manage dependencies. First, you need to add the uv package manager, if you don't have it already. @@ -49,13 +53,13 @@ brew install uv ``` -5. Install dependencies +1. Install dependencies ```bash uv sync --dev ``` -6. Run migrate +1. Run migrate Before the first launch, migrate the database to the latest version. @@ -63,24 +67,27 @@ uv run flask db upgrade ``` -7. Start backend +1. Start backend ```bash uv run flask run --host 0.0.0.0 --port=5001 --debug ``` -8. Start Dify [web](../web) service. -9. Setup your application by visiting `http://localhost:3000`. -10. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service. +1. Start Dify [web](../web) service. - ```bash - uv run celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage - ``` +1. Setup your application by visiting `http://localhost:3000`. - Addition, if you want to debug the celery scheduled tasks, you can use the following command in another terminal: - ```bash - uv run celery -A app.celery beat - ``` +1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service. + +```bash +uv run celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation +``` + +Addition, if you want to debug the celery scheduled tasks, you can use the following command in another terminal: + +```bash +uv run celery -A app.celery beat +``` ## Testing @@ -90,9 +97,16 @@ uv sync --dev ``` -2. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml` +1. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml`, more can check [Claude.md](../CLAUDE.md) - ```bash - uv run -P api bash dev/pytest/pytest_all_tests.sh + ```cli + uv run --project api pytest # Run all tests + uv run --project api pytest tests/unit_tests/ # Unit tests only + uv run --project api pytest tests/integration_tests/ # Integration tests + + # Code quality + ./dev/reformat # Run all formatters and linters + uv run --project api ruff check --fix ./ # Fix linting issues + uv run --project api ruff format ./ # Format code + uv run --project api mypy . # Type checking ``` - diff --git a/api/app_factory.py b/api/app_factory.py index 032d6b17fc..8a0417dd72 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -5,6 +5,8 @@ from configs import dify_config from contexts.wrapper import RecyclableContextVar from dify_app import DifyApp +logger = logging.getLogger(__name__) + # ---------------------------- # Application Factory Function @@ -32,7 +34,7 @@ def create_app() -> DifyApp: initialize_extensions(app) end_time = time.perf_counter() if dify_config.DEBUG: - logging.info("Finished create_app (%s ms)", round((end_time - start_time) * 1000, 2)) + logger.info("Finished create_app (%s ms)", round((end_time - start_time) * 1000, 2)) return app @@ -93,14 +95,14 @@ def initialize_extensions(app: DifyApp): is_enabled = ext.is_enabled() if hasattr(ext, "is_enabled") else True if not is_enabled: if dify_config.DEBUG: - logging.info("Skipped %s", short_name) + logger.info("Skipped %s", short_name) continue start_time = time.perf_counter() ext.init_app(app) end_time = time.perf_counter() if dify_config.DEBUG: - logging.info("Loaded %s (%s ms)", short_name, round((end_time - start_time) * 1000, 2)) + logger.info("Loaded %s (%s ms)", short_name, round((end_time - start_time) * 1000, 2)) def create_migrations_app(): diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index 0b2f99aece..2bccc4b7a0 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -968,6 +968,14 @@ class AccountConfig(BaseSettings): ) +class WorkflowLogConfig(BaseSettings): + WORKFLOW_LOG_CLEANUP_ENABLED: bool = Field(default=True, description="Enable workflow run log cleanup") + WORKFLOW_LOG_RETENTION_DAYS: int = Field(default=30, description="Retention days for workflow run logs") + WORKFLOW_LOG_CLEANUP_BATCH_SIZE: int = Field( + default=100, description="Batch size for workflow run log cleanup operations" + ) + + class FeatureConfig( # place the configs in alphabet order AppExecutionConfig, @@ -1003,5 +1011,6 @@ class FeatureConfig( HostedServiceConfig, CeleryBeatConfig, CeleryScheduleTasksConfig, + WorkflowLogConfig, ): pass diff --git a/api/controllers/common/fields.py b/api/controllers/common/fields.py index 3466eea1f6..df9de825de 100644 --- a/api/controllers/common/fields.py +++ b/api/controllers/common/fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from libs.helper import AppIconUrlField @@ -10,6 +10,12 @@ parameters__system_parameters = { "workflow_file_upload_limit": fields.Integer, } + +def build_system_parameters_model(api_or_ns: Api | Namespace): + """Build the system parameters model for the API or Namespace.""" + return api_or_ns.model("SystemParameters", parameters__system_parameters) + + parameters_fields = { "opening_statement": fields.String, "suggested_questions": fields.Raw, @@ -25,6 +31,14 @@ parameters_fields = { "system_parameters": fields.Nested(parameters__system_parameters), } + +def build_parameters_model(api_or_ns: Api | Namespace): + """Build the parameters model for the API or Namespace.""" + copied_fields = parameters_fields.copy() + copied_fields["system_parameters"] = fields.Nested(build_system_parameters_model(api_or_ns)) + return api_or_ns.model("Parameters", copied_fields) + + site_fields = { "title": fields.String, "chat_color_theme": fields.String, @@ -41,3 +55,8 @@ site_fields = { "show_workflow_steps": fields.Boolean, "use_icon_as_answer_icon": fields.Boolean, } + + +def build_site_model(api_or_ns: Api | Namespace): + """Build the site model for the API or Namespace.""" + return api_or_ns.model("Site", site_fields) diff --git a/api/controllers/common/helpers.py b/api/controllers/common/helpers.py index 008f1f0f7a..6a5197635e 100644 --- a/api/controllers/common/helpers.py +++ b/api/controllers/common/helpers.py @@ -1,3 +1,4 @@ +import contextlib import mimetypes import os import platform @@ -65,10 +66,8 @@ def guess_file_info_from_response(response: httpx.Response): # Use python-magic to guess MIME type if still unknown or generic if mimetype == "application/octet-stream" and magic is not None: - try: + with contextlib.suppress(magic.MagicException): mimetype = magic.from_buffer(response.content[:1024], mime=True) - except magic.MagicException: - pass extension = os.path.splitext(filename)[1] diff --git a/api/controllers/console/admin.py b/api/controllers/console/admin.py index 8a55197fb6..7e5c28200a 100644 --- a/api/controllers/console/admin.py +++ b/api/controllers/console/admin.py @@ -1,7 +1,7 @@ from functools import wraps from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from sqlalchemy import select from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound, Unauthorized diff --git a/api/controllers/console/apikey.py b/api/controllers/console/apikey.py index d7500c415c..401e88709a 100644 --- a/api/controllers/console/apikey.py +++ b/api/controllers/console/apikey.py @@ -1,8 +1,8 @@ -from typing import Any +from typing import Any, Optional -import flask_restful +import flask_restx from flask_login import current_user -from flask_restful import Resource, fields, marshal_with +from flask_restx import Resource, fields, marshal_with from sqlalchemy import select from sqlalchemy.orm import Session from werkzeug.exceptions import Forbidden @@ -40,7 +40,7 @@ def _get_resource(resource_id, tenant_id, resource_model): ).scalar_one_or_none() if resource is None: - flask_restful.abort(404, message=f"{resource_model.__name__} not found.") + flask_restx.abort(404, message=f"{resource_model.__name__} not found.") return resource @@ -49,7 +49,7 @@ class BaseApiKeyListResource(Resource): method_decorators = [account_initialization_required, login_required, setup_required] resource_type: str | None = None - resource_model: Any = None + resource_model: Optional[Any] = None resource_id_field: str | None = None token_prefix: str | None = None max_keys = 10 @@ -81,7 +81,7 @@ class BaseApiKeyListResource(Resource): ) if current_key_count >= self.max_keys: - flask_restful.abort( + flask_restx.abort( 400, message=f"Cannot create more than {self.max_keys} API keys for this resource type.", code="max_keys_exceeded", @@ -102,7 +102,7 @@ class BaseApiKeyResource(Resource): method_decorators = [account_initialization_required, login_required, setup_required] resource_type: str | None = None - resource_model: Any = None + resource_model: Optional[Any] = None resource_id_field: str | None = None def delete(self, resource_id, api_key_id): @@ -126,7 +126,7 @@ class BaseApiKeyResource(Resource): ) if key is None: - flask_restful.abort(404, message="API key not found") + flask_restx.abort(404, message="API key not found") db.session.query(ApiToken).where(ApiToken.id == api_key_id).delete() db.session.commit() diff --git a/api/controllers/console/app/advanced_prompt_template.py b/api/controllers/console/app/advanced_prompt_template.py index c228743fa5..c6cb6f6e3a 100644 --- a/api/controllers/console/app/advanced_prompt_template.py +++ b/api/controllers/console/app/advanced_prompt_template.py @@ -1,4 +1,4 @@ -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console import api from controllers.console.wraps import account_initialization_required, setup_required diff --git a/api/controllers/console/app/agent.py b/api/controllers/console/app/agent.py index d433415894..a964154207 100644 --- a/api/controllers/console/app/agent.py +++ b/api/controllers/console/app/agent.py @@ -1,4 +1,4 @@ -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console import api from controllers.console.app.wraps import get_app_model diff --git a/api/controllers/console/app/annotation.py b/api/controllers/console/app/annotation.py index 493a9a52e2..37d23ccd9f 100644 --- a/api/controllers/console/app/annotation.py +++ b/api/controllers/console/app/annotation.py @@ -1,6 +1,8 @@ +from typing import Literal + from flask import request from flask_login import current_user -from flask_restful import Resource, marshal, marshal_with, reqparse +from flask_restx import Resource, marshal, marshal_with, reqparse from werkzeug.exceptions import Forbidden from controllers.common.errors import NoFileUploadedError, TooManyFilesError @@ -24,7 +26,7 @@ class AnnotationReplyActionApi(Resource): @login_required @account_initialization_required @cloud_edition_billing_resource_check("annotation") - def post(self, app_id, action): + def post(self, app_id, action: Literal["enable", "disable"]): if not current_user.is_editor: raise Forbidden() @@ -38,8 +40,6 @@ class AnnotationReplyActionApi(Resource): result = AppAnnotationService.enable_app_annotation(args, app_id) elif action == "disable": result = AppAnnotationService.disable_app_annotation(app_id) - else: - raise ValueError("Unsupported annotation reply action") return result, 200 diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py index 1cc13d669c..a6eb86122d 100644 --- a/api/controllers/console/app/app.py +++ b/api/controllers/console/app/app.py @@ -2,7 +2,7 @@ import uuid from typing import cast from flask_login import current_user -from flask_restful import Resource, inputs, marshal, marshal_with, reqparse +from flask_restx import Resource, inputs, marshal, marshal_with, reqparse from sqlalchemy import select from sqlalchemy.orm import Session from werkzeug.exceptions import BadRequest, Forbidden, abort diff --git a/api/controllers/console/app/app_import.py b/api/controllers/console/app/app_import.py index 9ffb94e9f9..aee93a8814 100644 --- a/api/controllers/console/app/app_import.py +++ b/api/controllers/console/app/app_import.py @@ -1,7 +1,7 @@ from typing import cast from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from sqlalchemy.orm import Session from werkzeug.exceptions import Forbidden diff --git a/api/controllers/console/app/audio.py b/api/controllers/console/app/audio.py index 665cf1aede..ea1869a587 100644 --- a/api/controllers/console/app/audio.py +++ b/api/controllers/console/app/audio.py @@ -1,7 +1,7 @@ import logging from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import InternalServerError import services diff --git a/api/controllers/console/app/completion.py b/api/controllers/console/app/completion.py index ad94112f05..bd5e7d0924 100644 --- a/api/controllers/console/app/completion.py +++ b/api/controllers/console/app/completion.py @@ -2,7 +2,7 @@ import logging import flask_login from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import InternalServerError, NotFound import services diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py index 6ddae6fad5..06f0218771 100644 --- a/api/controllers/console/app/conversation.py +++ b/api/controllers/console/app/conversation.py @@ -2,8 +2,8 @@ from datetime import datetime import pytz # pip install pytz from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import Resource, marshal_with, reqparse +from flask_restx.inputs import int_range from sqlalchemy import func, or_ from sqlalchemy.orm import joinedload from werkzeug.exceptions import Forbidden, NotFound @@ -24,6 +24,8 @@ from libs.helper import DatetimeString from libs.login import login_required from models import Conversation, EndUser, Message, MessageAnnotation from models.model import AppMode +from services.conversation_service import ConversationService +from services.errors.conversation import ConversationNotExistsError class CompletionConversationApi(Resource): @@ -46,7 +48,9 @@ class CompletionConversationApi(Resource): parser.add_argument("limit", type=int_range(1, 100), default=20, location="args") args = parser.parse_args() - query = db.select(Conversation).where(Conversation.app_id == app_model.id, Conversation.mode == "completion") + query = db.select(Conversation).where( + Conversation.app_id == app_model.id, Conversation.mode == "completion", Conversation.is_deleted.is_(False) + ) if args["keyword"]: query = query.join(Message, Message.conversation_id == Conversation.id).where( @@ -119,18 +123,11 @@ class CompletionConversationDetailApi(Resource): raise Forbidden() conversation_id = str(conversation_id) - conversation = ( - db.session.query(Conversation) - .where(Conversation.id == conversation_id, Conversation.app_id == app_model.id) - .first() - ) - - if not conversation: + try: + ConversationService.delete(app_model, conversation_id, current_user) + except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - conversation.is_deleted = True - db.session.commit() - return {"result": "success"}, 204 @@ -171,7 +168,7 @@ class ChatConversationApi(Resource): .subquery() ) - query = db.select(Conversation).where(Conversation.app_id == app_model.id) + query = db.select(Conversation).where(Conversation.app_id == app_model.id, Conversation.is_deleted.is_(False)) if args["keyword"]: keyword_filter = f"%{args['keyword']}%" @@ -284,18 +281,11 @@ class ChatConversationDetailApi(Resource): raise Forbidden() conversation_id = str(conversation_id) - conversation = ( - db.session.query(Conversation) - .where(Conversation.id == conversation_id, Conversation.app_id == app_model.id) - .first() - ) - - if not conversation: + try: + ConversationService.delete(app_model, conversation_id, current_user) + except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - conversation.is_deleted = True - db.session.commit() - return {"result": "success"}, 204 diff --git a/api/controllers/console/app/conversation_variables.py b/api/controllers/console/app/conversation_variables.py index d49f433ba1..5ca4c33f87 100644 --- a/api/controllers/console/app/conversation_variables.py +++ b/api/controllers/console/app/conversation_variables.py @@ -1,4 +1,4 @@ -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from sqlalchemy import select from sqlalchemy.orm import Session diff --git a/api/controllers/console/app/generator.py b/api/controllers/console/app/generator.py index b46292305b..497fd53df7 100644 --- a/api/controllers/console/app/generator.py +++ b/api/controllers/console/app/generator.py @@ -1,7 +1,7 @@ from collections.abc import Sequence from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console import api from controllers.console.app.error import ( @@ -12,6 +12,8 @@ from controllers.console.app.error import ( ) from controllers.console.wraps import account_initialization_required, setup_required from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError +from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider +from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider from core.llm_generator.llm_generator import LLMGenerator from core.model_runtime.errors.invoke import InvokeError from libs.login import login_required @@ -123,13 +125,20 @@ class InstructionGenerateApi(Resource): parser.add_argument("model_config", type=dict, required=True, nullable=False, location="json") parser.add_argument("ideal_output", type=str, required=False, default="", location="json") args = parser.parse_args() - + code_template = ( + Python3CodeProvider.get_default_code() + if args["language"] == "python" + else (JavascriptCodeProvider.get_default_code()) + if args["language"] == "javascript" + else "" + ) try: - if args["current"] == "" and args["node_id"] != "": # Generate from nothing for a workflow node + # Generate from nothing for a workflow node + if (args["current"] == code_template or args["current"] == "") and args["node_id"] != "": from models import App, db from services.workflow_service import WorkflowService - app = db.session.query(App).filter(App.id == args["flow_id"]).first() + app = db.session.query(App).where(App.id == args["flow_id"]).first() if not app: return {"error": f"app {args['flow_id']} not found"}, 400 workflow = WorkflowService().get_draft_workflow(app_model=app) diff --git a/api/controllers/console/app/mcp_server.py b/api/controllers/console/app/mcp_server.py index 2344fd5acb..541803e539 100644 --- a/api/controllers/console/app/mcp_server.py +++ b/api/controllers/console/app/mcp_server.py @@ -2,7 +2,7 @@ import json from enum import StrEnum from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from werkzeug.exceptions import NotFound from controllers.console import api diff --git a/api/controllers/console/app/message.py b/api/controllers/console/app/message.py index 680ac4a64c..57cc825fe9 100644 --- a/api/controllers/console/app/message.py +++ b/api/controllers/console/app/message.py @@ -1,8 +1,8 @@ import logging from flask_login import current_user -from flask_restful import Resource, fields, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import Resource, fields, marshal_with, reqparse +from flask_restx.inputs import int_range from werkzeug.exceptions import Forbidden, InternalServerError, NotFound from controllers.console import api diff --git a/api/controllers/console/app/model_config.py b/api/controllers/console/app/model_config.py index 029138fb6b..52ff9b923d 100644 --- a/api/controllers/console/app/model_config.py +++ b/api/controllers/console/app/model_config.py @@ -3,7 +3,7 @@ from typing import cast from flask import request from flask_login import current_user -from flask_restful import Resource +from flask_restx import Resource from controllers.console import api from controllers.console.app.wraps import get_app_model diff --git a/api/controllers/console/app/ops_trace.py b/api/controllers/console/app/ops_trace.py index 978c02412c..74c2867c2f 100644 --- a/api/controllers/console/app/ops_trace.py +++ b/api/controllers/console/app/ops_trace.py @@ -1,4 +1,4 @@ -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import BadRequest from controllers.console import api diff --git a/api/controllers/console/app/site.py b/api/controllers/console/app/site.py index 03418f1dd2..778ce92da6 100644 --- a/api/controllers/console/app/site.py +++ b/api/controllers/console/app/site.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from werkzeug.exceptions import Forbidden, NotFound from constants.languages import supported_language diff --git a/api/controllers/console/app/statistic.py b/api/controllers/console/app/statistic.py index 343b7acd7b..27e405af38 100644 --- a/api/controllers/console/app/statistic.py +++ b/api/controllers/console/app/statistic.py @@ -5,7 +5,7 @@ import pytz import sqlalchemy as sa from flask import jsonify from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console import api from controllers.console.app.wraps import get_app_model diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index c58301b300..8dcffb1666 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -4,7 +4,7 @@ from collections.abc import Sequence from typing import cast from flask import abort, request -from flask_restful import Resource, inputs, marshal_with, reqparse +from flask_restx import Resource, inputs, marshal_with, reqparse from sqlalchemy.orm import Session from werkzeug.exceptions import Forbidden, InternalServerError, NotFound diff --git a/api/controllers/console/app/workflow_app_log.py b/api/controllers/console/app/workflow_app_log.py index 310146a5e7..8d8cdc93cf 100644 --- a/api/controllers/console/app/workflow_app_log.py +++ b/api/controllers/console/app/workflow_app_log.py @@ -1,6 +1,6 @@ from dateutil.parser import isoparse -from flask_restful import Resource, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import Resource, marshal_with, reqparse +from flask_restx.inputs import int_range from sqlalchemy.orm import Session from controllers.console import api diff --git a/api/controllers/console/app/workflow_draft_variable.py b/api/controllers/console/app/workflow_draft_variable.py index 414c07ef50..4e625db24d 100644 --- a/api/controllers/console/app/workflow_draft_variable.py +++ b/api/controllers/console/app/workflow_draft_variable.py @@ -2,7 +2,7 @@ import logging from typing import Any, NoReturn from flask import Response -from flask_restful import Resource, fields, inputs, marshal, marshal_with, reqparse +from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse from sqlalchemy.orm import Session from werkzeug.exceptions import Forbidden diff --git a/api/controllers/console/app/workflow_run.py b/api/controllers/console/app/workflow_run.py index 9099700213..dccbfd8648 100644 --- a/api/controllers/console/app/workflow_run.py +++ b/api/controllers/console/app/workflow_run.py @@ -1,8 +1,8 @@ from typing import cast from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import Resource, marshal_with, reqparse +from flask_restx.inputs import int_range from controllers.console import api from controllers.console.app.wraps import get_app_model diff --git a/api/controllers/console/app/workflow_statistic.py b/api/controllers/console/app/workflow_statistic.py index 7f80afd83b..7cef175c14 100644 --- a/api/controllers/console/app/workflow_statistic.py +++ b/api/controllers/console/app/workflow_statistic.py @@ -5,7 +5,7 @@ import pytz import sqlalchemy as sa from flask import jsonify from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console import api from controllers.console.app.wraps import get_app_model diff --git a/api/controllers/console/auth/activate.py b/api/controllers/console/auth/activate.py index 2562fb5eb8..e82e403ec2 100644 --- a/api/controllers/console/auth/activate.py +++ b/api/controllers/console/auth/activate.py @@ -1,5 +1,5 @@ from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from constants.languages import supported_language from controllers.console import api diff --git a/api/controllers/console/auth/data_source_bearer_auth.py b/api/controllers/console/auth/data_source_bearer_auth.py index b8c3c8f012..796e6916cc 100644 --- a/api/controllers/console/auth/data_source_bearer_auth.py +++ b/api/controllers/console/auth/data_source_bearer_auth.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden from controllers.console import api diff --git a/api/controllers/console/auth/data_source_oauth.py b/api/controllers/console/auth/data_source_oauth.py index 4940b48754..d4cf20549a 100644 --- a/api/controllers/console/auth/data_source_oauth.py +++ b/api/controllers/console/auth/data_source_oauth.py @@ -3,7 +3,7 @@ import logging import requests from flask import current_app, redirect, request from flask_login import current_user -from flask_restful import Resource +from flask_restx import Resource from werkzeug.exceptions import Forbidden from configs import dify_config diff --git a/api/controllers/console/auth/forgot_password.py b/api/controllers/console/auth/forgot_password.py index 3bbe3177fc..ede0696854 100644 --- a/api/controllers/console/auth/forgot_password.py +++ b/api/controllers/console/auth/forgot_password.py @@ -2,7 +2,7 @@ import base64 import secrets from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from sqlalchemy import select from sqlalchemy.orm import Session diff --git a/api/controllers/console/auth/login.py b/api/controllers/console/auth/login.py index 5f2a24322d..a5ad6a1cd7 100644 --- a/api/controllers/console/auth/login.py +++ b/api/controllers/console/auth/login.py @@ -2,7 +2,7 @@ from typing import cast import flask_login from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse import services from configs import dify_config @@ -221,7 +221,7 @@ class EmailCodeLoginApi(Resource): email=user_email, name=user_email, interface_language=languages[0] ) except WorkSpaceNotAllowedCreateError: - return NotAllowedCreateWorkspace() + raise NotAllowedCreateWorkspace() except AccountRegisterError as are: raise AccountInFreezeError() except WorkspacesLimitExceededError: diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py index 4a6cb99390..3c76394cf9 100644 --- a/api/controllers/console/auth/oauth.py +++ b/api/controllers/console/auth/oauth.py @@ -3,7 +3,7 @@ from typing import Optional import requests from flask import current_app, redirect, request -from flask_restful import Resource +from flask_restx import Resource from sqlalchemy import select from sqlalchemy.orm import Session from werkzeug.exceptions import Unauthorized diff --git a/api/controllers/console/billing/billing.py b/api/controllers/console/billing/billing.py index 4b0c82ae6c..8ebb745a60 100644 --- a/api/controllers/console/billing/billing.py +++ b/api/controllers/console/billing/billing.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console import api from controllers.console.wraps import account_initialization_required, only_edition_cloud, setup_required diff --git a/api/controllers/console/billing/compliance.py b/api/controllers/console/billing/compliance.py index 9679632ac7..4bc073f679 100644 --- a/api/controllers/console/billing/compliance.py +++ b/api/controllers/console/billing/compliance.py @@ -1,6 +1,6 @@ from flask import request from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from libs.helper import extract_remote_ip from libs.login import login_required diff --git a/api/controllers/console/datasets/data_source.py b/api/controllers/console/datasets/data_source.py index 39f8ab5787..6083a53bec 100644 --- a/api/controllers/console/datasets/data_source.py +++ b/api/controllers/console/datasets/data_source.py @@ -2,7 +2,7 @@ import json from flask import request from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from sqlalchemy import select from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 2befd2a651..a23536f82e 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -1,7 +1,7 @@ -import flask_restful +import flask_restx from flask import request from flask_login import current_user -from flask_restful import Resource, marshal, marshal_with, reqparse +from flask_restx import Resource, marshal, marshal_with, reqparse from werkzeug.exceptions import Forbidden, NotFound import services @@ -589,7 +589,7 @@ class DatasetApiKeyApi(Resource): ) if current_key_count >= self.max_keys: - flask_restful.abort( + flask_restx.abort( 400, message=f"Cannot create more than {self.max_keys} API keys for this resource type.", code="max_keys_exceeded", @@ -629,7 +629,7 @@ class DatasetApiDeleteApi(Resource): ) if key is None: - flask_restful.abort(404, message="API key not found") + flask_restx.abort(404, message="API key not found") db.session.query(ApiToken).where(ApiToken.id == api_key_id).delete() db.session.commit() diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 4e0955bd43..f823ed603b 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -1,10 +1,10 @@ import logging from argparse import ArgumentTypeError -from typing import cast +from typing import Literal, cast from flask import request from flask_login import current_user -from flask_restful import Resource, marshal, marshal_with, reqparse +from flask_restx import Resource, marshal, marshal_with, reqparse from sqlalchemy import asc, desc, select from werkzeug.exceptions import Forbidden, NotFound @@ -758,7 +758,7 @@ class DocumentProcessingApi(DocumentResource): @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") - def patch(self, dataset_id, document_id, action): + def patch(self, dataset_id, document_id, action: Literal["pause", "resume"]): dataset_id = str(dataset_id) document_id = str(document_id) document = self.get_document(dataset_id, document_id) @@ -784,8 +784,6 @@ class DocumentProcessingApi(DocumentResource): document.paused_at = None document.is_paused = False db.session.commit() - else: - raise InvalidActionError() return {"result": "success"}, 200 @@ -840,7 +838,7 @@ class DocumentStatusApi(DocumentResource): @account_initialization_required @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") - def patch(self, dataset_id, action): + def patch(self, dataset_id, action: Literal["enable", "disable", "archive", "un_archive"]): dataset_id = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id) if dataset is None: diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index 8c429044d7..463fd2d7ec 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -2,7 +2,7 @@ import uuid from flask import request from flask_login import current_user -from flask_restful import Resource, marshal, reqparse +from flask_restx import Resource, marshal, reqparse from sqlalchemy import select from werkzeug.exceptions import Forbidden, NotFound @@ -584,7 +584,12 @@ class ChildChunkUpdateApi(Resource): child_chunk_id = str(child_chunk_id) child_chunk = ( db.session.query(ChildChunk) - .where(ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id) + .where( + ChildChunk.id == str(child_chunk_id), + ChildChunk.tenant_id == current_user.current_tenant_id, + ChildChunk.segment_id == segment.id, + ChildChunk.document_id == document_id, + ) .first() ) if not child_chunk: @@ -633,7 +638,12 @@ class ChildChunkUpdateApi(Resource): child_chunk_id = str(child_chunk_id) child_chunk = ( db.session.query(ChildChunk) - .where(ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id) + .where( + ChildChunk.id == str(child_chunk_id), + ChildChunk.tenant_id == current_user.current_tenant_id, + ChildChunk.segment_id == segment.id, + ChildChunk.document_id == document_id, + ) .first() ) if not child_chunk: diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index cf9081e154..043f39f623 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -1,6 +1,6 @@ from flask import request from flask_login import current_user -from flask_restful import Resource, marshal, reqparse +from flask_restx import Resource, marshal, reqparse from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services diff --git a/api/controllers/console/datasets/hit_testing.py b/api/controllers/console/datasets/hit_testing.py index fba5d4c0f3..2ad192571b 100644 --- a/api/controllers/console/datasets/hit_testing.py +++ b/api/controllers/console/datasets/hit_testing.py @@ -1,4 +1,4 @@ -from flask_restful import Resource +from flask_restx import Resource from controllers.console import api from controllers.console.datasets.hit_testing_base import DatasetsHitTestingBase diff --git a/api/controllers/console/datasets/hit_testing_base.py b/api/controllers/console/datasets/hit_testing_base.py index 3b4c076863..304674db5f 100644 --- a/api/controllers/console/datasets/hit_testing_base.py +++ b/api/controllers/console/datasets/hit_testing_base.py @@ -1,7 +1,7 @@ import logging from flask_login import current_user -from flask_restful import marshal, reqparse +from flask_restx import marshal, reqparse from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services.dataset_service diff --git a/api/controllers/console/datasets/metadata.py b/api/controllers/console/datasets/metadata.py index 65f76fb402..6aa309f930 100644 --- a/api/controllers/console/datasets/metadata.py +++ b/api/controllers/console/datasets/metadata.py @@ -1,5 +1,7 @@ +from typing import Literal + from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from werkzeug.exceptions import NotFound from controllers.console import api @@ -100,7 +102,7 @@ class DatasetMetadataBuiltInFieldActionApi(Resource): @login_required @account_initialization_required @enterprise_license_required - def post(self, dataset_id, action): + def post(self, dataset_id, action: Literal["enable", "disable"]): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: diff --git a/api/controllers/console/datasets/upload_file.py b/api/controllers/console/datasets/upload_file.py index 9b456c771d..617dbcaff2 100644 --- a/api/controllers/console/datasets/upload_file.py +++ b/api/controllers/console/datasets/upload_file.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource +from flask_restx import Resource from werkzeug.exceptions import NotFound from controllers.console import api @@ -39,7 +39,7 @@ class UploadFileApi(Resource): data_source_info = document.data_source_info_dict if data_source_info and "upload_file_id" in data_source_info: file_id = data_source_info["upload_file_id"] - upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first() + upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first() if not upload_file: raise NotFound("UploadFile not found.") else: diff --git a/api/controllers/console/datasets/website.py b/api/controllers/console/datasets/website.py index fcdc91ec67..bdaa268462 100644 --- a/api/controllers/console/datasets/website.py +++ b/api/controllers/console/datasets/website.py @@ -1,4 +1,4 @@ -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console import api from controllers.console.datasets.error import WebsiteCrawlError diff --git a/api/controllers/console/explore/audio.py b/api/controllers/console/explore/audio.py index d564a00a76..2a4d5be82f 100644 --- a/api/controllers/console/explore/audio.py +++ b/api/controllers/console/explore/audio.py @@ -65,7 +65,7 @@ class ChatAudioApi(InstalledAppResource): class ChatTextApi(InstalledAppResource): def post(self, installed_app): - from flask_restful import reqparse + from flask_restx import reqparse app_model = installed_app.app try: diff --git a/api/controllers/console/explore/completion.py b/api/controllers/console/explore/completion.py index 4842fefc57..b444a2a197 100644 --- a/api/controllers/console/explore/completion.py +++ b/api/controllers/console/explore/completion.py @@ -1,7 +1,7 @@ import logging from flask_login import current_user -from flask_restful import reqparse +from flask_restx import reqparse from werkzeug.exceptions import InternalServerError, NotFound import services diff --git a/api/controllers/console/explore/conversation.py b/api/controllers/console/explore/conversation.py index d7c161cc6d..a8d46954b5 100644 --- a/api/controllers/console/explore/conversation.py +++ b/api/controllers/console/explore/conversation.py @@ -1,6 +1,6 @@ from flask_login import current_user -from flask_restful import marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import marshal_with, reqparse +from flask_restx.inputs import int_range from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py index ad62bd6e08..3ccedd654b 100644 --- a/api/controllers/console/explore/installed_app.py +++ b/api/controllers/console/explore/installed_app.py @@ -3,7 +3,7 @@ from typing import Any from flask import request from flask_login import current_user -from flask_restful import Resource, inputs, marshal_with, reqparse +from flask_restx import Resource, inputs, marshal_with, reqparse from sqlalchemy import and_ from werkzeug.exceptions import BadRequest, Forbidden, NotFound diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py index de95a9e7b0..6df3bca762 100644 --- a/api/controllers/console/explore/message.py +++ b/api/controllers/console/explore/message.py @@ -1,8 +1,8 @@ import logging from flask_login import current_user -from flask_restful import marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import marshal_with, reqparse +from flask_restx.inputs import int_range from werkzeug.exceptions import InternalServerError, NotFound from controllers.console.app.error import ( diff --git a/api/controllers/console/explore/parameter.py b/api/controllers/console/explore/parameter.py index a1280d91d1..c368744759 100644 --- a/api/controllers/console/explore/parameter.py +++ b/api/controllers/console/explore/parameter.py @@ -1,4 +1,4 @@ -from flask_restful import marshal_with +from flask_restx import marshal_with from controllers.common import fields from controllers.console import api diff --git a/api/controllers/console/explore/recommended_app.py b/api/controllers/console/explore/recommended_app.py index ce85f495aa..62f9350b71 100644 --- a/api/controllers/console/explore/recommended_app.py +++ b/api/controllers/console/explore/recommended_app.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource, fields, marshal_with, reqparse +from flask_restx import Resource, fields, marshal_with, reqparse from constants.languages import languages from controllers.console import api diff --git a/api/controllers/console/explore/saved_message.py b/api/controllers/console/explore/saved_message.py index 339e7007a0..5353dbcad5 100644 --- a/api/controllers/console/explore/saved_message.py +++ b/api/controllers/console/explore/saved_message.py @@ -1,6 +1,6 @@ from flask_login import current_user -from flask_restful import fields, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import fields, marshal_with, reqparse +from flask_restx.inputs import int_range from werkzeug.exceptions import NotFound from controllers.console import api diff --git a/api/controllers/console/explore/workflow.py b/api/controllers/console/explore/workflow.py index 3f625e6609..3d872fc1fc 100644 --- a/api/controllers/console/explore/workflow.py +++ b/api/controllers/console/explore/workflow.py @@ -1,6 +1,6 @@ import logging -from flask_restful import reqparse +from flask_restx import reqparse from werkzeug.exceptions import InternalServerError from controllers.console.app.error import ( diff --git a/api/controllers/console/explore/wraps.py b/api/controllers/console/explore/wraps.py index de97fb149e..e86103184a 100644 --- a/api/controllers/console/explore/wraps.py +++ b/api/controllers/console/explore/wraps.py @@ -1,7 +1,7 @@ from functools import wraps from flask_login import current_user -from flask_restful import Resource +from flask_restx import Resource from werkzeug.exceptions import NotFound from controllers.console.explore.error import AppAccessDeniedError diff --git a/api/controllers/console/extension.py b/api/controllers/console/extension.py index 07a241ef86..e157041c35 100644 --- a/api/controllers/console/extension.py +++ b/api/controllers/console/extension.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from constants import HIDDEN_VALUE from controllers.console import api diff --git a/api/controllers/console/feature.py b/api/controllers/console/feature.py index 70ab4ff865..6236832d39 100644 --- a/api/controllers/console/feature.py +++ b/api/controllers/console/feature.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource +from flask_restx import Resource from libs.login import login_required from services.feature_service import FeatureService diff --git a/api/controllers/console/files.py b/api/controllers/console/files.py index a87d270e9c..101a49a32e 100644 --- a/api/controllers/console/files.py +++ b/api/controllers/console/files.py @@ -2,7 +2,7 @@ from typing import Literal from flask import request from flask_login import current_user -from flask_restful import Resource, marshal_with +from flask_restx import Resource, marshal_with from werkzeug.exceptions import Forbidden import services diff --git a/api/controllers/console/init_validate.py b/api/controllers/console/init_validate.py index b19e331d2e..2a37b1708a 100644 --- a/api/controllers/console/init_validate.py +++ b/api/controllers/console/init_validate.py @@ -1,7 +1,7 @@ import os from flask import session -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from sqlalchemy import select from sqlalchemy.orm import Session diff --git a/api/controllers/console/ping.py b/api/controllers/console/ping.py index cd28cc946e..1a53a2347e 100644 --- a/api/controllers/console/ping.py +++ b/api/controllers/console/ping.py @@ -1,4 +1,4 @@ -from flask_restful import Resource +from flask_restx import Resource from controllers.console import api diff --git a/api/controllers/console/remote_files.py b/api/controllers/console/remote_files.py index c356113c40..73014cfc97 100644 --- a/api/controllers/console/remote_files.py +++ b/api/controllers/console/remote_files.py @@ -3,7 +3,7 @@ from typing import cast import httpx from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse import services from controllers.common import helpers diff --git a/api/controllers/console/setup.py b/api/controllers/console/setup.py index e1f19a87a3..8e230496f0 100644 --- a/api/controllers/console/setup.py +++ b/api/controllers/console/setup.py @@ -1,5 +1,5 @@ from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from configs import dify_config from libs.helper import StrLen, email, extract_remote_ip diff --git a/api/controllers/console/tag/tags.py b/api/controllers/console/tag/tags.py index cb5dedca21..c45e7dbb26 100644 --- a/api/controllers/console/tag/tags.py +++ b/api/controllers/console/tag/tags.py @@ -1,11 +1,11 @@ from flask import request from flask_login import current_user -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from werkzeug.exceptions import Forbidden from controllers.console import api from controllers.console.wraps import account_initialization_required, setup_required -from fields.tag_fields import tag_fields +from fields.tag_fields import dataset_tag_fields from libs.login import login_required from models.model import Tag from services.tag_service import TagService @@ -21,7 +21,7 @@ class TagListApi(Resource): @setup_required @login_required @account_initialization_required - @marshal_with(tag_fields) + @marshal_with(dataset_tag_fields) def get(self): tag_type = request.args.get("type", type=str, default="") keyword = request.args.get("keyword", default=None, type=str) diff --git a/api/controllers/console/version.py b/api/controllers/console/version.py index 894785abc8..96cf627b65 100644 --- a/api/controllers/console/version.py +++ b/api/controllers/console/version.py @@ -2,7 +2,7 @@ import json import logging import requests -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from packaging import version from configs import dify_config diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 4d5357cd18..5b2828dbab 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -1,7 +1,9 @@ +from datetime import datetime + import pytz from flask import request from flask_login import current_user -from flask_restful import Resource, fields, marshal_with, reqparse +from flask_restx import Resource, fields, marshal_with, reqparse from sqlalchemy import select from sqlalchemy.orm import Session @@ -327,6 +329,9 @@ class EducationVerifyApi(Resource): class EducationApi(Resource): status_fields = { "result": fields.Boolean, + "is_student": fields.Boolean, + "expire_at": TimestampField, + "allow_refresh": fields.Boolean, } @setup_required @@ -354,7 +359,11 @@ class EducationApi(Resource): def get(self): account = current_user - return BillingService.EducationIdentity.is_active(account.id) + res = BillingService.EducationIdentity.status(account.id) + # convert expire_at to UTC timestamp from isoformat + if res and "expire_at" in res: + res["expire_at"] = datetime.fromisoformat(res["expire_at"]).astimezone(pytz.utc) + return res class EducationAutoCompleteApi(Resource): diff --git a/api/controllers/console/workspace/agent_providers.py b/api/controllers/console/workspace/agent_providers.py index 88c37767e3..08bab6fcb5 100644 --- a/api/controllers/console/workspace/agent_providers.py +++ b/api/controllers/console/workspace/agent_providers.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource +from flask_restx import Resource from controllers.console import api from controllers.console.wraps import account_initialization_required, setup_required diff --git a/api/controllers/console/workspace/endpoint.py b/api/controllers/console/workspace/endpoint.py index eb53dcb16e..96e873d42b 100644 --- a/api/controllers/console/workspace/endpoint.py +++ b/api/controllers/console/workspace/endpoint.py @@ -1,5 +1,5 @@ from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden from controllers.console import api diff --git a/api/controllers/console/workspace/load_balancing_config.py b/api/controllers/console/workspace/load_balancing_config.py index b4eb5e246b..2a54511bf0 100644 --- a/api/controllers/console/workspace/load_balancing_config.py +++ b/api/controllers/console/workspace/load_balancing_config.py @@ -1,4 +1,4 @@ -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden from controllers.console import api diff --git a/api/controllers/console/workspace/members.py b/api/controllers/console/workspace/members.py index f7424923b9..f018fada3a 100644 --- a/api/controllers/console/workspace/members.py +++ b/api/controllers/console/workspace/members.py @@ -2,7 +2,7 @@ from urllib import parse from flask import request from flask_login import current_user -from flask_restful import Resource, abort, marshal_with, reqparse +from flask_restx import Resource, abort, marshal_with, reqparse import services from configs import dify_config diff --git a/api/controllers/console/workspace/model_providers.py b/api/controllers/console/workspace/model_providers.py index ff0fcbda6e..281783b3d7 100644 --- a/api/controllers/console/workspace/model_providers.py +++ b/api/controllers/console/workspace/model_providers.py @@ -2,7 +2,7 @@ import io from flask import send_file from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden from controllers.console import api diff --git a/api/controllers/console/workspace/models.py b/api/controllers/console/workspace/models.py index 514d1084c4..b8dddb91dd 100644 --- a/api/controllers/console/workspace/models.py +++ b/api/controllers/console/workspace/models.py @@ -1,7 +1,7 @@ import logging from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden from controllers.console import api diff --git a/api/controllers/console/workspace/plugin.py b/api/controllers/console/workspace/plugin.py index 09846d5c94..fd5421fa64 100644 --- a/api/controllers/console/workspace/plugin.py +++ b/api/controllers/console/workspace/plugin.py @@ -2,7 +2,7 @@ import io from flask import request, send_file from flask_login import current_user -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden from configs import dify_config diff --git a/api/controllers/console/workspace/tool_providers.py b/api/controllers/console/workspace/tool_providers.py index 8c8b73b45d..854ba7ac45 100644 --- a/api/controllers/console/workspace/tool_providers.py +++ b/api/controllers/console/workspace/tool_providers.py @@ -3,7 +3,7 @@ from urllib.parse import urlparse from flask import make_response, redirect, request, send_file from flask_login import current_user -from flask_restful import ( +from flask_restx import ( Resource, reqparse, ) diff --git a/api/controllers/console/workspace/workspace.py b/api/controllers/console/workspace/workspace.py index f4f0078da7..fb89f6bbbd 100644 --- a/api/controllers/console/workspace/workspace.py +++ b/api/controllers/console/workspace/workspace.py @@ -2,7 +2,7 @@ import logging from flask import request from flask_login import current_user -from flask_restful import Resource, fields, inputs, marshal, marshal_with, reqparse +from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse from sqlalchemy import select from werkzeug.exceptions import Unauthorized diff --git a/api/controllers/console/wraps.py b/api/controllers/console/wraps.py index d862dac373..d3fd1d52e5 100644 --- a/api/controllers/console/wraps.py +++ b/api/controllers/console/wraps.py @@ -1,3 +1,4 @@ +import contextlib import json import os import time @@ -178,7 +179,7 @@ def cloud_edition_billing_rate_limit_check(resource: str): def cloud_utm_record(view): @wraps(view) def decorated(*args, **kwargs): - try: + with contextlib.suppress(Exception): features = FeatureService.get_features(current_user.current_tenant_id) if features.billing.enabled: @@ -187,8 +188,7 @@ def cloud_utm_record(view): if utm_info: utm_info_dict: dict = json.loads(utm_info) OperationService.record_utm(current_user.current_tenant_id, utm_info_dict) - except Exception as e: - pass + return view(*args, **kwargs) return decorated diff --git a/api/controllers/files/__init__.py b/api/controllers/files/__init__.py index d4c3245708..282a181997 100644 --- a/api/controllers/files/__init__.py +++ b/api/controllers/files/__init__.py @@ -1,9 +1,20 @@ from flask import Blueprint +from flask_restx import Namespace from libs.external_api import ExternalApi -bp = Blueprint("files", __name__) -api = ExternalApi(bp) +bp = Blueprint("files", __name__, url_prefix="/files") +api = ExternalApi( + bp, + version="1.0", + title="Files API", + description="API for file operations including upload and preview", + doc="/docs", # Enable Swagger UI at /files/docs +) + +files_ns = Namespace("files", description="File operations") from . import image_preview, tool_files, upload + +api.add_namespace(files_ns) diff --git a/api/controllers/files/image_preview.py b/api/controllers/files/image_preview.py index 91f7b27d1d..48baac6556 100644 --- a/api/controllers/files/image_preview.py +++ b/api/controllers/files/image_preview.py @@ -1,16 +1,17 @@ from urllib.parse import quote from flask import Response, request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import NotFound import services from controllers.common.errors import UnsupportedFileTypeError -from controllers.files import api +from controllers.files import files_ns from services.account_service import TenantService from services.file_service import FileService +@files_ns.route("//image-preview") class ImagePreviewApi(Resource): """ Deprecated @@ -39,6 +40,7 @@ class ImagePreviewApi(Resource): return Response(generator, mimetype=mimetype) +@files_ns.route("//file-preview") class FilePreviewApi(Resource): def get(self, file_id): file_id = str(file_id) @@ -94,6 +96,7 @@ class FilePreviewApi(Resource): return response +@files_ns.route("/workspaces//webapp-logo") class WorkspaceWebappLogoApi(Resource): def get(self, workspace_id): workspace_id = str(workspace_id) @@ -112,8 +115,3 @@ class WorkspaceWebappLogoApi(Resource): raise UnsupportedFileTypeError() return Response(generator, mimetype=mimetype) - - -api.add_resource(ImagePreviewApi, "/files//image-preview") -api.add_resource(FilePreviewApi, "/files//file-preview") -api.add_resource(WorkspaceWebappLogoApi, "/files/workspaces//webapp-logo") diff --git a/api/controllers/files/tool_files.py b/api/controllers/files/tool_files.py index d9c4e50511..faa9b733c2 100644 --- a/api/controllers/files/tool_files.py +++ b/api/controllers/files/tool_files.py @@ -1,17 +1,18 @@ from urllib.parse import quote from flask import Response -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden, NotFound from controllers.common.errors import UnsupportedFileTypeError -from controllers.files import api +from controllers.files import files_ns from core.tools.signature import verify_tool_file_signature from core.tools.tool_file_manager import ToolFileManager from models import db as global_db -class ToolFilePreviewApi(Resource): +@files_ns.route("/tools/.") +class ToolFileApi(Resource): def get(self, file_id, extension): file_id = str(file_id) @@ -52,6 +53,3 @@ class ToolFilePreviewApi(Resource): response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}" return response - - -api.add_resource(ToolFilePreviewApi, "/files/tools/.") diff --git a/api/controllers/files/upload.py b/api/controllers/files/upload.py index bcc72d131c..7a2b3b0428 100644 --- a/api/controllers/files/upload.py +++ b/api/controllers/files/upload.py @@ -1,7 +1,9 @@ from mimetypes import guess_extension +from typing import Optional -from flask import request -from flask_restful import Resource, marshal_with +from flask_restx import Resource, reqparse +from flask_restx.api import HTTPStatus +from werkzeug.datastructures import FileStorage from werkzeug.exceptions import Forbidden import services @@ -10,39 +12,76 @@ from controllers.common.errors import ( UnsupportedFileTypeError, ) from controllers.console.wraps import setup_required -from controllers.files import api +from controllers.files import files_ns from controllers.inner_api.plugin.wraps import get_user from core.file.helpers import verify_plugin_file_signature from core.tools.tool_file_manager import ToolFileManager -from fields.file_fields import file_fields +from fields.file_fields import build_file_model + +# Define parser for both documentation and validation +upload_parser = reqparse.RequestParser() +upload_parser.add_argument("file", location="files", type=FileStorage, required=True, help="File to upload") +upload_parser.add_argument( + "timestamp", type=str, required=True, location="args", help="Unix timestamp for signature verification" +) +upload_parser.add_argument( + "nonce", type=str, required=True, location="args", help="Random string for signature verification" +) +upload_parser.add_argument( + "sign", type=str, required=True, location="args", help="HMAC signature for request validation" +) +upload_parser.add_argument("tenant_id", type=str, required=True, location="args", help="Tenant identifier") +upload_parser.add_argument("user_id", type=str, required=False, location="args", help="User identifier") +@files_ns.route("/upload/for-plugin") class PluginUploadFileApi(Resource): @setup_required - @marshal_with(file_fields) + @files_ns.expect(upload_parser) + @files_ns.doc("upload_plugin_file") + @files_ns.doc(description="Upload a file for plugin usage with signature verification") + @files_ns.doc( + responses={ + 201: "File uploaded successfully", + 400: "Invalid request parameters", + 403: "Forbidden - Invalid signature or missing parameters", + 413: "File too large", + 415: "Unsupported file type", + } + ) + @files_ns.marshal_with(build_file_model(files_ns), code=HTTPStatus.CREATED) def post(self): - # get file from request - file = request.files["file"] + """Upload a file for plugin usage. - timestamp = request.args.get("timestamp") - nonce = request.args.get("nonce") - sign = request.args.get("sign") - tenant_id = request.args.get("tenant_id") - if not tenant_id: - raise Forbidden("Invalid request.") + Accepts a file upload with signature verification for security. + The file must be accompanied by valid timestamp, nonce, and signature parameters. - user_id = request.args.get("user_id") + Returns: + dict: File metadata including ID, URLs, and properties + int: HTTP status code (201 for success) + + Raises: + Forbidden: Invalid signature or missing required parameters + FileTooLargeError: File exceeds size limit + UnsupportedFileTypeError: File type not supported + """ + # Parse and validate all arguments + args = upload_parser.parse_args() + + file: FileStorage = args["file"] + timestamp: str = args["timestamp"] + nonce: str = args["nonce"] + sign: str = args["sign"] + tenant_id: str = args["tenant_id"] + user_id: Optional[str] = args.get("user_id") user = get_user(tenant_id, user_id) - filename = file.filename - mimetype = file.mimetype + filename: Optional[str] = file.filename + mimetype: Optional[str] = file.mimetype if not filename or not mimetype: raise Forbidden("Invalid request.") - if not timestamp or not nonce or not sign: - raise Forbidden("Invalid request.") - if not verify_plugin_file_signature( filename=filename, mimetype=mimetype, @@ -88,6 +127,3 @@ class PluginUploadFileApi(Resource): raise FileTooLargeError(file_too_large_error.description) except services.errors.file.UnsupportedFileTypeError: raise UnsupportedFileTypeError() - - -api.add_resource(PluginUploadFileApi, "/files/upload/for-plugin") diff --git a/api/controllers/inner_api/mail.py b/api/controllers/inner_api/mail.py index ce3373d65c..80bbc360de 100644 --- a/api/controllers/inner_api/mail.py +++ b/api/controllers/inner_api/mail.py @@ -1,27 +1,38 @@ -from flask_restful import ( - Resource, # type: ignore - reqparse, -) +from flask_restx import Resource, reqparse from controllers.console.wraps import setup_required from controllers.inner_api import api -from controllers.inner_api.wraps import enterprise_inner_api_only -from services.enterprise.mail_service import DifyMail, EnterpriseMailService +from controllers.inner_api.wraps import billing_inner_api_only, enterprise_inner_api_only +from tasks.mail_inner_task import send_inner_email_task + +_mail_parser = reqparse.RequestParser() +_mail_parser.add_argument("to", type=str, action="append", required=True) +_mail_parser.add_argument("subject", type=str, required=True) +_mail_parser.add_argument("body", type=str, required=True) +_mail_parser.add_argument("substitutions", type=dict, required=False) -class EnterpriseMail(Resource): - @setup_required - @enterprise_inner_api_only +class BaseMail(Resource): + """Shared logic for sending an inner email.""" + def post(self): - parser = reqparse.RequestParser() - parser.add_argument("to", type=str, action="append", required=True) - parser.add_argument("subject", type=str, required=True) - parser.add_argument("body", type=str, required=True) - parser.add_argument("substitutions", type=dict, required=False) - args = parser.parse_args() - - EnterpriseMailService.send_mail(DifyMail(**args)) + args = _mail_parser.parse_args() + send_inner_email_task.delay( + to=args["to"], + subject=args["subject"], + body=args["body"], + substitutions=args["substitutions"], + ) return {"message": "success"}, 200 +class EnterpriseMail(BaseMail): + method_decorators = [setup_required, enterprise_inner_api_only] + + +class BillingMail(BaseMail): + method_decorators = [setup_required, billing_inner_api_only] + + api.add_resource(EnterpriseMail, "/enterprise/mail") +api.add_resource(BillingMail, "/billing/mail") diff --git a/api/controllers/inner_api/plugin/plugin.py b/api/controllers/inner_api/plugin/plugin.py index 5dfe41eb6b..9b8d9457f0 100644 --- a/api/controllers/inner_api/plugin/plugin.py +++ b/api/controllers/inner_api/plugin/plugin.py @@ -1,4 +1,4 @@ -from flask_restful import Resource +from flask_restx import Resource from controllers.console.wraps import setup_required from controllers.inner_api import api diff --git a/api/controllers/inner_api/plugin/wraps.py b/api/controllers/inner_api/plugin/wraps.py index b533614d4d..89b4ac7506 100644 --- a/api/controllers/inner_api/plugin/wraps.py +++ b/api/controllers/inner_api/plugin/wraps.py @@ -4,7 +4,7 @@ from typing import Optional from flask import current_app, request from flask_login import user_logged_in -from flask_restful import reqparse +from flask_restx import reqparse from pydantic import BaseModel from sqlalchemy.orm import Session diff --git a/api/controllers/inner_api/workspace/workspace.py b/api/controllers/inner_api/workspace/workspace.py index 77568b75f1..1c26416080 100644 --- a/api/controllers/inner_api/workspace/workspace.py +++ b/api/controllers/inner_api/workspace/workspace.py @@ -1,6 +1,6 @@ import json -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from controllers.console.wraps import setup_required from controllers.inner_api import api diff --git a/api/controllers/inner_api/wraps.py b/api/controllers/inner_api/wraps.py index 9e7b3d4f29..c5aa318f58 100644 --- a/api/controllers/inner_api/wraps.py +++ b/api/controllers/inner_api/wraps.py @@ -10,6 +10,22 @@ from extensions.ext_database import db from models.model import EndUser +def billing_inner_api_only(view): + @wraps(view) + def decorated(*args, **kwargs): + if not dify_config.INNER_API: + abort(404) + + # get header 'X-Inner-Api-Key' + inner_api_key = request.headers.get("X-Inner-Api-Key") + if not inner_api_key or inner_api_key != dify_config.INNER_API_KEY: + abort(401) + + return view(*args, **kwargs) + + return decorated + + def enterprise_inner_api_only(view): @wraps(view) def decorated(*args, **kwargs): diff --git a/api/controllers/mcp/__init__.py b/api/controllers/mcp/__init__.py index 1b3e0a5621..1f5dae74e8 100644 --- a/api/controllers/mcp/__init__.py +++ b/api/controllers/mcp/__init__.py @@ -1,8 +1,20 @@ from flask import Blueprint +from flask_restx import Namespace from libs.external_api import ExternalApi bp = Blueprint("mcp", __name__, url_prefix="/mcp") -api = ExternalApi(bp) + +api = ExternalApi( + bp, + version="1.0", + title="MCP API", + description="API for Model Context Protocol operations", + doc="/docs", # Enable Swagger UI at /mcp/docs +) + +mcp_ns = Namespace("mcp", description="MCP operations") from . import mcp + +api.add_namespace(mcp_ns) diff --git a/api/controllers/mcp/mcp.py b/api/controllers/mcp/mcp.py index 87d678796f..fc19749011 100644 --- a/api/controllers/mcp/mcp.py +++ b/api/controllers/mcp/mcp.py @@ -1,8 +1,10 @@ -from flask_restful import Resource, reqparse +from typing import Optional, Union + +from flask_restx import Resource, reqparse from pydantic import ValidationError from controllers.console.app.mcp_server import AppMCPServerStatus -from controllers.mcp import api +from controllers.mcp import mcp_ns from core.app.app_config.entities import VariableEntity from core.mcp import types from core.mcp.server.streamable_http import MCPServerStreamableHTTPRequestHandler @@ -13,22 +15,58 @@ from libs import helper from models.model import App, AppMCPServer, AppMode +def int_or_str(value): + """Validate that a value is either an integer or string.""" + if isinstance(value, (int, str)): + return value + else: + return None + + +# Define parser for both documentation and validation +mcp_request_parser = reqparse.RequestParser() +mcp_request_parser.add_argument( + "jsonrpc", type=str, required=True, location="json", help="JSON-RPC version (should be '2.0')" +) +mcp_request_parser.add_argument("method", type=str, required=True, location="json", help="The method to invoke") +mcp_request_parser.add_argument("params", type=dict, required=False, location="json", help="Parameters for the method") +mcp_request_parser.add_argument( + "id", type=int_or_str, required=False, location="json", help="Request ID for tracking responses" +) + + +@mcp_ns.route("/server//mcp") class MCPAppApi(Resource): - def post(self, server_code): - def int_or_str(value): - if isinstance(value, (int, str)): - return value - else: - return None + @mcp_ns.expect(mcp_request_parser) + @mcp_ns.doc("handle_mcp_request") + @mcp_ns.doc(description="Handle Model Context Protocol (MCP) requests for a specific server") + @mcp_ns.doc(params={"server_code": "Unique identifier for the MCP server"}) + @mcp_ns.doc( + responses={ + 200: "MCP response successfully processed", + 400: "Invalid MCP request or parameters", + 404: "Server or app not found", + } + ) + def post(self, server_code: str): + """Handle MCP requests for a specific server. - parser = reqparse.RequestParser() - parser.add_argument("jsonrpc", type=str, required=True, location="json") - parser.add_argument("method", type=str, required=True, location="json") - parser.add_argument("params", type=dict, required=False, location="json") - parser.add_argument("id", type=int_or_str, required=False, location="json") - args = parser.parse_args() + Processes JSON-RPC formatted requests according to the Model Context Protocol specification. + Validates the server status and associated app before processing the request. - request_id = args.get("id") + Args: + server_code: Unique identifier for the MCP server + + Returns: + dict: JSON-RPC response from the MCP handler + + Raises: + ValidationError: Invalid request format or parameters + """ + # Parse and validate all arguments + args = mcp_request_parser.parse_args() + + request_id: Optional[Union[int, str]] = args.get("id") server = db.session.query(AppMCPServer).where(AppMCPServer.server_code == server_code).first() if not server: @@ -99,6 +137,3 @@ class MCPAppApi(Resource): mcp_server_handler = MCPServerStreamableHTTPRequestHandler(app, request, converted_user_input_form) response = mcp_server_handler.handle() return helper.compact_generate_response(response) - - -api.add_resource(MCPAppApi, "/server//mcp") diff --git a/api/controllers/service_api/__init__.py b/api/controllers/service_api/__init__.py index b26f29d98d..aaa3c8f9a1 100644 --- a/api/controllers/service_api/__init__.py +++ b/api/controllers/service_api/__init__.py @@ -1,11 +1,23 @@ from flask import Blueprint +from flask_restx import Namespace from libs.external_api import ExternalApi bp = Blueprint("service_api", __name__, url_prefix="/v1") -api = ExternalApi(bp) + +api = ExternalApi( + bp, + version="1.0", + title="Service API", + description="API for application services", + doc="/docs", # Enable Swagger UI at /v1/docs +) + +service_api_ns = Namespace("service_api", description="Service operations") from . import index from .app import annotation, app, audio, completion, conversation, file, file_preview, message, site, workflow from .dataset import dataset, document, hit_testing, metadata, segment, upload_file from .workspace import models + +api.add_namespace(service_api_ns) diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index 9b22c535f4..6bc94af8c1 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -1,38 +1,73 @@ +from typing import Literal + from flask import request -from flask_restful import Resource, marshal, marshal_with, reqparse +from flask_restx import Api, Namespace, Resource, fields, reqparse +from flask_restx.api import HTTPStatus from werkzeug.exceptions import Forbidden -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.wraps import validate_app_token from extensions.ext_redis import redis_client -from fields.annotation_fields import ( - annotation_fields, -) +from fields.annotation_fields import annotation_fields, build_annotation_model from libs.login import current_user from models.model import App from services.annotation_service import AppAnnotationService +# Define parsers for annotation API +annotation_create_parser = reqparse.RequestParser() +annotation_create_parser.add_argument("question", required=True, type=str, location="json", help="Annotation question") +annotation_create_parser.add_argument("answer", required=True, type=str, location="json", help="Annotation answer") +annotation_reply_action_parser = reqparse.RequestParser() +annotation_reply_action_parser.add_argument( + "score_threshold", required=True, type=float, location="json", help="Score threshold for annotation matching" +) +annotation_reply_action_parser.add_argument( + "embedding_provider_name", required=True, type=str, location="json", help="Embedding provider name" +) +annotation_reply_action_parser.add_argument( + "embedding_model_name", required=True, type=str, location="json", help="Embedding model name" +) + + +@service_api_ns.route("/apps/annotation-reply/") class AnnotationReplyActionApi(Resource): + @service_api_ns.expect(annotation_reply_action_parser) + @service_api_ns.doc("annotation_reply_action") + @service_api_ns.doc(description="Enable or disable annotation reply feature") + @service_api_ns.doc(params={"action": "Action to perform: 'enable' or 'disable'"}) + @service_api_ns.doc( + responses={ + 200: "Action completed successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_app_token - def post(self, app_model: App, action): - parser = reqparse.RequestParser() - parser.add_argument("score_threshold", required=True, type=float, location="json") - parser.add_argument("embedding_provider_name", required=True, type=str, location="json") - parser.add_argument("embedding_model_name", required=True, type=str, location="json") - args = parser.parse_args() + def post(self, app_model: App, action: Literal["enable", "disable"]): + """Enable or disable annotation reply feature.""" + args = annotation_reply_action_parser.parse_args() if action == "enable": result = AppAnnotationService.enable_app_annotation(args, app_model.id) elif action == "disable": result = AppAnnotationService.disable_app_annotation(app_model.id) - else: - raise ValueError("Unsupported annotation reply action") return result, 200 +@service_api_ns.route("/apps/annotation-reply//status/") class AnnotationReplyActionStatusApi(Resource): + @service_api_ns.doc("get_annotation_reply_action_status") + @service_api_ns.doc(description="Get the status of an annotation reply action job") + @service_api_ns.doc(params={"action": "Action type", "job_id": "Job ID"}) + @service_api_ns.doc( + responses={ + 200: "Job status retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Job not found", + } + ) @validate_app_token def get(self, app_model: App, job_id, action): + """Get the status of an annotation reply action job.""" job_id = str(job_id) app_annotation_job_key = f"{action}_app_annotation_job_{str(job_id)}" cache_result = redis_client.get(app_annotation_job_key) @@ -48,60 +83,111 @@ class AnnotationReplyActionStatusApi(Resource): return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200 +# Define annotation list response model +annotation_list_fields = { + "data": fields.List(fields.Nested(annotation_fields)), + "has_more": fields.Boolean, + "limit": fields.Integer, + "total": fields.Integer, + "page": fields.Integer, +} + + +def build_annotation_list_model(api_or_ns: Api | Namespace): + """Build the annotation list model for the API or Namespace.""" + copied_annotation_list_fields = annotation_list_fields.copy() + copied_annotation_list_fields["data"] = fields.List(fields.Nested(build_annotation_model(api_or_ns))) + return api_or_ns.model("AnnotationList", copied_annotation_list_fields) + + +@service_api_ns.route("/apps/annotations") class AnnotationListApi(Resource): + @service_api_ns.doc("list_annotations") + @service_api_ns.doc(description="List annotations for the application") + @service_api_ns.doc( + responses={ + 200: "Annotations retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_app_token + @service_api_ns.marshal_with(build_annotation_list_model(service_api_ns)) def get(self, app_model: App): + """List annotations for the application.""" page = request.args.get("page", default=1, type=int) limit = request.args.get("limit", default=20, type=int) keyword = request.args.get("keyword", default="", type=str) annotation_list, total = AppAnnotationService.get_annotation_list_by_app_id(app_model.id, page, limit, keyword) - response = { - "data": marshal(annotation_list, annotation_fields), + return { + "data": annotation_list, "has_more": len(annotation_list) == limit, "limit": limit, "total": total, "page": page, } - return response, 200 + @service_api_ns.expect(annotation_create_parser) + @service_api_ns.doc("create_annotation") + @service_api_ns.doc(description="Create a new annotation") + @service_api_ns.doc( + responses={ + 201: "Annotation created successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_app_token - @marshal_with(annotation_fields) + @service_api_ns.marshal_with(build_annotation_model(service_api_ns), code=HTTPStatus.CREATED) def post(self, app_model: App): - parser = reqparse.RequestParser() - parser.add_argument("question", required=True, type=str, location="json") - parser.add_argument("answer", required=True, type=str, location="json") - args = parser.parse_args() + """Create a new annotation.""" + args = annotation_create_parser.parse_args() annotation = AppAnnotationService.insert_app_annotation_directly(args, app_model.id) - return annotation + return annotation, 201 +@service_api_ns.route("/apps/annotations/") class AnnotationUpdateDeleteApi(Resource): + @service_api_ns.expect(annotation_create_parser) + @service_api_ns.doc("update_annotation") + @service_api_ns.doc(description="Update an existing annotation") + @service_api_ns.doc(params={"annotation_id": "Annotation ID"}) + @service_api_ns.doc( + responses={ + 200: "Annotation updated successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Annotation not found", + } + ) @validate_app_token - @marshal_with(annotation_fields) + @service_api_ns.marshal_with(build_annotation_model(service_api_ns)) def put(self, app_model: App, annotation_id): + """Update an existing annotation.""" if not current_user.is_editor: raise Forbidden() annotation_id = str(annotation_id) - parser = reqparse.RequestParser() - parser.add_argument("question", required=True, type=str, location="json") - parser.add_argument("answer", required=True, type=str, location="json") - args = parser.parse_args() + args = annotation_create_parser.parse_args() annotation = AppAnnotationService.update_app_annotation_directly(args, app_model.id, annotation_id) return annotation + @service_api_ns.doc("delete_annotation") + @service_api_ns.doc(description="Delete an annotation") + @service_api_ns.doc(params={"annotation_id": "Annotation ID"}) + @service_api_ns.doc( + responses={ + 204: "Annotation deleted successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Annotation not found", + } + ) @validate_app_token def delete(self, app_model: App, annotation_id): + """Delete an annotation.""" if not current_user.is_editor: raise Forbidden() annotation_id = str(annotation_id) AppAnnotationService.delete_app_annotation(app_model.id, annotation_id) return {"result": "success"}, 204 - - -api.add_resource(AnnotationReplyActionApi, "/apps/annotation-reply/") -api.add_resource(AnnotationReplyActionStatusApi, "/apps/annotation-reply//status/") -api.add_resource(AnnotationListApi, "/apps/annotations") -api.add_resource(AnnotationUpdateDeleteApi, "/apps/annotations/") diff --git a/api/controllers/service_api/app/app.py b/api/controllers/service_api/app/app.py index 89222d5e83..2dbeed1d68 100644 --- a/api/controllers/service_api/app/app.py +++ b/api/controllers/service_api/app/app.py @@ -1,7 +1,7 @@ -from flask_restful import Resource, marshal_with +from flask_restx import Resource -from controllers.common import fields -from controllers.service_api import api +from controllers.common.fields import build_parameters_model +from controllers.service_api import service_api_ns from controllers.service_api.app.error import AppUnavailableError from controllers.service_api.wraps import validate_app_token from core.app.app_config.common.parameters_mapping import get_parameters_from_feature_dict @@ -9,13 +9,26 @@ from models.model import App, AppMode from services.app_service import AppService +@service_api_ns.route("/parameters") class AppParameterApi(Resource): """Resource for app variables.""" + @service_api_ns.doc("get_app_parameters") + @service_api_ns.doc(description="Retrieve application input parameters and configuration") + @service_api_ns.doc( + responses={ + 200: "Parameters retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Application not found", + } + ) @validate_app_token - @marshal_with(fields.parameters_fields) + @service_api_ns.marshal_with(build_parameters_model(service_api_ns)) def get(self, app_model: App): - """Retrieve app parameters.""" + """Retrieve app parameters. + + Returns the input form parameters and configuration for the application. + """ if app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}: workflow = app_model.workflow if workflow is None: @@ -35,17 +48,43 @@ class AppParameterApi(Resource): return get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form) +@service_api_ns.route("/meta") class AppMetaApi(Resource): + @service_api_ns.doc("get_app_meta") + @service_api_ns.doc(description="Get application metadata") + @service_api_ns.doc( + responses={ + 200: "Metadata retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Application not found", + } + ) @validate_app_token def get(self, app_model: App): - """Get app meta""" + """Get app metadata. + + Returns metadata about the application including configuration and settings. + """ return AppService().get_app_meta(app_model) +@service_api_ns.route("/info") class AppInfoApi(Resource): + @service_api_ns.doc("get_app_info") + @service_api_ns.doc(description="Get basic application information") + @service_api_ns.doc( + responses={ + 200: "Application info retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Application not found", + } + ) @validate_app_token def get(self, app_model: App): - """Get app information""" + """Get app information. + + Returns basic information about the application including name, description, tags, and mode. + """ tags = [tag.name for tag in app_model.tags] return { "name": app_model.name, @@ -54,8 +93,3 @@ class AppInfoApi(Resource): "mode": app_model.mode, "author_name": app_model.author_name, } - - -api.add_resource(AppParameterApi, "/parameters") -api.add_resource(AppMetaApi, "/meta") -api.add_resource(AppInfoApi, "/info") diff --git a/api/controllers/service_api/app/audio.py b/api/controllers/service_api/app/audio.py index 848863cf1b..61b3020a5f 100644 --- a/api/controllers/service_api/app/audio.py +++ b/api/controllers/service_api/app/audio.py @@ -1,11 +1,11 @@ import logging from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import InternalServerError import services -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import ( AppUnavailableError, AudioTooLargeError, @@ -30,9 +30,26 @@ from services.errors.audio import ( ) +@service_api_ns.route("/audio-to-text") class AudioApi(Resource): + @service_api_ns.doc("audio_to_text") + @service_api_ns.doc(description="Convert audio to text using speech-to-text") + @service_api_ns.doc( + responses={ + 200: "Audio successfully transcribed", + 400: "Bad request - no audio or invalid audio", + 401: "Unauthorized - invalid API token", + 413: "Audio file too large", + 415: "Unsupported audio type", + 500: "Internal server error", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM)) def post(self, app_model: App, end_user: EndUser): + """Convert audio to text using speech-to-text. + + Accepts an audio file upload and returns the transcribed text. + """ file = request.files["file"] try: @@ -65,16 +82,35 @@ class AudioApi(Resource): raise InternalServerError() +# Define parser for text-to-audio API +text_to_audio_parser = reqparse.RequestParser() +text_to_audio_parser.add_argument("message_id", type=str, required=False, location="json", help="Message ID") +text_to_audio_parser.add_argument("voice", type=str, location="json", help="Voice to use for TTS") +text_to_audio_parser.add_argument("text", type=str, location="json", help="Text to convert to audio") +text_to_audio_parser.add_argument("streaming", type=bool, location="json", help="Enable streaming response") + + +@service_api_ns.route("/text-to-audio") class TextApi(Resource): + @service_api_ns.expect(text_to_audio_parser) + @service_api_ns.doc("text_to_audio") + @service_api_ns.doc(description="Convert text to audio using text-to-speech") + @service_api_ns.doc( + responses={ + 200: "Text successfully converted to audio", + 400: "Bad request - invalid parameters", + 401: "Unauthorized - invalid API token", + 500: "Internal server error", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) def post(self, app_model: App, end_user: EndUser): + """Convert text to audio using text-to-speech. + + Converts the provided text to audio using the specified voice. + """ try: - parser = reqparse.RequestParser() - parser.add_argument("message_id", type=str, required=False, location="json") - parser.add_argument("voice", type=str, location="json") - parser.add_argument("text", type=str, location="json") - parser.add_argument("streaming", type=bool, location="json") - args = parser.parse_args() + args = text_to_audio_parser.parse_args() message_id = args.get("message_id", None) text = args.get("text", None) @@ -108,7 +144,3 @@ class TextApi(Resource): except Exception as e: logging.exception("internal server error.") raise InternalServerError() - - -api.add_resource(AudioApi, "/audio-to-text") -api.add_resource(TextApi, "/text-to-audio") diff --git a/api/controllers/service_api/app/completion.py b/api/controllers/service_api/app/completion.py index ea57f04850..dddb75d593 100644 --- a/api/controllers/service_api/app/completion.py +++ b/api/controllers/service_api/app/completion.py @@ -1,11 +1,11 @@ import logging from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from werkzeug.exceptions import BadRequest, InternalServerError, NotFound import services -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import ( AppUnavailableError, CompletionRequestError, @@ -33,21 +33,68 @@ from services.app_generate_service import AppGenerateService from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError from services.errors.llm import InvokeRateLimitError +# Define parser for completion API +completion_parser = reqparse.RequestParser() +completion_parser.add_argument( + "inputs", type=dict, required=True, location="json", help="Input parameters for completion" +) +completion_parser.add_argument("query", type=str, location="json", default="", help="The query string") +completion_parser.add_argument("files", type=list, required=False, location="json", help="List of file attachments") +completion_parser.add_argument( + "response_mode", type=str, choices=["blocking", "streaming"], location="json", help="Response mode" +) +completion_parser.add_argument( + "retriever_from", type=str, required=False, default="dev", location="json", help="Retriever source" +) +# Define parser for chat API +chat_parser = reqparse.RequestParser() +chat_parser.add_argument("inputs", type=dict, required=True, location="json", help="Input parameters for chat") +chat_parser.add_argument("query", type=str, required=True, location="json", help="The chat query") +chat_parser.add_argument("files", type=list, required=False, location="json", help="List of file attachments") +chat_parser.add_argument( + "response_mode", type=str, choices=["blocking", "streaming"], location="json", help="Response mode" +) +chat_parser.add_argument("conversation_id", type=uuid_value, location="json", help="Existing conversation ID") +chat_parser.add_argument( + "retriever_from", type=str, required=False, default="dev", location="json", help="Retriever source" +) +chat_parser.add_argument( + "auto_generate_name", + type=bool, + required=False, + default=True, + location="json", + help="Auto generate conversation name", +) +chat_parser.add_argument("workflow_id", type=str, required=False, location="json", help="Workflow ID for advanced chat") + + +@service_api_ns.route("/completion-messages") class CompletionApi(Resource): + @service_api_ns.expect(completion_parser) + @service_api_ns.doc("create_completion") + @service_api_ns.doc(description="Create a completion for the given prompt") + @service_api_ns.doc( + responses={ + 200: "Completion created successfully", + 400: "Bad request - invalid parameters", + 401: "Unauthorized - invalid API token", + 404: "Conversation not found", + 500: "Internal server error", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) def post(self, app_model: App, end_user: EndUser): + """Create a completion for the given prompt. + + This endpoint generates a completion based on the provided inputs and query. + Supports both blocking and streaming response modes. + """ if app_model.mode != "completion": raise AppUnavailableError() - parser = reqparse.RequestParser() - parser.add_argument("inputs", type=dict, required=True, location="json") - parser.add_argument("query", type=str, location="json", default="") - parser.add_argument("files", type=list, required=False, location="json") - parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json") - parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json") - - args = parser.parse_args() + args = completion_parser.parse_args() external_trace_id = get_external_trace_id(request) if external_trace_id: args["external_trace_id"] = external_trace_id @@ -88,9 +135,21 @@ class CompletionApi(Resource): raise InternalServerError() +@service_api_ns.route("/completion-messages//stop") class CompletionStopApi(Resource): + @service_api_ns.doc("stop_completion") + @service_api_ns.doc(description="Stop a running completion task") + @service_api_ns.doc(params={"task_id": "The ID of the task to stop"}) + @service_api_ns.doc( + responses={ + 200: "Task stopped successfully", + 401: "Unauthorized - invalid API token", + 404: "Task not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) - def post(self, app_model: App, end_user: EndUser, task_id): + def post(self, app_model: App, end_user: EndUser, task_id: str): + """Stop a running completion task.""" if app_model.mode != "completion": raise AppUnavailableError() @@ -99,23 +158,33 @@ class CompletionStopApi(Resource): return {"result": "success"}, 200 +@service_api_ns.route("/chat-messages") class ChatApi(Resource): + @service_api_ns.expect(chat_parser) + @service_api_ns.doc("create_chat_message") + @service_api_ns.doc(description="Send a message in a chat conversation") + @service_api_ns.doc( + responses={ + 200: "Message sent successfully", + 400: "Bad request - invalid parameters or workflow issues", + 401: "Unauthorized - invalid API token", + 404: "Conversation or workflow not found", + 429: "Rate limit exceeded", + 500: "Internal server error", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) def post(self, app_model: App, end_user: EndUser): + """Send a message in a chat conversation. + + This endpoint handles chat messages for chat, agent chat, and advanced chat applications. + Supports conversation management and both blocking and streaming response modes. + """ app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() - parser = reqparse.RequestParser() - parser.add_argument("inputs", type=dict, required=True, location="json") - parser.add_argument("query", type=str, required=True, location="json") - parser.add_argument("files", type=list, required=False, location="json") - parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json") - parser.add_argument("conversation_id", type=uuid_value, location="json") - parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json") - parser.add_argument("auto_generate_name", type=bool, required=False, default=True, location="json") - parser.add_argument("workflow_id", type=str, required=False, location="json") - args = parser.parse_args() + args = chat_parser.parse_args() external_trace_id = get_external_trace_id(request) if external_trace_id: @@ -159,9 +228,21 @@ class ChatApi(Resource): raise InternalServerError() +@service_api_ns.route("/chat-messages//stop") class ChatStopApi(Resource): + @service_api_ns.doc("stop_chat_message") + @service_api_ns.doc(description="Stop a running chat message generation") + @service_api_ns.doc(params={"task_id": "The ID of the task to stop"}) + @service_api_ns.doc( + responses={ + 200: "Task stopped successfully", + 401: "Unauthorized - invalid API token", + 404: "Task not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) - def post(self, app_model: App, end_user: EndUser, task_id): + def post(self, app_model: App, end_user: EndUser, task_id: str): + """Stop a running chat message generation.""" app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() @@ -169,9 +250,3 @@ class ChatStopApi(Resource): AppQueueManager.set_stop_flag(task_id, InvokeFrom.SERVICE_API, end_user.id) return {"result": "success"}, 200 - - -api.add_resource(CompletionApi, "/completion-messages") -api.add_resource(CompletionStopApi, "/completion-messages//stop") -api.add_resource(ChatApi, "/chat-messages") -api.add_resource(ChatStopApi, "/chat-messages//stop") diff --git a/api/controllers/service_api/app/conversation.py b/api/controllers/service_api/app/conversation.py index 073307ac4a..4860bf3a79 100644 --- a/api/controllers/service_api/app/conversation.py +++ b/api/controllers/service_api/app/conversation.py @@ -1,48 +1,97 @@ -from flask_restful import Resource, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import Resource, reqparse +from flask_restx.inputs import int_range from sqlalchemy.orm import Session from werkzeug.exceptions import BadRequest, NotFound import services -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import NotChatAppError from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db from fields.conversation_fields import ( - conversation_delete_fields, - conversation_infinite_scroll_pagination_fields, - simple_conversation_fields, + build_conversation_delete_model, + build_conversation_infinite_scroll_pagination_model, + build_simple_conversation_model, ) from fields.conversation_variable_fields import ( - conversation_variable_fields, - conversation_variable_infinite_scroll_pagination_fields, + build_conversation_variable_infinite_scroll_pagination_model, + build_conversation_variable_model, ) from libs.helper import uuid_value from models.model import App, AppMode, EndUser from services.conversation_service import ConversationService +# Define parsers for conversation APIs +conversation_list_parser = reqparse.RequestParser() +conversation_list_parser.add_argument( + "last_id", type=uuid_value, location="args", help="Last conversation ID for pagination" +) +conversation_list_parser.add_argument( + "limit", + type=int_range(1, 100), + required=False, + default=20, + location="args", + help="Number of conversations to return", +) +conversation_list_parser.add_argument( + "sort_by", + type=str, + choices=["created_at", "-created_at", "updated_at", "-updated_at"], + required=False, + default="-updated_at", + location="args", + help="Sort order for conversations", +) +conversation_rename_parser = reqparse.RequestParser() +conversation_rename_parser.add_argument("name", type=str, required=False, location="json", help="New conversation name") +conversation_rename_parser.add_argument( + "auto_generate", type=bool, required=False, default=False, location="json", help="Auto-generate conversation name" +) + +conversation_variables_parser = reqparse.RequestParser() +conversation_variables_parser.add_argument( + "last_id", type=uuid_value, location="args", help="Last variable ID for pagination" +) +conversation_variables_parser.add_argument( + "limit", type=int_range(1, 100), required=False, default=20, location="args", help="Number of variables to return" +) + +conversation_variable_update_parser = reqparse.RequestParser() +# using lambda is for passing the already-typed value without modification +# if no lambda, it will be converted to string +# the string cannot be converted using json.loads +conversation_variable_update_parser.add_argument( + "value", required=True, location="json", type=lambda x: x, help="New value for the conversation variable" +) + + +@service_api_ns.route("/conversations") class ConversationApi(Resource): + @service_api_ns.expect(conversation_list_parser) + @service_api_ns.doc("list_conversations") + @service_api_ns.doc(description="List all conversations for the current user") + @service_api_ns.doc( + responses={ + 200: "Conversations retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Last conversation not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) - @marshal_with(conversation_infinite_scroll_pagination_fields) + @service_api_ns.marshal_with(build_conversation_infinite_scroll_pagination_model(service_api_ns)) def get(self, app_model: App, end_user: EndUser): + """List all conversations for the current user. + + Supports pagination using last_id and limit parameters. + """ app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() - parser = reqparse.RequestParser() - parser.add_argument("last_id", type=uuid_value, location="args") - parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") - parser.add_argument( - "sort_by", - type=str, - choices=["created_at", "-created_at", "updated_at", "-updated_at"], - required=False, - default="-updated_at", - location="args", - ) - args = parser.parse_args() + args = conversation_list_parser.parse_args() try: with Session(db.engine) as session: @@ -59,10 +108,22 @@ class ConversationApi(Resource): raise NotFound("Last Conversation Not Exists.") +@service_api_ns.route("/conversations/") class ConversationDetailApi(Resource): + @service_api_ns.doc("delete_conversation") + @service_api_ns.doc(description="Delete a specific conversation") + @service_api_ns.doc(params={"c_id": "Conversation ID"}) + @service_api_ns.doc( + responses={ + 204: "Conversation deleted successfully", + 401: "Unauthorized - invalid API token", + 404: "Conversation not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) - @marshal_with(conversation_delete_fields) + @service_api_ns.marshal_with(build_conversation_delete_model(service_api_ns), code=204) def delete(self, app_model: App, end_user: EndUser, c_id): + """Delete a specific conversation.""" app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() @@ -76,20 +137,30 @@ class ConversationDetailApi(Resource): return {"result": "success"}, 204 +@service_api_ns.route("/conversations//name") class ConversationRenameApi(Resource): + @service_api_ns.expect(conversation_rename_parser) + @service_api_ns.doc("rename_conversation") + @service_api_ns.doc(description="Rename a conversation or auto-generate a name") + @service_api_ns.doc(params={"c_id": "Conversation ID"}) + @service_api_ns.doc( + responses={ + 200: "Conversation renamed successfully", + 401: "Unauthorized - invalid API token", + 404: "Conversation not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) - @marshal_with(simple_conversation_fields) + @service_api_ns.marshal_with(build_simple_conversation_model(service_api_ns)) def post(self, app_model: App, end_user: EndUser, c_id): + """Rename a conversation or auto-generate a name.""" app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() conversation_id = str(c_id) - parser = reqparse.RequestParser() - parser.add_argument("name", type=str, required=False, location="json") - parser.add_argument("auto_generate", type=bool, required=False, default=False, location="json") - args = parser.parse_args() + args = conversation_rename_parser.parse_args() try: return ConversationService.rename(app_model, conversation_id, end_user, args["name"], args["auto_generate"]) @@ -97,10 +168,26 @@ class ConversationRenameApi(Resource): raise NotFound("Conversation Not Exists.") +@service_api_ns.route("/conversations//variables") class ConversationVariablesApi(Resource): + @service_api_ns.expect(conversation_variables_parser) + @service_api_ns.doc("list_conversation_variables") + @service_api_ns.doc(description="List all variables for a conversation") + @service_api_ns.doc(params={"c_id": "Conversation ID"}) + @service_api_ns.doc( + responses={ + 200: "Variables retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Conversation not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) - @marshal_with(conversation_variable_infinite_scroll_pagination_fields) + @service_api_ns.marshal_with(build_conversation_variable_infinite_scroll_pagination_model(service_api_ns)) def get(self, app_model: App, end_user: EndUser, c_id): + """List all variables for a conversation. + + Conversational variables are only available for chat applications. + """ # conversational variable only for chat app app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -108,10 +195,7 @@ class ConversationVariablesApi(Resource): conversation_id = str(c_id) - parser = reqparse.RequestParser() - parser.add_argument("last_id", type=uuid_value, location="args") - parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") - args = parser.parse_args() + args = conversation_variables_parser.parse_args() try: return ConversationService.get_conversational_variable( @@ -121,11 +205,28 @@ class ConversationVariablesApi(Resource): raise NotFound("Conversation Not Exists.") +@service_api_ns.route("/conversations//variables/") class ConversationVariableDetailApi(Resource): + @service_api_ns.expect(conversation_variable_update_parser) + @service_api_ns.doc("update_conversation_variable") + @service_api_ns.doc(description="Update a conversation variable's value") + @service_api_ns.doc(params={"c_id": "Conversation ID", "variable_id": "Variable ID"}) + @service_api_ns.doc( + responses={ + 200: "Variable updated successfully", + 400: "Bad request - type mismatch", + 401: "Unauthorized - invalid API token", + 404: "Conversation or variable not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) - @marshal_with(conversation_variable_fields) + @service_api_ns.marshal_with(build_conversation_variable_model(service_api_ns)) def put(self, app_model: App, end_user: EndUser, c_id, variable_id): - """Update a conversation variable's value""" + """Update a conversation variable's value. + + Allows updating the value of a specific conversation variable. + The value must match the variable's expected type. + """ app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() @@ -133,12 +234,7 @@ class ConversationVariableDetailApi(Resource): conversation_id = str(c_id) variable_id = str(variable_id) - parser = reqparse.RequestParser() - # using lambda is for passing the already-typed value without modification - # if no lambda, it will be converted to string - # the string cannot be converted using json.loads - parser.add_argument("value", required=True, location="json", type=lambda x: x) - args = parser.parse_args() + args = conversation_variable_update_parser.parse_args() try: return ConversationService.update_conversation_variable( @@ -150,15 +246,3 @@ class ConversationVariableDetailApi(Resource): raise NotFound("Conversation Variable Not Exists.") except services.errors.conversation.ConversationVariableTypeMismatchError as e: raise BadRequest(str(e)) - - -api.add_resource(ConversationRenameApi, "/conversations//name", endpoint="conversation_name") -api.add_resource(ConversationApi, "/conversations") -api.add_resource(ConversationDetailApi, "/conversations/", endpoint="conversation_detail") -api.add_resource(ConversationVariablesApi, "/conversations//variables", endpoint="conversation_variables") -api.add_resource( - ConversationVariableDetailApi, - "/conversations//variables/", - endpoint="conversation_variable_detail", - methods=["PUT"], -) diff --git a/api/controllers/service_api/app/file.py b/api/controllers/service_api/app/file.py index 37153ca5db..05f27545b3 100644 --- a/api/controllers/service_api/app/file.py +++ b/api/controllers/service_api/app/file.py @@ -1,5 +1,6 @@ from flask import request -from flask_restful import Resource, marshal_with +from flask_restx import Resource +from flask_restx.api import HTTPStatus import services from controllers.common.errors import ( @@ -9,17 +10,33 @@ from controllers.common.errors import ( TooManyFilesError, UnsupportedFileTypeError, ) -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token -from fields.file_fields import file_fields +from fields.file_fields import build_file_model from models.model import App, EndUser from services.file_service import FileService +@service_api_ns.route("/files/upload") class FileApi(Resource): + @service_api_ns.doc("upload_file") + @service_api_ns.doc(description="Upload a file for use in conversations") + @service_api_ns.doc( + responses={ + 201: "File uploaded successfully", + 400: "Bad request - no file or invalid file", + 401: "Unauthorized - invalid API token", + 413: "File too large", + 415: "Unsupported file type", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM)) - @marshal_with(file_fields) + @service_api_ns.marshal_with(build_file_model(service_api_ns), code=HTTPStatus.CREATED) def post(self, app_model: App, end_user: EndUser): + """Upload a file for use in conversations. + + Accepts a single file upload via multipart/form-data. + """ # check file if "file" not in request.files: raise NoFileUploadedError() @@ -47,6 +64,3 @@ class FileApi(Resource): raise UnsupportedFileTypeError() return upload_file, 201 - - -api.add_resource(FileApi, "/files/upload") diff --git a/api/controllers/service_api/app/file_preview.py b/api/controllers/service_api/app/file_preview.py index 57141033d1..84d80ea101 100644 --- a/api/controllers/service_api/app/file_preview.py +++ b/api/controllers/service_api/app/file_preview.py @@ -2,9 +2,9 @@ import logging from urllib.parse import quote from flask import Response -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import ( FileAccessDeniedError, FileNotFoundError, @@ -17,6 +17,14 @@ from models.model import App, EndUser, Message, MessageFile, UploadFile logger = logging.getLogger(__name__) +# Define parser for file preview API +file_preview_parser = reqparse.RequestParser() +file_preview_parser.add_argument( + "as_attachment", type=bool, required=False, default=False, location="args", help="Download as attachment" +) + + +@service_api_ns.route("/files//preview") class FilePreviewApi(Resource): """ Service API File Preview endpoint @@ -25,33 +33,30 @@ class FilePreviewApi(Resource): Files can only be accessed if they belong to messages within the requesting app's context. """ + @service_api_ns.expect(file_preview_parser) + @service_api_ns.doc("preview_file") + @service_api_ns.doc(description="Preview or download a file uploaded via Service API") + @service_api_ns.doc(params={"file_id": "UUID of the file to preview"}) + @service_api_ns.doc( + responses={ + 200: "File retrieved successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - file access denied", + 404: "File not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) def get(self, app_model: App, end_user: EndUser, file_id: str): """ - Preview/Download a file that was uploaded via Service API + Preview/Download a file that was uploaded via Service API. - Args: - app_model: The authenticated app model - end_user: The authenticated end user (optional) - file_id: UUID of the file to preview - - Query Parameters: - user: Optional user identifier - as_attachment: Boolean, whether to download as attachment (default: false) - - Returns: - Stream response with file content - - Raises: - FileNotFoundError: File does not exist - FileAccessDeniedError: File access denied (not owned by app) + Provides secure file preview/download functionality. + Files can only be accessed if they belong to messages within the requesting app's context. """ file_id = str(file_id) # Parse query parameters - parser = reqparse.RequestParser() - parser.add_argument("as_attachment", type=bool, required=False, default=False, location="args") - args = parser.parse_args() + args = file_preview_parser.parse_args() # Validate file ownership and get file objects message_file, upload_file = self._validate_file_ownership(file_id, app_model.id) @@ -180,7 +185,3 @@ class FilePreviewApi(Resource): response.headers["Cache-Control"] = "public, max-age=3600" # Cache for 1 hour return response - - -# Register the API endpoint -api.add_resource(FilePreviewApi, "/files//preview") diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index a4f95cb1cb..ad3fac7009 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -1,17 +1,17 @@ import json import logging -from flask_restful import Resource, fields, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import Api, Namespace, Resource, fields, reqparse +from flask_restx.inputs import int_range from werkzeug.exceptions import BadRequest, InternalServerError, NotFound import services -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import NotChatAppError from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token from core.app.entities.app_invoke_entities import InvokeFrom -from fields.conversation_fields import message_file_fields -from fields.message_fields import agent_thought_fields, feedback_fields +from fields.conversation_fields import build_message_file_model +from fields.message_fields import build_agent_thought_model, build_feedback_model from fields.raws import FilesContainedField from libs.helper import TimestampField, uuid_value from models.model import App, AppMode, EndUser @@ -22,8 +22,37 @@ from services.errors.message import ( ) from services.message_service import MessageService +# Define parsers for message APIs +message_list_parser = reqparse.RequestParser() +message_list_parser.add_argument( + "conversation_id", required=True, type=uuid_value, location="args", help="Conversation ID" +) +message_list_parser.add_argument("first_id", type=uuid_value, location="args", help="First message ID for pagination") +message_list_parser.add_argument( + "limit", type=int_range(1, 100), required=False, default=20, location="args", help="Number of messages to return" +) -class MessageListApi(Resource): +message_feedback_parser = reqparse.RequestParser() +message_feedback_parser.add_argument( + "rating", type=str, choices=["like", "dislike", None], location="json", help="Feedback rating" +) +message_feedback_parser.add_argument("content", type=str, location="json", help="Feedback content") + +feedback_list_parser = reqparse.RequestParser() +feedback_list_parser.add_argument("page", type=int, default=1, location="args", help="Page number") +feedback_list_parser.add_argument( + "limit", type=int_range(1, 101), required=False, default=20, location="args", help="Number of feedbacks per page" +) + + +def build_message_model(api_or_ns: Api | Namespace): + """Build the message model for the API or Namespace.""" + # First build the nested models + feedback_model = build_feedback_model(api_or_ns) + agent_thought_model = build_agent_thought_model(api_or_ns) + message_file_model = build_message_file_model(api_or_ns) + + # Then build the message fields with nested models message_fields = { "id": fields.String, "conversation_id": fields.String, @@ -31,37 +60,58 @@ class MessageListApi(Resource): "inputs": FilesContainedField, "query": fields.String, "answer": fields.String(attribute="re_sign_file_url_answer"), - "message_files": fields.List(fields.Nested(message_file_fields)), - "feedback": fields.Nested(feedback_fields, attribute="user_feedback", allow_null=True), + "message_files": fields.List(fields.Nested(message_file_model)), + "feedback": fields.Nested(feedback_model, attribute="user_feedback", allow_null=True), "retriever_resources": fields.Raw( attribute=lambda obj: json.loads(obj.message_metadata).get("retriever_resources", []) if obj.message_metadata else [] ), "created_at": TimestampField, - "agent_thoughts": fields.List(fields.Nested(agent_thought_fields)), + "agent_thoughts": fields.List(fields.Nested(agent_thought_model)), "status": fields.String, "error": fields.String, } + return api_or_ns.model("Message", message_fields) + + +def build_message_infinite_scroll_pagination_model(api_or_ns: Api | Namespace): + """Build the message infinite scroll pagination model for the API or Namespace.""" + # Build the nested message model first + message_model = build_message_model(api_or_ns) message_infinite_scroll_pagination_fields = { "limit": fields.Integer, "has_more": fields.Boolean, - "data": fields.List(fields.Nested(message_fields)), + "data": fields.List(fields.Nested(message_model)), } + return api_or_ns.model("MessageInfiniteScrollPagination", message_infinite_scroll_pagination_fields) + +@service_api_ns.route("/messages") +class MessageListApi(Resource): + @service_api_ns.expect(message_list_parser) + @service_api_ns.doc("list_messages") + @service_api_ns.doc(description="List messages in a conversation") + @service_api_ns.doc( + responses={ + 200: "Messages retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Conversation or first message not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) - @marshal_with(message_infinite_scroll_pagination_fields) + @service_api_ns.marshal_with(build_message_infinite_scroll_pagination_model(service_api_ns)) def get(self, app_model: App, end_user: EndUser): + """List messages in a conversation. + + Retrieves messages with pagination support using first_id. + """ app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() - parser = reqparse.RequestParser() - parser.add_argument("conversation_id", required=True, type=uuid_value, location="args") - parser.add_argument("first_id", type=uuid_value, location="args") - parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") - args = parser.parse_args() + args = message_list_parser.parse_args() try: return MessageService.pagination_by_first_id( @@ -73,15 +123,28 @@ class MessageListApi(Resource): raise NotFound("First Message Not Exists.") +@service_api_ns.route("/messages//feedbacks") class MessageFeedbackApi(Resource): + @service_api_ns.expect(message_feedback_parser) + @service_api_ns.doc("create_message_feedback") + @service_api_ns.doc(description="Submit feedback for a message") + @service_api_ns.doc(params={"message_id": "Message ID"}) + @service_api_ns.doc( + responses={ + 200: "Feedback submitted successfully", + 401: "Unauthorized - invalid API token", + 404: "Message not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) def post(self, app_model: App, end_user: EndUser, message_id): + """Submit feedback for a message. + + Allows users to rate messages as like/dislike and provide optional feedback content. + """ message_id = str(message_id) - parser = reqparse.RequestParser() - parser.add_argument("rating", type=str, choices=["like", "dislike", None], location="json") - parser.add_argument("content", type=str, location="json") - args = parser.parse_args() + args = message_feedback_parser.parse_args() try: MessageService.create_feedback( @@ -97,21 +160,48 @@ class MessageFeedbackApi(Resource): return {"result": "success"} +@service_api_ns.route("/app/feedbacks") class AppGetFeedbacksApi(Resource): + @service_api_ns.expect(feedback_list_parser) + @service_api_ns.doc("get_app_feedbacks") + @service_api_ns.doc(description="Get all feedbacks for the application") + @service_api_ns.doc( + responses={ + 200: "Feedbacks retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_app_token def get(self, app_model: App): - """Get All Feedbacks of an app""" - parser = reqparse.RequestParser() - parser.add_argument("page", type=int, default=1, location="args") - parser.add_argument("limit", type=int_range(1, 101), required=False, default=20, location="args") - args = parser.parse_args() + """Get all feedbacks for the application. + + Returns paginated list of all feedback submitted for messages in this app. + """ + args = feedback_list_parser.parse_args() feedbacks = MessageService.get_all_messages_feedbacks(app_model, page=args["page"], limit=args["limit"]) return {"data": feedbacks} +@service_api_ns.route("/messages//suggested") class MessageSuggestedApi(Resource): + @service_api_ns.doc("get_suggested_questions") + @service_api_ns.doc(description="Get suggested follow-up questions for a message") + @service_api_ns.doc(params={"message_id": "Message ID"}) + @service_api_ns.doc( + responses={ + 200: "Suggested questions retrieved successfully", + 400: "Suggested questions feature is disabled", + 401: "Unauthorized - invalid API token", + 404: "Message not found", + 500: "Internal server error", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY, required=True)) def get(self, app_model: App, end_user: EndUser, message_id): + """Get suggested follow-up questions for a message. + + Returns AI-generated follow-up questions based on the message content. + """ message_id = str(message_id) app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -130,9 +220,3 @@ class MessageSuggestedApi(Resource): raise InternalServerError() return {"result": "success", "data": questions} - - -api.add_resource(MessageListApi, "/messages") -api.add_resource(MessageFeedbackApi, "/messages//feedbacks") -api.add_resource(MessageSuggestedApi, "/messages//suggested") -api.add_resource(AppGetFeedbacksApi, "/app/feedbacks") diff --git a/api/controllers/service_api/app/site.py b/api/controllers/service_api/app/site.py index c157b39f6b..9f8324a84e 100644 --- a/api/controllers/service_api/app/site.py +++ b/api/controllers/service_api/app/site.py @@ -1,30 +1,41 @@ -from flask_restful import Resource, marshal_with +from flask_restx import Resource from werkzeug.exceptions import Forbidden -from controllers.common import fields -from controllers.service_api import api +from controllers.common.fields import build_site_model +from controllers.service_api import service_api_ns from controllers.service_api.wraps import validate_app_token from extensions.ext_database import db from models.account import TenantStatus from models.model import App, Site +@service_api_ns.route("/site") class AppSiteApi(Resource): """Resource for app sites.""" + @service_api_ns.doc("get_app_site") + @service_api_ns.doc(description="Get application site configuration") + @service_api_ns.doc( + responses={ + 200: "Site configuration retrieved successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - site not found or tenant archived", + } + ) @validate_app_token - @marshal_with(fields.site_fields) + @service_api_ns.marshal_with(build_site_model(service_api_ns)) def get(self, app_model: App): - """Retrieve app site info.""" + """Retrieve app site info. + + Returns the site configuration for the application including theme, icons, and text. + """ site = db.session.query(Site).where(Site.app_id == app_model.id).first() if not site: raise Forbidden() + assert app_model.tenant if app_model.tenant.status == TenantStatus.ARCHIVE: raise Forbidden() return site - - -api.add_resource(AppSiteApi, "/site") diff --git a/api/controllers/service_api/app/workflow.py b/api/controllers/service_api/app/workflow.py index cd8a5f03ac..19e2e67d7f 100644 --- a/api/controllers/service_api/app/workflow.py +++ b/api/controllers/service_api/app/workflow.py @@ -2,12 +2,12 @@ import logging from dateutil.parser import isoparse from flask import request -from flask_restful import Resource, fields, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import Api, Namespace, Resource, fields, reqparse +from flask_restx.inputs import int_range from sqlalchemy.orm import Session, sessionmaker from werkzeug.exceptions import BadRequest, InternalServerError, NotFound -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import ( CompletionRequestError, NotWorkflowAppError, @@ -28,7 +28,7 @@ from core.helper.trace_id_helper import get_external_trace_id from core.model_runtime.errors.invoke import InvokeError from core.workflow.entities.workflow_execution import WorkflowExecutionStatus from extensions.ext_database import db -from fields.workflow_app_log_fields import workflow_app_log_pagination_fields +from fields.workflow_app_log_fields import build_workflow_app_log_pagination_model from libs import helper from libs.helper import TimestampField from models.model import App, AppMode, EndUser @@ -40,6 +40,34 @@ from services.workflow_app_service import WorkflowAppService logger = logging.getLogger(__name__) +# Define parsers for workflow APIs +workflow_run_parser = reqparse.RequestParser() +workflow_run_parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json") +workflow_run_parser.add_argument("files", type=list, required=False, location="json") +workflow_run_parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json") + +workflow_log_parser = reqparse.RequestParser() +workflow_log_parser.add_argument("keyword", type=str, location="args") +workflow_log_parser.add_argument("status", type=str, choices=["succeeded", "failed", "stopped"], location="args") +workflow_log_parser.add_argument("created_at__before", type=str, location="args") +workflow_log_parser.add_argument("created_at__after", type=str, location="args") +workflow_log_parser.add_argument( + "created_by_end_user_session_id", + type=str, + location="args", + required=False, + default=None, +) +workflow_log_parser.add_argument( + "created_by_account", + type=str, + location="args", + required=False, + default=None, +) +workflow_log_parser.add_argument("page", type=int_range(1, 99999), default=1, location="args") +workflow_log_parser.add_argument("limit", type=int_range(1, 100), default=20, location="args") + workflow_run_fields = { "id": fields.String, "workflow_id": fields.String, @@ -55,12 +83,29 @@ workflow_run_fields = { } +def build_workflow_run_model(api_or_ns: Api | Namespace): + """Build the workflow run model for the API or Namespace.""" + return api_or_ns.model("WorkflowRun", workflow_run_fields) + + +@service_api_ns.route("/workflows/run/") class WorkflowRunDetailApi(Resource): + @service_api_ns.doc("get_workflow_run_detail") + @service_api_ns.doc(description="Get workflow run details") + @service_api_ns.doc(params={"workflow_run_id": "Workflow run ID"}) + @service_api_ns.doc( + responses={ + 200: "Workflow run details retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Workflow run not found", + } + ) @validate_app_token - @marshal_with(workflow_run_fields) + @service_api_ns.marshal_with(build_workflow_run_model(service_api_ns)) def get(self, app_model: App, workflow_run_id: str): - """ - Get a workflow task running detail + """Get a workflow task running detail. + + Returns detailed information about a specific workflow run. """ app_mode = AppMode.value_of(app_model.mode) if app_mode not in [AppMode.WORKFLOW, AppMode.ADVANCED_CHAT]: @@ -78,21 +123,33 @@ class WorkflowRunDetailApi(Resource): return workflow_run +@service_api_ns.route("/workflows/run") class WorkflowRunApi(Resource): + @service_api_ns.expect(workflow_run_parser) + @service_api_ns.doc("run_workflow") + @service_api_ns.doc(description="Execute a workflow") + @service_api_ns.doc( + responses={ + 200: "Workflow executed successfully", + 400: "Bad request - invalid parameters or workflow issues", + 401: "Unauthorized - invalid API token", + 404: "Workflow not found", + 429: "Rate limit exceeded", + 500: "Internal server error", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) def post(self, app_model: App, end_user: EndUser): - """ - Run workflow + """Execute a workflow. + + Runs a workflow with the provided inputs and returns the results. + Supports both blocking and streaming response modes. """ app_mode = AppMode.value_of(app_model.mode) if app_mode != AppMode.WORKFLOW: raise NotWorkflowAppError() - parser = reqparse.RequestParser() - parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json") - parser.add_argument("files", type=list, required=False, location="json") - parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json") - args = parser.parse_args() + args = workflow_run_parser.parse_args() external_trace_id = get_external_trace_id(request) if external_trace_id: args["external_trace_id"] = external_trace_id @@ -121,21 +178,33 @@ class WorkflowRunApi(Resource): raise InternalServerError() +@service_api_ns.route("/workflows//run") class WorkflowRunByIdApi(Resource): + @service_api_ns.expect(workflow_run_parser) + @service_api_ns.doc("run_workflow_by_id") + @service_api_ns.doc(description="Execute a specific workflow by ID") + @service_api_ns.doc(params={"workflow_id": "Workflow ID to execute"}) + @service_api_ns.doc( + responses={ + 200: "Workflow executed successfully", + 400: "Bad request - invalid parameters or workflow issues", + 401: "Unauthorized - invalid API token", + 404: "Workflow not found", + 429: "Rate limit exceeded", + 500: "Internal server error", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) def post(self, app_model: App, end_user: EndUser, workflow_id: str): - """ - Run specific workflow by ID + """Run specific workflow by ID. + + Executes a specific workflow version identified by its ID. """ app_mode = AppMode.value_of(app_model.mode) if app_mode != AppMode.WORKFLOW: raise NotWorkflowAppError() - parser = reqparse.RequestParser() - parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json") - parser.add_argument("files", type=list, required=False, location="json") - parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json") - args = parser.parse_args() + args = workflow_run_parser.parse_args() # Add workflow_id to args for AppGenerateService args["workflow_id"] = workflow_id @@ -174,12 +243,21 @@ class WorkflowRunByIdApi(Resource): raise InternalServerError() +@service_api_ns.route("/workflows/tasks//stop") class WorkflowTaskStopApi(Resource): + @service_api_ns.doc("stop_workflow_task") + @service_api_ns.doc(description="Stop a running workflow task") + @service_api_ns.doc(params={"task_id": "Task ID to stop"}) + @service_api_ns.doc( + responses={ + 200: "Task stopped successfully", + 401: "Unauthorized - invalid API token", + 404: "Task not found", + } + ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True)) def post(self, app_model: App, end_user: EndUser, task_id: str): - """ - Stop workflow task - """ + """Stop a running workflow task.""" app_mode = AppMode.value_of(app_model.mode) if app_mode != AppMode.WORKFLOW: raise NotWorkflowAppError() @@ -189,35 +267,25 @@ class WorkflowTaskStopApi(Resource): return {"result": "success"} +@service_api_ns.route("/workflows/logs") class WorkflowAppLogApi(Resource): + @service_api_ns.expect(workflow_log_parser) + @service_api_ns.doc("get_workflow_logs") + @service_api_ns.doc(description="Get workflow execution logs") + @service_api_ns.doc( + responses={ + 200: "Logs retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_app_token - @marshal_with(workflow_app_log_pagination_fields) + @service_api_ns.marshal_with(build_workflow_app_log_pagination_model(service_api_ns)) def get(self, app_model: App): + """Get workflow app logs. + + Returns paginated workflow execution logs with filtering options. """ - Get workflow app logs - """ - parser = reqparse.RequestParser() - parser.add_argument("keyword", type=str, location="args") - parser.add_argument("status", type=str, choices=["succeeded", "failed", "stopped"], location="args") - parser.add_argument("created_at__before", type=str, location="args") - parser.add_argument("created_at__after", type=str, location="args") - parser.add_argument( - "created_by_end_user_session_id", - type=str, - location="args", - required=False, - default=None, - ) - parser.add_argument( - "created_by_account", - type=str, - location="args", - required=False, - default=None, - ) - parser.add_argument("page", type=int_range(1, 99999), default=1, location="args") - parser.add_argument("limit", type=int_range(1, 100), default=20, location="args") - args = parser.parse_args() + args = workflow_log_parser.parse_args() args.status = WorkflowExecutionStatus(args.status) if args.status else None if args.created_at__before: @@ -243,10 +311,3 @@ class WorkflowAppLogApi(Resource): ) return workflow_app_log_pagination - - -api.add_resource(WorkflowRunApi, "/workflows/run") -api.add_resource(WorkflowRunDetailApi, "/workflows/run/") -api.add_resource(WorkflowRunByIdApi, "/workflows//run") -api.add_resource(WorkflowTaskStopApi, "/workflows/tasks//stop") -api.add_resource(WorkflowAppLogApi, "/workflows/logs") diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index 29eef41253..c486b0480b 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -1,9 +1,11 @@ +from typing import Literal + from flask import request -from flask_restful import marshal, marshal_with, reqparse +from flask_restx import marshal, reqparse from werkzeug.exceptions import Forbidden, NotFound import services.dataset_service -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError from controllers.service_api.wraps import ( DatasetApiResource, @@ -14,7 +16,7 @@ from core.model_runtime.entities.model_entities import ModelType from core.plugin.entities.plugin import ModelProviderID from core.provider_manager import ProviderManager from fields.dataset_fields import dataset_detail_fields -from fields.tag_fields import tag_fields +from fields.tag_fields import build_dataset_tag_fields from libs.login import current_user from models.dataset import Dataset, DatasetPermissionEnum from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService @@ -34,12 +36,171 @@ def _validate_description_length(description): return description +# Define parsers for dataset operations +dataset_create_parser = reqparse.RequestParser() +dataset_create_parser.add_argument( + "name", + nullable=False, + required=True, + help="type is required. Name must be between 1 to 40 characters.", + type=_validate_name, +) +dataset_create_parser.add_argument( + "description", + type=_validate_description_length, + nullable=True, + required=False, + default="", +) +dataset_create_parser.add_argument( + "indexing_technique", + type=str, + location="json", + choices=Dataset.INDEXING_TECHNIQUE_LIST, + help="Invalid indexing technique.", +) +dataset_create_parser.add_argument( + "permission", + type=str, + location="json", + choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM), + help="Invalid permission.", + required=False, + nullable=False, +) +dataset_create_parser.add_argument( + "external_knowledge_api_id", + type=str, + nullable=True, + required=False, + default="_validate_name", +) +dataset_create_parser.add_argument( + "provider", + type=str, + nullable=True, + required=False, + default="vendor", +) +dataset_create_parser.add_argument( + "external_knowledge_id", + type=str, + nullable=True, + required=False, +) +dataset_create_parser.add_argument("retrieval_model", type=dict, required=False, nullable=True, location="json") +dataset_create_parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json") +dataset_create_parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json") + +dataset_update_parser = reqparse.RequestParser() +dataset_update_parser.add_argument( + "name", + nullable=False, + help="type is required. Name must be between 1 to 40 characters.", + type=_validate_name, +) +dataset_update_parser.add_argument( + "description", location="json", store_missing=False, type=_validate_description_length +) +dataset_update_parser.add_argument( + "indexing_technique", + type=str, + location="json", + choices=Dataset.INDEXING_TECHNIQUE_LIST, + nullable=True, + help="Invalid indexing technique.", +) +dataset_update_parser.add_argument( + "permission", + type=str, + location="json", + choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM), + help="Invalid permission.", +) +dataset_update_parser.add_argument("embedding_model", type=str, location="json", help="Invalid embedding model.") +dataset_update_parser.add_argument( + "embedding_model_provider", type=str, location="json", help="Invalid embedding model provider." +) +dataset_update_parser.add_argument("retrieval_model", type=dict, location="json", help="Invalid retrieval model.") +dataset_update_parser.add_argument("partial_member_list", type=list, location="json", help="Invalid parent user list.") +dataset_update_parser.add_argument( + "external_retrieval_model", + type=dict, + required=False, + nullable=True, + location="json", + help="Invalid external retrieval model.", +) +dataset_update_parser.add_argument( + "external_knowledge_id", + type=str, + required=False, + nullable=True, + location="json", + help="Invalid external knowledge id.", +) +dataset_update_parser.add_argument( + "external_knowledge_api_id", + type=str, + required=False, + nullable=True, + location="json", + help="Invalid external knowledge api id.", +) + +tag_create_parser = reqparse.RequestParser() +tag_create_parser.add_argument( + "name", + nullable=False, + required=True, + help="Name must be between 1 to 50 characters.", + type=lambda x: x + if x and 1 <= len(x) <= 50 + else (_ for _ in ()).throw(ValueError("Name must be between 1 to 50 characters.")), +) + +tag_update_parser = reqparse.RequestParser() +tag_update_parser.add_argument( + "name", + nullable=False, + required=True, + help="Name must be between 1 to 50 characters.", + type=lambda x: x + if x and 1 <= len(x) <= 50 + else (_ for _ in ()).throw(ValueError("Name must be between 1 to 50 characters.")), +) +tag_update_parser.add_argument("tag_id", nullable=False, required=True, help="Id of a tag.", type=str) + +tag_delete_parser = reqparse.RequestParser() +tag_delete_parser.add_argument("tag_id", nullable=False, required=True, help="Id of a tag.", type=str) + +tag_binding_parser = reqparse.RequestParser() +tag_binding_parser.add_argument( + "tag_ids", type=list, nullable=False, required=True, location="json", help="Tag IDs is required." +) +tag_binding_parser.add_argument( + "target_id", type=str, nullable=False, required=True, location="json", help="Target Dataset ID is required." +) + +tag_unbinding_parser = reqparse.RequestParser() +tag_unbinding_parser.add_argument("tag_id", type=str, nullable=False, required=True, help="Tag ID is required.") +tag_unbinding_parser.add_argument("target_id", type=str, nullable=False, required=True, help="Target ID is required.") + + +@service_api_ns.route("/datasets") class DatasetListApi(DatasetApiResource): """Resource for datasets.""" + @service_api_ns.doc("list_datasets") + @service_api_ns.doc(description="List all datasets") + @service_api_ns.doc( + responses={ + 200: "Datasets retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) def get(self, tenant_id): """Resource for getting datasets.""" - page = request.args.get("page", default=1, type=int) limit = request.args.get("limit", default=20, type=int) # provider = request.args.get("provider", default="vendor") @@ -74,65 +235,20 @@ class DatasetListApi(DatasetApiResource): response = {"data": data, "has_more": len(datasets) == limit, "limit": limit, "total": total, "page": page} return response, 200 + @service_api_ns.expect(dataset_create_parser) + @service_api_ns.doc("create_dataset") + @service_api_ns.doc(description="Create a new dataset") + @service_api_ns.doc( + responses={ + 200: "Dataset created successfully", + 401: "Unauthorized - invalid API token", + 400: "Bad request - invalid parameters", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id): """Resource for creating datasets.""" - parser = reqparse.RequestParser() - parser.add_argument( - "name", - nullable=False, - required=True, - help="type is required. Name must be between 1 to 40 characters.", - type=_validate_name, - ) - parser.add_argument( - "description", - type=_validate_description_length, - nullable=True, - required=False, - default="", - ) - parser.add_argument( - "indexing_technique", - type=str, - location="json", - choices=Dataset.INDEXING_TECHNIQUE_LIST, - help="Invalid indexing technique.", - ) - parser.add_argument( - "permission", - type=str, - location="json", - choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM), - help="Invalid permission.", - required=False, - nullable=False, - ) - parser.add_argument( - "external_knowledge_api_id", - type=str, - nullable=True, - required=False, - default="_validate_name", - ) - parser.add_argument( - "provider", - type=str, - nullable=True, - required=False, - default="vendor", - ) - parser.add_argument( - "external_knowledge_id", - type=str, - nullable=True, - required=False, - ) - parser.add_argument("retrieval_model", type=dict, required=False, nullable=True, location="json") - parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json") - parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json") - - args = parser.parse_args() + args = dataset_create_parser.parse_args() if args.get("embedding_model_provider"): DatasetService.check_embedding_model_setting( @@ -172,9 +288,21 @@ class DatasetListApi(DatasetApiResource): return marshal(dataset, dataset_detail_fields), 200 +@service_api_ns.route("/datasets/") class DatasetApi(DatasetApiResource): """Resource for dataset.""" + @service_api_ns.doc("get_dataset") + @service_api_ns.doc(description="Get a specific dataset by ID") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Dataset retrieved successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Dataset not found", + } + ) def get(self, _, dataset_id): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -214,6 +342,18 @@ class DatasetApi(DatasetApiResource): return data, 200 + @service_api_ns.expect(dataset_update_parser) + @service_api_ns.doc("update_dataset") + @service_api_ns.doc(description="Update an existing dataset") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Dataset updated successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Dataset not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def patch(self, _, dataset_id): dataset_id_str = str(dataset_id) @@ -221,63 +361,7 @@ class DatasetApi(DatasetApiResource): if dataset is None: raise NotFound("Dataset not found.") - parser = reqparse.RequestParser() - parser.add_argument( - "name", - nullable=False, - help="type is required. Name must be between 1 to 40 characters.", - type=_validate_name, - ) - parser.add_argument("description", location="json", store_missing=False, type=_validate_description_length) - parser.add_argument( - "indexing_technique", - type=str, - location="json", - choices=Dataset.INDEXING_TECHNIQUE_LIST, - nullable=True, - help="Invalid indexing technique.", - ) - parser.add_argument( - "permission", - type=str, - location="json", - choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM), - help="Invalid permission.", - ) - parser.add_argument("embedding_model", type=str, location="json", help="Invalid embedding model.") - parser.add_argument( - "embedding_model_provider", type=str, location="json", help="Invalid embedding model provider." - ) - parser.add_argument("retrieval_model", type=dict, location="json", help="Invalid retrieval model.") - parser.add_argument("partial_member_list", type=list, location="json", help="Invalid parent user list.") - - parser.add_argument( - "external_retrieval_model", - type=dict, - required=False, - nullable=True, - location="json", - help="Invalid external retrieval model.", - ) - - parser.add_argument( - "external_knowledge_id", - type=str, - required=False, - nullable=True, - location="json", - help="Invalid external knowledge id.", - ) - - parser.add_argument( - "external_knowledge_api_id", - type=str, - required=False, - nullable=True, - location="json", - help="Invalid external knowledge api id.", - ) - args = parser.parse_args() + args = dataset_update_parser.parse_args() data = request.get_json() # check embedding model setting @@ -325,6 +409,17 @@ class DatasetApi(DatasetApiResource): return result_data, 200 + @service_api_ns.doc("delete_dataset") + @service_api_ns.doc(description="Delete a dataset") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 204: "Dataset deleted successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset not found", + 409: "Conflict - dataset is in use", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def delete(self, _, dataset_id): """ @@ -355,17 +450,35 @@ class DatasetApi(DatasetApiResource): raise DatasetInUseError() +@service_api_ns.route("/datasets//documents/status/") class DocumentStatusApi(DatasetApiResource): """Resource for batch document status operations.""" - def patch(self, tenant_id, dataset_id, action): + @service_api_ns.doc("update_document_status") + @service_api_ns.doc(description="Batch update document status") + @service_api_ns.doc( + params={ + "dataset_id": "Dataset ID", + "action": "Action to perform: 'enable', 'disable', 'archive', or 'un_archive'", + } + ) + @service_api_ns.doc( + responses={ + 200: "Document status updated successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Dataset not found", + 400: "Bad request - invalid action", + } + ) + def patch(self, tenant_id, dataset_id, action: Literal["enable", "disable", "archive", "un_archive"]): """ Batch update document status. Args: tenant_id: tenant id dataset_id: dataset id - action: action to perform (enable, disable, archive, un_archive) + action: action to perform (Literal["enable", "disable", "archive", "un_archive"]) Returns: dict: A dictionary with a key 'result' and a value 'success' @@ -405,53 +518,65 @@ class DocumentStatusApi(DatasetApiResource): return {"result": "success"}, 200 +@service_api_ns.route("/datasets/tags") class DatasetTagsApi(DatasetApiResource): + @service_api_ns.doc("list_dataset_tags") + @service_api_ns.doc(description="Get all knowledge type tags") + @service_api_ns.doc( + responses={ + 200: "Tags retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_dataset_token - @marshal_with(tag_fields) + @service_api_ns.marshal_with(build_dataset_tag_fields(service_api_ns)) def get(self, _, dataset_id): """Get all knowledge type tags.""" tags = TagService.get_tags("knowledge", current_user.current_tenant_id) return tags, 200 + @service_api_ns.expect(tag_create_parser) + @service_api_ns.doc("create_dataset_tag") + @service_api_ns.doc(description="Add a knowledge type tag") + @service_api_ns.doc( + responses={ + 200: "Tag created successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + } + ) + @service_api_ns.marshal_with(build_dataset_tag_fields(service_api_ns)) @validate_dataset_token def post(self, _, dataset_id): """Add a knowledge type tag.""" if not (current_user.is_editor or current_user.is_dataset_editor): raise Forbidden() - parser = reqparse.RequestParser() - parser.add_argument( - "name", - nullable=False, - required=True, - help="Name must be between 1 to 50 characters.", - type=DatasetTagsApi._validate_tag_name, - ) - - args = parser.parse_args() + args = tag_create_parser.parse_args() args["type"] = "knowledge" tag = TagService.save_tags(args) response = {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0} - return response, 200 + @service_api_ns.expect(tag_update_parser) + @service_api_ns.doc("update_dataset_tag") + @service_api_ns.doc(description="Update a knowledge type tag") + @service_api_ns.doc( + responses={ + 200: "Tag updated successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + } + ) + @service_api_ns.marshal_with(build_dataset_tag_fields(service_api_ns)) @validate_dataset_token def patch(self, _, dataset_id): if not (current_user.is_editor or current_user.is_dataset_editor): raise Forbidden() - parser = reqparse.RequestParser() - parser.add_argument( - "name", - nullable=False, - required=True, - help="Name must be between 1 to 50 characters.", - type=DatasetTagsApi._validate_tag_name, - ) - parser.add_argument("tag_id", nullable=False, required=True, help="Id of a tag.", type=str) - args = parser.parse_args() + args = tag_update_parser.parse_args() args["type"] = "knowledge" tag = TagService.update_tags(args, args.get("tag_id")) @@ -461,66 +586,88 @@ class DatasetTagsApi(DatasetApiResource): return response, 200 + @service_api_ns.expect(tag_delete_parser) + @service_api_ns.doc("delete_dataset_tag") + @service_api_ns.doc(description="Delete a knowledge type tag") + @service_api_ns.doc( + responses={ + 204: "Tag deleted successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + } + ) @validate_dataset_token def delete(self, _, dataset_id): """Delete a knowledge type tag.""" if not current_user.is_editor: raise Forbidden() - parser = reqparse.RequestParser() - parser.add_argument("tag_id", nullable=False, required=True, help="Id of a tag.", type=str) - args = parser.parse_args() + args = tag_delete_parser.parse_args() TagService.delete_tag(args.get("tag_id")) return 204 - @staticmethod - def _validate_tag_name(name): - if not name or len(name) < 1 or len(name) > 50: - raise ValueError("Name must be between 1 to 50 characters.") - return name - +@service_api_ns.route("/datasets/tags/binding") class DatasetTagBindingApi(DatasetApiResource): + @service_api_ns.expect(tag_binding_parser) + @service_api_ns.doc("bind_dataset_tags") + @service_api_ns.doc(description="Bind tags to a dataset") + @service_api_ns.doc( + responses={ + 204: "Tags bound successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + } + ) @validate_dataset_token def post(self, _, dataset_id): # The role of the current user in the ta table must be admin, owner, editor, or dataset_operator if not (current_user.is_editor or current_user.is_dataset_editor): raise Forbidden() - parser = reqparse.RequestParser() - parser.add_argument( - "tag_ids", type=list, nullable=False, required=True, location="json", help="Tag IDs is required." - ) - parser.add_argument( - "target_id", type=str, nullable=False, required=True, location="json", help="Target Dataset ID is required." - ) - - args = parser.parse_args() + args = tag_binding_parser.parse_args() args["type"] = "knowledge" TagService.save_tag_binding(args) return 204 +@service_api_ns.route("/datasets/tags/unbinding") class DatasetTagUnbindingApi(DatasetApiResource): + @service_api_ns.expect(tag_unbinding_parser) + @service_api_ns.doc("unbind_dataset_tag") + @service_api_ns.doc(description="Unbind a tag from a dataset") + @service_api_ns.doc( + responses={ + 204: "Tag unbound successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + } + ) @validate_dataset_token def post(self, _, dataset_id): # The role of the current user in the ta table must be admin, owner, editor, or dataset_operator if not (current_user.is_editor or current_user.is_dataset_editor): raise Forbidden() - parser = reqparse.RequestParser() - parser.add_argument("tag_id", type=str, nullable=False, required=True, help="Tag ID is required.") - parser.add_argument("target_id", type=str, nullable=False, required=True, help="Target ID is required.") - - args = parser.parse_args() + args = tag_unbinding_parser.parse_args() args["type"] = "knowledge" TagService.delete_tag_binding(args) return 204 +@service_api_ns.route("/datasets//tags") class DatasetTagsBindingStatusApi(DatasetApiResource): + @service_api_ns.doc("get_dataset_tags_binding_status") + @service_api_ns.doc(description="Get tags bound to a specific dataset") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Tags retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_dataset_token def get(self, _, *args, **kwargs): """Get all knowledge type tags.""" @@ -529,12 +676,3 @@ class DatasetTagsBindingStatusApi(DatasetApiResource): tags_list = [{"id": tag.id, "name": tag.name} for tag in tags] response = {"data": tags_list, "total": len(tags)} return response, 200 - - -api.add_resource(DatasetListApi, "/datasets") -api.add_resource(DatasetApi, "/datasets/") -api.add_resource(DocumentStatusApi, "/datasets//documents/status/") -api.add_resource(DatasetTagsApi, "/datasets/tags") -api.add_resource(DatasetTagBindingApi, "/datasets/tags/binding") -api.add_resource(DatasetTagUnbindingApi, "/datasets/tags/unbinding") -api.add_resource(DatasetTagsBindingStatusApi, "/datasets//tags") diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index d0354f7851..43232229c8 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -1,7 +1,7 @@ import json from flask import request -from flask_restful import marshal, reqparse +from flask_restx import marshal, reqparse from sqlalchemy import desc, select from werkzeug.exceptions import Forbidden, NotFound @@ -13,7 +13,7 @@ from controllers.common.errors import ( TooManyFilesError, UnsupportedFileTypeError, ) -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import ProviderNotInitializeError from controllers.service_api.dataset.error import ( ArchivedDocumentImmutableError, @@ -34,32 +34,64 @@ from services.dataset_service import DatasetService, DocumentService from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig from services.file_service import FileService +# Define parsers for document operations +document_text_create_parser = reqparse.RequestParser() +document_text_create_parser.add_argument("name", type=str, required=True, nullable=False, location="json") +document_text_create_parser.add_argument("text", type=str, required=True, nullable=False, location="json") +document_text_create_parser.add_argument("process_rule", type=dict, required=False, nullable=True, location="json") +document_text_create_parser.add_argument("original_document_id", type=str, required=False, location="json") +document_text_create_parser.add_argument( + "doc_form", type=str, default="text_model", required=False, nullable=False, location="json" +) +document_text_create_parser.add_argument( + "doc_language", type=str, default="English", required=False, nullable=False, location="json" +) +document_text_create_parser.add_argument( + "indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json" +) +document_text_create_parser.add_argument("retrieval_model", type=dict, required=False, nullable=True, location="json") +document_text_create_parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json") +document_text_create_parser.add_argument( + "embedding_model_provider", type=str, required=False, nullable=True, location="json" +) +document_text_update_parser = reqparse.RequestParser() +document_text_update_parser.add_argument("name", type=str, required=False, nullable=True, location="json") +document_text_update_parser.add_argument("text", type=str, required=False, nullable=True, location="json") +document_text_update_parser.add_argument("process_rule", type=dict, required=False, nullable=True, location="json") +document_text_update_parser.add_argument( + "doc_form", type=str, default="text_model", required=False, nullable=False, location="json" +) +document_text_update_parser.add_argument( + "doc_language", type=str, default="English", required=False, nullable=False, location="json" +) +document_text_update_parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json") + + +@service_api_ns.route( + "/datasets//document/create_by_text", + "/datasets//document/create-by-text", +) class DocumentAddByTextApi(DatasetApiResource): """Resource for documents.""" + @service_api_ns.expect(document_text_create_parser) + @service_api_ns.doc("create_document_by_text") + @service_api_ns.doc(description="Create a new document by providing text content") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Document created successfully", + 401: "Unauthorized - invalid API token", + 400: "Bad request - invalid parameters", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_resource_check("documents", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id): """Create document by text.""" - parser = reqparse.RequestParser() - parser.add_argument("name", type=str, required=True, nullable=False, location="json") - parser.add_argument("text", type=str, required=True, nullable=False, location="json") - parser.add_argument("process_rule", type=dict, required=False, nullable=True, location="json") - parser.add_argument("original_document_id", type=str, required=False, location="json") - parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json") - parser.add_argument( - "doc_language", type=str, default="English", required=False, nullable=False, location="json" - ) - parser.add_argument( - "indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json" - ) - parser.add_argument("retrieval_model", type=dict, required=False, nullable=True, location="json") - parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json") - parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json") - - args = parser.parse_args() + args = document_text_create_parser.parse_args() dataset_id = str(dataset_id) tenant_id = str(tenant_id) @@ -117,23 +149,29 @@ class DocumentAddByTextApi(DatasetApiResource): return documents_and_batch_fields, 200 +@service_api_ns.route( + "/datasets//documents//update_by_text", + "/datasets//documents//update-by-text", +) class DocumentUpdateByTextApi(DatasetApiResource): """Resource for update documents.""" + @service_api_ns.expect(document_text_update_parser) + @service_api_ns.doc("update_document_by_text") + @service_api_ns.doc(description="Update an existing document by providing text content") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 200: "Document updated successfully", + 401: "Unauthorized - invalid API token", + 404: "Document not found", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id, document_id): """Update document by text.""" - parser = reqparse.RequestParser() - parser.add_argument("name", type=str, required=False, nullable=True, location="json") - parser.add_argument("text", type=str, required=False, nullable=True, location="json") - parser.add_argument("process_rule", type=dict, required=False, nullable=True, location="json") - parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json") - parser.add_argument( - "doc_language", type=str, default="English", required=False, nullable=False, location="json" - ) - parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json") - args = parser.parse_args() + args = document_text_update_parser.parse_args() dataset_id = str(dataset_id) tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() @@ -187,9 +225,23 @@ class DocumentUpdateByTextApi(DatasetApiResource): return documents_and_batch_fields, 200 +@service_api_ns.route( + "/datasets//document/create_by_file", + "/datasets//document/create-by-file", +) class DocumentAddByFileApi(DatasetApiResource): """Resource for documents.""" + @service_api_ns.doc("create_document_by_file") + @service_api_ns.doc(description="Create a new document by uploading a file") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Document created successfully", + 401: "Unauthorized - invalid API token", + 400: "Bad request - invalid file or parameters", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_resource_check("documents", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") @@ -281,9 +333,23 @@ class DocumentAddByFileApi(DatasetApiResource): return documents_and_batch_fields, 200 +@service_api_ns.route( + "/datasets//documents//update_by_file", + "/datasets//documents//update-by-file", +) class DocumentUpdateByFileApi(DatasetApiResource): """Resource for update documents.""" + @service_api_ns.doc("update_document_by_file") + @service_api_ns.doc(description="Update an existing document by uploading a file") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 200: "Document updated successfully", + 401: "Unauthorized - invalid API token", + 404: "Document not found", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id, document_id): @@ -358,7 +424,18 @@ class DocumentUpdateByFileApi(DatasetApiResource): return documents_and_batch_fields, 200 +@service_api_ns.route("/datasets//documents") class DocumentListApi(DatasetApiResource): + @service_api_ns.doc("list_documents") + @service_api_ns.doc(description="List all documents in a dataset") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Documents retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset not found", + } + ) def get(self, tenant_id, dataset_id): dataset_id = str(dataset_id) tenant_id = str(tenant_id) @@ -391,7 +468,18 @@ class DocumentListApi(DatasetApiResource): return response +@service_api_ns.route("/datasets//documents//indexing-status") class DocumentIndexingStatusApi(DatasetApiResource): + @service_api_ns.doc("get_document_indexing_status") + @service_api_ns.doc(description="Get indexing status for documents in a batch") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "batch": "Batch ID"}) + @service_api_ns.doc( + responses={ + 200: "Indexing status retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset or documents not found", + } + ) def get(self, tenant_id, dataset_id, batch): dataset_id = str(dataset_id) batch = str(batch) @@ -440,9 +528,21 @@ class DocumentIndexingStatusApi(DatasetApiResource): return data +@service_api_ns.route("/datasets//documents/") class DocumentApi(DatasetApiResource): METADATA_CHOICES = {"all", "only", "without"} + @service_api_ns.doc("get_document") + @service_api_ns.doc(description="Get a specific document by ID") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 200: "Document retrieved successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Document not found", + } + ) def get(self, tenant_id, dataset_id, document_id): dataset_id = str(dataset_id) document_id = str(document_id) @@ -534,6 +634,17 @@ class DocumentApi(DatasetApiResource): return response + @service_api_ns.doc("delete_document") + @service_api_ns.doc(description="Delete a document") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 204: "Document deleted successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - document is archived", + 404: "Document not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def delete(self, tenant_id, dataset_id, document_id): """Delete document.""" @@ -564,28 +675,3 @@ class DocumentApi(DatasetApiResource): raise DocumentIndexingError("Cannot delete document during indexing.") return 204 - - -api.add_resource( - DocumentAddByTextApi, - "/datasets//document/create_by_text", - "/datasets//document/create-by-text", -) -api.add_resource( - DocumentAddByFileApi, - "/datasets//document/create_by_file", - "/datasets//document/create-by-file", -) -api.add_resource( - DocumentUpdateByTextApi, - "/datasets//documents//update_by_text", - "/datasets//documents//update-by-text", -) -api.add_resource( - DocumentUpdateByFileApi, - "/datasets//documents//update_by_file", - "/datasets//documents//update-by-file", -) -api.add_resource(DocumentApi, "/datasets//documents/") -api.add_resource(DocumentListApi, "/datasets//documents") -api.add_resource(DocumentIndexingStatusApi, "/datasets//documents//indexing-status") diff --git a/api/controllers/service_api/dataset/hit_testing.py b/api/controllers/service_api/dataset/hit_testing.py index 52e9bca5da..d81287d56f 100644 --- a/api/controllers/service_api/dataset/hit_testing.py +++ b/api/controllers/service_api/dataset/hit_testing.py @@ -1,11 +1,26 @@ from controllers.console.datasets.hit_testing_base import DatasetsHitTestingBase -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_rate_limit_check +@service_api_ns.route("/datasets//hit-testing", "/datasets//retrieve") class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase): + @service_api_ns.doc("dataset_hit_testing") + @service_api_ns.doc(description="Perform hit testing on a dataset") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Hit testing results", + 401: "Unauthorized - invalid API token", + 404: "Dataset not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id): + """Perform hit testing on a dataset. + + Tests retrieval performance for the specified dataset. + """ dataset_id_str = str(dataset_id) dataset = self.get_and_validate_dataset(dataset_id_str) @@ -13,6 +28,3 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase): self.hit_testing_args_check(args) return self.perform_hit_testing(dataset, args) - - -api.add_resource(HitTestingApi, "/datasets//hit-testing", "/datasets//retrieve") diff --git a/api/controllers/service_api/dataset/metadata.py b/api/controllers/service_api/dataset/metadata.py index 6ba818c5fc..9defe6af03 100644 --- a/api/controllers/service_api/dataset/metadata.py +++ b/api/controllers/service_api/dataset/metadata.py @@ -1,8 +1,10 @@ +from typing import Literal + from flask_login import current_user # type: ignore -from flask_restful import marshal, reqparse +from flask_restx import marshal, reqparse from werkzeug.exceptions import NotFound -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_rate_limit_check from fields.dataset_fields import dataset_metadata_fields from services.dataset_service import DatasetService @@ -12,14 +14,43 @@ from services.entities.knowledge_entities.knowledge_entities import ( ) from services.metadata_service import MetadataService +# Define parsers for metadata APIs +metadata_create_parser = reqparse.RequestParser() +metadata_create_parser.add_argument( + "type", type=str, required=True, nullable=False, location="json", help="Metadata type" +) +metadata_create_parser.add_argument( + "name", type=str, required=True, nullable=False, location="json", help="Metadata name" +) +metadata_update_parser = reqparse.RequestParser() +metadata_update_parser.add_argument( + "name", type=str, required=True, nullable=False, location="json", help="New metadata name" +) + +document_metadata_parser = reqparse.RequestParser() +document_metadata_parser.add_argument( + "operation_data", type=list, required=True, nullable=False, location="json", help="Metadata operation data" +) + + +@service_api_ns.route("/datasets//metadata") class DatasetMetadataCreateServiceApi(DatasetApiResource): + @service_api_ns.expect(metadata_create_parser) + @service_api_ns.doc("create_dataset_metadata") + @service_api_ns.doc(description="Create metadata for a dataset") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 201: "Metadata created successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id): - parser = reqparse.RequestParser() - parser.add_argument("type", type=str, required=True, nullable=False, location="json") - parser.add_argument("name", type=str, required=True, nullable=False, location="json") - args = parser.parse_args() + """Create metadata for a dataset.""" + args = metadata_create_parser.parse_args() metadata_args = MetadataArgs(**args) dataset_id_str = str(dataset_id) @@ -31,7 +62,18 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource): metadata = MetadataService.create_metadata(dataset_id_str, metadata_args) return marshal(metadata, dataset_metadata_fields), 201 + @service_api_ns.doc("get_dataset_metadata") + @service_api_ns.doc(description="Get all metadata for a dataset") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Metadata retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset not found", + } + ) def get(self, tenant_id, dataset_id): + """Get all metadata for a dataset.""" dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: @@ -39,12 +81,23 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource): return MetadataService.get_dataset_metadatas(dataset), 200 +@service_api_ns.route("/datasets//metadata/") class DatasetMetadataServiceApi(DatasetApiResource): + @service_api_ns.expect(metadata_update_parser) + @service_api_ns.doc("update_dataset_metadata") + @service_api_ns.doc(description="Update metadata name") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"}) + @service_api_ns.doc( + responses={ + 200: "Metadata updated successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset or metadata not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def patch(self, tenant_id, dataset_id, metadata_id): - parser = reqparse.RequestParser() - parser.add_argument("name", type=str, required=True, nullable=False, location="json") - args = parser.parse_args() + """Update metadata name.""" + args = metadata_update_parser.parse_args() dataset_id_str = str(dataset_id) metadata_id_str = str(metadata_id) @@ -56,8 +109,19 @@ class DatasetMetadataServiceApi(DatasetApiResource): metadata = MetadataService.update_metadata_name(dataset_id_str, metadata_id_str, args.get("name")) return marshal(metadata, dataset_metadata_fields), 200 + @service_api_ns.doc("delete_dataset_metadata") + @service_api_ns.doc(description="Delete metadata") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"}) + @service_api_ns.doc( + responses={ + 204: "Metadata deleted successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset or metadata not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def delete(self, tenant_id, dataset_id, metadata_id): + """Delete metadata.""" dataset_id_str = str(dataset_id) metadata_id_str = str(metadata_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -69,15 +133,37 @@ class DatasetMetadataServiceApi(DatasetApiResource): return 204 +@service_api_ns.route("/datasets/metadata/built-in") class DatasetMetadataBuiltInFieldServiceApi(DatasetApiResource): + @service_api_ns.doc("get_built_in_fields") + @service_api_ns.doc(description="Get all built-in metadata fields") + @service_api_ns.doc( + responses={ + 200: "Built-in fields retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) def get(self, tenant_id): + """Get all built-in metadata fields.""" built_in_fields = MetadataService.get_built_in_fields() return {"fields": built_in_fields}, 200 +@service_api_ns.route("/datasets//metadata/built-in/") class DatasetMetadataBuiltInFieldActionServiceApi(DatasetApiResource): + @service_api_ns.doc("toggle_built_in_field") + @service_api_ns.doc(description="Enable or disable built-in metadata field") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "action": "Action to perform: 'enable' or 'disable'"}) + @service_api_ns.doc( + responses={ + 200: "Action completed successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def post(self, tenant_id, dataset_id, action): + def post(self, tenant_id, dataset_id, action: Literal["enable", "disable"]): + """Enable or disable built-in metadata field.""" dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: @@ -91,29 +177,31 @@ class DatasetMetadataBuiltInFieldActionServiceApi(DatasetApiResource): return 200 +@service_api_ns.route("/datasets//documents/metadata") class DocumentMetadataEditServiceApi(DatasetApiResource): + @service_api_ns.expect(document_metadata_parser) + @service_api_ns.doc("update_documents_metadata") + @service_api_ns.doc(description="Update metadata for multiple documents") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "Documents metadata updated successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id): + """Update metadata for multiple documents.""" dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: raise NotFound("Dataset not found.") DatasetService.check_dataset_permission(dataset, current_user) - parser = reqparse.RequestParser() - parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json") - args = parser.parse_args() + args = document_metadata_parser.parse_args() metadata_args = MetadataOperationData(**args) MetadataService.update_documents_metadata(dataset, metadata_args) return 200 - - -api.add_resource(DatasetMetadataCreateServiceApi, "/datasets//metadata") -api.add_resource(DatasetMetadataServiceApi, "/datasets//metadata/") -api.add_resource(DatasetMetadataBuiltInFieldServiceApi, "/datasets/metadata/built-in") -api.add_resource( - DatasetMetadataBuiltInFieldActionServiceApi, "/datasets//metadata/built-in/" -) -api.add_resource(DocumentMetadataEditServiceApi, "/datasets//documents/metadata") diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index 31f862dc8f..f5e2010ca4 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -1,9 +1,9 @@ from flask import request from flask_login import current_user -from flask_restful import marshal, reqparse +from flask_restx import marshal, reqparse from werkzeug.exceptions import NotFound -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.app.error import ProviderNotInitializeError from controllers.service_api.wraps import ( DatasetApiResource, @@ -19,34 +19,59 @@ from fields.segment_fields import child_chunk_fields, segment_fields from models.dataset import Dataset from services.dataset_service import DatasetService, DocumentService, SegmentService from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateArgs -from services.errors.chunk import ( - ChildChunkDeleteIndexError, - ChildChunkIndexingError, -) -from services.errors.chunk import ( - ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError, -) -from services.errors.chunk import ( - ChildChunkIndexingError as ChildChunkIndexingServiceError, -) +from services.errors.chunk import ChildChunkDeleteIndexError, ChildChunkIndexingError +from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError +from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingServiceError + +# Define parsers for segment operations +segment_create_parser = reqparse.RequestParser() +segment_create_parser.add_argument("segments", type=list, required=False, nullable=True, location="json") + +segment_list_parser = reqparse.RequestParser() +segment_list_parser.add_argument("status", type=str, action="append", default=[], location="args") +segment_list_parser.add_argument("keyword", type=str, default=None, location="args") + +segment_update_parser = reqparse.RequestParser() +segment_update_parser.add_argument("segment", type=dict, required=False, nullable=True, location="json") + +child_chunk_create_parser = reqparse.RequestParser() +child_chunk_create_parser.add_argument("content", type=str, required=True, nullable=False, location="json") + +child_chunk_list_parser = reqparse.RequestParser() +child_chunk_list_parser.add_argument("limit", type=int, default=20, location="args") +child_chunk_list_parser.add_argument("keyword", type=str, default=None, location="args") +child_chunk_list_parser.add_argument("page", type=int, default=1, location="args") + +child_chunk_update_parser = reqparse.RequestParser() +child_chunk_update_parser.add_argument("content", type=str, required=True, nullable=False, location="json") +@service_api_ns.route("/datasets//documents//segments") class SegmentApi(DatasetApiResource): """Resource for segments.""" + @service_api_ns.expect(segment_create_parser) + @service_api_ns.doc("create_segments") + @service_api_ns.doc(description="Create segments in a document") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 200: "Segments created successfully", + 400: "Bad request - segments data is missing", + 401: "Unauthorized - invalid API token", + 404: "Dataset or document not found", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def post(self, tenant_id, dataset_id, document_id): + def post(self, tenant_id: str, dataset_id: str, document_id: str): """Create single segment.""" # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) document = DocumentService.get_document(dataset.id, document_id) if not document: raise NotFound("Document not found.") @@ -71,9 +96,7 @@ class SegmentApi(DatasetApiResource): except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) # validate args - parser = reqparse.RequestParser() - parser.add_argument("segments", type=list, required=False, nullable=True, location="json") - args = parser.parse_args() + args = segment_create_parser.parse_args() if args["segments"] is not None: for args_item in args["segments"]: SegmentService.segment_create_args_validate(args_item, document) @@ -82,18 +105,26 @@ class SegmentApi(DatasetApiResource): else: return {"error": "Segments is required"}, 400 - def get(self, tenant_id, dataset_id, document_id): + @service_api_ns.expect(segment_list_parser) + @service_api_ns.doc("list_segments") + @service_api_ns.doc(description="List segments in a document") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 200: "Segments retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset or document not found", + } + ) + def get(self, tenant_id: str, dataset_id: str, document_id: str): """Get segments.""" # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) page = request.args.get("page", default=1, type=int) limit = request.args.get("limit", default=20, type=int) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) document = DocumentService.get_document(dataset.id, document_id) if not document: raise NotFound("Document not found.") @@ -114,10 +145,7 @@ class SegmentApi(DatasetApiResource): except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) - parser = reqparse.RequestParser() - parser.add_argument("status", type=str, action="append", default=[], location="args") - parser.add_argument("keyword", type=str, default=None, location="args") - args = parser.parse_args() + args = segment_list_parser.parse_args() segments, total = SegmentService.get_segments( document_id=document_id, @@ -140,43 +168,62 @@ class SegmentApi(DatasetApiResource): return response, 200 +@service_api_ns.route("/datasets//documents//segments/") class DatasetSegmentApi(DatasetApiResource): + @service_api_ns.doc("delete_segment") + @service_api_ns.doc(description="Delete a specific segment") + @service_api_ns.doc( + params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Segment ID to delete"} + ) + @service_api_ns.doc( + responses={ + 204: "Segment deleted successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, or segment not found", + } + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def delete(self, tenant_id, dataset_id, document_id, segment_id): + def delete(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str): # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) document = DocumentService.get_document(dataset_id, document_id) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) if not segment: raise NotFound("Segment not found.") SegmentService.delete_segment(segment, document, dataset) return 204 + @service_api_ns.expect(segment_update_parser) + @service_api_ns.doc("update_segment") + @service_api_ns.doc(description="Update a specific segment") + @service_api_ns.doc( + params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Segment ID to update"} + ) + @service_api_ns.doc( + responses={ + 200: "Segment updated successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, or segment not found", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def post(self, tenant_id, dataset_id, document_id, segment_id): + def post(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str): # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) document = DocumentService.get_document(dataset_id, document_id) if not document: raise NotFound("Document not found.") @@ -197,37 +244,39 @@ class DatasetSegmentApi(DatasetApiResource): except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) # check segment - segment_id = str(segment_id) segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) if not segment: raise NotFound("Segment not found.") # validate args - parser = reqparse.RequestParser() - parser.add_argument("segment", type=dict, required=False, nullable=True, location="json") - args = parser.parse_args() + args = segment_update_parser.parse_args() updated_segment = SegmentService.update_segment( SegmentUpdateArgs(**args["segment"]), segment, document, dataset ) return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200 - def get(self, tenant_id, dataset_id, document_id, segment_id): + @service_api_ns.doc("get_segment") + @service_api_ns.doc(description="Get a specific segment by ID") + @service_api_ns.doc( + responses={ + 200: "Segment retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, or segment not found", + } + ) + def get(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str): # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) document = DocumentService.get_document(dataset_id, document_id) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) if not segment: raise NotFound("Segment not found.") @@ -235,29 +284,41 @@ class DatasetSegmentApi(DatasetApiResource): return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200 +@service_api_ns.route( + "/datasets//documents//segments//child_chunks" +) class ChildChunkApi(DatasetApiResource): """Resource for child chunks.""" + @service_api_ns.expect(child_chunk_create_parser) + @service_api_ns.doc("create_child_chunk") + @service_api_ns.doc(description="Create a new child chunk for a segment") + @service_api_ns.doc( + params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Parent segment ID"} + ) + @service_api_ns.doc( + responses={ + 200: "Child chunk created successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, or segment not found", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def post(self, tenant_id, dataset_id, document_id, segment_id): + def post(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str): """Create child chunk.""" # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) document = DocumentService.get_document(dataset.id, document_id) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) if not segment: raise NotFound("Segment not found.") @@ -280,43 +341,46 @@ class ChildChunkApi(DatasetApiResource): raise ProviderNotInitializeError(ex.description) # validate args - parser = reqparse.RequestParser() - parser.add_argument("content", type=str, required=True, nullable=False, location="json") - args = parser.parse_args() + args = child_chunk_create_parser.parse_args() try: - child_chunk = SegmentService.create_child_chunk(args.get("content"), segment, document, dataset) + child_chunk = SegmentService.create_child_chunk(args["content"], segment, document, dataset) except ChildChunkIndexingServiceError as e: raise ChildChunkIndexingError(str(e)) return {"data": marshal(child_chunk, child_chunk_fields)}, 200 - def get(self, tenant_id, dataset_id, document_id, segment_id): + @service_api_ns.expect(child_chunk_list_parser) + @service_api_ns.doc("list_child_chunks") + @service_api_ns.doc(description="List child chunks for a segment") + @service_api_ns.doc( + params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Parent segment ID"} + ) + @service_api_ns.doc( + responses={ + 200: "Child chunks retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, or segment not found", + } + ) + def get(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str): """Get child chunks.""" # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) document = DocumentService.get_document(dataset.id, document_id) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) if not segment: raise NotFound("Segment not found.") - parser = reqparse.RequestParser() - parser.add_argument("limit", type=int, default=20, location="args") - parser.add_argument("keyword", type=str, default=None, location="args") - parser.add_argument("page", type=int, default=1, location="args") - args = parser.parse_args() + args = child_chunk_list_parser.parse_args() page = args["page"] limit = min(args["limit"], 100) @@ -333,40 +397,63 @@ class ChildChunkApi(DatasetApiResource): }, 200 +@service_api_ns.route( + "/datasets//documents//segments//child_chunks/" +) class DatasetChildChunkApi(DatasetApiResource): """Resource for updating child chunks.""" + @service_api_ns.doc("delete_child_chunk") + @service_api_ns.doc(description="Delete a specific child chunk") + @service_api_ns.doc( + params={ + "dataset_id": "Dataset ID", + "document_id": "Document ID", + "segment_id": "Parent segment ID", + "child_chunk_id": "Child chunk ID to delete", + } + ) + @service_api_ns.doc( + responses={ + 204: "Child chunk deleted successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, segment, or child chunk not found", + } + ) @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def delete(self, tenant_id, dataset_id, document_id, segment_id, child_chunk_id): + def delete(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str, child_chunk_id: str): """Delete child chunk.""" # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) document = DocumentService.get_document(dataset.id, document_id) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) if not segment: raise NotFound("Segment not found.") + # validate segment belongs to the specified document + if segment.document_id != document_id: + raise NotFound("Document not found.") + # check child chunk - child_chunk_id = str(child_chunk_id) child_chunk = SegmentService.get_child_chunk_by_id( child_chunk_id=child_chunk_id, tenant_id=current_user.current_tenant_id ) if not child_chunk: raise NotFound("Child chunk not found.") + # validate child chunk belongs to the specified segment + if child_chunk.segment_id != segment.id: + raise NotFound("Child chunk not found.") + try: SegmentService.delete_child_chunk(child_chunk, dataset) except ChildChunkDeleteIndexServiceError as e: @@ -374,14 +461,30 @@ class DatasetChildChunkApi(DatasetApiResource): return 204 + @service_api_ns.expect(child_chunk_update_parser) + @service_api_ns.doc("update_child_chunk") + @service_api_ns.doc(description="Update a specific child chunk") + @service_api_ns.doc( + params={ + "dataset_id": "Dataset ID", + "document_id": "Document ID", + "segment_id": "Parent segment ID", + "child_chunk_id": "Child chunk ID to update", + } + ) + @service_api_ns.doc( + responses={ + 200: "Child chunk updated successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, segment, or child chunk not found", + } + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def patch(self, tenant_id, dataset_id, document_id, segment_id, child_chunk_id): + def patch(self, tenant_id: str, dataset_id: str, document_id: str, segment_id: str, child_chunk_id: str): """Update child chunk.""" # check dataset - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") @@ -396,6 +499,10 @@ class DatasetChildChunkApi(DatasetApiResource): if not segment: raise NotFound("Segment not found.") + # validate segment belongs to the specified document + if segment.document_id != document_id: + raise NotFound("Segment not found.") + # get child chunk child_chunk = SegmentService.get_child_chunk_by_id( child_chunk_id=child_chunk_id, tenant_id=current_user.current_tenant_id @@ -403,29 +510,16 @@ class DatasetChildChunkApi(DatasetApiResource): if not child_chunk: raise NotFound("Child chunk not found.") + # validate child chunk belongs to the specified segment + if child_chunk.segment_id != segment.id: + raise NotFound("Child chunk not found.") + # validate args - parser = reqparse.RequestParser() - parser.add_argument("content", type=str, required=True, nullable=False, location="json") - args = parser.parse_args() + args = child_chunk_update_parser.parse_args() try: - child_chunk = SegmentService.update_child_chunk( - args.get("content"), child_chunk, segment, document, dataset - ) + child_chunk = SegmentService.update_child_chunk(args["content"], child_chunk, segment, document, dataset) except ChildChunkIndexingServiceError as e: raise ChildChunkIndexingError(str(e)) return {"data": marshal(child_chunk, child_chunk_fields)}, 200 - - -api.add_resource(SegmentApi, "/datasets//documents//segments") -api.add_resource( - DatasetSegmentApi, "/datasets//documents//segments/" -) -api.add_resource( - ChildChunkApi, "/datasets//documents//segments//child_chunks" -) -api.add_resource( - DatasetChildChunkApi, - "/datasets//documents//segments//child_chunks/", -) diff --git a/api/controllers/service_api/dataset/upload_file.py b/api/controllers/service_api/dataset/upload_file.py index 3b4721b5b0..27b36a6402 100644 --- a/api/controllers/service_api/dataset/upload_file.py +++ b/api/controllers/service_api/dataset/upload_file.py @@ -1,6 +1,6 @@ from werkzeug.exceptions import NotFound -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.wraps import ( DatasetApiResource, ) @@ -11,9 +11,23 @@ from models.model import UploadFile from services.dataset_service import DocumentService +@service_api_ns.route("/datasets//documents//upload-file") class UploadFileApi(DatasetApiResource): + @service_api_ns.doc("get_upload_file") + @service_api_ns.doc(description="Get upload file information and download URL") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 200: "Upload file information retrieved successfully", + 401: "Unauthorized - invalid API token", + 404: "Dataset, document, or upload file not found", + } + ) def get(self, tenant_id, dataset_id, document_id): - """Get upload file.""" + """Get upload file information and download URL. + + Returns information about an uploaded file including its download URL. + """ # check dataset dataset_id = str(dataset_id) tenant_id = str(tenant_id) @@ -49,6 +63,3 @@ class UploadFileApi(DatasetApiResource): "created_by": upload_file.created_by, "created_at": upload_file.created_at.timestamp(), }, 200 - - -api.add_resource(UploadFileApi, "/datasets//documents//upload-file") diff --git a/api/controllers/service_api/index.py b/api/controllers/service_api/index.py index 9bb5df4c4e..a9d2d6fadc 100644 --- a/api/controllers/service_api/index.py +++ b/api/controllers/service_api/index.py @@ -1,9 +1,10 @@ -from flask_restful import Resource +from flask_restx import Resource from configs import dify_config -from controllers.service_api import api +from controllers.service_api import service_api_ns +@service_api_ns.route("/") class IndexApi(Resource): def get(self): return { @@ -11,6 +12,3 @@ class IndexApi(Resource): "api_version": "v1", "server_version": dify_config.project.version, } - - -api.add_resource(IndexApi, "/") diff --git a/api/controllers/service_api/workspace/models.py b/api/controllers/service_api/workspace/models.py index 3f18474674..536cf81a2f 100644 --- a/api/controllers/service_api/workspace/models.py +++ b/api/controllers/service_api/workspace/models.py @@ -1,21 +1,32 @@ from flask_login import current_user -from flask_restful import Resource +from flask_restx import Resource -from controllers.service_api import api +from controllers.service_api import service_api_ns from controllers.service_api.wraps import validate_dataset_token from core.model_runtime.utils.encoders import jsonable_encoder from services.model_provider_service import ModelProviderService +@service_api_ns.route("/workspaces/current/models/model-types/") class ModelProviderAvailableModelApi(Resource): + @service_api_ns.doc("get_available_models") + @service_api_ns.doc(description="Get available models by model type") + @service_api_ns.doc(params={"model_type": "Type of model to retrieve"}) + @service_api_ns.doc( + responses={ + 200: "Models retrieved successfully", + 401: "Unauthorized - invalid API token", + } + ) @validate_dataset_token def get(self, _, model_type): + """Get available models by model type. + + Returns a list of available models for the specified model type. + """ tenant_id = current_user.current_tenant_id model_provider_service = ModelProviderService() models = model_provider_service.get_models_by_model_type(tenant_id=tenant_id, model_type=model_type) return jsonable_encoder({"data": models}) - - -api.add_resource(ModelProviderAvailableModelApi, "/workspaces/current/models/model-types/") diff --git a/api/controllers/service_api/wraps.py b/api/controllers/service_api/wraps.py index da81cc8bc3..8aac3de4c3 100644 --- a/api/controllers/service_api/wraps.py +++ b/api/controllers/service_api/wraps.py @@ -7,7 +7,7 @@ from typing import Optional from flask import current_app, request from flask_login import user_logged_in # type: ignore -from flask_restful import Resource +from flask_restx import Resource from pydantic import BaseModel from sqlalchemy import select, update from sqlalchemy.orm import Session diff --git a/api/controllers/web/app.py b/api/controllers/web/app.py index 197859e8f3..0680903635 100644 --- a/api/controllers/web/app.py +++ b/api/controllers/web/app.py @@ -1,5 +1,7 @@ +import logging + from flask import request -from flask_restful import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with, reqparse from werkzeug.exceptions import Unauthorized from controllers.common import fields @@ -87,8 +89,11 @@ class AppWebAuthPermission(Resource): decoded = PassportService().verify(tk) user_id = decoded.get("user_id", "visitor") - except Exception as e: - pass + except Unauthorized: + raise + except Exception: + logging.exception("Unexpected error during auth verification") + raise features = FeatureService.get_system_features() if not features.webapp_auth.enabled: diff --git a/api/controllers/web/audio.py b/api/controllers/web/audio.py index 2919ca9af4..241d0874db 100644 --- a/api/controllers/web/audio.py +++ b/api/controllers/web/audio.py @@ -65,7 +65,7 @@ class AudioApi(WebApiResource): class TextApi(WebApiResource): def post(self, app_model: App, end_user): - from flask_restful import reqparse + from flask_restx import reqparse try: parser = reqparse.RequestParser() diff --git a/api/controllers/web/completion.py b/api/controllers/web/completion.py index fd3b9aa804..c19afee9b7 100644 --- a/api/controllers/web/completion.py +++ b/api/controllers/web/completion.py @@ -1,6 +1,6 @@ import logging -from flask_restful import reqparse +from flask_restx import reqparse from werkzeug.exceptions import InternalServerError, NotFound import services diff --git a/api/controllers/web/conversation.py b/api/controllers/web/conversation.py index 98cea3974f..cea8e442f3 100644 --- a/api/controllers/web/conversation.py +++ b/api/controllers/web/conversation.py @@ -1,5 +1,5 @@ -from flask_restful import marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import marshal_with, reqparse +from flask_restx.inputs import int_range from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound diff --git a/api/controllers/web/feature.py b/api/controllers/web/feature.py index 0563ed2238..478b3d2e31 100644 --- a/api/controllers/web/feature.py +++ b/api/controllers/web/feature.py @@ -1,4 +1,4 @@ -from flask_restful import Resource +from flask_restx import Resource from controllers.web import api from services.feature_service import FeatureService diff --git a/api/controllers/web/files.py b/api/controllers/web/files.py index 0c30435825..b05e2a2e65 100644 --- a/api/controllers/web/files.py +++ b/api/controllers/web/files.py @@ -1,5 +1,5 @@ from flask import request -from flask_restful import marshal_with +from flask_restx import marshal_with import services from controllers.common.errors import ( diff --git a/api/controllers/web/forgot_password.py b/api/controllers/web/forgot_password.py index 0da8d65efc..d436657f06 100644 --- a/api/controllers/web/forgot_password.py +++ b/api/controllers/web/forgot_password.py @@ -2,7 +2,7 @@ import base64 import secrets from flask import request -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from sqlalchemy import select from sqlalchemy.orm import Session diff --git a/api/controllers/web/login.py b/api/controllers/web/login.py index 01c4f4a262..d4eafd532b 100644 --- a/api/controllers/web/login.py +++ b/api/controllers/web/login.py @@ -1,4 +1,4 @@ -from flask_restful import Resource, reqparse +from flask_restx import Resource, reqparse from jwt import InvalidTokenError # type: ignore import services diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py index 7bb81cd0d3..f348221d80 100644 --- a/api/controllers/web/message.py +++ b/api/controllers/web/message.py @@ -1,7 +1,7 @@ import logging -from flask_restful import fields, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import fields, marshal_with, reqparse +from flask_restx.inputs import int_range from werkzeug.exceptions import InternalServerError, NotFound from controllers.web import api diff --git a/api/controllers/web/passport.py b/api/controllers/web/passport.py index acd3a8b539..1ac20e6531 100644 --- a/api/controllers/web/passport.py +++ b/api/controllers/web/passport.py @@ -2,7 +2,7 @@ import uuid from datetime import UTC, datetime, timedelta from flask import request -from flask_restful import Resource +from flask_restx import Resource from sqlalchemy import func, select from werkzeug.exceptions import NotFound, Unauthorized diff --git a/api/controllers/web/remote_files.py b/api/controllers/web/remote_files.py index 4e19716c3d..930b9d96e9 100644 --- a/api/controllers/web/remote_files.py +++ b/api/controllers/web/remote_files.py @@ -1,7 +1,7 @@ import urllib.parse import httpx -from flask_restful import marshal_with, reqparse +from flask_restx import marshal_with, reqparse import services from controllers.common import helpers diff --git a/api/controllers/web/saved_message.py b/api/controllers/web/saved_message.py index d7188ef0b3..a0912499ff 100644 --- a/api/controllers/web/saved_message.py +++ b/api/controllers/web/saved_message.py @@ -1,5 +1,5 @@ -from flask_restful import fields, marshal_with, reqparse -from flask_restful.inputs import int_range +from flask_restx import fields, marshal_with, reqparse +from flask_restx.inputs import int_range from werkzeug.exceptions import NotFound from controllers.web import api diff --git a/api/controllers/web/site.py b/api/controllers/web/site.py index 3c133499b7..b2a887a0de 100644 --- a/api/controllers/web/site.py +++ b/api/controllers/web/site.py @@ -1,4 +1,4 @@ -from flask_restful import fields, marshal_with +from flask_restx import fields, marshal_with from werkzeug.exceptions import Forbidden from configs import dify_config diff --git a/api/controllers/web/workflow.py b/api/controllers/web/workflow.py index 590fd3f2c7..331587cc28 100644 --- a/api/controllers/web/workflow.py +++ b/api/controllers/web/workflow.py @@ -1,6 +1,6 @@ import logging -from flask_restful import reqparse +from flask_restx import reqparse from werkzeug.exceptions import InternalServerError from controllers.web import api diff --git a/api/controllers/web/wraps.py b/api/controllers/web/wraps.py index ae6f14a689..94fa5d5626 100644 --- a/api/controllers/web/wraps.py +++ b/api/controllers/web/wraps.py @@ -2,7 +2,7 @@ from datetime import UTC, datetime from functools import wraps from flask import request -from flask_restful import Resource +from flask_restx import Resource from sqlalchemy import select from werkzeug.exceptions import BadRequest, NotFound, Unauthorized diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py index ad9b625350..f7c83f927f 100644 --- a/api/core/agent/base_agent_runner.py +++ b/api/core/agent/base_agent_runner.py @@ -512,7 +512,6 @@ class BaseAgentRunner(AppRunner): if not file_objs: return UserPromptMessage(content=message.query) prompt_message_contents: list[PromptMessageContentUnionTypes] = [] - prompt_message_contents.append(TextPromptMessageContent(data=message.query)) for file in file_objs: prompt_message_contents.append( file_manager.to_prompt_message_content( @@ -520,4 +519,6 @@ class BaseAgentRunner(AppRunner): image_detail_config=image_detail_config, ) ) + prompt_message_contents.append(TextPromptMessageContent(data=message.query)) + return UserPromptMessage(content=prompt_message_contents) diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py index 565fb42478..6cb1077126 100644 --- a/api/core/agent/cot_agent_runner.py +++ b/api/core/agent/cot_agent_runner.py @@ -197,7 +197,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): final_answer = scratchpad.action.action_input else: final_answer = f"{scratchpad.action.action_input}" - except json.JSONDecodeError: + except TypeError: final_answer = f"{scratchpad.action.action_input}" else: function_call_state = True diff --git a/api/core/agent/cot_chat_agent_runner.py b/api/core/agent/cot_chat_agent_runner.py index 5ff89bdacb..4d1d94eadc 100644 --- a/api/core/agent/cot_chat_agent_runner.py +++ b/api/core/agent/cot_chat_agent_runner.py @@ -39,9 +39,6 @@ class CotChatAgentRunner(CotAgentRunner): Organize user query """ if self.files: - prompt_message_contents: list[PromptMessageContentUnionTypes] = [] - prompt_message_contents.append(TextPromptMessageContent(data=query)) - # get image detail config image_detail_config = ( self.application_generate_entity.file_upload_config.image_config.detail @@ -52,6 +49,8 @@ class CotChatAgentRunner(CotAgentRunner): else None ) image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW + + prompt_message_contents: list[PromptMessageContentUnionTypes] = [] for file in self.files: prompt_message_contents.append( file_manager.to_prompt_message_content( @@ -59,6 +58,7 @@ class CotChatAgentRunner(CotAgentRunner): image_detail_config=image_detail_config, ) ) + prompt_message_contents.append(TextPromptMessageContent(data=query)) prompt_messages.append(UserPromptMessage(content=prompt_message_contents)) else: diff --git a/api/core/agent/fc_agent_runner.py b/api/core/agent/fc_agent_runner.py index 4df71ce9de..9eb853aa74 100644 --- a/api/core/agent/fc_agent_runner.py +++ b/api/core/agent/fc_agent_runner.py @@ -126,8 +126,8 @@ class FunctionCallAgentRunner(BaseAgentRunner): tool_call_inputs = json.dumps( {tool_call[1]: tool_call[2] for tool_call in tool_calls}, ensure_ascii=False ) - except json.JSONDecodeError: - # ensure ascii to avoid encoding error + except TypeError: + # fallback: force ASCII to handle non-serializable objects tool_call_inputs = json.dumps({tool_call[1]: tool_call[2] for tool_call in tool_calls}) if chunk.delta.message and chunk.delta.message.content: @@ -153,8 +153,8 @@ class FunctionCallAgentRunner(BaseAgentRunner): tool_call_inputs = json.dumps( {tool_call[1]: tool_call[2] for tool_call in tool_calls}, ensure_ascii=False ) - except json.JSONDecodeError: - # ensure ascii to avoid encoding error + except TypeError: + # fallback: force ASCII to handle non-serializable objects tool_call_inputs = json.dumps({tool_call[1]: tool_call[2] for tool_call in tool_calls}) if result.usage: @@ -395,9 +395,6 @@ class FunctionCallAgentRunner(BaseAgentRunner): Organize user query """ if self.files: - prompt_message_contents: list[PromptMessageContentUnionTypes] = [] - prompt_message_contents.append(TextPromptMessageContent(data=query)) - # get image detail config image_detail_config = ( self.application_generate_entity.file_upload_config.image_config.detail @@ -408,6 +405,8 @@ class FunctionCallAgentRunner(BaseAgentRunner): else None ) image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW + + prompt_message_contents: list[PromptMessageContentUnionTypes] = [] for file in self.files: prompt_message_contents.append( file_manager.to_prompt_message_content( @@ -415,6 +414,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): image_detail_config=image_detail_config, ) ) + prompt_message_contents.append(TextPromptMessageContent(data=query)) prompt_messages.append(UserPromptMessage(content=prompt_message_contents)) else: diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index 0df0aa59b2..0db1d52779 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -167,7 +167,7 @@ class ModelConfig(BaseModel): provider: str name: str mode: LLMMode - completion_params: dict[str, Any] = {} + completion_params: dict[str, Any] = Field(default_factory=dict) class Condition(BaseModel): diff --git a/api/core/app/apps/common/workflow_response_converter.py b/api/core/app/apps/common/workflow_response_converter.py index 34a1da2227..1a89237333 100644 --- a/api/core/app/apps/common/workflow_response_converter.py +++ b/api/core/app/apps/common/workflow_response_converter.py @@ -50,6 +50,7 @@ from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution from core.workflow.nodes import NodeType from core.workflow.nodes.tool.entities import ToolNodeData from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter +from libs.datetime_utils import naive_utc_now from models import ( Account, CreatorUserRole, @@ -399,7 +400,7 @@ class WorkflowResponseConverter: if event.error is None else WorkflowNodeExecutionStatus.FAILED, error=None, - elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), + elapsed_time=(naive_utc_now() - event.start_at).total_seconds(), total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, execution_metadata=event.metadata, finished_at=int(time.time()), @@ -478,7 +479,7 @@ class WorkflowResponseConverter: if event.error is None else WorkflowNodeExecutionStatus.FAILED, error=None, - elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), + elapsed_time=(naive_utc_now() - event.start_at).total_seconds(), total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, execution_metadata=event.metadata, finished_at=int(time.time()), diff --git a/api/core/app/entities/queue_entities.py b/api/core/app/entities/queue_entities.py index 42e6a1519c..d663dbb175 100644 --- a/api/core/app/entities/queue_entities.py +++ b/api/core/app/entities/queue_entities.py @@ -610,7 +610,7 @@ class QueueErrorEvent(AppQueueEvent): """ event: QueueEvent = QueueEvent.ERROR - error: Any = None + error: Optional[Any] = None class QueuePingEvent(AppQueueEvent): diff --git a/api/core/app/entities/task_entities.py b/api/core/app/entities/task_entities.py index 25c889e922..a1c0368354 100644 --- a/api/core/app/entities/task_entities.py +++ b/api/core/app/entities/task_entities.py @@ -142,7 +142,7 @@ class MessageEndStreamResponse(StreamResponse): event: StreamEvent = StreamEvent.MESSAGE_END id: str - metadata: dict = {} + metadata: dict = Field(default_factory=dict) files: Optional[Sequence[Mapping[str, Any]]] = None @@ -261,7 +261,7 @@ class NodeStartStreamResponse(StreamResponse): predecessor_node_id: Optional[str] = None inputs: Optional[Mapping[str, Any]] = None created_at: int - extras: dict = {} + extras: dict = Field(default_factory=dict) parallel_id: Optional[str] = None parallel_start_node_id: Optional[str] = None parent_parallel_id: Optional[str] = None @@ -503,7 +503,7 @@ class IterationNodeStartStreamResponse(StreamResponse): node_type: str title: str created_at: int - extras: dict = {} + extras: dict = Field(default_factory=dict) metadata: Mapping = {} inputs: Mapping = {} parallel_id: Optional[str] = None @@ -531,7 +531,7 @@ class IterationNodeNextStreamResponse(StreamResponse): index: int created_at: int pre_iteration_output: Optional[Any] = None - extras: dict = {} + extras: dict = Field(default_factory=dict) parallel_id: Optional[str] = None parallel_start_node_id: Optional[str] = None parallel_mode_run_id: Optional[str] = None @@ -590,7 +590,7 @@ class LoopNodeStartStreamResponse(StreamResponse): node_type: str title: str created_at: int - extras: dict = {} + extras: dict = Field(default_factory=dict) metadata: Mapping = {} inputs: Mapping = {} parallel_id: Optional[str] = None @@ -618,7 +618,7 @@ class LoopNodeNextStreamResponse(StreamResponse): index: int created_at: int pre_loop_output: Optional[Any] = None - extras: dict = {} + extras: dict = Field(default_factory=dict) parallel_id: Optional[str] = None parallel_start_node_id: Optional[str] = None parallel_mode_run_id: Optional[str] = None @@ -764,7 +764,7 @@ class ChatbotAppBlockingResponse(AppBlockingResponse): conversation_id: str message_id: str answer: str - metadata: dict = {} + metadata: dict = Field(default_factory=dict) created_at: int data: Data @@ -784,7 +784,7 @@ class CompletionAppBlockingResponse(AppBlockingResponse): mode: str message_id: str answer: str - metadata: dict = {} + metadata: dict = Field(default_factory=dict) created_at: int data: Data diff --git a/api/core/app/task_pipeline/based_generate_task_pipeline.py b/api/core/app/task_pipeline/based_generate_task_pipeline.py index 014c7fd4f5..8c0a442158 100644 --- a/api/core/app/task_pipeline/based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/based_generate_task_pipeline.py @@ -52,7 +52,8 @@ class BasedGenerateTaskPipeline: elif isinstance(e, InvokeError | ValueError): err = e else: - err = Exception(e.description if getattr(e, "description", None) is not None else str(e)) + description = getattr(e, "description", None) + err = Exception(description if description is not None else str(e)) if not message_id or not session: return err diff --git a/api/core/app/task_pipeline/message_cycle_manager.py b/api/core/app/task_pipeline/message_cycle_manager.py index f3b9dbf758..0d786ba051 100644 --- a/api/core/app/task_pipeline/message_cycle_manager.py +++ b/api/core/app/task_pipeline/message_cycle_manager.py @@ -181,7 +181,7 @@ class MessageCycleManager: :param message_id: message id :return: """ - message_file = db.session.query(MessageFile).filter(MessageFile.id == message_id).first() + message_file = db.session.query(MessageFile).where(MessageFile.id == message_id).first() event_type = StreamEvent.MESSAGE_FILE if message_file else StreamEvent.MESSAGE return MessageStreamResponse( diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index 8bfbd82e1f..646e0e21e9 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -1,4 +1,3 @@ -import datetime import json import logging from collections import defaultdict @@ -29,6 +28,7 @@ from core.model_runtime.model_providers.__base.ai_model import AIModel from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory from core.plugin.entities.plugin import ModelProviderID from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now from models.provider import ( LoadBalancingModelConfig, Provider, @@ -261,7 +261,7 @@ class ProviderConfiguration(BaseModel): if provider_record: provider_record.encrypted_config = json.dumps(credentials) provider_record.is_valid = True - provider_record.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + provider_record.updated_at = naive_utc_now() db.session.commit() else: provider_record = Provider() @@ -426,7 +426,7 @@ class ProviderConfiguration(BaseModel): if provider_model_record: provider_model_record.encrypted_config = json.dumps(credentials) provider_model_record.is_valid = True - provider_model_record.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + provider_model_record.updated_at = naive_utc_now() db.session.commit() else: provider_model_record = ProviderModel() @@ -501,7 +501,7 @@ class ProviderConfiguration(BaseModel): if model_setting: model_setting.enabled = True - model_setting.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + model_setting.updated_at = naive_utc_now() db.session.commit() else: model_setting = ProviderModelSetting() @@ -526,7 +526,7 @@ class ProviderConfiguration(BaseModel): if model_setting: model_setting.enabled = False - model_setting.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + model_setting.updated_at = naive_utc_now() db.session.commit() else: model_setting = ProviderModelSetting() @@ -599,7 +599,7 @@ class ProviderConfiguration(BaseModel): if model_setting: model_setting.load_balancing_enabled = True - model_setting.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + model_setting.updated_at = naive_utc_now() db.session.commit() else: model_setting = ProviderModelSetting() @@ -638,7 +638,7 @@ class ProviderConfiguration(BaseModel): if model_setting: model_setting.load_balancing_enabled = False - model_setting.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + model_setting.updated_at = naive_utc_now() db.session.commit() else: model_setting = ProviderModelSetting() diff --git a/api/core/extension/extensible.py b/api/core/extension/extensible.py index 557f7eb1ed..ae4671a381 100644 --- a/api/core/extension/extensible.py +++ b/api/core/extension/extensible.py @@ -17,7 +17,7 @@ class ExtensionModule(enum.Enum): class ModuleExtension(BaseModel): - extension_class: Any = None + extension_class: Optional[Any] = None name: str label: Optional[dict] = None form_schema: Optional[list] = None diff --git a/api/core/extension/extension.py b/api/core/extension/extension.py index 9eb9e0306b..50c3f9b5f4 100644 --- a/api/core/extension/extension.py +++ b/api/core/extension/extension.py @@ -38,6 +38,7 @@ class Extension: def extension_class(self, module: ExtensionModule, extension_name: str) -> type: module_extension = self.module_extension(module, extension_name) + assert module_extension.extension_class is not None t: type = module_extension.extension_class return t diff --git a/api/core/helper/trace_id_helper.py b/api/core/helper/trace_id_helper.py index df42837796..5cd0ea5c66 100644 --- a/api/core/helper/trace_id_helper.py +++ b/api/core/helper/trace_id_helper.py @@ -1,3 +1,4 @@ +import contextlib import re from collections.abc import Mapping from typing import Any, Optional @@ -97,10 +98,8 @@ def parse_traceparent_header(traceparent: str) -> Optional[str]: Reference: W3C Trace Context Specification: https://www.w3.org/TR/trace-context/ """ - try: + with contextlib.suppress(Exception): parts = traceparent.split("-") if len(parts) == 4 and len(parts[1]) == 32: return parts[1] - except Exception: - pass return None diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 2387658bb6..9876194608 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -1,5 +1,4 @@ import concurrent.futures -import datetime import json import logging import re @@ -9,7 +8,6 @@ import uuid from typing import Any, Optional, cast from flask import current_app -from flask_login import current_user from sqlalchemy.orm.exc import ObjectDeletedError from configs import dify_config @@ -30,11 +28,12 @@ from core.rag.splitter.fixed_text_splitter import ( FixedRecursiveCharacterTextSplitter, ) from core.rag.splitter.text_splitter import TextSplitter -from core.tools.utils.rag_web_reader import get_image_upload_file_ids +from core.tools.utils.web_reader_tool import get_image_upload_file_ids from extensions.ext_database import db from extensions.ext_redis import redis_client from extensions.ext_storage import storage from libs import helper +from libs.datetime_utils import naive_utc_now from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment from models.dataset import Document as DatasetDocument from models.model import UploadFile @@ -88,7 +87,7 @@ class IndexingRunner: except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) - dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + dataset_document.stopped_at = naive_utc_now() db.session.commit() except ObjectDeletedError: logging.warning("Document deleted, document id: %s", dataset_document.id) @@ -96,7 +95,7 @@ class IndexingRunner: logging.exception("consume document failed") dataset_document.indexing_status = "error" dataset_document.error = str(e) - dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + dataset_document.stopped_at = naive_utc_now() db.session.commit() def run_in_splitting_status(self, dataset_document: DatasetDocument): @@ -151,13 +150,13 @@ class IndexingRunner: except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) - dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + dataset_document.stopped_at = naive_utc_now() db.session.commit() except Exception as e: logging.exception("consume document failed") dataset_document.indexing_status = "error" dataset_document.error = str(e) - dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + dataset_document.stopped_at = naive_utc_now() db.session.commit() def run_in_indexing_status(self, dataset_document: DatasetDocument): @@ -226,13 +225,13 @@ class IndexingRunner: except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) - dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + dataset_document.stopped_at = naive_utc_now() db.session.commit() except Exception as e: logging.exception("consume document failed") dataset_document.indexing_status = "error" dataset_document.error = str(e) - dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + dataset_document.stopped_at = naive_utc_now() db.session.commit() def indexing_estimate( @@ -295,7 +294,7 @@ class IndexingRunner: text_docs, embedding_model_instance=embedding_model_instance, process_rule=processing_rule.to_dict(), - tenant_id=current_user.current_tenant_id, + tenant_id=tenant_id, doc_language=doc_language, preview=True, ) @@ -401,7 +400,7 @@ class IndexingRunner: after_indexing_status="splitting", extra_update_params={ DatasetDocument.word_count: sum(len(text_doc.page_content) for text_doc in text_docs), - DatasetDocument.parsing_completed_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DatasetDocument.parsing_completed_at: naive_utc_now(), }, ) @@ -584,7 +583,7 @@ class IndexingRunner: after_indexing_status="completed", extra_update_params={ DatasetDocument.tokens: tokens, - DatasetDocument.completed_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DatasetDocument.completed_at: naive_utc_now(), DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at, DatasetDocument.error: None, }, @@ -609,7 +608,7 @@ class IndexingRunner: { DocumentSegment.status: "completed", DocumentSegment.enabled: True, - DocumentSegment.completed_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.completed_at: naive_utc_now(), } ) @@ -640,7 +639,7 @@ class IndexingRunner: { DocumentSegment.status: "completed", DocumentSegment.enabled: True, - DocumentSegment.completed_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.completed_at: naive_utc_now(), } ) @@ -728,7 +727,7 @@ class IndexingRunner: doc_store.add_documents(docs=documents, save_child=dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX) # update document status to indexing - cur_time = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + cur_time = naive_utc_now() self._update_document_index_status( document_id=dataset_document.id, after_indexing_status="indexing", @@ -743,7 +742,7 @@ class IndexingRunner: dataset_document_id=dataset_document.id, update_params={ DocumentSegment.status: "indexing", - DocumentSegment.indexing_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.indexing_at: naive_utc_now(), }, ) pass diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index 736d4fade8..b7c5b0993a 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -401,9 +401,9 @@ class LLMGenerator: def instruction_modify_legacy( tenant_id: str, flow_id: str, current: str, instruction: str, model_config: dict, ideal_output: str | None ) -> dict: - app: App | None = db.session.query(App).filter(App.id == flow_id).first() + app: App | None = db.session.query(App).where(App.id == flow_id).first() last_run: Message | None = ( - db.session.query(Message).filter(Message.app_id == flow_id).order_by(Message.created_at.desc()).first() + db.session.query(Message).where(Message.app_id == flow_id).order_by(Message.created_at.desc()).first() ) if not last_run: return LLMGenerator.__instruction_modify_common( @@ -444,7 +444,7 @@ class LLMGenerator: ) -> dict: from services.workflow_service import WorkflowService - app: App | None = db.session.query(App).filter(App.id == flow_id).first() + app: App | None = db.session.query(App).where(App.id == flow_id).first() if not app: raise ValueError("App not found.") workflow = WorkflowService().get_draft_workflow(app_model=app) @@ -534,7 +534,7 @@ class LLMGenerator: model=model_config.get("name", ""), ) match node_type: - case "llm", "agent": + case "llm" | "agent": system_prompt = LLM_MODIFY_PROMPT_SYSTEM case "code": system_prompt = LLM_MODIFY_CODE_SYSTEM diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py index 4eec0ef0f5..004ff73f97 100644 --- a/api/core/llm_generator/prompts.py +++ b/api/core/llm_generator/prompts.py @@ -414,7 +414,7 @@ When you are modifying the code, you should remember: - Get inputs from the parameters of the function and have explicit type annotations. - Write proper imports at the top of the code. - Use return statement to return the result. -- You should return a `dict`. +- You should return a `dict`. If you need to return a `result: str`, you should `return {"result": result}`. Your output must strictly follow the schema format, do not output any content outside of the JSON body. """ # noqa: E501 diff --git a/api/core/mcp/auth/auth_flow.py b/api/core/mcp/auth/auth_flow.py index bcb31a816f..eb783297c3 100644 --- a/api/core/mcp/auth/auth_flow.py +++ b/api/core/mcp/auth/auth_flow.py @@ -5,9 +5,9 @@ import os import secrets import urllib.parse from typing import Optional -from urllib.parse import urljoin +from urllib.parse import urljoin, urlparse -import requests +import httpx from pydantic import BaseModel, ValidationError from core.mcp.auth.auth_provider import OAuthClientProvider @@ -99,24 +99,52 @@ def handle_callback(state_key: str, authorization_code: str) -> OAuthCallbackSta return full_state_data +def check_support_resource_discovery(server_url: str) -> tuple[bool, str]: + """Check if the server supports OAuth 2.0 Resource Discovery.""" + b_scheme, b_netloc, b_path, b_params, b_query, b_fragment = urlparse(server_url, "", True) + url_for_resource_discovery = f"{b_scheme}://{b_netloc}/.well-known/oauth-protected-resource{b_path}" + if b_query: + url_for_resource_discovery += f"?{b_query}" + if b_fragment: + url_for_resource_discovery += f"#{b_fragment}" + try: + headers = {"MCP-Protocol-Version": LATEST_PROTOCOL_VERSION, "User-Agent": "Dify"} + response = httpx.get(url_for_resource_discovery, headers=headers) + if 200 <= response.status_code < 300: + body = response.json() + if "authorization_server_url" in body: + return True, body["authorization_server_url"][0] + else: + return False, "" + return False, "" + except httpx.RequestError as e: + # Not support resource discovery, fall back to well-known OAuth metadata + return False, "" + + def discover_oauth_metadata(server_url: str, protocol_version: Optional[str] = None) -> Optional[OAuthMetadata]: """Looks up RFC 8414 OAuth 2.0 Authorization Server Metadata.""" - url = urljoin(server_url, "/.well-known/oauth-authorization-server") + # First check if the server supports OAuth 2.0 Resource Discovery + support_resource_discovery, oauth_discovery_url = check_support_resource_discovery(server_url) + if support_resource_discovery: + url = oauth_discovery_url + else: + url = urljoin(server_url, "/.well-known/oauth-authorization-server") try: headers = {"MCP-Protocol-Version": protocol_version or LATEST_PROTOCOL_VERSION} - response = requests.get(url, headers=headers) + response = httpx.get(url, headers=headers) if response.status_code == 404: return None - if not response.ok: + if not response.is_success: raise ValueError(f"HTTP {response.status_code} trying to load well-known OAuth metadata") return OAuthMetadata.model_validate(response.json()) - except requests.RequestException as e: - if isinstance(e, requests.ConnectionError): - response = requests.get(url) + except httpx.RequestError as e: + if isinstance(e, httpx.ConnectError): + response = httpx.get(url) if response.status_code == 404: return None - if not response.ok: + if not response.is_success: raise ValueError(f"HTTP {response.status_code} trying to load well-known OAuth metadata") return OAuthMetadata.model_validate(response.json()) raise @@ -206,8 +234,8 @@ def exchange_authorization( if client_information.client_secret: params["client_secret"] = client_information.client_secret - response = requests.post(token_url, data=params) - if not response.ok: + response = httpx.post(token_url, data=params) + if not response.is_success: raise ValueError(f"Token exchange failed: HTTP {response.status_code}") return OAuthTokens.model_validate(response.json()) @@ -237,8 +265,8 @@ def refresh_authorization( if client_information.client_secret: params["client_secret"] = client_information.client_secret - response = requests.post(token_url, data=params) - if not response.ok: + response = httpx.post(token_url, data=params) + if not response.is_success: raise ValueError(f"Token refresh failed: HTTP {response.status_code}") return OAuthTokens.model_validate(response.json()) @@ -256,12 +284,12 @@ def register_client( else: registration_url = urljoin(server_url, "/register") - response = requests.post( + response = httpx.post( registration_url, json=client_metadata.model_dump(), headers={"Content-Type": "application/json"}, ) - if not response.ok: + if not response.is_success: response.raise_for_status() return OAuthClientInformationFull.model_validate(response.json()) @@ -283,7 +311,7 @@ def auth( raise ValueError("Existing OAuth client information is required when exchanging an authorization code") try: full_information = register_client(server_url, metadata, provider.client_metadata) - except requests.RequestException as e: + except httpx.RequestError as e: raise ValueError(f"Could not register OAuth client: {e}") provider.save_client_information(full_information) client_information = full_information diff --git a/api/core/mcp/session/base_session.py b/api/core/mcp/session/base_session.py index 3f98aa94ae..031f01f411 100644 --- a/api/core/mcp/session/base_session.py +++ b/api/core/mcp/session/base_session.py @@ -4,7 +4,7 @@ from collections.abc import Callable from concurrent.futures import Future, ThreadPoolExecutor, TimeoutError from datetime import timedelta from types import TracebackType -from typing import Any, Generic, Self, TypeVar +from typing import Any, Generic, Optional, Self, TypeVar from httpx import HTTPStatusError from pydantic import BaseModel @@ -209,7 +209,7 @@ class BaseSession( request: SendRequestT, result_type: type[ReceiveResultT], request_read_timeout_seconds: timedelta | None = None, - metadata: MessageMetadata = None, + metadata: Optional[MessageMetadata] = None, ) -> ReceiveResultT: """ Sends a request and wait for a response. Raises an McpError if the diff --git a/api/core/mcp/types.py b/api/core/mcp/types.py index 99d985a781..49aa8e4498 100644 --- a/api/core/mcp/types.py +++ b/api/core/mcp/types.py @@ -1173,7 +1173,7 @@ class SessionMessage: """A message with specific metadata for transport-specific features.""" message: JSONRPCMessage - metadata: MessageMetadata = None + metadata: Optional[MessageMetadata] = None class OAuthClientMetadata(BaseModel): diff --git a/api/core/model_runtime/README.md b/api/core/model_runtime/README.md index b5de7ad412..3abb3f63ac 100644 --- a/api/core/model_runtime/README.md +++ b/api/core/model_runtime/README.md @@ -30,7 +30,7 @@ This module provides the interface for invoking and authenticating various model In addition, this list also returns configurable parameter information and rules for LLM, as shown below: - ![image-20231210144814617](./docs/en_US/images/index/image-20231210144814617.png) + ![image-20231210144814617](./docs/en_US/images/index/image-20231210144814617.png) These parameters are all defined in the backend, allowing different settings for various parameters supported by different models, as detailed in: [Schema](./docs/en_US/schema.md#ParameterRule). @@ -60,8 +60,6 @@ Model Runtime is divided into three layers: It offers direct invocation of various model types, predefined model configuration information, getting predefined/remote model lists, model credential authentication methods. Different models provide additional special methods, like LLM's pre-computed tokens method, cost information obtaining method, etc., **allowing horizontal expansion** for different models under the same provider (within supported model types). - - ## Next Steps - Add new provider configuration: [Link](./docs/en_US/provider_scale_out.md) diff --git a/api/core/model_runtime/README_CN.md b/api/core/model_runtime/README_CN.md index 2fc2a60461..19846481e0 100644 --- a/api/core/model_runtime/README_CN.md +++ b/api/core/model_runtime/README_CN.md @@ -20,19 +20,19 @@ ![image-20231210143654461](./docs/zh_Hans/images/index/image-20231210143654461.png) -​ 展示所有已支持的供应商列表,除了返回供应商名称、图标之外,还提供了支持的模型类型列表,预定义模型列表、配置方式以及配置凭据的表单规则等等,规则设计详见:[Schema](./docs/zh_Hans/schema.md)。 +​ 展示所有已支持的供应商列表,除了返回供应商名称、图标之外,还提供了支持的模型类型列表,预定义模型列表、配置方式以及配置凭据的表单规则等等,规则设计详见:[Schema](./docs/zh_Hans/schema.md)。 - 可选择的模型列表展示 ![image-20231210144229650](./docs/zh_Hans/images/index/image-20231210144229650.png) -​ 配置供应商/模型凭据后,可在此下拉(应用编排界面/默认模型)查看可用的 LLM 列表,其中灰色的为未配置凭据供应商的预定义模型列表,方便用户查看已支持的模型。 +​ 配置供应商/模型凭据后,可在此下拉(应用编排界面/默认模型)查看可用的 LLM 列表,其中灰色的为未配置凭据供应商的预定义模型列表,方便用户查看已支持的模型。 -​ 除此之外,该列表还返回了 LLM 可配置的参数信息和规则,如下图: +​ 除此之外,该列表还返回了 LLM 可配置的参数信息和规则,如下图: -​ ![image-20231210144814617](./docs/zh_Hans/images/index/image-20231210144814617.png) +​ ![image-20231210144814617](./docs/zh_Hans/images/index/image-20231210144814617.png) -​ 这里的参数均为后端定义,相比之前只有 5 种固定参数,这里可为不同模型设置所支持的各种参数,详见:[Schema](./docs/zh_Hans/schema.md#ParameterRule)。 +​ 这里的参数均为后端定义,相比之前只有 5 种固定参数,这里可为不同模型设置所支持的各种参数,详见:[Schema](./docs/zh_Hans/schema.md#ParameterRule)。 - 供应商/模型凭据鉴权 @@ -40,7 +40,7 @@ ![image-20231210151628992](./docs/zh_Hans/images/index/image-20231210151628992.png) -​ 供应商列表返回了凭据表单的配置信息,可通过 Runtime 提供的接口对凭据进行鉴权,上图 1 为供应商凭据 DEMO,上图 2 为模型凭据 DEMO。 +​ 供应商列表返回了凭据表单的配置信息,可通过 Runtime 提供的接口对凭据进行鉴权,上图 1 为供应商凭据 DEMO,上图 2 为模型凭据 DEMO。 ## 结构 @@ -57,9 +57,10 @@ Model Runtime 分三层: 提供获取当前供应商模型列表、获取模型实例、供应商凭据鉴权、供应商配置规则信息,**可横向扩展**以支持不同的供应商。 对于供应商/模型凭据,有两种情况 + - 如 OpenAI 这类中心化供应商,需要定义如**api_key**这类的鉴权凭据 - 如[**Xinference**](https://github.com/xorbitsai/inference)这类本地部署的供应商,需要定义如**server_url**这类的地址凭据,有时候还需要定义**model_uid**之类的模型类型凭据,就像下面这样,当在供应商层定义了这些凭据后,就可以在前端页面上直接展示,无需修改前端逻辑。 - ![Alt text](docs/zh_Hans/images/index/image.png) + ![Alt text](docs/zh_Hans/images/index/image.png) 当配置好凭据后,就可以通过 DifyRuntime 的外部接口直接获取到对应供应商所需要的**Schema**(凭据表单规则),从而在可以在不修改前端逻辑的情况下,提供新的供应商/模型的支持。 @@ -76,14 +77,17 @@ Model Runtime 分三层: ## 下一步 ### [增加新的供应商配置 👈🏻](./docs/zh_Hans/provider_scale_out.md) + 当添加后,这里将会出现一个新的供应商 ![Alt text](docs/zh_Hans/images/index/image-1.png) -### [为已存在的供应商新增模型 👈🏻](./docs/zh_Hans/provider_scale_out.md#增加模型) +### [为已存在的供应商新增模型 👈🏻](./docs/zh_Hans/provider_scale_out.md#%E5%A2%9E%E5%8A%A0%E6%A8%A1%E5%9E%8B) + 当添加后,对应供应商的模型列表中将会出现一个新的预定义模型供用户选择,如 GPT-3.5 GPT-4 ChatGLM3-6b 等,而对于支持自定义模型的供应商,则不需要新增模型。 ![Alt text](docs/zh_Hans/images/index/image-2.png) ### [接口的具体实现 👈🏻](./docs/zh_Hans/interfaces.md) + 你可以在这里找到你想要查看的接口的具体实现,以及接口的参数和返回值的具体含义。 diff --git a/api/core/model_runtime/docs/en_US/customizable_model_scale_out.md b/api/core/model_runtime/docs/en_US/customizable_model_scale_out.md index d845c4bd09..245aa4699c 100644 --- a/api/core/model_runtime/docs/en_US/customizable_model_scale_out.md +++ b/api/core/model_runtime/docs/en_US/customizable_model_scale_out.md @@ -56,7 +56,6 @@ provider_credential_schema: credential_form_schemas: ``` - Then, we need to determine what credentials are required to define a model in Xinference. - Since it supports three different types of models, we need to specify the model_type to denote the model type. Here is how we can define it: @@ -191,7 +190,6 @@ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[Pr """ ``` - Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens and ensure environment variable `PLUGIN_BASED_TOKEN_COUNTING_ENABLED` is set to `true`, This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate. - Model Credentials Validation diff --git a/api/core/model_runtime/docs/en_US/interfaces.md b/api/core/model_runtime/docs/en_US/interfaces.md index 158d4b306b..9a8c2ec942 100644 --- a/api/core/model_runtime/docs/en_US/interfaces.md +++ b/api/core/model_runtime/docs/en_US/interfaces.md @@ -35,12 +35,11 @@ All models need to uniformly implement the following 2 methods: Similar to provider credential verification, this step involves verification for an individual model. - ```python def validate_credentials(self, model: str, credentials: dict) -> None: """ Validate model credentials - + :param model: model name :param credentials: model credentials :return: @@ -77,12 +76,12 @@ All models need to uniformly implement the following 2 methods: The key is the error type thrown to the caller The value is the error type thrown by the model, which needs to be converted into a unified error type for the caller. - + :return: Invoke error mapping """ ``` -​ You can refer to OpenAI's `_invoke_error_mapping` for an example. +​ You can refer to OpenAI's `_invoke_error_mapping` for an example. ### LLM @@ -92,7 +91,6 @@ Inherit the `__base.large_language_model.LargeLanguageModel` base class and impl Implement the core method for LLM invocation, which can support both streaming and synchronous returns. - ```python def _invoke(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, @@ -101,7 +99,7 @@ Inherit the `__base.large_language_model.LargeLanguageModel` base class and impl -> Union[LLMResult, Generator]: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param prompt_messages: prompt messages @@ -122,7 +120,7 @@ Inherit the `__base.large_language_model.LargeLanguageModel` base class and impl The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included. - - `prompt_messages` (array[[PromptMessage](#PromptMessage)]) List of prompts + - `prompt_messages` (array\[[PromptMessage](#PromptMessage)\]) List of prompts If the model is of the `Completion` type, the list only needs to include one [UserPromptMessage](#UserPromptMessage) element; @@ -132,7 +130,7 @@ Inherit the `__base.large_language_model.LargeLanguageModel` base class and impl The model parameters are defined by the `parameter_rules` in the model's YAML configuration. - - `tools` (array[[PromptMessageTool](#PromptMessageTool)]) [optional] List of tools, equivalent to the `function` in `function calling`. + - `tools` (array\[[PromptMessageTool](#PromptMessageTool)\]) [optional] List of tools, equivalent to the `function` in `function calling`. That is, the tool list for tool calling. @@ -142,7 +140,7 @@ Inherit the `__base.large_language_model.LargeLanguageModel` base class and impl - `stream` (bool) Whether to output in a streaming manner, default is True - Streaming output returns Generator[[LLMResultChunk](#LLMResultChunk)], non-streaming output returns [LLMResult](#LLMResult). + Streaming output returns Generator\[[LLMResultChunk](#LLMResultChunk)\], non-streaming output returns [LLMResult](#LLMResult). - `user` (string) [optional] Unique identifier of the user @@ -150,7 +148,7 @@ Inherit the `__base.large_language_model.LargeLanguageModel` base class and impl - Returns - Streaming output returns Generator[[LLMResultChunk](#LLMResultChunk)], non-streaming output returns [LLMResult](#LLMResult). + Streaming output returns Generator\[[LLMResultChunk](#LLMResultChunk)\], non-streaming output returns [LLMResult](#LLMResult). - Pre-calculating Input Tokens @@ -187,7 +185,6 @@ Inherit the `__base.large_language_model.LargeLanguageModel` base class and impl When the provider supports adding custom LLMs, this method can be implemented to allow custom models to fetch model schema. The default return null. - ### TextEmbedding Inherit the `__base.text_embedding_model.TextEmbeddingModel` base class and implement the following interfaces: @@ -200,7 +197,7 @@ Inherit the `__base.text_embedding_model.TextEmbeddingModel` base class and impl -> TextEmbeddingResult: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param texts: texts to embed @@ -256,7 +253,7 @@ Inherit the `__base.rerank_model.RerankModel` base class and implement the follo -> RerankResult: """ Invoke rerank model - + :param model: model name :param credentials: model credentials :param query: search query @@ -302,7 +299,7 @@ Inherit the `__base.speech2text_model.Speech2TextModel` base class and implement def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param file: audio file @@ -339,7 +336,7 @@ Inherit the `__base.text2speech_model.Text2SpeechModel` base class and implement def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None): """ Invoke large language model - + :param model: model name :param credentials: model credentials :param content_text: text content to be translated @@ -381,7 +378,7 @@ Inherit the `__base.moderation_model.ModerationModel` base class and implement t -> bool: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param text: text to moderate @@ -408,11 +405,9 @@ Inherit the `__base.moderation_model.ModerationModel` base class and implement t False indicates that the input text is safe, True indicates otherwise. - - ## Entities -### PromptMessageRole +### PromptMessageRole Message role @@ -583,7 +578,7 @@ class PromptMessageTool(BaseModel): parameters: dict ``` ---- +______________________________________________________________________ ### LLMResult @@ -650,7 +645,7 @@ class LLMUsage(ModelUsage): latency: float # Request latency (s) ``` ---- +______________________________________________________________________ ### TextEmbeddingResult @@ -680,7 +675,7 @@ class EmbeddingUsage(ModelUsage): latency: float # Request latency (s) ``` ---- +______________________________________________________________________ ### RerankResult diff --git a/api/core/model_runtime/docs/en_US/predefined_model_scale_out.md b/api/core/model_runtime/docs/en_US/predefined_model_scale_out.md index a770ed157b..97968e9988 100644 --- a/api/core/model_runtime/docs/en_US/predefined_model_scale_out.md +++ b/api/core/model_runtime/docs/en_US/predefined_model_scale_out.md @@ -153,8 +153,11 @@ Runtime Errors: - `InvokeConnectionError` Connection error - `InvokeServerUnavailableError` Service provider unavailable + - `InvokeRateLimitError` Rate limit reached + - `InvokeAuthorizationError` Authorization failed + - `InvokeBadRequestError` Parameter error ```python diff --git a/api/core/model_runtime/docs/en_US/provider_scale_out.md b/api/core/model_runtime/docs/en_US/provider_scale_out.md index 07be5811d3..c38c7c0f0c 100644 --- a/api/core/model_runtime/docs/en_US/provider_scale_out.md +++ b/api/core/model_runtime/docs/en_US/provider_scale_out.md @@ -63,6 +63,7 @@ You can also refer to the YAML configuration information under other provider di ### Implementing Provider Code Providers need to inherit the `__base.model_provider.ModelProvider` base class and implement the `validate_provider_credentials` method for unified provider credential verification. For reference, see [AnthropicProvider](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/anthropic.py). + > If the provider is the type of `customizable-model`, there is no need to implement the `validate_provider_credentials` method. ```python @@ -80,7 +81,7 @@ def validate_provider_credentials(self, credentials: dict) -> None: Of course, you can also preliminarily reserve the implementation of `validate_provider_credentials` and directly reuse it after the model credential verification method is implemented. ---- +______________________________________________________________________ ### Adding Models @@ -166,7 +167,7 @@ In `llm.py`, create an Anthropic LLM class, which we name `AnthropicLargeLanguag -> Union[LLMResult, Generator]: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param prompt_messages: prompt messages @@ -205,7 +206,7 @@ In `llm.py`, create an Anthropic LLM class, which we name `AnthropicLargeLanguag def validate_credentials(self, model: str, credentials: dict) -> None: """ Validate model credentials - + :param model: model name :param credentials: model credentials :return: @@ -232,7 +233,7 @@ In `llm.py`, create an Anthropic LLM class, which we name `AnthropicLargeLanguag The key is the error type thrown to the caller The value is the error type thrown by the model, which needs to be converted into a unified error type for the caller. - + :return: Invoke error mapping """ ``` diff --git a/api/core/model_runtime/docs/en_US/schema.md b/api/core/model_runtime/docs/en_US/schema.md index f819a4dbdc..1cea4127f4 100644 --- a/api/core/model_runtime/docs/en_US/schema.md +++ b/api/core/model_runtime/docs/en_US/schema.md @@ -28,8 +28,8 @@ - `url` (object) help link, i18n - `zh_Hans` (string) [optional] Chinese link - `en_US` (string) English link -- `supported_model_types` (array[[ModelType](#ModelType)]) Supported model types -- `configurate_methods` (array[[ConfigurateMethod](#ConfigurateMethod)]) Configuration methods +- `supported_model_types` (array\[[ModelType](#ModelType)\]) Supported model types +- `configurate_methods` (array\[[ConfigurateMethod](#ConfigurateMethod)\]) Configuration methods - `provider_credential_schema` ([ProviderCredentialSchema](#ProviderCredentialSchema)) Provider credential specification - `model_credential_schema` ([ModelCredentialSchema](#ModelCredentialSchema)) Model credential specification @@ -40,23 +40,23 @@ - `zh_Hans` (string) [optional] Chinese label name - `en_US` (string) English label name - `model_type` ([ModelType](#ModelType)) Model type -- `features` (array[[ModelFeature](#ModelFeature)]) [optional] Supported feature list +- `features` (array\[[ModelFeature](#ModelFeature)\]) [optional] Supported feature list - `model_properties` (object) Model properties - `mode` ([LLMMode](#LLMMode)) Mode (available for model type `llm`) - `context_size` (int) Context size (available for model types `llm`, `text-embedding`) - `max_chunks` (int) Maximum number of chunks (available for model types `text-embedding`, `moderation`) - `file_upload_limit` (int) Maximum file upload limit, in MB (available for model type `speech2text`) - `supported_file_extensions` (string) Supported file extension formats, e.g., mp3, mp4 (available for model type `speech2text`) - - `default_voice` (string) default voice, e.g.:alloy,echo,fable,onyx,nova,shimmer(available for model type `tts`) - - `voices` (list) List of available voice.(available for model type `tts`) - - `mode` (string) voice model.(available for model type `tts`) - - `name` (string) voice model display name.(available for model type `tts`) - - `language` (string) the voice model supports languages.(available for model type `tts`) - - `word_limit` (int) Single conversion word limit, paragraph-wise by default(available for model type `tts`) - - `audio_type` (string) Support audio file extension format, e.g.:mp3,wav(available for model type `tts`) - - `max_workers` (int) Number of concurrent workers supporting text and audio conversion(available for model type`tts`) + - `default_voice` (string) default voice, e.g.:alloy,echo,fable,onyx,nova,shimmer(available for model type `tts`) + - `voices` (list) List of available voice.(available for model type `tts`) + - `mode` (string) voice model.(available for model type `tts`) + - `name` (string) voice model display name.(available for model type `tts`) + - `language` (string) the voice model supports languages.(available for model type `tts`) + - `word_limit` (int) Single conversion word limit, paragraph-wise by default(available for model type `tts`) + - `audio_type` (string) Support audio file extension format, e.g.:mp3,wav(available for model type `tts`) + - `max_workers` (int) Number of concurrent workers supporting text and audio conversion(available for model type`tts`) - `max_characters_per_chunk` (int) Maximum characters per chunk (available for model type `moderation`) -- `parameter_rules` (array[[ParameterRule](#ParameterRule)]) [optional] Model invocation parameter rules +- `parameter_rules` (array\[[ParameterRule](#ParameterRule)\]) [optional] Model invocation parameter rules - `pricing` ([PriceConfig](#PriceConfig)) [optional] Pricing information - `deprecated` (bool) Whether deprecated. If deprecated, the model will no longer be displayed in the list, but those already configured can continue to be used. Default False. @@ -74,6 +74,7 @@ - `predefined-model` Predefined model Indicates that users can use the predefined models under the provider by configuring the unified provider credentials. + - `customizable-model` Customizable model Users need to add credential configuration for each model. @@ -103,6 +104,7 @@ ### ParameterRule - `name` (string) Actual model invocation parameter name + - `use_template` (string) [optional] Using template By default, 5 variable content configuration templates are preset: @@ -112,7 +114,7 @@ - `frequency_penalty` - `presence_penalty` - `max_tokens` - + In use_template, you can directly set the template variable name, which will use the default configuration in entities.defaults.PARAMETER_RULE_TEMPLATE No need to set any parameters other than `name` and `use_template`. If additional configuration parameters are set, they will override the default configuration. Refer to `openai/llm/gpt-3.5-turbo.yaml`. @@ -155,7 +157,7 @@ ### ProviderCredentialSchema -- `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) Credential form standard +- `credential_form_schemas` (array\[[CredentialFormSchema](#CredentialFormSchema)\]) Credential form standard ### ModelCredentialSchema @@ -166,7 +168,7 @@ - `placeholder` (object) Model prompt content - `en_US`(string) English - `zh_Hans`(string) [optional] Chinese -- `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) Credential form standard +- `credential_form_schemas` (array\[[CredentialFormSchema](#CredentialFormSchema)\]) Credential form standard ### CredentialFormSchema @@ -177,12 +179,12 @@ - `type` ([FormType](#FormType)) Form item type - `required` (bool) Whether required - `default`(string) Default value -- `options` (array[[FormOption](#FormOption)]) Specific property of form items of type `select` or `radio`, defining dropdown content +- `options` (array\[[FormOption](#FormOption)\]) Specific property of form items of type `select` or `radio`, defining dropdown content - `placeholder`(object) Specific property of form items of type `text-input`, placeholder content - `en_US`(string) English - `zh_Hans` (string) [optional] Chinese - `max_length` (int) Specific property of form items of type `text-input`, defining maximum input length, 0 for no limit. -- `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) Displayed when other form item values meet certain conditions, displayed always if empty. +- `show_on` (array\[[FormShowOnObject](#FormShowOnObject)\]) Displayed when other form item values meet certain conditions, displayed always if empty. ### FormType @@ -198,7 +200,7 @@ - `en_US`(string) English - `zh_Hans`(string) [optional] Chinese - `value` (string) Dropdown option value -- `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) Displayed when other form item values meet certain conditions, displayed always if empty. +- `show_on` (array\[[FormShowOnObject](#FormShowOnObject)\]) Displayed when other form item values meet certain conditions, displayed always if empty. ### FormShowOnObject diff --git a/api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md b/api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md index 7d30655469..825f9349d7 100644 --- a/api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md +++ b/api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md @@ -10,7 +10,6 @@ ![Alt text](images/index/image-3.png) - 在前文中,我们已经知道了供应商无需实现`validate_provider_credential`,Runtime 会自行根据用户在此选择的模型类型和模型名称调用对应的模型层的`validate_credentials`来进行验证。 ### 编写供应商 yaml @@ -55,6 +54,7 @@ provider_credential_schema: 随后,我们需要思考在 Xinference 中定义一个模型需要哪些凭据 - 它支持三种不同的模型,因此,我们需要有`model_type`来指定这个模型的类型,它有三种类型,所以我们这么编写 + ```yaml provider_credential_schema: credential_form_schemas: @@ -76,7 +76,9 @@ provider_credential_schema: label: en_US: Rerank ``` + - 每一个模型都有自己的名称`model_name`,因此需要在这里定义 + ```yaml - variable: model_name type: text-input @@ -88,7 +90,9 @@ provider_credential_schema: zh_Hans: 填写模型名称 en_US: Input model name ``` + - 填写 Xinference 本地部署的地址 + ```yaml - variable: server_url label: @@ -100,7 +104,9 @@ provider_credential_schema: zh_Hans: 在此输入 Xinference 的服务器地址,如 https://example.com/xxx en_US: Enter the url of your Xinference, for example https://example.com/xxx ``` + - 每个模型都有唯一的 model_uid,因此需要在这里定义 + ```yaml - variable: model_uid label: @@ -112,6 +118,7 @@ provider_credential_schema: zh_Hans: 在此输入您的 Model UID en_US: Enter the model uid ``` + 现在,我们就完成了供应商的基础定义。 ### 编写模型代码 @@ -132,7 +139,7 @@ provider_credential_schema: -> Union[LLMResult, Generator]: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param prompt_messages: prompt messages @@ -189,7 +196,7 @@ provider_credential_schema: def validate_credentials(self, model: str, credentials: dict) -> None: """ Validate model credentials - + :param model: model name :param credentials: model credentials :return: @@ -197,78 +204,78 @@ provider_credential_schema: ``` - 模型参数 Schema - + 与自定义类型不同,由于没有在 yaml 文件中定义一个模型支持哪些参数,因此,我们需要动态时间模型参数的 Schema。 - + 如 Xinference 支持`max_tokens` `temperature` `top_p` 这三个模型参数。 - + 但是有的供应商根据不同的模型支持不同的参数,如供应商`OpenLLM`支持`top_k`,但是并不是这个供应商提供的所有模型都支持`top_k`,我们这里举例 A 模型支持`top_k`,B 模型不支持`top_k`,那么我们需要在这里动态生成模型参数的 Schema,如下所示: - - ```python - def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]: - """ - used to define customizable model schema - """ - rules = [ - ParameterRule( - name='temperature', type=ParameterType.FLOAT, - use_template='temperature', - label=I18nObject( - zh_Hans='温度', en_US='Temperature' - ) - ), - ParameterRule( - name='top_p', type=ParameterType.FLOAT, - use_template='top_p', - label=I18nObject( - zh_Hans='Top P', en_US='Top P' - ) - ), - ParameterRule( - name='max_tokens', type=ParameterType.INT, - use_template='max_tokens', - min=1, - default=512, - label=I18nObject( - zh_Hans='最大生成长度', en_US='Max Tokens' - ) - ) - ] - # if model is A, add top_k to rules - if model == 'A': - rules.append( - ParameterRule( - name='top_k', type=ParameterType.INT, - use_template='top_k', - min=1, - default=50, - label=I18nObject( - zh_Hans='Top K', en_US='Top K' - ) - ) - ) + ```python + def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]: + """ + used to define customizable model schema + """ + rules = [ + ParameterRule( + name='temperature', type=ParameterType.FLOAT, + use_template='temperature', + label=I18nObject( + zh_Hans='温度', en_US='Temperature' + ) + ), + ParameterRule( + name='top_p', type=ParameterType.FLOAT, + use_template='top_p', + label=I18nObject( + zh_Hans='Top P', en_US='Top P' + ) + ), + ParameterRule( + name='max_tokens', type=ParameterType.INT, + use_template='max_tokens', + min=1, + default=512, + label=I18nObject( + zh_Hans='最大生成长度', en_US='Max Tokens' + ) + ) + ] - """ - some NOT IMPORTANT code here - """ + # if model is A, add top_k to rules + if model == 'A': + rules.append( + ParameterRule( + name='top_k', type=ParameterType.INT, + use_template='top_k', + min=1, + default=50, + label=I18nObject( + zh_Hans='Top K', en_US='Top K' + ) + ) + ) - entity = AIModelEntity( - model=model, - label=I18nObject( - en_US=model - ), - fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, - model_type=model_type, - model_properties={ - ModelPropertyKey.MODE: ModelType.LLM, - }, - parameter_rules=rules - ) + """ + some NOT IMPORTANT code here + """ + + entity = AIModelEntity( + model=model, + label=I18nObject( + en_US=model + ), + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_type=model_type, + model_properties={ + ModelPropertyKey.MODE: ModelType.LLM, + }, + parameter_rules=rules + ) + + return entity + ``` - return entity - ``` - - 调用异常错误映射表 当模型调用异常时需要映射到 Runtime 指定的 `InvokeError` 类型,方便 Dify 针对不同错误做不同后续处理。 @@ -278,7 +285,7 @@ provider_credential_schema: - `InvokeConnectionError` 调用连接错误 - `InvokeServerUnavailableError ` 调用服务方不可用 - `InvokeRateLimitError ` 调用达到限额 - - `InvokeAuthorizationError` 调用鉴权失败 + - `InvokeAuthorizationError` 调用鉴权失败 - `InvokeBadRequestError ` 调用传参有误 ```python @@ -289,7 +296,7 @@ provider_credential_schema: The key is the error type thrown to the caller The value is the error type thrown by the model, which needs to be converted into a unified error type for the caller. - + :return: Invoke error mapping """ ``` diff --git a/api/core/model_runtime/docs/zh_Hans/interfaces.md b/api/core/model_runtime/docs/zh_Hans/interfaces.md index 93a48cafb8..8eeeee9ff9 100644 --- a/api/core/model_runtime/docs/zh_Hans/interfaces.md +++ b/api/core/model_runtime/docs/zh_Hans/interfaces.md @@ -49,7 +49,7 @@ class XinferenceProvider(Provider): def validate_credentials(self, model: str, credentials: dict) -> None: """ Validate model credentials - + :param model: model name :param credentials: model credentials :return: @@ -75,7 +75,7 @@ class XinferenceProvider(Provider): - `InvokeConnectionError` 调用连接错误 - `InvokeServerUnavailableError ` 调用服务方不可用 - `InvokeRateLimitError ` 调用达到限额 - - `InvokeAuthorizationError` 调用鉴权失败 + - `InvokeAuthorizationError` 调用鉴权失败 - `InvokeBadRequestError ` 调用传参有误 ```python @@ -86,36 +86,36 @@ class XinferenceProvider(Provider): The key is the error type thrown to the caller The value is the error type thrown by the model, which needs to be converted into a unified error type for the caller. - + :return: Invoke error mapping """ ``` 也可以直接抛出对应 Errors,并做如下定义,这样在之后的调用中可以直接抛出`InvokeConnectionError`等异常。 - - ```python - @property - def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: - return { - InvokeConnectionError: [ - InvokeConnectionError - ], - InvokeServerUnavailableError: [ - InvokeServerUnavailableError - ], - InvokeRateLimitError: [ - InvokeRateLimitError - ], - InvokeAuthorizationError: [ - InvokeAuthorizationError - ], - InvokeBadRequestError: [ - InvokeBadRequestError - ], - } - ``` -​ 可参考 OpenAI `_invoke_error_mapping`。 + ```python + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + return { + InvokeConnectionError: [ + InvokeConnectionError + ], + InvokeServerUnavailableError: [ + InvokeServerUnavailableError + ], + InvokeRateLimitError: [ + InvokeRateLimitError + ], + InvokeAuthorizationError: [ + InvokeAuthorizationError + ], + InvokeBadRequestError: [ + InvokeBadRequestError + ], + } + ``` + +​ 可参考 OpenAI `_invoke_error_mapping`。 ### LLM @@ -133,7 +133,7 @@ class XinferenceProvider(Provider): -> Union[LLMResult, Generator]: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param prompt_messages: prompt messages @@ -151,38 +151,38 @@ class XinferenceProvider(Provider): - `model` (string) 模型名称 - `credentials` (object) 凭据信息 - + 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。 - - `prompt_messages` (array[[PromptMessage](#PromptMessage)]) Prompt 列表 - + - `prompt_messages` (array\[[PromptMessage](#PromptMessage)\]) Prompt 列表 + 若模型为 `Completion` 类型,则列表只需要传入一个 [UserPromptMessage](#UserPromptMessage) 元素即可; - + 若模型为 `Chat` 类型,需要根据消息不同传入 [SystemPromptMessage](#SystemPromptMessage), [UserPromptMessage](#UserPromptMessage), [AssistantPromptMessage](#AssistantPromptMessage), [ToolPromptMessage](#ToolPromptMessage) 元素列表 - `model_parameters` (object) 模型参数 - + 模型参数由模型 YAML 配置的 `parameter_rules` 定义。 - - `tools` (array[[PromptMessageTool](#PromptMessageTool)]) [optional] 工具列表,等同于 `function calling` 中的 `function`。 - + - `tools` (array\[[PromptMessageTool](#PromptMessageTool)\]) [optional] 工具列表,等同于 `function calling` 中的 `function`。 + 即传入 tool calling 的工具列表。 - `stop` (array[string]) [optional] 停止序列 - + 模型返回将在停止序列定义的字符串之前停止输出。 - `stream` (bool) 是否流式输出,默认 True - - 流式输出返回 Generator[[LLMResultChunk](#LLMResultChunk)],非流式输出返回 [LLMResult](#LLMResult)。 + + 流式输出返回 Generator\[[LLMResultChunk](#LLMResultChunk)\],非流式输出返回 [LLMResult](#LLMResult)。 - `user` (string) [optional] 用户的唯一标识符 - + 可以帮助供应商监控和检测滥用行为。 - 返回 - 流式输出返回 Generator[[LLMResultChunk](#LLMResultChunk)],非流式输出返回 [LLMResult](#LLMResult)。 + 流式输出返回 Generator\[[LLMResultChunk](#LLMResultChunk)\],非流式输出返回 [LLMResult](#LLMResult)。 - 预计算输入 tokens @@ -236,7 +236,7 @@ class XinferenceProvider(Provider): -> TextEmbeddingResult: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param texts: texts to embed @@ -294,7 +294,7 @@ class XinferenceProvider(Provider): -> RerankResult: """ Invoke rerank model - + :param model: model name :param credentials: model credentials :param query: search query @@ -342,7 +342,7 @@ class XinferenceProvider(Provider): -> str: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param file: audio file @@ -379,7 +379,7 @@ class XinferenceProvider(Provider): def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None): """ Invoke large language model - + :param model: model name :param credentials: model credentials :param content_text: text content to be translated @@ -421,7 +421,7 @@ class XinferenceProvider(Provider): -> bool: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param text: text to moderate @@ -448,11 +448,9 @@ class XinferenceProvider(Provider): False 代表传入的文本安全,True 则反之。 - - ## 实体 -### PromptMessageRole +### PromptMessageRole 消息角色 @@ -623,7 +621,7 @@ class PromptMessageTool(BaseModel): parameters: dict # 工具参数 dict ``` ---- +______________________________________________________________________ ### LLMResult @@ -690,7 +688,7 @@ class LLMUsage(ModelUsage): latency: float # 请求耗时 (s) ``` ---- +______________________________________________________________________ ### TextEmbeddingResult @@ -720,7 +718,7 @@ class EmbeddingUsage(ModelUsage): latency: float # 请求耗时 (s) ``` ---- +______________________________________________________________________ ### RerankResult diff --git a/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md b/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md index 80e7982e9f..cd4de51ef7 100644 --- a/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md +++ b/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md @@ -62,7 +62,7 @@ pricing: # 价格信息 建议将所有模型配置都准备完毕后再开始模型代码的实现。 -同样,也可以参考 `model_providers` 目录下其他供应商对应模型类型目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#aimodelentity)。 +同样,也可以参考 `model_providers` 目录下其他供应商对应模型类型目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#aimodelentity)。 ### 实现模型调用代码 @@ -82,7 +82,7 @@ pricing: # 价格信息 -> Union[LLMResult, Generator]: """ Invoke large language model - + :param model: model name :param credentials: model credentials :param prompt_messages: prompt messages @@ -137,7 +137,7 @@ pricing: # 价格信息 def validate_credentials(self, model: str, credentials: dict) -> None: """ Validate model credentials - + :param model: model name :param credentials: model credentials :return: @@ -153,7 +153,7 @@ pricing: # 价格信息 - `InvokeConnectionError` 调用连接错误 - `InvokeServerUnavailableError ` 调用服务方不可用 - `InvokeRateLimitError ` 调用达到限额 - - `InvokeAuthorizationError` 调用鉴权失败 + - `InvokeAuthorizationError` 调用鉴权失败 - `InvokeBadRequestError ` 调用传参有误 ```python @@ -164,7 +164,7 @@ pricing: # 价格信息 The key is the error type thrown to the caller The value is the error type thrown by the model, which needs to be converted into a unified error type for the caller. - + :return: Invoke error mapping """ ``` diff --git a/api/core/model_runtime/docs/zh_Hans/provider_scale_out.md b/api/core/model_runtime/docs/zh_Hans/provider_scale_out.md index 2048b506ac..de48b0d11a 100644 --- a/api/core/model_runtime/docs/zh_Hans/provider_scale_out.md +++ b/api/core/model_runtime/docs/zh_Hans/provider_scale_out.md @@ -5,7 +5,7 @@ - `predefined-model ` 预定义模型 表示用户只需要配置统一的供应商凭据即可使用供应商下的预定义模型。 - + - `customizable-model` 自定义模型 用户需要新增每个模型的凭据配置,如 Xinference,它同时支持 LLM 和 Text Embedding,但是每个模型都有唯一的**model_uid**,如果想要将两者同时接入,就需要为每个模型配置一个**model_uid**。 @@ -23,9 +23,11 @@ ### 介绍 #### 名词解释 - - `module`: 一个`module`即为一个 Python Package,或者通俗一点,称为一个文件夹,里面包含了一个`__init__.py`文件,以及其他的`.py`文件。 + +- `module`: 一个`module`即为一个 Python Package,或者通俗一点,称为一个文件夹,里面包含了一个`__init__.py`文件,以及其他的`.py`文件。 #### 步骤 + 新增一个供应商主要分为几步,这里简单列出,帮助大家有一个大概的认识,具体的步骤会在下面详细介绍。 - 创建供应商 yaml 文件,根据[ProviderSchema](./schema.md#provider)编写 @@ -117,7 +119,7 @@ model_credential_schema: en_US: Enter your API Base ``` -也可以参考 `model_providers` 目录下其他供应商目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#provider)。 +也可以参考 `model_providers` 目录下其他供应商目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#provider)。 #### 实现供应商代码 @@ -155,12 +157,14 @@ def validate_provider_credentials(self, credentials: dict) -> None: #### 增加模型 #### [增加预定义模型 👈🏻](./predefined_model_scale_out.md) + 对于预定义模型,我们可以通过简单定义一个 yaml,并通过实现调用代码来接入。 #### [增加自定义模型 👈🏻](./customizable_model_scale_out.md) + 对于自定义模型,我们只需要实现调用代码即可接入,但是它需要处理的参数可能会更加复杂。 ---- +______________________________________________________________________ ### 测试 diff --git a/api/core/model_runtime/docs/zh_Hans/schema.md b/api/core/model_runtime/docs/zh_Hans/schema.md index 681f49c435..e68cb500e1 100644 --- a/api/core/model_runtime/docs/zh_Hans/schema.md +++ b/api/core/model_runtime/docs/zh_Hans/schema.md @@ -16,9 +16,9 @@ - `zh_Hans` (string) [optional] 中文描述 - `en_US` (string) 英文描述 - `icon_small` (string) [optional] 供应商小 ICON,存储在对应供应商实现目录下的 `_assets` 目录,中英文策略同 `label` - - `zh_Hans` (string) [optional] 中文 ICON + - `zh_Hans` (string) [optional] 中文 ICON - `en_US` (string) 英文 ICON -- `icon_large` (string) [optional] 供应商大 ICON,存储在对应供应商实现目录下的 _assets 目录,中英文策略同 label +- `icon_large` (string) [optional] 供应商大 ICON,存储在对应供应商实现目录下的 \_assets 目录,中英文策略同 label - `zh_Hans `(string) [optional] 中文 ICON - `en_US` (string) 英文 ICON - `background` (string) [optional] 背景颜色色值,例:#FFFFFF,为空则展示前端默认色值。 @@ -29,8 +29,8 @@ - `url` (object) 帮助链接,i18n - `zh_Hans` (string) [optional] 中文链接 - `en_US` (string) 英文链接 -- `supported_model_types` (array[[ModelType](#ModelType)]) 支持的模型类型 -- `configurate_methods` (array[[ConfigurateMethod](#ConfigurateMethod)]) 配置方式 +- `supported_model_types` (array\[[ModelType](#ModelType)\]) 支持的模型类型 +- `configurate_methods` (array\[[ConfigurateMethod](#ConfigurateMethod)\]) 配置方式 - `provider_credential_schema` ([ProviderCredentialSchema](#ProviderCredentialSchema)) 供应商凭据规格 - `model_credential_schema` ([ModelCredentialSchema](#ModelCredentialSchema)) 模型凭据规格 @@ -41,23 +41,23 @@ - `zh_Hans `(string) [optional] 中文标签名 - `en_US` (string) 英文标签名 - `model_type` ([ModelType](#ModelType)) 模型类型 -- `features` (array[[ModelFeature](#ModelFeature)]) [optional] 支持功能列表 +- `features` (array\[[ModelFeature](#ModelFeature)\]) [optional] 支持功能列表 - `model_properties` (object) 模型属性 - `mode` ([LLMMode](#LLMMode)) 模式 (模型类型 `llm` 可用) - `context_size` (int) 上下文大小 (模型类型 `llm` `text-embedding` 可用) - `max_chunks` (int) 最大分块数量 (模型类型 `text-embedding ` `moderation` 可用) - `file_upload_limit` (int) 文件最大上传限制,单位:MB。(模型类型 `speech2text` 可用) - - `supported_file_extensions` (string) 支持文件扩展格式,如:mp3,mp4(模型类型 `speech2text` 可用) - - `default_voice` (string) 缺省音色,必选:alloy,echo,fable,onyx,nova,shimmer(模型类型 `tts` 可用) - - `voices` (list) 可选音色列表。 - - `mode` (string) 音色模型。(模型类型 `tts` 可用) - - `name` (string) 音色模型显示名称。(模型类型 `tts` 可用) - - `language` (string) 音色模型支持语言。(模型类型 `tts` 可用) - - `word_limit` (int) 单次转换字数限制,默认按段落分段(模型类型 `tts` 可用) - - `audio_type` (string) 支持音频文件扩展格式,如:mp3,wav(模型类型 `tts` 可用) - - `max_workers` (int) 支持文字音频转换并发任务数(模型类型 `tts` 可用) - - `max_characters_per_chunk` (int) 每块最大字符数 (模型类型 `moderation` 可用) -- `parameter_rules` (array[[ParameterRule](#ParameterRule)]) [optional] 模型调用参数规则 + - `supported_file_extensions` (string) 支持文件扩展格式,如:mp3,mp4(模型类型 `speech2text` 可用) + - `default_voice` (string) 缺省音色,必选:alloy,echo,fable,onyx,nova,shimmer(模型类型 `tts` 可用) + - `voices` (list) 可选音色列表。 + - `mode` (string) 音色模型。(模型类型 `tts` 可用) + - `name` (string) 音色模型显示名称。(模型类型 `tts` 可用) + - `language` (string) 音色模型支持语言。(模型类型 `tts` 可用) + - `word_limit` (int) 单次转换字数限制,默认按段落分段(模型类型 `tts` 可用) + - `audio_type` (string) 支持音频文件扩展格式,如:mp3,wav(模型类型 `tts` 可用) + - `max_workers` (int) 支持文字音频转换并发任务数(模型类型 `tts` 可用) + - `max_characters_per_chunk` (int) 每块最大字符数 (模型类型 `moderation` 可用) +- `parameter_rules` (array\[[ParameterRule](#ParameterRule)\]) [optional] 模型调用参数规则 - `pricing` ([PriceConfig](#PriceConfig)) [optional] 价格信息 - `deprecated` (bool) 是否废弃。若废弃,模型列表将不再展示,但已经配置的可以继续使用,默认 False。 @@ -75,6 +75,7 @@ - `predefined-model ` 预定义模型 表示用户只需要配置统一的供应商凭据即可使用供应商下的预定义模型。 + - `customizable-model` 自定义模型 用户需要新增每个模型的凭据配置。 @@ -106,7 +107,7 @@ - `name` (string) 调用模型实际参数名 - `use_template` (string) [optional] 使用模板 - + 默认预置了 5 种变量内容配置模板: - `temperature` @@ -114,7 +115,7 @@ - `frequency_penalty` - `presence_penalty` - `max_tokens` - + 可在 use_template 中直接设置模板变量名,将会使用 entities.defaults.PARAMETER_RULE_TEMPLATE 中的默认配置 不用设置除 `name` 和 `use_template` 之外的所有参数,若设置了额外的配置参数,将覆盖默认配置。 可参考 `openai/llm/gpt-3.5-turbo.yaml`。 @@ -157,7 +158,7 @@ ### ProviderCredentialSchema -- `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) 凭据表单规范 +- `credential_form_schemas` (array\[[CredentialFormSchema](#CredentialFormSchema)\]) 凭据表单规范 ### ModelCredentialSchema @@ -168,7 +169,7 @@ - `placeholder` (object) 模型提示内容 - `en_US`(string) 英文 - `zh_Hans`(string) [optional] 中文 -- `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) 凭据表单规范 +- `credential_form_schemas` (array\[[CredentialFormSchema](#CredentialFormSchema)\]) 凭据表单规范 ### CredentialFormSchema @@ -179,12 +180,12 @@ - `type` ([FormType](#FormType)) 表单项类型 - `required` (bool) 是否必填 - `default`(string) 默认值 -- `options` (array[[FormOption](#FormOption)]) 表单项为 `select` 或 `radio` 专有属性,定义下拉内容 +- `options` (array\[[FormOption](#FormOption)\]) 表单项为 `select` 或 `radio` 专有属性,定义下拉内容 - `placeholder`(object) 表单项为 `text-input `专有属性,表单项 PlaceHolder - `en_US`(string) 英文 - `zh_Hans` (string) [optional] 中文 - `max_length` (int) 表单项为`text-input`专有属性,定义输入最大长度,0 为不限制。 -- `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) 当其他表单项值符合条件时显示,为空则始终显示。 +- `show_on` (array\[[FormShowOnObject](#FormShowOnObject)\]) 当其他表单项值符合条件时显示,为空则始终显示。 ### FormType @@ -200,7 +201,7 @@ - `en_US`(string) 英文 - `zh_Hans`(string) [optional] 中文 - `value` (string) 下拉选项值 -- `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) 当其他表单项值符合条件时显示,为空则始终显示。 +- `show_on` (array\[[FormShowOnObject](#FormShowOnObject)\]) 当其他表单项值符合条件时显示,为空则始终显示。 ### FormShowOnObject diff --git a/api/core/model_runtime/entities/llm_entities.py b/api/core/model_runtime/entities/llm_entities.py index ace2c1f770..dc6032e405 100644 --- a/api/core/model_runtime/entities/llm_entities.py +++ b/api/core/model_runtime/entities/llm_entities.py @@ -1,7 +1,9 @@ +from __future__ import annotations + from collections.abc import Mapping, Sequence from decimal import Decimal from enum import StrEnum -from typing import Any, Optional +from typing import Any, Optional, TypedDict, Union from pydantic import BaseModel, Field @@ -18,6 +20,26 @@ class LLMMode(StrEnum): CHAT = "chat" +class LLMUsageMetadata(TypedDict, total=False): + """ + TypedDict for LLM usage metadata. + All fields are optional. + """ + + prompt_tokens: int + completion_tokens: int + total_tokens: int + prompt_unit_price: Union[float, str] + completion_unit_price: Union[float, str] + total_price: Union[float, str] + currency: str + prompt_price_unit: Union[float, str] + completion_price_unit: Union[float, str] + prompt_price: Union[float, str] + completion_price: Union[float, str] + latency: float + + class LLMUsage(ModelUsage): """ Model class for llm usage. @@ -54,23 +76,27 @@ class LLMUsage(ModelUsage): ) @classmethod - def from_metadata(cls, metadata: dict) -> "LLMUsage": + def from_metadata(cls, metadata: LLMUsageMetadata) -> LLMUsage: """ Create LLMUsage instance from metadata dictionary with default values. Args: - metadata: Dictionary containing usage metadata + metadata: TypedDict containing usage metadata Returns: LLMUsage instance with values from metadata or defaults """ - total_tokens = metadata.get("total_tokens", 0) + prompt_tokens = metadata.get("prompt_tokens", 0) completion_tokens = metadata.get("completion_tokens", 0) - if total_tokens > 0 and completion_tokens == 0: - completion_tokens = total_tokens + total_tokens = metadata.get("total_tokens", 0) + + # If total_tokens is not provided but prompt and completion tokens are, + # calculate total_tokens + if total_tokens == 0 and (prompt_tokens > 0 or completion_tokens > 0): + total_tokens = prompt_tokens + completion_tokens return cls( - prompt_tokens=metadata.get("prompt_tokens", 0), + prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens, prompt_unit_price=Decimal(str(metadata.get("prompt_unit_price", 0))), @@ -84,7 +110,7 @@ class LLMUsage(ModelUsage): latency=metadata.get("latency", 0.0), ) - def plus(self, other: "LLMUsage") -> "LLMUsage": + def plus(self, other: LLMUsage) -> LLMUsage: """ Add two LLMUsage instances together. @@ -109,7 +135,7 @@ class LLMUsage(ModelUsage): latency=self.latency + other.latency, ) - def __add__(self, other: "LLMUsage") -> "LLMUsage": + def __add__(self, other: LLMUsage) -> LLMUsage: """ Overload the + operator to add two LLMUsage instances. diff --git a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py index b7db0b78bc..68d30112d9 100644 --- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py +++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py @@ -1,10 +1,10 @@ import logging from threading import Lock -from typing import Any +from typing import Any, Optional logger = logging.getLogger(__name__) -_tokenizer: Any = None +_tokenizer: Optional[Any] = None _lock = Lock() diff --git a/api/core/moderation/api/api.py b/api/core/moderation/api/api.py index 332381555b..af51b72cd5 100644 --- a/api/core/moderation/api/api.py +++ b/api/core/moderation/api/api.py @@ -1,6 +1,6 @@ from typing import Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field from core.extension.api_based_extension_requestor import APIBasedExtensionPoint, APIBasedExtensionRequestor from core.helper.encrypter import decrypt_token @@ -11,7 +11,7 @@ from models.api_based_extension import APIBasedExtension class ModerationInputParams(BaseModel): app_id: str = "" - inputs: dict = {} + inputs: dict = Field(default_factory=dict) query: str = "" diff --git a/api/core/moderation/base.py b/api/core/moderation/base.py index d8c392d097..99bd0049c0 100644 --- a/api/core/moderation/base.py +++ b/api/core/moderation/base.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from enum import Enum from typing import Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field from core.extension.extensible import Extensible, ExtensionModule @@ -16,7 +16,7 @@ class ModerationInputsResult(BaseModel): flagged: bool = False action: ModerationAction preset_response: str = "" - inputs: dict = {} + inputs: dict = Field(default_factory=dict) query: str = "" diff --git a/api/core/prompt/advanced_prompt_transform.py b/api/core/prompt/advanced_prompt_transform.py index 0f0fe65f27..16c145f936 100644 --- a/api/core/prompt/advanced_prompt_transform.py +++ b/api/core/prompt/advanced_prompt_transform.py @@ -125,11 +125,11 @@ class AdvancedPromptTransform(PromptTransform): if files: prompt_message_contents: list[PromptMessageContentUnionTypes] = [] - prompt_message_contents.append(TextPromptMessageContent(data=prompt)) for file in files: prompt_message_contents.append( file_manager.to_prompt_message_content(file, image_detail_config=image_detail_config) ) + prompt_message_contents.append(TextPromptMessageContent(data=prompt)) prompt_messages.append(UserPromptMessage(content=prompt_message_contents)) else: @@ -196,16 +196,17 @@ class AdvancedPromptTransform(PromptTransform): query = parser.format(prompt_inputs) + prompt_message_contents: list[PromptMessageContentUnionTypes] = [] if memory and memory_config: prompt_messages = self._append_chat_histories(memory, memory_config, prompt_messages, model_config) if files and query is not None: - prompt_message_contents: list[PromptMessageContentUnionTypes] = [] - prompt_message_contents.append(TextPromptMessageContent(data=query)) for file in files: prompt_message_contents.append( file_manager.to_prompt_message_content(file, image_detail_config=image_detail_config) ) + prompt_message_contents.append(TextPromptMessageContent(data=query)) + prompt_messages.append(UserPromptMessage(content=prompt_message_contents)) else: prompt_messages.append(UserPromptMessage(content=query)) @@ -215,27 +216,27 @@ class AdvancedPromptTransform(PromptTransform): last_message = prompt_messages[-1] if prompt_messages else None if last_message and last_message.role == PromptMessageRole.USER: # get last user message content and add files - prompt_message_contents = [TextPromptMessageContent(data=cast(str, last_message.content))] for file in files: prompt_message_contents.append( file_manager.to_prompt_message_content(file, image_detail_config=image_detail_config) ) + prompt_message_contents.append(TextPromptMessageContent(data=cast(str, last_message.content))) last_message.content = prompt_message_contents else: - prompt_message_contents = [TextPromptMessageContent(data="")] # not for query for file in files: prompt_message_contents.append( file_manager.to_prompt_message_content(file, image_detail_config=image_detail_config) ) + prompt_message_contents.append(TextPromptMessageContent(data="")) prompt_messages.append(UserPromptMessage(content=prompt_message_contents)) else: - prompt_message_contents = [TextPromptMessageContent(data=query)] for file in files: prompt_message_contents.append( file_manager.to_prompt_message_content(file, image_detail_config=image_detail_config) ) + prompt_message_contents.append(TextPromptMessageContent(data=query)) prompt_messages.append(UserPromptMessage(content=prompt_message_contents)) elif query: diff --git a/api/core/prompt/simple_prompt_transform.py b/api/core/prompt/simple_prompt_transform.py index e19c6419ca..13f4163d80 100644 --- a/api/core/prompt/simple_prompt_transform.py +++ b/api/core/prompt/simple_prompt_transform.py @@ -265,11 +265,11 @@ class SimplePromptTransform(PromptTransform): ) -> UserPromptMessage: if files: prompt_message_contents: list[PromptMessageContentUnionTypes] = [] - prompt_message_contents.append(TextPromptMessageContent(data=prompt)) for file in files: prompt_message_contents.append( file_manager.to_prompt_message_content(file, image_detail_config=image_detail_config) ) + prompt_message_contents.append(TextPromptMessageContent(data=prompt)) prompt_message = UserPromptMessage(content=prompt_message_contents) else: diff --git a/api/core/provider_manager.py b/api/core/provider_manager.py index 9250497d29..39fec951bb 100644 --- a/api/core/provider_manager.py +++ b/api/core/provider_manager.py @@ -1,3 +1,4 @@ +import contextlib import json from collections import defaultdict from json import JSONDecodeError @@ -624,14 +625,12 @@ class ProviderManager: for variable in provider_credential_secret_variables: if variable in provider_credentials: - try: + with contextlib.suppress(ValueError): provider_credentials[variable] = encrypter.decrypt_token_with_decoding( provider_credentials.get(variable) or "", # type: ignore self.decoding_rsa_key, self.decoding_cipher_rsa, ) - except ValueError: - pass # cache provider credentials provider_credentials_cache.set(credentials=provider_credentials) @@ -672,14 +671,12 @@ class ProviderManager: for variable in model_credential_secret_variables: if variable in provider_model_credentials: - try: + with contextlib.suppress(ValueError): provider_model_credentials[variable] = encrypter.decrypt_token_with_decoding( provider_model_credentials.get(variable), self.decoding_rsa_key, self.decoding_cipher_rsa, ) - except ValueError: - pass # cache provider model credentials provider_model_credentials_cache.set(credentials=provider_model_credentials) diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py index 14481b1f10..bb61b71bb1 100644 --- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py +++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py @@ -98,18 +98,26 @@ class AnalyticdbVectorBySql: try: cur.execute(f"CREATE DATABASE {self.databaseName}") except Exception as e: - if "already exists" in str(e): - return - raise e + if "already exists" not in str(e): + raise e finally: cur.close() conn.close() self.pool = self._create_connection_pool() with self._get_cursor() as cur: + conn = cur.connection + try: + cur.execute("CREATE EXTENSION IF NOT EXISTS zhparser;") + except Exception as e: + conn.rollback() + raise RuntimeError( + "Failed to create zhparser extension. Please ensure it is available in your AnalyticDB." + ) from e try: cur.execute("CREATE TEXT SEARCH CONFIGURATION zh_cn (PARSER = zhparser)") cur.execute("ALTER TEXT SEARCH CONFIGURATION zh_cn ADD MAPPING FOR n,v,a,i,e,l,x WITH simple") except Exception as e: + conn.rollback() if "already exists" not in str(e): raise e cur.execute( diff --git a/api/core/rag/datasource/vdb/clickzetta/README.md b/api/core/rag/datasource/vdb/clickzetta/README.md index 2ee3e657d3..969d4e40a0 100644 --- a/api/core/rag/datasource/vdb/clickzetta/README.md +++ b/api/core/rag/datasource/vdb/clickzetta/README.md @@ -92,17 +92,21 @@ Clickzetta supports advanced full-text search with multiple analyzers: ### Analyzer Types 1. **keyword**: No tokenization, treats the entire string as a single token + - Best for: Exact matching, IDs, codes -2. **english**: Designed for English text +1. **english**: Designed for English text + - Features: Recognizes ASCII letters and numbers, converts to lowercase - Best for: English content -3. **chinese**: Chinese text tokenizer +1. **chinese**: Chinese text tokenizer + - Features: Recognizes Chinese and English characters, removes punctuation - Best for: Chinese or mixed Chinese-English content -4. **unicode**: Multi-language tokenizer based on Unicode +1. **unicode**: Multi-language tokenizer based on Unicode + - Features: Recognizes text boundaries in multiple languages - Best for: Multi-language content @@ -124,21 +128,25 @@ Clickzetta supports advanced full-text search with multiple analyzers: ### Vector Search 1. **Adjust exploration factor** for accuracy vs speed trade-off: + ```sql SET cz.vector.index.search.ef=64; ``` -2. **Use appropriate distance functions**: +1. **Use appropriate distance functions**: + - `cosine_distance`: Best for normalized embeddings (e.g., from language models) - `l2_distance`: Best for raw feature vectors ### Full-Text Search 1. **Choose the right analyzer**: + - Use `keyword` for exact matching - Use language-specific analyzers for better tokenization -2. **Combine with vector search**: +1. **Combine with vector search**: + - Pre-filter with full-text search for better performance - Use hybrid search for improved relevance @@ -147,27 +155,30 @@ Clickzetta supports advanced full-text search with multiple analyzers: ### Connection Issues 1. Verify all 7 required configuration parameters are set -2. Check network connectivity to Clickzetta service -3. Ensure the user has proper permissions on the schema +1. Check network connectivity to Clickzetta service +1. Ensure the user has proper permissions on the schema ### Search Performance 1. Verify vector index exists: + ```sql SHOW INDEX FROM .; ``` -2. Check if vector index is being used: +1. Check if vector index is being used: + ```sql EXPLAIN SELECT ... WHERE l2_distance(...) < threshold; ``` + Look for `vector_index_search_type` in the execution plan. ### Full-Text Search Not Working 1. Verify inverted index is created -2. Check analyzer configuration matches your content language -3. Use `TOKENIZE()` function to test tokenization: +1. Check analyzer configuration matches your content language +1. Use `TOKENIZE()` function to test tokenization: ```sql SELECT TOKENIZE('your text', map('analyzer', 'chinese', 'mode', 'smart')); ``` @@ -175,13 +186,13 @@ Clickzetta supports advanced full-text search with multiple analyzers: ## Limitations 1. Vector operations don't support `ORDER BY` or `GROUP BY` directly on vector columns -2. Full-text search relevance scores are not provided by Clickzetta -3. Inverted index creation may fail for very large existing tables (continue without error) -4. Index naming constraints: +1. Full-text search relevance scores are not provided by Clickzetta +1. Inverted index creation may fail for very large existing tables (continue without error) +1. Index naming constraints: - Index names must be unique within a schema - Only one vector index can be created per column - The implementation uses timestamps to ensure unique index names -5. A column can only have one vector index at a time +1. A column can only have one vector index at a time ## References diff --git a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py index 1059b855a2..6e8077ffd9 100644 --- a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py +++ b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py @@ -1,3 +1,4 @@ +import contextlib import json import logging import queue @@ -214,10 +215,8 @@ class ClickzettaConnectionPool: return connection else: # Connection expired or invalid, close it - try: + with contextlib.suppress(Exception): connection.close() - except Exception: - pass # No valid connection found, create new one return self._create_connection(config) @@ -228,10 +227,8 @@ class ClickzettaConnectionPool: if config_key not in self._pool_locks: # Pool was cleaned up, just close the connection - try: + with contextlib.suppress(Exception): connection.close() - except Exception: - pass return with self._pool_locks[config_key]: @@ -243,10 +240,8 @@ class ClickzettaConnectionPool: logger.debug("Returned ClickZetta connection to pool") else: # Pool full or connection invalid, close it - try: + with contextlib.suppress(Exception): connection.close() - except Exception: - pass def _cleanup_expired_connections(self) -> None: """Clean up expired connections from all pools.""" @@ -265,10 +260,8 @@ class ClickzettaConnectionPool: if current_time - last_used < self._connection_timeout: valid_connections.append((connection, last_used)) else: - try: + with contextlib.suppress(Exception): connection.close() - except Exception: - pass self._pools[config_key] = valid_connections @@ -299,10 +292,8 @@ class ClickzettaConnectionPool: with self._pool_locks[config_key]: pool = self._pools[config_key] for connection, _ in pool: - try: + with contextlib.suppress(Exception): connection.close() - except Exception: - pass pool.clear() diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index d64f366e0e..112f07844c 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -101,7 +101,7 @@ class MilvusVector(BaseVector): if "Zilliz Cloud" in milvus_version: return True # For standard Milvus installations, check version number - return version.parse(milvus_version).base_version >= version.parse("2.5.0").base_version + return version.parse(milvus_version) >= version.parse("2.5.0") except Exception as e: logger.warning("Failed to check Milvus version: %s. Disabling hybrid search.", str(e)) return False diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index d6dfe967d7..556d03940e 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -4,8 +4,8 @@ import math from typing import Any from pydantic import BaseModel, model_validator -from pyobvector import VECTOR, ObVecClient # type: ignore -from sqlalchemy import JSON, Column, String, func +from pyobvector import VECTOR, FtsIndexParam, FtsParser, ObVecClient, l2_distance # type: ignore +from sqlalchemy import JSON, Column, String from sqlalchemy.dialects.mysql import LONGTEXT from configs import dify_config @@ -119,14 +119,21 @@ class OceanBaseVector(BaseVector): ) try: if self._hybrid_search_enabled: - self._client.perform_raw_text_sql(f"""ALTER TABLE {self._collection_name} - ADD FULLTEXT INDEX fulltext_index_for_col_text (text) WITH PARSER ik""") + self._client.create_fts_idx_with_fts_index_param( + table_name=self._collection_name, + fts_idx_param=FtsIndexParam( + index_name="fulltext_index_for_col_text", + field_names=["text"], + parser_type=FtsParser.IK, + ), + ) except Exception as e: raise Exception( "Failed to add fulltext index to the target table, your OceanBase version must be 4.3.5.1 or above " + "to support fulltext index and vector index in the same table", e, ) + self._client.refresh_metadata([self._collection_name]) redis_client.set(collection_exist_cache_key, 1, ex=3600) def _check_hybrid_search_support(self) -> bool: @@ -145,7 +152,7 @@ class OceanBaseVector(BaseVector): ob_full_version = result.fetchone()[0] ob_version = ob_full_version.split()[1] logger.debug("Current OceanBase version is %s", ob_version) - return version.parse(ob_version).base_version >= version.parse("4.3.5.1").base_version + return version.parse(ob_version) >= version.parse("4.3.5.1") except Exception as e: logger.warning("Failed to check OceanBase version: %s. Disabling hybrid search.", str(e)) return False @@ -252,7 +259,7 @@ class OceanBaseVector(BaseVector): vec_column_name="vector", vec_data=query_vector, topk=topk, - distance_func=func.l2_distance, + distance_func=l2_distance, output_column_names=["text", "metadata"], with_dist=True, where_clause=_where_clause, diff --git a/api/core/rag/extractor/excel_extractor.py b/api/core/rag/extractor/excel_extractor.py index a3b35458df..7cc554c74d 100644 --- a/api/core/rag/extractor/excel_extractor.py +++ b/api/core/rag/extractor/excel_extractor.py @@ -34,9 +34,8 @@ class ExcelExtractor(BaseExtractor): for sheet_name in wb.sheetnames: sheet = wb[sheet_name] data = sheet.values - try: - cols = next(data) - except StopIteration: + cols = next(data, None) + if cols is None: continue df = pd.DataFrame(data, columns=cols) diff --git a/api/core/rag/extractor/pdf_extractor.py b/api/core/rag/extractor/pdf_extractor.py index 04033dec3f..7dfe2e357c 100644 --- a/api/core/rag/extractor/pdf_extractor.py +++ b/api/core/rag/extractor/pdf_extractor.py @@ -1,5 +1,6 @@ """Abstract interface for document loader implementations.""" +import contextlib from collections.abc import Iterator from typing import Optional, cast @@ -25,12 +26,10 @@ class PdfExtractor(BaseExtractor): def extract(self) -> list[Document]: plaintext_file_exists = False if self._file_cache_key: - try: + with contextlib.suppress(FileNotFoundError): text = cast(bytes, storage.load(self._file_cache_key)).decode("utf-8") plaintext_file_exists = True return [Document(page_content=text)] - except FileNotFoundError: - pass documents = list(self.load()) text_list = [] for document in documents: diff --git a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py index f1fa5dde5c..856a9bce18 100644 --- a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py +++ b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py @@ -1,4 +1,5 @@ import base64 +import contextlib import logging from typing import Optional @@ -33,7 +34,7 @@ class UnstructuredEmailExtractor(BaseExtractor): elements = partition_email(filename=self._file_path) # noinspection PyBroadException - try: + with contextlib.suppress(Exception): for element in elements: element_text = element.text.strip() @@ -43,8 +44,6 @@ class UnstructuredEmailExtractor(BaseExtractor): element_decode = base64.b64decode(element_text) soup = BeautifulSoup(element_decode.decode("utf-8"), "html.parser") element.text = soup.get_text() - except Exception: - pass from unstructured.chunking.title import chunk_by_title diff --git a/api/core/rag/extractor/watercrawl/provider.py b/api/core/rag/extractor/watercrawl/provider.py index 21fbb2100f..da03fc67a6 100644 --- a/api/core/rag/extractor/watercrawl/provider.py +++ b/api/core/rag/extractor/watercrawl/provider.py @@ -1,6 +1,6 @@ from collections.abc import Generator from datetime import datetime -from typing import Any +from typing import Any, Optional from core.rag.extractor.watercrawl.client import WaterCrawlAPIClient @@ -9,7 +9,7 @@ class WaterCrawlProvider: def __init__(self, api_key, base_url: str | None = None): self.client = WaterCrawlAPIClient(api_key, base_url) - def crawl_url(self, url, options: dict | Any = None) -> dict: + def crawl_url(self, url, options: Optional[dict | Any] = None) -> dict: options = options or {} spider_options = { "max_depth": 1, diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index 0eff7c186a..f3b162e3d3 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -1,6 +1,5 @@ """Abstract interface for document loader implementations.""" -import datetime import logging import mimetypes import os @@ -19,6 +18,7 @@ from core.rag.extractor.extractor_base import BaseExtractor from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_storage import storage +from libs.datetime_utils import naive_utc_now from models.enums import CreatorUserRole from models.model import UploadFile @@ -117,10 +117,10 @@ class WordExtractor(BaseExtractor): mime_type=mime_type or "", created_by=self.user_id, created_by_role=CreatorUserRole.ACCOUNT, - created_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + created_at=naive_utc_now(), used=True, used_by=self.user_id, - used_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + used_at=naive_utc_now(), ) db.session.add(upload_file) diff --git a/api/core/rag/models/document.py b/api/core/rag/models/document.py index 04a3428ad8..ff63a6780e 100644 --- a/api/core/rag/models/document.py +++ b/api/core/rag/models/document.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from collections.abc import Sequence from typing import Any, Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field class ChildDocument(BaseModel): @@ -15,7 +15,7 @@ class ChildDocument(BaseModel): """Arbitrary metadata about the page content (e.g., source, relationships to other documents, etc.). """ - metadata: dict = {} + metadata: dict = Field(default_factory=dict) class Document(BaseModel): @@ -28,7 +28,7 @@ class Document(BaseModel): """Arbitrary metadata about the page content (e.g., source, relationships to other documents, etc.). """ - metadata: dict = {} + metadata: dict = Field(default_factory=dict) provider: Optional[str] = "dify" diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index a25bc65646..cd4af72832 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -1012,7 +1012,7 @@ class DatasetRetrieval: def _process_metadata_filter_func( self, sequence: int, condition: str, metadata_name: str, value: Optional[Any], filters: list ): - if value is None: + if value is None and condition not in ("empty", "not empty"): return key = f"{metadata_name}_{sequence}" diff --git a/api/core/repositories/factory.py b/api/core/repositories/factory.py index 09c775f3a6..854c122331 100644 --- a/api/core/repositories/factory.py +++ b/api/core/repositories/factory.py @@ -5,10 +5,7 @@ This module provides a Django-like settings system for repository implementation allowing users to configure different repository backends through string paths. """ -import importlib -import inspect -import logging -from typing import Protocol, Union +from typing import Union from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker @@ -16,12 +13,11 @@ from sqlalchemy.orm import sessionmaker from configs import dify_config from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from libs.module_loading import import_string from models import Account, EndUser from models.enums import WorkflowRunTriggeredFrom from models.workflow import WorkflowNodeExecutionTriggeredFrom -logger = logging.getLogger(__name__) - class RepositoryImportError(Exception): """Raised when a repository implementation cannot be imported or instantiated.""" @@ -37,96 +33,6 @@ class DifyCoreRepositoryFactory: are specified as module paths (e.g., 'module.submodule.ClassName'). """ - @staticmethod - def _import_class(class_path: str) -> type: - """ - Import a class from a module path string. - - Args: - class_path: Full module path to the class (e.g., 'module.submodule.ClassName') - - Returns: - The imported class - - Raises: - RepositoryImportError: If the class cannot be imported - """ - try: - module_path, class_name = class_path.rsplit(".", 1) - module = importlib.import_module(module_path) - repo_class = getattr(module, class_name) - assert isinstance(repo_class, type) - return repo_class - except (ValueError, ImportError, AttributeError) as e: - raise RepositoryImportError(f"Cannot import repository class '{class_path}': {e}") from e - - @staticmethod - def _validate_repository_interface(repository_class: type, expected_interface: type[Protocol]) -> None: # type: ignore - """ - Validate that a class implements the expected repository interface. - - Args: - repository_class: The class to validate - expected_interface: The expected interface/protocol - - Raises: - RepositoryImportError: If the class doesn't implement the interface - """ - # Check if the class has all required methods from the protocol - required_methods = [ - method - for method in dir(expected_interface) - if not method.startswith("_") and callable(getattr(expected_interface, method, None)) - ] - - missing_methods = [] - for method_name in required_methods: - if not hasattr(repository_class, method_name): - missing_methods.append(method_name) - - if missing_methods: - raise RepositoryImportError( - f"Repository class '{repository_class.__name__}' does not implement required methods " - f"{missing_methods} from interface '{expected_interface.__name__}'" - ) - - @staticmethod - def _validate_constructor_signature(repository_class: type, required_params: list[str]) -> None: - """ - Validate that a repository class constructor accepts required parameters. - Args: - repository_class: The class to validate - required_params: List of required parameter names - Raises: - RepositoryImportError: If the constructor doesn't accept required parameters - """ - - try: - # MyPy may flag the line below with the following error: - # - # > Accessing "__init__" on an instance is unsound, since - # > instance.__init__ could be from an incompatible subclass. - # - # Despite this, we need to ensure that the constructor of `repository_class` - # has a compatible signature. - signature = inspect.signature(repository_class.__init__) # type: ignore[misc] - param_names = list(signature.parameters.keys()) - - # Remove 'self' parameter - if "self" in param_names: - param_names.remove("self") - - missing_params = [param for param in required_params if param not in param_names] - if missing_params: - raise RepositoryImportError( - f"Repository class '{repository_class.__name__}' constructor does not accept required parameters: " - f"{missing_params}. Expected parameters: {required_params}" - ) - except Exception as e: - raise RepositoryImportError( - f"Failed to validate constructor signature for '{repository_class.__name__}': {e}" - ) from e - @classmethod def create_workflow_execution_repository( cls, @@ -151,24 +57,16 @@ class DifyCoreRepositoryFactory: RepositoryImportError: If the configured repository cannot be created """ class_path = dify_config.CORE_WORKFLOW_EXECUTION_REPOSITORY - logger.debug("Creating WorkflowExecutionRepository from: %s", class_path) try: - repository_class = cls._import_class(class_path) - cls._validate_repository_interface(repository_class, WorkflowExecutionRepository) - - # All repository types now use the same constructor parameters + repository_class = import_string(class_path) return repository_class( # type: ignore[no-any-return] session_factory=session_factory, user=user, app_id=app_id, triggered_from=triggered_from, ) - except RepositoryImportError: - # Re-raise our custom errors as-is - raise - except Exception as e: - logger.exception("Failed to create WorkflowExecutionRepository") + except (ImportError, Exception) as e: raise RepositoryImportError(f"Failed to create WorkflowExecutionRepository from '{class_path}': {e}") from e @classmethod @@ -195,24 +93,16 @@ class DifyCoreRepositoryFactory: RepositoryImportError: If the configured repository cannot be created """ class_path = dify_config.CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY - logger.debug("Creating WorkflowNodeExecutionRepository from: %s", class_path) try: - repository_class = cls._import_class(class_path) - cls._validate_repository_interface(repository_class, WorkflowNodeExecutionRepository) - - # All repository types now use the same constructor parameters + repository_class = import_string(class_path) return repository_class( # type: ignore[no-any-return] session_factory=session_factory, user=user, app_id=app_id, triggered_from=triggered_from, ) - except RepositoryImportError: - # Re-raise our custom errors as-is - raise - except Exception as e: - logger.exception("Failed to create WorkflowNodeExecutionRepository") + except (ImportError, Exception) as e: raise RepositoryImportError( f"Failed to create WorkflowNodeExecutionRepository from '{class_path}': {e}" ) from e diff --git a/api/core/tools/custom_tool/tool.py b/api/core/tools/custom_tool/tool.py index e112de9578..3c0bfa5240 100644 --- a/api/core/tools/custom_tool/tool.py +++ b/api/core/tools/custom_tool/tool.py @@ -81,14 +81,11 @@ class ApiTool(Tool): return ToolProviderType.API def assembling_request(self, parameters: dict[str, Any]) -> dict[str, Any]: + headers = {} if self.runtime is None: raise ToolProviderCredentialValidationError("runtime not initialized") - headers = {} - if self.runtime is None: - raise ValueError("runtime is required") credentials = self.runtime.credentials or {} - if "auth_type" not in credentials: raise ToolProviderCredentialValidationError("Missing auth_type") diff --git a/api/core/tools/entities/api_entities.py b/api/core/tools/entities/api_entities.py index 27ce96b90e..48015c04ee 100644 --- a/api/core/tools/entities/api_entities.py +++ b/api/core/tools/entities/api_entities.py @@ -62,7 +62,7 @@ class ToolProviderApiEntity(BaseModel): parameter.pop("input_schema", None) # ------------- optional_fields = self.optional_field("server_url", self.server_url) - if self.type == ToolProviderType.MCP.value: + if self.type == ToolProviderType.MCP: optional_fields.update(self.optional_field("updated_at", self.updated_at)) optional_fields.update(self.optional_field("server_identifier", self.server_identifier)) return { diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py index 5ffba07b44..df599a09a3 100644 --- a/api/core/tools/entities/tool_entities.py +++ b/api/core/tools/entities/tool_entities.py @@ -1,4 +1,5 @@ import base64 +import contextlib import enum from collections.abc import Mapping from enum import Enum @@ -227,10 +228,8 @@ class ToolInvokeMessage(BaseModel): @classmethod def decode_blob_message(cls, v): if isinstance(v, dict) and "blob" in v: - try: + with contextlib.suppress(Exception): v["blob"] = base64.b64decode(v["blob"]) - except Exception: - pass return v @field_serializer("message") diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py index 83444c02d8..10db4d9503 100644 --- a/api/core/tools/tool_engine.py +++ b/api/core/tools/tool_engine.py @@ -1,3 +1,4 @@ +import contextlib import json from collections.abc import Generator, Iterable from copy import deepcopy @@ -69,10 +70,8 @@ class ToolEngine: if parameters and len(parameters) == 1: tool_parameters = {parameters[0].name: tool_parameters} else: - try: + with contextlib.suppress(Exception): tool_parameters = json.loads(tool_parameters) - except Exception: - pass if not isinstance(tool_parameters, dict): raise ValueError(f"tool_parameters should be a dict, but got a string: {tool_parameters}") @@ -270,14 +269,12 @@ class ToolEngine: if response.meta.get("mime_type"): mimetype = response.meta.get("mime_type") else: - try: + with contextlib.suppress(Exception): url = URL(cast(ToolInvokeMessage.TextMessage, response.message).text) extension = url.suffix guess_type_result, _ = guess_type(f"a{extension}") if guess_type_result: mimetype = guess_type_result - except Exception: - pass if not mimetype: mimetype = "image/jpeg" diff --git a/api/core/tools/tool_manager.py b/api/core/tools/tool_manager.py index 7472f4f605..2089313b08 100644 --- a/api/core/tools/tool_manager.py +++ b/api/core/tools/tool_manager.py @@ -959,7 +959,7 @@ class ToolManager: elif provider_type == ToolProviderType.WORKFLOW: return cls.generate_workflow_tool_icon_url(tenant_id, provider_id) elif provider_type == ToolProviderType.PLUGIN: - provider = ToolManager.get_builtin_provider(provider_id, tenant_id) + provider = ToolManager.get_plugin_provider(provider_id, tenant_id) if isinstance(provider, PluginToolProviderController): try: return cls.generate_plugin_tool_icon_url(tenant_id, provider.entity.identity.icon) diff --git a/api/core/tools/utils/configuration.py b/api/core/tools/utils/configuration.py index aceba6e69f..3a9391dbb1 100644 --- a/api/core/tools/utils/configuration.py +++ b/api/core/tools/utils/configuration.py @@ -1,3 +1,4 @@ +import contextlib from copy import deepcopy from typing import Any @@ -137,11 +138,9 @@ class ToolParameterConfigurationManager: and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT ): if parameter.name in parameters: - try: - has_secret_input = True + has_secret_input = True + with contextlib.suppress(Exception): parameters[parameter.name] = encrypter.decrypt_token(self.tenant_id, parameters[parameter.name]) - except Exception: - pass if has_secret_input: cache.set(parameters) diff --git a/api/core/tools/utils/encryption.py b/api/core/tools/utils/encryption.py index 5fdfd3b9d1..d771293e11 100644 --- a/api/core/tools/utils/encryption.py +++ b/api/core/tools/utils/encryption.py @@ -1,3 +1,4 @@ +import contextlib from copy import deepcopy from typing import Any, Optional, Protocol @@ -111,14 +112,12 @@ class ProviderConfigEncrypter: for field_name, field in fields.items(): if field.type == BasicProviderConfig.Type.SECRET_INPUT: if field_name in data: - try: + with contextlib.suppress(Exception): # if the value is None or empty string, skip decrypt if not data[field_name]: continue data[field_name] = encrypter.decrypt_token(self.tenant_id, data[field_name]) - except Exception: - pass self.provider_config_cache.set(data) return data diff --git a/api/core/tools/utils/rag_web_reader.py b/api/core/tools/utils/rag_web_reader.py deleted file mode 100644 index 22c47fa814..0000000000 --- a/api/core/tools/utils/rag_web_reader.py +++ /dev/null @@ -1,17 +0,0 @@ -import re - - -def get_image_upload_file_ids(content): - pattern = r"!\[image\]\((http?://.*?(file-preview|image-preview))\)" - matches = re.findall(pattern, content) - image_upload_file_ids = [] - for match in matches: - if match[1] == "file-preview": - content_pattern = r"files/([^/]+)/file-preview" - else: - content_pattern = r"files/([^/]+)/image-preview" - content_match = re.search(content_pattern, match[0]) - if content_match: - image_upload_file_id = content_match.group(1) - image_upload_file_ids.append(image_upload_file_id) - return image_upload_file_ids diff --git a/api/core/tools/utils/web_reader_tool.py b/api/core/tools/utils/web_reader_tool.py index df052c16db..d8403c2e15 100644 --- a/api/core/tools/utils/web_reader_tool.py +++ b/api/core/tools/utils/web_reader_tool.py @@ -87,7 +87,7 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: res = FULL_TEMPLATE.format( title=article.title, - author=article.auther, + author=article.author, text=article.text, ) @@ -97,7 +97,7 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: @dataclass class Article: title: str - auther: str + author: str text: Sequence[dict] @@ -105,7 +105,7 @@ def extract_using_readabilipy(html: str): json_article: dict[str, Any] = simple_json_from_html_string(html, use_readability=True) article = Article( title=json_article.get("title") or "", - auther=json_article.get("byline") or "", + author=json_article.get("byline") or "", text=json_article.get("plain_text") or [], ) @@ -113,7 +113,7 @@ def extract_using_readabilipy(html: str): def get_image_upload_file_ids(content): - pattern = r"!\[image\]\((http?://.*?(file-preview|image-preview))\)" + pattern = r"!\[image\]\((https?://.*?(file-preview|image-preview))\)" matches = re.findall(pattern, content) image_upload_file_ids = [] for match in matches: diff --git a/api/core/tools/workflow_as_tool/provider.py b/api/core/tools/workflow_as_tool/provider.py index 83f5f558d5..18e6993b38 100644 --- a/api/core/tools/workflow_as_tool/provider.py +++ b/api/core/tools/workflow_as_tool/provider.py @@ -203,9 +203,6 @@ class WorkflowToolProviderController(ToolProviderController): raise ValueError("app not found") app = db_providers.app - if not app: - raise ValueError("can not read app of workflow") - self.tools = [self._get_db_provider_tool(db_providers, app)] return self.tools diff --git a/api/core/variables/types.py b/api/core/variables/types.py index d28fb11401..6629056042 100644 --- a/api/core/variables/types.py +++ b/api/core/variables/types.py @@ -126,7 +126,7 @@ class SegmentType(StrEnum): """ if self.is_array_type(): return self._validate_array(value, array_validation) - elif self == SegmentType.NUMBER: + elif self in [SegmentType.INTEGER, SegmentType.FLOAT, SegmentType.NUMBER]: return isinstance(value, (int, float)) elif self == SegmentType.STRING: return isinstance(value, str) @@ -166,7 +166,6 @@ _ARRAY_TYPES = frozenset( ] ) - _NUMERICAL_TYPES = frozenset( [ SegmentType.NUMBER, diff --git a/api/core/workflow/entities/workflow_execution.py b/api/core/workflow/entities/workflow_execution.py index 781be4b3c6..f00dc11aa6 100644 --- a/api/core/workflow/entities/workflow_execution.py +++ b/api/core/workflow/entities/workflow_execution.py @@ -6,12 +6,14 @@ implementation details like tenant_id, app_id, etc. """ from collections.abc import Mapping -from datetime import UTC, datetime +from datetime import datetime from enum import StrEnum from typing import Any, Optional from pydantic import BaseModel, Field +from libs.datetime_utils import naive_utc_now + class WorkflowType(StrEnum): """ @@ -60,7 +62,7 @@ class WorkflowExecution(BaseModel): Calculate elapsed time in seconds. If workflow is not finished, use current time. """ - end_time = self.finished_at or datetime.now(UTC).replace(tzinfo=None) + end_time = self.finished_at or naive_utc_now() return (end_time - self.started_at).total_seconds() @classmethod diff --git a/api/core/workflow/graph_engine/entities/graph_runtime_state.py b/api/core/workflow/graph_engine/entities/graph_runtime_state.py index a62ffe46c9..e2ec7b17f0 100644 --- a/api/core/workflow/graph_engine/entities/graph_runtime_state.py +++ b/api/core/workflow/graph_engine/entities/graph_runtime_state.py @@ -22,7 +22,7 @@ class GraphRuntimeState(BaseModel): # # Note: Since the type of this field is `dict[str, Any]`, its values may not remain consistent # after a serialization and deserialization round trip. - outputs: dict[str, Any] = {} + outputs: dict[str, Any] = Field(default_factory=dict) node_run_steps: int = 0 """node run steps""" diff --git a/api/core/workflow/graph_engine/entities/runtime_route_state.py b/api/core/workflow/graph_engine/entities/runtime_route_state.py index f2d9c98936..a4ddfafab5 100644 --- a/api/core/workflow/graph_engine/entities/runtime_route_state.py +++ b/api/core/workflow/graph_engine/entities/runtime_route_state.py @@ -1,5 +1,5 @@ import uuid -from datetime import UTC, datetime +from datetime import datetime from enum import Enum from typing import Optional @@ -7,6 +7,7 @@ from pydantic import BaseModel, Field from core.workflow.entities.node_entities import NodeRunResult from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from libs.datetime_utils import naive_utc_now class RouteNodeState(BaseModel): @@ -71,7 +72,7 @@ class RouteNodeState(BaseModel): raise Exception(f"Invalid route status {run_result.status}") self.node_run_result = run_result - self.finished_at = datetime.now(UTC).replace(tzinfo=None) + self.finished_at = naive_utc_now() class RuntimeRouteState(BaseModel): @@ -89,7 +90,7 @@ class RuntimeRouteState(BaseModel): :param node_id: node id """ - state = RouteNodeState(node_id=node_id, start_at=datetime.now(UTC).replace(tzinfo=None)) + state = RouteNodeState(node_id=node_id, start_at=naive_utc_now()) self.node_state_mapping[state.id] = state return state diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index b9663d32f7..03b920ccbb 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -6,7 +6,6 @@ import uuid from collections.abc import Generator, Mapping from concurrent.futures import ThreadPoolExecutor, wait from copy import copy, deepcopy -from datetime import UTC, datetime from typing import Any, Optional, cast from flask import Flask, current_app @@ -51,6 +50,7 @@ from core.workflow.nodes.base import BaseNode from core.workflow.nodes.end.end_stream_processor import EndStreamProcessor from core.workflow.nodes.enums import ErrorStrategy, FailBranchSourceHandle from core.workflow.nodes.event import RunCompletedEvent, RunRetrieverResourceEvent, RunStreamChunkEvent +from libs.datetime_utils import naive_utc_now from libs.flask_utils import preserve_flask_contexts from models.enums import UserFrom from models.workflow import WorkflowType @@ -640,7 +640,7 @@ class GraphEngine: while should_continue_retry and retries <= max_retries: try: # run node - retry_start_at = datetime.now(UTC).replace(tzinfo=None) + retry_start_at = naive_utc_now() # yield control to other threads time.sleep(0.001) event_stream = node.run() diff --git a/api/core/workflow/nodes/agent/agent_node.py b/api/core/workflow/nodes/agent/agent_node.py index 2b6382a8a6..144f036aa4 100644 --- a/api/core/workflow/nodes/agent/agent_node.py +++ b/api/core/workflow/nodes/agent/agent_node.py @@ -13,8 +13,9 @@ from core.agent.strategy.plugin import PluginAgentStrategy from core.file import File, FileTransferMethod from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance, ModelManager -from core.model_runtime.entities.llm_entities import LLMUsage +from core.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata from core.model_runtime.entities.model_entities import AIModelEntity, ModelType +from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.request import InvokeCredentials from core.plugin.impl.exc import PluginDaemonClientSideError from core.plugin.impl.plugin import PluginInstaller @@ -558,7 +559,7 @@ class AgentNode(BaseNode): assert isinstance(message.message, ToolInvokeMessage.JsonMessage) if node_type == NodeType.AGENT: msg_metadata: dict[str, Any] = message.message.json_object.pop("execution_metadata", {}) - llm_usage = LLMUsage.from_metadata(msg_metadata) + llm_usage = LLMUsage.from_metadata(cast(LLMUsageMetadata, msg_metadata)) agent_execution_metadata = { WorkflowNodeExecutionMetadataKey(key): value for key, value in msg_metadata.items() @@ -692,7 +693,13 @@ class AgentNode(BaseNode): yield RunCompletedEvent( run_result=NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, - outputs={"text": text, "files": ArrayFileSegment(value=files), "json": json_output, **variables}, + outputs={ + "text": text, + "usage": jsonable_encoder(llm_usage), + "files": ArrayFileSegment(value=files), + "json": json_output, + **variables, + }, metadata={ **agent_execution_metadata, WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info, diff --git a/api/core/workflow/nodes/http_request/executor.py b/api/core/workflow/nodes/http_request/executor.py index c9f7fa1221..a5a578a6ff 100644 --- a/api/core/workflow/nodes/http_request/executor.py +++ b/api/core/workflow/nodes/http_request/executor.py @@ -12,6 +12,7 @@ from json_repair import repair_json from configs import dify_config from core.file import file_manager +from core.file.enums import FileTransferMethod from core.helper import ssrf_proxy from core.variables.segments import ArrayFileSegment, FileSegment from core.workflow.entities.variable_pool import VariablePool @@ -228,7 +229,9 @@ class Executor: files: dict[str, list[tuple[str | None, bytes, str]]] = {} for key, files_in_segment in files_list: for file in files_in_segment: - if file.related_id is not None: + if file.related_id is not None or ( + file.transfer_method == FileTransferMethod.REMOTE_URL and file.remote_url is not None + ): file_tuple = ( file.filename, file_manager.download(file), diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py index def1e1cfa3..7f591a3ea9 100644 --- a/api/core/workflow/nodes/iteration/iteration_node.py +++ b/api/core/workflow/nodes/iteration/iteration_node.py @@ -4,7 +4,7 @@ import time import uuid from collections.abc import Generator, Mapping, Sequence from concurrent.futures import Future, wait -from datetime import UTC, datetime +from datetime import datetime from queue import Empty, Queue from typing import TYPE_CHECKING, Any, Optional, cast @@ -41,6 +41,7 @@ from core.workflow.nodes.enums import ErrorStrategy, NodeType from core.workflow.nodes.event import NodeEvent, RunCompletedEvent from core.workflow.nodes.iteration.entities import ErrorHandleMode, IterationNodeData from factories.variable_factory import build_segment +from libs.datetime_utils import naive_utc_now from libs.flask_utils import preserve_flask_contexts from .exc import ( @@ -179,7 +180,7 @@ class IterationNode(BaseNode): thread_pool_id=self.thread_pool_id, ) - start_at = datetime.now(UTC).replace(tzinfo=None) + start_at = naive_utc_now() yield IterationRunStartedEvent( iteration_id=self.id, @@ -428,7 +429,7 @@ class IterationNode(BaseNode): """ run single iteration """ - iter_start_at = datetime.now(UTC).replace(tzinfo=None) + iter_start_at = naive_utc_now() try: rst = graph_engine.run() @@ -505,7 +506,7 @@ class IterationNode(BaseNode): variable_pool.add([self.node_id, "index"], next_index) if next_index < len(iterator_list_value): variable_pool.add([self.node_id, "item"], iterator_list_value[next_index]) - duration = (datetime.now(UTC).replace(tzinfo=None) - iter_start_at).total_seconds() + duration = (naive_utc_now() - iter_start_at).total_seconds() iter_run_map[iteration_run_id] = duration yield IterationRunNextEvent( iteration_id=self.id, @@ -526,7 +527,7 @@ class IterationNode(BaseNode): if next_index < len(iterator_list_value): variable_pool.add([self.node_id, "item"], iterator_list_value[next_index]) - duration = (datetime.now(UTC).replace(tzinfo=None) - iter_start_at).total_seconds() + duration = (naive_utc_now() - iter_start_at).total_seconds() iter_run_map[iteration_run_id] = duration yield IterationRunNextEvent( iteration_id=self.id, @@ -602,7 +603,7 @@ class IterationNode(BaseNode): if next_index < len(iterator_list_value): variable_pool.add([self.node_id, "item"], iterator_list_value[next_index]) - duration = (datetime.now(UTC).replace(tzinfo=None) - iter_start_at).total_seconds() + duration = (naive_utc_now() - iter_start_at).total_seconds() iter_run_map[iteration_run_id] = duration yield IterationRunNextEvent( iteration_id=self.id, diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 7303b68501..5e5c9f520e 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, Optional, cast from sqlalchemy import Float, and_, func, or_, text from sqlalchemy import cast as sqlalchemy_cast -from sqlalchemy.orm import Session +from sqlalchemy.orm import sessionmaker from core.app.app_config.entities import DatasetRetrieveConfigEntity from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity @@ -175,7 +175,7 @@ class KnowledgeRetrievalNode(BaseNode): redis_client.zremrangebyscore(key, 0, current_time - 60000) request_count = redis_client.zcard(key) if request_count > knowledge_rate_limit.limit: - with Session(db.engine) as session: + with sessionmaker(db.engine).begin() as session: # add ratelimit record rate_limit_log = RateLimitLog( tenant_id=self.tenant_id, @@ -183,7 +183,6 @@ class KnowledgeRetrievalNode(BaseNode): operation="knowledge", ) session.add(rate_limit_log) - session.commit() return NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, @@ -389,6 +388,15 @@ class KnowledgeRetrievalNode(BaseNode): "segment_id": segment.id, "retriever_from": "workflow", "score": record.score or 0.0, + "child_chunks": [ + { + "id": str(getattr(chunk, "id", "")), + "content": str(getattr(chunk, "content", "")), + "position": int(getattr(chunk, "position", 0)), + "score": float(getattr(chunk, "score", 0.0)), + } + for chunk in (record.child_chunks or []) + ], "segment_hit_count": segment.hit_count, "segment_word_count": segment.word_count, "segment_position": segment.position, @@ -572,7 +580,7 @@ class KnowledgeRetrievalNode(BaseNode): def _process_metadata_filter_func( self, sequence: int, condition: str, metadata_name: str, value: Optional[Any], filters: list ): - if value is None: + if value is None and condition not in ("empty", "not empty"): return key = f"{metadata_name}_{sequence}" diff --git a/api/core/workflow/nodes/llm/entities.py b/api/core/workflow/nodes/llm/entities.py index 4bb62d35a2..e6f8abeba0 100644 --- a/api/core/workflow/nodes/llm/entities.py +++ b/api/core/workflow/nodes/llm/entities.py @@ -13,7 +13,7 @@ class ModelConfig(BaseModel): provider: str name: str mode: LLMMode - completion_params: dict[str, Any] = {} + completion_params: dict[str, Any] = Field(default_factory=dict) class ContextConfig(BaseModel): diff --git a/api/core/workflow/nodes/llm/llm_utils.py b/api/core/workflow/nodes/llm/llm_utils.py index 0966c87a1d..2441e30c87 100644 --- a/api/core/workflow/nodes/llm/llm_utils.py +++ b/api/core/workflow/nodes/llm/llm_utils.py @@ -1,5 +1,4 @@ from collections.abc import Sequence -from datetime import UTC, datetime from typing import Optional, cast from sqlalchemy import select, update @@ -20,6 +19,7 @@ from core.variables.segments import ArrayAnySegment, ArrayFileSegment, FileSegme from core.workflow.entities.variable_pool import VariablePool from core.workflow.enums import SystemVariableKey from core.workflow.nodes.llm.entities import ModelConfig +from libs.datetime_utils import naive_utc_now from models import db from models.model import Conversation from models.provider import Provider, ProviderType @@ -149,7 +149,7 @@ def deduct_llm_quota(tenant_id: str, model_instance: ModelInstance, usage: LLMUs ) .values( quota_used=Provider.quota_used + used_quota, - last_used=datetime.now(tz=UTC).replace(tzinfo=None), + last_used=naive_utc_now(), ) ) session.execute(stmt) diff --git a/api/core/workflow/nodes/loop/loop_node.py b/api/core/workflow/nodes/loop/loop_node.py index 655de9362f..b2ab943129 100644 --- a/api/core/workflow/nodes/loop/loop_node.py +++ b/api/core/workflow/nodes/loop/loop_node.py @@ -2,7 +2,7 @@ import json import logging import time from collections.abc import Generator, Mapping, Sequence -from datetime import UTC, datetime +from datetime import datetime from typing import TYPE_CHECKING, Any, Literal, Optional, cast from configs import dify_config @@ -36,6 +36,7 @@ from core.workflow.nodes.event import NodeEvent, RunCompletedEvent from core.workflow.nodes.loop.entities import LoopNodeData from core.workflow.utils.condition.processor import ConditionProcessor from factories.variable_factory import TypeMismatchError, build_segment_with_type +from libs.datetime_utils import naive_utc_now if TYPE_CHECKING: from core.workflow.entities.variable_pool import VariablePool @@ -143,7 +144,7 @@ class LoopNode(BaseNode): thread_pool_id=self.thread_pool_id, ) - start_at = datetime.now(UTC).replace(tzinfo=None) + start_at = naive_utc_now() condition_processor = ConditionProcessor() # Start Loop event @@ -171,7 +172,7 @@ class LoopNode(BaseNode): try: check_break_result = False for i in range(loop_count): - loop_start_time = datetime.now(UTC).replace(tzinfo=None) + loop_start_time = naive_utc_now() # run single loop loop_result = yield from self._run_single_loop( graph_engine=graph_engine, @@ -185,7 +186,7 @@ class LoopNode(BaseNode): start_at=start_at, inputs=inputs, ) - loop_end_time = datetime.now(UTC).replace(tzinfo=None) + loop_end_time = naive_utc_now() single_loop_variable = {} for key, selector in loop_variable_selectors.items(): @@ -313,30 +314,31 @@ class LoopNode(BaseNode): and event.node_type == NodeType.LOOP_END and not isinstance(event, NodeRunStreamChunkEvent) ): - check_break_result = True + # Check if variables in break conditions exist and process conditions + # Allow loop internal variables to be used in break conditions + available_conditions = [] + for condition in break_conditions: + variable = self.graph_runtime_state.variable_pool.get(condition.variable_selector) + if variable: + available_conditions.append(condition) + + # Process conditions if at least one variable is available + if available_conditions: + input_conditions, group_result, check_break_result = condition_processor.process_conditions( + variable_pool=self.graph_runtime_state.variable_pool, + conditions=available_conditions, + operator=logical_operator, + ) + if check_break_result: + break + else: + check_break_result = True yield self._handle_event_metadata(event=event, iter_run_index=current_index) break if isinstance(event, NodeRunSucceededEvent): yield self._handle_event_metadata(event=event, iter_run_index=current_index) - # Check if all variables in break conditions exist - exists_variable = False - for condition in break_conditions: - if not self.graph_runtime_state.variable_pool.get(condition.variable_selector): - exists_variable = False - break - else: - exists_variable = True - if exists_variable: - input_conditions, group_result, check_break_result = condition_processor.process_conditions( - variable_pool=self.graph_runtime_state.variable_pool, - conditions=break_conditions, - operator=logical_operator, - ) - if check_break_result: - break - elif isinstance(event, BaseGraphEvent): if isinstance(event, GraphRunFailedEvent): # Loop run failed diff --git a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py index 45c5e0a62c..49c4c142e1 100644 --- a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py @@ -1,3 +1,4 @@ +import contextlib import json import logging import uuid @@ -666,10 +667,8 @@ class ParameterExtractorNode(BaseNode): if result[idx] == "{" or result[idx] == "[": json_str = extract_json(result[idx:]) if json_str: - try: + with contextlib.suppress(Exception): return cast(dict, json.loads(json_str)) - except Exception: - pass logger.info("extra error: %s", result) return None @@ -686,10 +685,9 @@ class ParameterExtractorNode(BaseNode): if result[idx] == "{" or result[idx] == "[": json_str = extract_json(result[idx:]) if json_str: - try: + with contextlib.suppress(Exception): return cast(dict, json.loads(json_str)) - except Exception: - pass + logger.info("extra error: %s", result) return None diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index da147fe895..e21092349e 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -32,7 +32,7 @@ if [[ "${MODE}" == "worker" ]]; then exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION \ --max-tasks-per-child ${MAX_TASK_PRE_CHILD:-50} --loglevel ${LOG_LEVEL:-INFO} \ - -Q ${CELERY_QUEUES:-dataset,mail,ops_trace,app_deletion,plugin,workflow_storage} + -Q ${CELERY_QUEUES:-dataset,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation} elif [[ "${MODE}" == "beat" ]]; then exec celery -A app.celery beat --loglevel ${LOG_LEVEL:-INFO} diff --git a/api/events/event_handlers/clean_when_document_deleted.py b/api/events/event_handlers/clean_when_document_deleted.py index 00a66f50ad..bbc913b7cf 100644 --- a/api/events/event_handlers/clean_when_document_deleted.py +++ b/api/events/event_handlers/clean_when_document_deleted.py @@ -8,4 +8,6 @@ def handle(sender, **kwargs): dataset_id = kwargs.get("dataset_id") doc_form = kwargs.get("doc_form") file_id = kwargs.get("file_id") + assert dataset_id is not None + assert doc_form is not None clean_document_task.delay(document_id, dataset_id, doc_form, file_id) diff --git a/api/events/event_handlers/create_document_index.py b/api/events/event_handlers/create_document_index.py index c607161e2a..1b0321f42e 100644 --- a/api/events/event_handlers/create_document_index.py +++ b/api/events/event_handlers/create_document_index.py @@ -1,3 +1,4 @@ +import contextlib import logging import time @@ -38,12 +39,11 @@ def handle(sender, **kwargs): db.session.add(document) db.session.commit() - try: - indexing_runner = IndexingRunner() - indexing_runner.run(documents) - end_at = time.perf_counter() - logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) - except DocumentIsPausedError as ex: - logging.info(click.style(str(ex), fg="yellow")) - except Exception: - pass + with contextlib.suppress(Exception): + try: + indexing_runner = IndexingRunner() + indexing_runner.run(documents) + end_at = time.perf_counter() + logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) + except DocumentIsPausedError as ex: + logging.info(click.style(str(ex), fg="yellow")) diff --git a/api/events/event_handlers/update_provider_when_message_created.py b/api/events/event_handlers/update_provider_when_message_created.py index 2ed42c71ea..f01dd58900 100644 --- a/api/events/event_handlers/update_provider_when_message_created.py +++ b/api/events/event_handlers/update_provider_when_message_created.py @@ -188,7 +188,7 @@ def _execute_provider_updates(updates_to_perform: list[_ProviderUpdateOperation] # Use SQLAlchemy's context manager for transaction management # This automatically handles commit/rollback - with Session(db.engine) as session: + with Session(db.engine) as session, session.begin(): # Use a single transaction for all updates for update_operation in updates_to_perform: filters = update_operation.filters diff --git a/api/extensions/ext_blueprints.py b/api/extensions/ext_blueprints.py index a4d013ffc0..1024fd9ce6 100644 --- a/api/extensions/ext_blueprints.py +++ b/api/extensions/ext_blueprints.py @@ -29,7 +29,6 @@ def init_app(app: DifyApp): methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"], expose_headers=["X-Version", "X-Env"], ) - app.register_blueprint(web_bp) CORS( @@ -40,10 +39,13 @@ def init_app(app: DifyApp): methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"], expose_headers=["X-Version", "X-Env"], ) - app.register_blueprint(console_app_bp) - CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"]) + CORS( + files_bp, + allow_headers=["Content-Type"], + methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"], + ) app.register_blueprint(files_bp) app.register_blueprint(inner_api_bp) diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 198f60e554..fb5352ca8f 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -3,8 +3,8 @@ from datetime import timedelta from typing import Any, Optional import pytz -from celery import Celery, Task # type: ignore -from celery.schedules import crontab # type: ignore +from celery import Celery, Task +from celery.schedules import crontab from configs import dify_config from dify_app import DifyApp @@ -66,7 +66,6 @@ def init_app(app: DifyApp) -> Celery: task_cls=FlaskTask, broker=dify_config.CELERY_BROKER_URL, backend=dify_config.CELERY_BACKEND, - task_ignore_result=True, ) celery_app.conf.update( @@ -77,6 +76,7 @@ def init_app(app: DifyApp) -> Celery: worker_task_log_format=dify_config.LOG_FORMAT, worker_hijack_root_logger=False, timezone=pytz.timezone(dify_config.LOG_TZ or "UTC"), + task_ignore_result=True, ) # Apply SSL configuration if enabled @@ -145,13 +145,19 @@ def init_app(app: DifyApp) -> Celery: minutes=dify_config.QUEUE_MONITOR_INTERVAL if dify_config.QUEUE_MONITOR_INTERVAL else 30 ), } - if dify_config.ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK: + if dify_config.ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK and dify_config.MARKETPLACE_ENABLED: imports.append("schedule.check_upgradable_plugin_task") beat_schedule["check_upgradable_plugin_task"] = { "task": "schedule.check_upgradable_plugin_task.check_upgradable_plugin_task", "schedule": crontab(minute="*/15"), } - + if dify_config.WORKFLOW_LOG_CLEANUP_ENABLED: + # 2:00 AM every day + imports.append("schedule.clean_workflow_runlogs_precise") + beat_schedule["clean_workflow_runlogs_precise"] = { + "task": "schedule.clean_workflow_runlogs_precise.clean_workflow_runlogs_precise", + "schedule": crontab(minute="0", hour="2"), + } celery_app.conf.update(beat_schedule=beat_schedule, imports=imports) return celery_app diff --git a/api/extensions/ext_login.py b/api/extensions/ext_login.py index 9b18e25eaa..9e5c71fb1d 100644 --- a/api/extensions/ext_login.py +++ b/api/extensions/ext_login.py @@ -20,6 +20,10 @@ login_manager = flask_login.LoginManager() @login_manager.request_loader def load_user_from_request(request_from_flask_login): """Load user based on the request.""" + # Skip authentication for documentation endpoints + if request.path.endswith("/docs") or request.path.endswith("/swagger.json"): + return None + auth_header = request.headers.get("Authorization", "") auth_token: str | None = None if auth_header: diff --git a/api/extensions/ext_otel.py b/api/extensions/ext_otel.py index a8f025a750..544a2dc625 100644 --- a/api/extensions/ext_otel.py +++ b/api/extensions/ext_otel.py @@ -1,4 +1,5 @@ import atexit +import contextlib import logging import os import platform @@ -7,7 +8,7 @@ import sys from typing import Union import flask -from celery.signals import worker_init # type: ignore +from celery.signals import worker_init from flask_login import user_loaded_from_request, user_logged_in # type: ignore from configs import dify_config @@ -106,7 +107,7 @@ def init_app(app: DifyApp): """Custom logging handler that creates spans for logging.exception() calls""" def emit(self, record: logging.LogRecord): - try: + with contextlib.suppress(Exception): if record.exc_info: tracer = get_tracer_provider().get_tracer("dify.exception.logging") with tracer.start_as_current_span( @@ -126,9 +127,6 @@ def init_app(app: DifyApp): if record.exc_info[0]: span.set_attribute("exception.type", record.exc_info[0].__name__) - except Exception: - pass - from opentelemetry import trace from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as GRPCMetricExporter from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCSpanExporter diff --git a/api/extensions/ext_redis.py b/api/extensions/ext_redis.py index f5f544679f..1b22886fc1 100644 --- a/api/extensions/ext_redis.py +++ b/api/extensions/ext_redis.py @@ -3,7 +3,7 @@ import logging import ssl from collections.abc import Callable from datetime import timedelta -from typing import TYPE_CHECKING, Any, Union +from typing import TYPE_CHECKING, Any, Optional, Union import redis from redis import RedisError @@ -246,7 +246,7 @@ def init_app(app: DifyApp): app.extensions["redis"] = redis_client -def redis_fallback(default_return: Any = None): +def redis_fallback(default_return: Optional[Any] = None): """ decorator to handle Redis operation exceptions and return a default value when Redis is unavailable. diff --git a/api/fields/annotation_fields.py b/api/fields/annotation_fields.py index 379dcc6d16..38835d5ac7 100644 --- a/api/fields/annotation_fields.py +++ b/api/fields/annotation_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from libs.helper import TimestampField @@ -11,6 +11,12 @@ annotation_fields = { # 'account': fields.Nested(simple_account_fields, allow_null=True) } + +def build_annotation_model(api_or_ns: Api | Namespace): + """Build the annotation model for the API or Namespace.""" + return api_or_ns.model("Annotation", annotation_fields) + + annotation_list_fields = { "data": fields.List(fields.Nested(annotation_fields)), } diff --git a/api/fields/api_based_extension_fields.py b/api/fields/api_based_extension_fields.py index a85d4a34db..a2dda1dc15 100644 --- a/api/fields/api_based_extension_fields.py +++ b/api/fields/api_based_extension_fields.py @@ -1,10 +1,10 @@ -from flask_restful import fields +from flask_restx import fields from libs.helper import TimestampField class HiddenAPIKey(fields.Raw): - def output(self, key, obj): + def output(self, key, obj, **kwargs): api_key = obj.api_key # If the length of the api_key is less than 8 characters, show the first and last characters if len(api_key) <= 8: diff --git a/api/fields/app_fields.py b/api/fields/app_fields.py index 1a5fcabf97..1f14d663b8 100644 --- a/api/fields/app_fields.py +++ b/api/fields/app_fields.py @@ -1,6 +1,6 @@ import json -from flask_restful import fields +from flask_restx import fields from fields.workflow_fields import workflow_partial_fields from libs.helper import AppIconUrlField, TimestampField diff --git a/api/fields/conversation_fields.py b/api/fields/conversation_fields.py index 370e8a5a58..ecc267cf38 100644 --- a/api/fields/conversation_fields.py +++ b/api/fields/conversation_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from fields.member_fields import simple_account_fields from libs.helper import TimestampField @@ -45,6 +45,12 @@ message_file_fields = { "upload_file_id": fields.String(default=None), } + +def build_message_file_model(api_or_ns: Api | Namespace): + """Build the message file fields for the API or Namespace.""" + return api_or_ns.model("MessageFile", message_file_fields) + + agent_thought_fields = { "id": fields.String, "chain_id": fields.String, @@ -209,3 +215,22 @@ conversation_infinite_scroll_pagination_fields = { "has_more": fields.Boolean, "data": fields.List(fields.Nested(simple_conversation_fields)), } + + +def build_conversation_infinite_scroll_pagination_model(api_or_ns: Api | Namespace): + """Build the conversation infinite scroll pagination model for the API or Namespace.""" + simple_conversation_model = build_simple_conversation_model(api_or_ns) + + copied_fields = conversation_infinite_scroll_pagination_fields.copy() + copied_fields["data"] = fields.List(fields.Nested(simple_conversation_model)) + return api_or_ns.model("ConversationInfiniteScrollPagination", copied_fields) + + +def build_conversation_delete_model(api_or_ns: Api | Namespace): + """Build the conversation delete model for the API or Namespace.""" + return api_or_ns.model("ConversationDelete", conversation_delete_fields) + + +def build_simple_conversation_model(api_or_ns: Api | Namespace): + """Build the simple conversation model for the API or Namespace.""" + return api_or_ns.model("SimpleConversation", simple_conversation_fields) diff --git a/api/fields/conversation_variable_fields.py b/api/fields/conversation_variable_fields.py index c5a0c9a49d..7d5e311591 100644 --- a/api/fields/conversation_variable_fields.py +++ b/api/fields/conversation_variable_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from libs.helper import TimestampField @@ -27,3 +27,19 @@ conversation_variable_infinite_scroll_pagination_fields = { "has_more": fields.Boolean, "data": fields.List(fields.Nested(conversation_variable_fields)), } + + +def build_conversation_variable_model(api_or_ns: Api | Namespace): + """Build the conversation variable model for the API or Namespace.""" + return api_or_ns.model("ConversationVariable", conversation_variable_fields) + + +def build_conversation_variable_infinite_scroll_pagination_model(api_or_ns: Api | Namespace): + """Build the conversation variable infinite scroll pagination model for the API or Namespace.""" + # Build the nested variable model first + conversation_variable_model = build_conversation_variable_model(api_or_ns) + + copied_fields = conversation_variable_infinite_scroll_pagination_fields.copy() + copied_fields["data"] = fields.List(fields.Nested(conversation_variable_model)) + + return api_or_ns.model("ConversationVariableInfiniteScrollPagination", copied_fields) diff --git a/api/fields/data_source_fields.py b/api/fields/data_source_fields.py index 071071376f..93f6e447dc 100644 --- a/api/fields/data_source_fields.py +++ b/api/fields/data_source_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from libs.helper import TimestampField diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index 32a88cc5db..5a3082516e 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from libs.helper import TimestampField diff --git a/api/fields/document_fields.py b/api/fields/document_fields.py index 7fd43e8dbe..9be59f7454 100644 --- a/api/fields/document_fields.py +++ b/api/fields/document_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from fields.dataset_fields import dataset_fields from libs.helper import TimestampField diff --git a/api/fields/end_user_fields.py b/api/fields/end_user_fields.py index 99e529f9d1..ea43e3b5fd 100644 --- a/api/fields/end_user_fields.py +++ b/api/fields/end_user_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields simple_end_user_fields = { "id": fields.String, @@ -6,3 +6,7 @@ simple_end_user_fields = { "is_anonymous": fields.Boolean, "session_id": fields.String, } + + +def build_simple_end_user_model(api_or_ns: Api | Namespace): + return api_or_ns.model("SimpleEndUser", simple_end_user_fields) diff --git a/api/fields/file_fields.py b/api/fields/file_fields.py index 8b4839ef97..dd359e2f5f 100644 --- a/api/fields/file_fields.py +++ b/api/fields/file_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from libs.helper import TimestampField @@ -11,6 +11,19 @@ upload_config_fields = { "workflow_file_upload_limit": fields.Integer, } + +def build_upload_config_model(api_or_ns: Api | Namespace): + """Build the upload config model for the API or Namespace. + + Args: + api_or_ns: Flask-RestX Api or Namespace instance + + Returns: + The registered model + """ + return api_or_ns.model("UploadConfig", upload_config_fields) + + file_fields = { "id": fields.String, "name": fields.String, @@ -22,12 +35,37 @@ file_fields = { "preview_url": fields.String, } + +def build_file_model(api_or_ns: Api | Namespace): + """Build the file model for the API or Namespace. + + Args: + api_or_ns: Flask-RestX Api or Namespace instance + + Returns: + The registered model + """ + return api_or_ns.model("File", file_fields) + + remote_file_info_fields = { "file_type": fields.String(attribute="file_type"), "file_length": fields.Integer(attribute="file_length"), } +def build_remote_file_info_model(api_or_ns: Api | Namespace): + """Build the remote file info model for the API or Namespace. + + Args: + api_or_ns: Flask-RestX Api or Namespace instance + + Returns: + The registered model + """ + return api_or_ns.model("RemoteFileInfo", remote_file_info_fields) + + file_fields_with_signed_url = { "id": fields.String, "name": fields.String, @@ -38,3 +76,15 @@ file_fields_with_signed_url = { "created_by": fields.String, "created_at": TimestampField, } + + +def build_file_with_signed_url_model(api_or_ns: Api | Namespace): + """Build the file with signed URL model for the API or Namespace. + + Args: + api_or_ns: Flask-RestX Api or Namespace instance + + Returns: + The registered model + """ + return api_or_ns.model("FileWithSignedUrl", file_fields_with_signed_url) diff --git a/api/fields/hit_testing_fields.py b/api/fields/hit_testing_fields.py index 9d67999ea4..75bdff1803 100644 --- a/api/fields/hit_testing_fields.py +++ b/api/fields/hit_testing_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from libs.helper import TimestampField diff --git a/api/fields/installed_app_fields.py b/api/fields/installed_app_fields.py index e0b3e340f6..16dd26a10e 100644 --- a/api/fields/installed_app_fields.py +++ b/api/fields/installed_app_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from libs.helper import AppIconUrlField, TimestampField diff --git a/api/fields/member_fields.py b/api/fields/member_fields.py index 8007b7e052..08e38a6931 100644 --- a/api/fields/member_fields.py +++ b/api/fields/member_fields.py @@ -1,8 +1,17 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from libs.helper import AvatarUrlField, TimestampField -simple_account_fields = {"id": fields.String, "name": fields.String, "email": fields.String} +simple_account_fields = { + "id": fields.String, + "name": fields.String, + "email": fields.String, +} + + +def build_simple_account_model(api_or_ns: Api | Namespace): + return api_or_ns.model("SimpleAccount", simple_account_fields) + account_fields = { "id": fields.String, diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py index e6aebd810f..a419da2e18 100644 --- a/api/fields/message_fields.py +++ b/api/fields/message_fields.py @@ -1,11 +1,19 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from fields.conversation_fields import message_file_fields from libs.helper import TimestampField from .raws import FilesContainedField -feedback_fields = {"rating": fields.String} +feedback_fields = { + "rating": fields.String, +} + + +def build_feedback_model(api_or_ns: Api | Namespace): + """Build the feedback model for the API or Namespace.""" + return api_or_ns.model("Feedback", feedback_fields) + agent_thought_fields = { "id": fields.String, @@ -21,6 +29,12 @@ agent_thought_fields = { "files": fields.List(fields.String), } + +def build_agent_thought_model(api_or_ns: Api | Namespace): + """Build the agent thought model for the API or Namespace.""" + return api_or_ns.model("AgentThought", agent_thought_fields) + + retriever_resource_fields = { "id": fields.String, "message_id": fields.String, diff --git a/api/fields/raws.py b/api/fields/raws.py index 15ec16ab13..9bc6a12c78 100644 --- a/api/fields/raws.py +++ b/api/fields/raws.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from core.file import File diff --git a/api/fields/segment_fields.py b/api/fields/segment_fields.py index 4126c24598..2ff917d6bc 100644 --- a/api/fields/segment_fields.py +++ b/api/fields/segment_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from libs.helper import TimestampField diff --git a/api/fields/tag_fields.py b/api/fields/tag_fields.py index 9af4fc57dd..d5b7c86a04 100644 --- a/api/fields/tag_fields.py +++ b/api/fields/tag_fields.py @@ -1,3 +1,12 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields -tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String, "binding_count": fields.String} +dataset_tag_fields = { + "id": fields.String, + "name": fields.String, + "type": fields.String, + "binding_count": fields.String, +} + + +def build_dataset_tag_fields(api_or_ns: Api | Namespace): + return api_or_ns.model("DataSetTag", dataset_tag_fields) diff --git a/api/fields/workflow_app_log_fields.py b/api/fields/workflow_app_log_fields.py index 823c99ec6b..243efd817c 100644 --- a/api/fields/workflow_app_log_fields.py +++ b/api/fields/workflow_app_log_fields.py @@ -1,8 +1,8 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields -from fields.end_user_fields import simple_end_user_fields -from fields.member_fields import simple_account_fields -from fields.workflow_run_fields import workflow_run_for_log_fields +from fields.end_user_fields import build_simple_end_user_model, simple_end_user_fields +from fields.member_fields import build_simple_account_model, simple_account_fields +from fields.workflow_run_fields import build_workflow_run_for_log_model, workflow_run_for_log_fields from libs.helper import TimestampField workflow_app_log_partial_fields = { @@ -15,6 +15,24 @@ workflow_app_log_partial_fields = { "created_at": TimestampField, } + +def build_workflow_app_log_partial_model(api_or_ns: Api | Namespace): + """Build the workflow app log partial model for the API or Namespace.""" + workflow_run_model = build_workflow_run_for_log_model(api_or_ns) + simple_account_model = build_simple_account_model(api_or_ns) + simple_end_user_model = build_simple_end_user_model(api_or_ns) + + copied_fields = workflow_app_log_partial_fields.copy() + copied_fields["workflow_run"] = fields.Nested(workflow_run_model, attribute="workflow_run", allow_null=True) + copied_fields["created_by_account"] = fields.Nested( + simple_account_model, attribute="created_by_account", allow_null=True + ) + copied_fields["created_by_end_user"] = fields.Nested( + simple_end_user_model, attribute="created_by_end_user", allow_null=True + ) + return api_or_ns.model("WorkflowAppLogPartial", copied_fields) + + workflow_app_log_pagination_fields = { "page": fields.Integer, "limit": fields.Integer, @@ -22,3 +40,13 @@ workflow_app_log_pagination_fields = { "has_more": fields.Boolean, "data": fields.List(fields.Nested(workflow_app_log_partial_fields)), } + + +def build_workflow_app_log_pagination_model(api_or_ns: Api | Namespace): + """Build the workflow app log pagination model for the API or Namespace.""" + # Build the nested partial model first + workflow_app_log_partial_model = build_workflow_app_log_partial_model(api_or_ns) + + copied_fields = workflow_app_log_pagination_fields.copy() + copied_fields["data"] = fields.List(fields.Nested(workflow_app_log_partial_model)) + return api_or_ns.model("WorkflowAppLogPagination", copied_fields) diff --git a/api/fields/workflow_fields.py b/api/fields/workflow_fields.py index 930e59cc1c..f048d0f3b6 100644 --- a/api/fields/workflow_fields.py +++ b/api/fields/workflow_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import fields from core.helper import encrypter from core.variables import SecretVariable, SegmentType, Variable diff --git a/api/fields/workflow_run_fields.py b/api/fields/workflow_run_fields.py index a106728e9c..6462d8ce5a 100644 --- a/api/fields/workflow_run_fields.py +++ b/api/fields/workflow_run_fields.py @@ -1,4 +1,4 @@ -from flask_restful import fields +from flask_restx import Api, Namespace, fields from fields.end_user_fields import simple_end_user_fields from fields.member_fields import simple_account_fields @@ -17,6 +17,11 @@ workflow_run_for_log_fields = { "exceptions_count": fields.Integer, } + +def build_workflow_run_for_log_model(api_or_ns: Api | Namespace): + return api_or_ns.model("WorkflowRunForLog", workflow_run_for_log_fields) + + workflow_run_for_list_fields = { "id": fields.String, "version": fields.String, diff --git a/api/libs/external_api.py b/api/libs/external_api.py index 2070df3e55..95d13cd0e6 100644 --- a/api/libs/external_api.py +++ b/api/libs/external_api.py @@ -1,119 +1,111 @@ import re import sys +from collections.abc import Mapping from typing import Any from flask import current_app, got_request_exception -from flask_restful import Api, http_status_message -from werkzeug.datastructures import Headers +from flask_restx import Api from werkzeug.exceptions import HTTPException +from werkzeug.http import HTTP_STATUS_CODES from core.errors.error import AppInvokeQuotaExceededError -class ExternalApi(Api): - def handle_error(self, e): - """Error handler for the API transforms a raised exception into a Flask - response, with the appropriate HTTP status code and body. +def http_status_message(code): + return HTTP_STATUS_CODES.get(code, "") - :param e: the raised Exception object - :type e: Exception - """ +def register_external_error_handlers(api: Api) -> None: + @api.errorhandler(HTTPException) + def handle_http_exception(e: HTTPException): got_request_exception.send(current_app, exception=e) - headers = Headers() - if isinstance(e, HTTPException): - if e.response is not None: - resp = e.get_response() - return resp + # If Werkzeug already prepared a Response, just use it. + if getattr(e, "response", None) is not None: + return e.response - status_code = e.code - default_data = { - "code": re.sub(r"(?= 500: - exc_info: Any = sys.exc_info() - if exc_info[1] is None: - exc_info = None - current_app.log_exception(exc_info) - - if status_code == 406 and self.default_mediatype is None: - # if we are handling NotAcceptable (406), make sure that - # make_response uses a representation we support as the - # default mediatype (so that make_response doesn't throw - # another NotAcceptable error). - supported_mediatypes = list(self.representations.keys()) # only supported application/json - fallback_mediatype = supported_mediatypes[0] if supported_mediatypes else "text/plain" - data = {"code": "not_acceptable", "message": data.get("message")} - resp = self.make_response(data, status_code, headers, fallback_mediatype=fallback_mediatype) + # Payload per status + if status_code == 406 and api.default_mediatype is None: + data = {"code": "not_acceptable", "message": default_data["message"], "status": status_code} + return data, status_code, headers elif status_code == 400: - if isinstance(data.get("message"), dict): - param_key, param_value = list(data.get("message", {}).items())[0] - data = {"code": "invalid_param", "message": param_value, "params": param_key} + msg = default_data["message"] + if isinstance(msg, Mapping) and msg: + # Convert param errors like {"field": "reason"} into a friendly shape + param_key, param_value = next(iter(msg.items())) + data = { + "code": "invalid_param", + "message": str(param_value), + "params": param_key, + "status": status_code, + } else: - if "code" not in data: - data["code"] = "unknown" - - resp = self.make_response(data, status_code, headers) + data = {**default_data} + data.setdefault("code", "unknown") + return data, status_code, headers else: - if "code" not in data: - data["code"] = "unknown" + data = {**default_data} + data.setdefault("code", "unknown") + # If you need WWW-Authenticate for 401, add it to headers + if status_code == 401: + headers["WWW-Authenticate"] = 'Bearer realm="api"' + return data, status_code, headers - resp = self.make_response(data, status_code, headers) + @api.errorhandler(ValueError) + def handle_value_error(e: ValueError): + got_request_exception.send(current_app, exception=e) + status_code = 400 + data = {"code": "invalid_param", "message": str(e), "status": status_code} + return data, status_code - if status_code == 401: - resp = self.unauthorized(resp) - return resp + @api.errorhandler(AppInvokeQuotaExceededError) + def handle_quota_exceeded(e: AppInvokeQuotaExceededError): + got_request_exception.send(current_app, exception=e) + status_code = 429 + data = {"code": "too_many_requests", "message": str(e), "status": status_code} + return data, status_code + + @api.errorhandler(Exception) + def handle_general_exception(e: Exception): + got_request_exception.send(current_app, exception=e) + + status_code = 500 + data: dict[str, Any] = getattr(e, "data", {"message": http_status_message(status_code)}) + + # 🔒 Normalize non-mapping data (e.g., if someone set e.data = Response) + if not isinstance(data, Mapping): + data = {"message": str(e)} + + data.setdefault("code", "unknown") + data.setdefault("status", status_code) + + # Log stack + exc_info: Any = sys.exc_info() + if exc_info[1] is None: + exc_info = None + current_app.log_exception(exc_info) + + return data, status_code + + +class ExternalApi(Api): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + register_external_error_handlers(self) diff --git a/api/libs/helper.py b/api/libs/helper.py index b36f972e19..70986fedd3 100644 --- a/api/libs/helper.py +++ b/api/libs/helper.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast from zoneinfo import available_timezones from flask import Response, stream_with_context -from flask_restful import fields +from flask_restx import fields from pydantic import BaseModel from configs import dify_config @@ -57,7 +57,7 @@ def run(script): class AppIconUrlField(fields.Raw): - def output(self, key, obj): + def output(self, key, obj, **kwargs): if obj is None: return None @@ -72,7 +72,7 @@ class AppIconUrlField(fields.Raw): class AvatarUrlField(fields.Raw): - def output(self, key, obj): + def output(self, key, obj, **kwargs): if obj is None: return None diff --git a/api/libs/module_loading.py b/api/libs/module_loading.py new file mode 100644 index 0000000000..616d072a1b --- /dev/null +++ b/api/libs/module_loading.py @@ -0,0 +1,55 @@ +""" +Module loading utilities similar to Django's module_loading. + +Reference implementation from Django: +https://github.com/django/django/blob/main/django/utils/module_loading.py +""" + +import sys +from importlib import import_module +from typing import Any + + +def cached_import(module_path: str, class_name: str) -> Any: + """ + Import a module and return the named attribute/class from it, with caching. + + Args: + module_path: The module path to import from + class_name: The attribute/class name to retrieve + + Returns: + The imported attribute/class + """ + if not ( + (module := sys.modules.get(module_path)) + and (spec := getattr(module, "__spec__", None)) + and getattr(spec, "_initializing", False) is False + ): + module = import_module(module_path) + return getattr(module, class_name) + + +def import_string(dotted_path: str) -> Any: + """ + Import a dotted module path and return the attribute/class designated by + the last name in the path. Raise ImportError if the import failed. + + Args: + dotted_path: Full module path to the class (e.g., 'module.submodule.ClassName') + + Returns: + The imported class or attribute + + Raises: + ImportError: If the module or attribute cannot be imported + """ + try: + module_path, class_name = dotted_path.rsplit(".", 1) + except ValueError as err: + raise ImportError(f"{dotted_path} doesn't look like a module path") from err + + try: + return cached_import(module_path, class_name) + except AttributeError as err: + raise ImportError(f'Module "{module_path}" does not define a "{class_name}" attribute/class') from err diff --git a/api/models/task.py b/api/models/task.py index ab700c553c..9a52fcfb41 100644 --- a/api/models/task.py +++ b/api/models/task.py @@ -2,7 +2,7 @@ from datetime import datetime from typing import Optional import sqlalchemy as sa -from celery import states # type: ignore +from celery import states from sqlalchemy import DateTime, String from sqlalchemy.orm import Mapped, mapped_column diff --git a/api/models/workflow.py b/api/models/workflow.py index 66c64a0fd5..1dd2b391b9 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, Any, Optional, Union from uuid import uuid4 import sqlalchemy as sa -from flask_login import current_user from sqlalchemy import DateTime, orm from core.file.constants import maybe_file_object @@ -24,7 +23,6 @@ from core.workflow.constants import ( from core.workflow.nodes.enums import NodeType from factories.variable_factory import TypeMismatchError, build_segment_with_type from libs.datetime_utils import naive_utc_now -from libs.helper import extract_tenant_id from ._workflow_exc import NodeNotFoundError, WorkflowDataError @@ -357,8 +355,8 @@ class Workflow(Base): if self._environment_variables is None: self._environment_variables = "{}" - # Get tenant_id from current_user (Account or EndUser) - tenant_id = extract_tenant_id(current_user) + # Use workflow.tenant_id to avoid relying on request user in background threads + tenant_id = self.tenant_id if not tenant_id: return [] @@ -388,8 +386,8 @@ class Workflow(Base): self._environment_variables = "{}" return - # Get tenant_id from current_user (Account or EndUser) - tenant_id = extract_tenant_id(current_user) + # Use workflow.tenant_id to avoid relying on request user in background threads + tenant_id = self.tenant_id if not tenant_id: self._environment_variables = "{}" diff --git a/api/mypy.ini b/api/mypy.ini index 3a6a54afe1..44a01068e9 100644 --- a/api/mypy.ini +++ b/api/mypy.ini @@ -12,8 +12,11 @@ exclude = (?x)( [mypy-flask_login] ignore_missing_imports=True -[mypy-flask_restful] +[mypy-flask_restx] ignore_missing_imports=True -[mypy-flask_restful.inputs] +[mypy-flask_restx.api] +ignore_missing_imports=True + +[mypy-flask_restx.inputs] ignore_missing_imports=True diff --git a/api/pyproject.toml b/api/pyproject.toml index 61a725a830..6aa4746d2f 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -13,13 +13,12 @@ dependencies = [ "cachetools~=5.3.0", "celery~=5.5.2", "chardet~=5.1.0", - "flask~=3.1.0", + "flask~=3.1.2", "flask-compress~=1.17", "flask-cors~=6.0.0", "flask-login~=0.6.3", "flask-migrate~=4.0.7", "flask-orjson~=2.0.0", - "flask-restful~=0.3.10", "flask-sqlalchemy~=3.1.1", "gevent~=24.11.1", "gmpy2~=2.2.1", @@ -88,6 +87,7 @@ dependencies = [ "sseclient-py>=1.8.0", "httpx-sse>=0.4.0", "sendgrid~=6.12.3", + "flask-restx>=1.3.0", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. @@ -110,7 +110,7 @@ dev = [ "dotenv-linter~=0.5.0", "faker~=32.1.0", "lxml-stubs~=0.5.1", - "mypy~=1.16.0", + "mypy~=1.17.1", "ruff~=0.12.3", "pytest~=8.3.2", "pytest-benchmark~=4.0.0", @@ -164,6 +164,7 @@ dev = [ "scipy-stubs>=1.15.3.0", "types-python-http-client>=3.3.7.20240910", "types-redis>=4.6.0.20241004", + "celery-types>=0.23.0", ] ############################################################ @@ -205,7 +206,7 @@ vdb = [ "pgvector==0.2.5", "pymilvus~=2.5.0", "pymochow==1.3.1", - "pyobvector~=0.1.6", + "pyobvector~=0.2.15", "qdrant-client==1.9.0", "tablestore==6.2.0", "tcvectordb~=1.6.4", diff --git a/api/repositories/factory.py b/api/repositories/factory.py index 1f0320054c..0be9c8908c 100644 --- a/api/repositories/factory.py +++ b/api/repositories/factory.py @@ -5,17 +5,14 @@ This factory is specifically designed for DifyAPI repositories that handle service-layer operations with dependency injection patterns. """ -import logging - from sqlalchemy.orm import sessionmaker from configs import dify_config from core.repositories import DifyCoreRepositoryFactory, RepositoryImportError +from libs.module_loading import import_string from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository from repositories.api_workflow_run_repository import APIWorkflowRunRepository -logger = logging.getLogger(__name__) - class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): """ @@ -50,17 +47,9 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY try: - repository_class = cls._import_class(class_path) - cls._validate_repository_interface(repository_class, DifyAPIWorkflowNodeExecutionRepository) - # Service repository requires session_maker parameter - cls._validate_constructor_signature(repository_class, ["session_maker"]) - + repository_class = import_string(class_path) return repository_class(session_maker=session_maker) # type: ignore[no-any-return] - except RepositoryImportError: - # Re-raise our custom errors as-is - raise - except Exception as e: - logger.exception("Failed to create DifyAPIWorkflowNodeExecutionRepository") + except (ImportError, Exception) as e: raise RepositoryImportError( f"Failed to create DifyAPIWorkflowNodeExecutionRepository from '{class_path}': {e}" ) from e @@ -87,15 +76,7 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY try: - repository_class = cls._import_class(class_path) - cls._validate_repository_interface(repository_class, APIWorkflowRunRepository) - # Service repository requires session_maker parameter - cls._validate_constructor_signature(repository_class, ["session_maker"]) - + repository_class = import_string(class_path) return repository_class(session_maker=session_maker) # type: ignore[no-any-return] - except RepositoryImportError: - # Re-raise our custom errors as-is - raise - except Exception as e: - logger.exception("Failed to create APIWorkflowRunRepository") + except (ImportError, Exception) as e: raise RepositoryImportError(f"Failed to create APIWorkflowRunRepository from '{class_path}': {e}") from e diff --git a/api/schedule/clean_workflow_runlogs_precise.py b/api/schedule/clean_workflow_runlogs_precise.py new file mode 100644 index 0000000000..8c21be01dc --- /dev/null +++ b/api/schedule/clean_workflow_runlogs_precise.py @@ -0,0 +1,155 @@ +import datetime +import logging +import time + +import click + +import app +from configs import dify_config +from extensions.ext_database import db +from models.model import ( + AppAnnotationHitHistory, + Conversation, + Message, + MessageAgentThought, + MessageAnnotation, + MessageChain, + MessageFeedback, + MessageFile, +) +from models.workflow import ConversationVariable, WorkflowAppLog, WorkflowNodeExecutionModel, WorkflowRun + +_logger = logging.getLogger(__name__) + + +MAX_RETRIES = 3 +BATCH_SIZE = dify_config.WORKFLOW_LOG_CLEANUP_BATCH_SIZE + + +@app.celery.task(queue="dataset") +def clean_workflow_runlogs_precise(): + """Clean expired workflow run logs with retry mechanism and complete message cascade""" + + click.echo(click.style("Start clean workflow run logs (precise mode with complete cascade).", fg="green")) + start_at = time.perf_counter() + + retention_days = dify_config.WORKFLOW_LOG_RETENTION_DAYS + cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days) + + try: + total_workflow_runs = db.session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count() + if total_workflow_runs == 0: + _logger.info("No expired workflow run logs found") + return + _logger.info("Found %s expired workflow run logs to clean", total_workflow_runs) + + total_deleted = 0 + failed_batches = 0 + batch_count = 0 + + while True: + workflow_runs = ( + db.session.query(WorkflowRun.id).where(WorkflowRun.created_at < cutoff_date).limit(BATCH_SIZE).all() + ) + + if not workflow_runs: + break + + workflow_run_ids = [run.id for run in workflow_runs] + batch_count += 1 + + success = _delete_batch_with_retry(workflow_run_ids, failed_batches) + + if success: + total_deleted += len(workflow_run_ids) + failed_batches = 0 + else: + failed_batches += 1 + if failed_batches >= MAX_RETRIES: + _logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES) + break + else: + # Calculate incremental delay times: 5, 10, 15 minutes + retry_delay_minutes = failed_batches * 5 + _logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes) + time.sleep(retry_delay_minutes * 60) + continue + + _logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted) + + except Exception as e: + db.session.rollback() + _logger.exception("Unexpected error in workflow log cleanup") + raise + + end_at = time.perf_counter() + execution_time = end_at - start_at + click.echo(click.style(f"Cleaned workflow run logs from db success latency: {execution_time:.2f}s", fg="green")) + + +def _delete_batch_with_retry(workflow_run_ids: list[str], attempt_count: int) -> bool: + """Delete a single batch with a retry mechanism and complete cascading deletion""" + try: + with db.session.begin_nested(): + message_data = ( + db.session.query(Message.id, Message.conversation_id) + .filter(Message.workflow_run_id.in_(workflow_run_ids)) + .all() + ) + message_id_list = [msg.id for msg in message_data] + conversation_id_list = list({msg.conversation_id for msg in message_data if msg.conversation_id}) + if message_id_list: + db.session.query(AppAnnotationHitHistory).where( + AppAnnotationHitHistory.message_id.in_(message_id_list) + ).delete(synchronize_session=False) + + db.session.query(MessageAgentThought).where(MessageAgentThought.message_id.in_(message_id_list)).delete( + synchronize_session=False + ) + + db.session.query(MessageChain).where(MessageChain.message_id.in_(message_id_list)).delete( + synchronize_session=False + ) + + db.session.query(MessageFile).where(MessageFile.message_id.in_(message_id_list)).delete( + synchronize_session=False + ) + + db.session.query(MessageAnnotation).where(MessageAnnotation.message_id.in_(message_id_list)).delete( + synchronize_session=False + ) + + db.session.query(MessageFeedback).where(MessageFeedback.message_id.in_(message_id_list)).delete( + synchronize_session=False + ) + + db.session.query(Message).where(Message.workflow_run_id.in_(workflow_run_ids)).delete( + synchronize_session=False + ) + + db.session.query(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(workflow_run_ids)).delete( + synchronize_session=False + ) + + db.session.query(WorkflowNodeExecutionModel).where( + WorkflowNodeExecutionModel.workflow_run_id.in_(workflow_run_ids) + ).delete(synchronize_session=False) + + if conversation_id_list: + db.session.query(ConversationVariable).where( + ConversationVariable.conversation_id.in_(conversation_id_list) + ).delete(synchronize_session=False) + + db.session.query(Conversation).where(Conversation.id.in_(conversation_id_list)).delete( + synchronize_session=False + ) + + db.session.query(WorkflowRun).where(WorkflowRun.id.in_(workflow_run_ids)).delete(synchronize_session=False) + + db.session.commit() + return True + + except Exception as e: + db.session.rollback() + _logger.exception("Batch deletion failed (attempt %s)", attempt_count + 1) + return False diff --git a/api/services/account_service.py b/api/services/account_service.py index 1cce8e67a4..0bb903fbbc 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -425,7 +425,7 @@ class AccountService: cls, account: Optional[Account] = None, email: Optional[str] = None, - language: Optional[str] = "en-US", + language: str = "en-US", ): account_email = account.email if account else email if account_email is None: @@ -452,12 +452,14 @@ class AccountService: account: Optional[Account] = None, email: Optional[str] = None, old_email: Optional[str] = None, - language: Optional[str] = "en-US", + language: str = "en-US", phase: Optional[str] = None, ): account_email = account.email if account else email if account_email is None: raise ValueError("Email must be provided.") + if not phase: + raise ValueError("phase must be provided.") if cls.change_email_rate_limiter.is_rate_limited(account_email): from controllers.console.auth.error import EmailChangeRateLimitExceededError @@ -480,7 +482,7 @@ class AccountService: cls, account: Optional[Account] = None, email: Optional[str] = None, - language: Optional[str] = "en-US", + language: str = "en-US", ): account_email = account.email if account else email if account_email is None: @@ -496,7 +498,7 @@ class AccountService: cls, account: Optional[Account] = None, email: Optional[str] = None, - language: Optional[str] = "en-US", + language: str = "en-US", workspace_name: Optional[str] = "", ): account_email = account.email if account else email @@ -509,6 +511,7 @@ class AccountService: raise OwnerTransferRateLimitExceededError() code, token = cls.generate_owner_transfer_token(account_email, account) + workspace_name = workspace_name or "" send_owner_transfer_confirm_task.delay( language=language, @@ -524,13 +527,14 @@ class AccountService: cls, account: Optional[Account] = None, email: Optional[str] = None, - language: Optional[str] = "en-US", + language: str = "en-US", workspace_name: Optional[str] = "", - new_owner_email: Optional[str] = "", + new_owner_email: str = "", ): account_email = account.email if account else email if account_email is None: raise ValueError("Email must be provided.") + workspace_name = workspace_name or "" send_old_owner_transfer_notify_email_task.delay( language=language, @@ -544,12 +548,13 @@ class AccountService: cls, account: Optional[Account] = None, email: Optional[str] = None, - language: Optional[str] = "en-US", + language: str = "en-US", workspace_name: Optional[str] = "", ): account_email = account.email if account else email if account_email is None: raise ValueError("Email must be provided.") + workspace_name = workspace_name or "" send_new_owner_transfer_notify_email_task.delay( language=language, @@ -633,7 +638,10 @@ class AccountService: @classmethod def send_email_code_login_email( - cls, account: Optional[Account] = None, email: Optional[str] = None, language: Optional[str] = "en-US" + cls, + account: Optional[Account] = None, + email: Optional[str] = None, + language: str = "en-US", ): email = account.email if account else email if email is None: @@ -1260,10 +1268,11 @@ class RegisterService: raise AccountAlreadyInTenantError("Account already in tenant.") token = cls.generate_invite_token(tenant, account) + language = account.interface_language or "en-US" # send email send_invite_member_mail_task.delay( - language=account.interface_language, + language=language, to=email, token=token, inviter_name=inviter.name if inviter else "Dify", diff --git a/api/services/annotation_service.py b/api/services/annotation_service.py index b7a047914e..45b246af1e 100644 --- a/api/services/annotation_service.py +++ b/api/services/annotation_service.py @@ -1,4 +1,3 @@ -import datetime import uuid from typing import cast @@ -10,6 +9,7 @@ from werkzeug.exceptions import NotFound from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation from services.feature_service import FeatureService from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task @@ -293,7 +293,7 @@ class AppAnnotationService: annotation_ids_to_delete = [annotation.id for annotation, _ in annotations_to_delete] # Step 2: Bulk delete hit histories in a single query - db.session.query(AppAnnotationHitHistory).filter( + db.session.query(AppAnnotationHitHistory).where( AppAnnotationHitHistory.annotation_id.in_(annotation_ids_to_delete) ).delete(synchronize_session=False) @@ -307,7 +307,7 @@ class AppAnnotationService: # Step 4: Bulk delete annotations in a single query deleted_count = ( db.session.query(MessageAnnotation) - .filter(MessageAnnotation.id.in_(annotation_ids_to_delete)) + .where(MessageAnnotation.id.in_(annotation_ids_to_delete)) .delete(synchronize_session=False) ) @@ -473,7 +473,7 @@ class AppAnnotationService: raise NotFound("App annotation not found") annotation_setting.score_threshold = args["score_threshold"] annotation_setting.updated_user_id = current_user.id - annotation_setting.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + annotation_setting.updated_at = naive_utc_now() db.session.add(annotation_setting) db.session.commit() @@ -505,9 +505,9 @@ class AppAnnotationService: db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first() ) - annotations_query = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id) + annotations_query = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id) for annotation in annotations_query.yield_per(100): - annotation_hit_histories_query = db.session.query(AppAnnotationHitHistory).filter( + annotation_hit_histories_query = db.session.query(AppAnnotationHitHistory).where( AppAnnotationHitHistory.annotation_id == annotation.id ) for annotation_hit_history in annotation_hit_histories_query.yield_per(100): diff --git a/api/services/billing_service.py b/api/services/billing_service.py index 476fce0057..40d45af376 100644 --- a/api/services/billing_service.py +++ b/api/services/billing_service.py @@ -123,7 +123,7 @@ class BillingService: return BillingService._send_request("GET", "/education/verify", params=params) @classmethod - def is_active(cls, account_id: str): + def status(cls, account_id: str): params = {"account_id": account_id} return BillingService._send_request("GET", "/education/status", params=params) diff --git a/api/services/conversation_service.py b/api/services/conversation_service.py index 4f3dd3c762..ac603d3cc9 100644 --- a/api/services/conversation_service.py +++ b/api/services/conversation_service.py @@ -1,3 +1,5 @@ +import contextlib +import logging from collections.abc import Callable, Sequence from typing import Any, Optional, Union @@ -22,6 +24,9 @@ from services.errors.conversation import ( LastConversationNotExistsError, ) from services.errors.message import MessageNotExistsError +from tasks.delete_conversation_task import delete_conversation_related_data + +logger = logging.getLogger(__name__) class ConversationService: @@ -142,13 +147,11 @@ class ConversationService: raise MessageNotExistsError() # generate conversation name - try: + with contextlib.suppress(Exception): name = LLMGenerator.generate_conversation_name( app_model.tenant_id, message.query, conversation.id, app_model.id ) conversation.name = name - except Exception: - pass db.session.commit() @@ -176,11 +179,21 @@ class ConversationService: @classmethod def delete(cls, app_model: App, conversation_id: str, user: Optional[Union[Account, EndUser]]): - conversation = cls.get_conversation(app_model, conversation_id, user) + try: + logger.info( + "Initiating conversation deletion for app_name %s, conversation_id: %s", + app_model.name, + conversation_id, + ) - conversation.is_deleted = True - conversation.updated_at = naive_utc_now() - db.session.commit() + db.session.query(Conversation).where(Conversation.id == conversation_id).delete(synchronize_session=False) + db.session.commit() + + delete_conversation_related_data.delay(conversation_id) + + except Exception as e: + db.session.rollback() + raise e @classmethod def get_conversational_variable( diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 8934608da1..fc2cbba78b 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -6,7 +6,7 @@ import secrets import time import uuid from collections import Counter -from typing import Any, Optional +from typing import Any, Literal, Optional from flask_login import current_user from sqlalchemy import func, select @@ -51,7 +51,7 @@ from services.entities.knowledge_entities.knowledge_entities import ( RetrievalModel, SegmentUpdateArgs, ) -from services.errors.account import InvalidActionError, NoPermissionError +from services.errors.account import NoPermissionError from services.errors.chunk import ChildChunkDeleteIndexError, ChildChunkIndexingError from services.errors.dataset import DatasetNameDuplicateError from services.errors.document import DocumentIndexingError @@ -250,6 +250,11 @@ class DatasetService: dataset: Optional[Dataset] = db.session.query(Dataset).filter_by(id=dataset_id).first() return dataset + @staticmethod + def check_doc_form(dataset: Dataset, doc_form: str): + if dataset.doc_form and doc_form != dataset.doc_form: + raise ValueError("doc_form is different from the dataset doc_form.") + @staticmethod def check_dataset_model_setting(dataset): if dataset.indexing_technique == "high_quality": @@ -1085,6 +1090,8 @@ class DocumentService: dataset_process_rule: Optional[DatasetProcessRule] = None, created_from: str = "web", ): + # check doc_form + DatasetService.check_doc_form(dataset, knowledge_config.doc_form) # check document limit features = FeatureService.get_features(current_user.current_tenant_id) @@ -1227,7 +1234,7 @@ class DocumentService: ) if document: document.dataset_process_rule_id = dataset_process_rule.id # type: ignore - document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.updated_at = naive_utc_now() document.created_from = created_from document.doc_form = knowledge_config.doc_form document.doc_language = knowledge_config.doc_language @@ -1545,7 +1552,7 @@ class DocumentService: document.parsing_completed_at = None document.cleaning_completed_at = None document.splitting_completed_at = None - document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.updated_at = naive_utc_now() document.created_from = created_from document.doc_form = document_data.doc_form db.session.add(document) @@ -1800,14 +1807,16 @@ class DocumentService: raise ValueError("Process rule segmentation max_tokens is invalid") @staticmethod - def batch_update_document_status(dataset: Dataset, document_ids: list[str], action: str, user): + def batch_update_document_status( + dataset: Dataset, document_ids: list[str], action: Literal["enable", "disable", "archive", "un_archive"], user + ): """ Batch update document status. Args: dataset (Dataset): The dataset object document_ids (list[str]): List of document IDs to update - action (str): Action to perform (enable, disable, archive, un_archive) + action (Literal["enable", "disable", "archive", "un_archive"]): Action to perform user: Current user performing the action Raises: @@ -1890,9 +1899,10 @@ class DocumentService: raise propagation_error @staticmethod - def _prepare_document_status_update(document, action: str, user): - """ - Prepare document status update information. + def _prepare_document_status_update( + document: Document, action: Literal["enable", "disable", "archive", "un_archive"], user + ): + """Prepare document status update information. Args: document: Document object to update @@ -1902,7 +1912,7 @@ class DocumentService: Returns: dict: Update information or None if no update needed """ - now = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + now = naive_utc_now() if action == "enable": return DocumentService._prepare_enable_update(document, now) @@ -2030,8 +2040,8 @@ class SegmentService: word_count=len(content), tokens=tokens, status="completed", - indexing_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), - completed_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + indexing_at=naive_utc_now(), + completed_at=naive_utc_now(), created_by=current_user.id, ) if document.doc_form == "qa_model": @@ -2051,7 +2061,7 @@ class SegmentService: except Exception as e: logging.exception("create segment index failed") segment_document.enabled = False - segment_document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment_document.disabled_at = naive_utc_now() segment_document.status = "error" segment_document.error = str(e) db.session.commit() @@ -2107,8 +2117,8 @@ class SegmentService: tokens=tokens, keywords=segment_item.get("keywords", []), status="completed", - indexing_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), - completed_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + indexing_at=naive_utc_now(), + completed_at=naive_utc_now(), created_by=current_user.id, ) if document.doc_form == "qa_model": @@ -2135,7 +2145,7 @@ class SegmentService: logging.exception("create segment index failed") for segment_document in segment_data_list: segment_document.enabled = False - segment_document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment_document.disabled_at = naive_utc_now() segment_document.status = "error" segment_document.error = str(e) db.session.commit() @@ -2152,7 +2162,7 @@ class SegmentService: if segment.enabled != action: if not action: segment.enabled = action - segment.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment.disabled_at = naive_utc_now() segment.disabled_by = current_user.id db.session.add(segment) db.session.commit() @@ -2250,10 +2260,10 @@ class SegmentService: segment.word_count = len(content) segment.tokens = tokens segment.status = "completed" - segment.indexing_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - segment.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment.indexing_at = naive_utc_now() + segment.completed_at = naive_utc_now() segment.updated_by = current_user.id - segment.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment.updated_at = naive_utc_now() segment.enabled = True segment.disabled_at = None segment.disabled_by = None @@ -2306,7 +2316,7 @@ class SegmentService: except Exception as e: logging.exception("update segment index failed") segment.enabled = False - segment.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment.disabled_at = naive_utc_now() segment.status = "error" segment.error = str(e) db.session.commit() @@ -2334,13 +2344,9 @@ class SegmentService: @classmethod def delete_segments(cls, segment_ids: list, document: Document, dataset: Dataset): - # Check if segment_ids is not empty to avoid WHERE false condition - if not segment_ids or len(segment_ids) == 0: - return - index_node_ids = ( - db.session.query(DocumentSegment) - .with_entities(DocumentSegment.index_node_id) - .where( + segments = ( + db.session.query(DocumentSegment.index_node_id, DocumentSegment.word_count) + .filter( DocumentSegment.id.in_(segment_ids), DocumentSegment.dataset_id == dataset.id, DocumentSegment.document_id == document.id, @@ -2348,14 +2354,24 @@ class SegmentService: ) .all() ) - index_node_ids = [index_node_id[0] for index_node_id in index_node_ids] + + if not segments: + return + + index_node_ids = [seg.index_node_id for seg in segments] + total_words = sum(seg.word_count for seg in segments) + + document.word_count -= total_words + db.session.add(document) delete_segment_from_index_task.delay(index_node_ids, dataset.id, document.id) db.session.query(DocumentSegment).where(DocumentSegment.id.in_(segment_ids)).delete() db.session.commit() @classmethod - def update_segments_status(cls, segment_ids: list, action: str, dataset: Dataset, document: Document): + def update_segments_status( + cls, segment_ids: list, action: Literal["enable", "disable"], dataset: Dataset, document: Document + ): # Check if segment_ids is not empty to avoid WHERE false condition if not segment_ids or len(segment_ids) == 0: return @@ -2406,15 +2422,13 @@ class SegmentService: if cache_result is not None: continue segment.enabled = False - segment.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment.disabled_at = naive_utc_now() segment.disabled_by = current_user.id db.session.add(segment) real_deal_segment_ids.append(segment.id) db.session.commit() disable_segments_from_index_task.delay(real_deal_segment_ids, dataset.id, document.id) - else: - raise InvalidActionError() @classmethod def create_child_chunk( @@ -2498,7 +2512,7 @@ class SegmentService: child_chunk.content = child_chunk_update_args.content child_chunk.word_count = len(child_chunk.content) child_chunk.updated_by = current_user.id - child_chunk.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + child_chunk.updated_at = naive_utc_now() child_chunk.type = "customized" update_child_chunks.append(child_chunk) else: @@ -2555,7 +2569,7 @@ class SegmentService: child_chunk.content = content child_chunk.word_count = len(content) child_chunk.updated_by = current_user.id - child_chunk.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + child_chunk.updated_at = naive_utc_now() child_chunk.type = "customized" db.session.add(child_chunk) VectorService.update_child_chunk_vector([], [child_chunk], [], dataset) diff --git a/api/services/enterprise/mail_service.py b/api/services/enterprise/mail_service.py deleted file mode 100644 index 630e7679ac..0000000000 --- a/api/services/enterprise/mail_service.py +++ /dev/null @@ -1,18 +0,0 @@ -from pydantic import BaseModel - -from tasks.mail_enterprise_task import send_enterprise_email_task - - -class DifyMail(BaseModel): - to: list[str] - subject: str - body: str - substitutions: dict[str, str] = {} - - -class EnterpriseMailService: - @classmethod - def send_mail(cls, mail: DifyMail): - send_enterprise_email_task.delay( - to=mail.to, subject=mail.subject, body=mail.body, substitutions=mail.substitutions - ) diff --git a/api/services/file_service.py b/api/services/file_service.py index e234c2f325..4c0a0f451c 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -1,4 +1,3 @@ -import datetime import hashlib import os import uuid @@ -18,6 +17,7 @@ from core.file import helpers as file_helpers from core.rag.extractor.extract_processor import ExtractProcessor from extensions.ext_database import db from extensions.ext_storage import storage +from libs.datetime_utils import naive_utc_now from libs.helper import extract_tenant_id from models.account import Account from models.enums import CreatorUserRole @@ -80,7 +80,7 @@ class FileService: mime_type=mimetype, created_by_role=(CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER), created_by=user.id, - created_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + created_at=naive_utc_now(), used=False, hash=hashlib.sha3_256(content).hexdigest(), source_url=source_url, @@ -131,10 +131,10 @@ class FileService: mime_type="text/plain", created_by=current_user.id, created_by_role=CreatorUserRole.ACCOUNT, - created_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + created_at=naive_utc_now(), used=True, used_by=current_user.id, - used_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + used_at=naive_utc_now(), ) db.session.add(upload_file) diff --git a/api/services/metadata_service.py b/api/services/metadata_service.py index 2a83588f41..fd222f59d3 100644 --- a/api/services/metadata_service.py +++ b/api/services/metadata_service.py @@ -1,5 +1,4 @@ import copy -import datetime import logging from typing import Optional @@ -8,6 +7,7 @@ from flask_login import current_user from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding from services.dataset_service import DocumentService from services.entities.knowledge_entities.knowledge_entities import ( @@ -69,7 +69,7 @@ class MetadataService: old_name = metadata.name metadata.name = name metadata.updated_by = current_user.id - metadata.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + metadata.updated_at = naive_utc_now() # update related documents dataset_metadata_bindings = ( diff --git a/api/services/model_load_balancing_service.py b/api/services/model_load_balancing_service.py index fe28aa006e..f8dd70c790 100644 --- a/api/services/model_load_balancing_service.py +++ b/api/services/model_load_balancing_service.py @@ -1,4 +1,3 @@ -import datetime import json import logging from json import JSONDecodeError @@ -17,6 +16,7 @@ from core.model_runtime.entities.provider_entities import ( from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory from core.provider_manager import ProviderManager from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now from models.provider import LoadBalancingModelConfig logger = logging.getLogger(__name__) @@ -371,7 +371,7 @@ class ModelLoadBalancingService: load_balancing_config.name = name load_balancing_config.enabled = enabled - load_balancing_config.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + load_balancing_config.updated_at = naive_utc_now() db.session.commit() self._clear_credentials_cache(tenant_id, config_id) diff --git a/api/services/tools/tools_manage_service.py b/api/services/tools/tools_manage_service.py index 59d5b50e23..f245dd7527 100644 --- a/api/services/tools/tools_manage_service.py +++ b/api/services/tools/tools_manage_service.py @@ -1,4 +1,5 @@ import logging +from typing import Optional from core.tools.entities.api_entities import ToolProviderTypeApiLiteral from core.tools.tool_manager import ToolManager @@ -9,7 +10,7 @@ logger = logging.getLogger(__name__) class ToolCommonService: @staticmethod - def list_tool_providers(user_id: str, tenant_id: str, typ: ToolProviderTypeApiLiteral = None): + def list_tool_providers(user_id: str, tenant_id: str, typ: Optional[ToolProviderTypeApiLiteral] = None): """ list tool providers diff --git a/api/services/webapp_auth_service.py b/api/services/webapp_auth_service.py index a9df8d0d73..8d21335c86 100644 --- a/api/services/webapp_auth_service.py +++ b/api/services/webapp_auth_service.py @@ -63,7 +63,7 @@ class WebAppAuthService: @classmethod def send_email_code_login_email( - cls, account: Optional[Account] = None, email: Optional[str] = None, language: Optional[str] = "en-US" + cls, account: Optional[Account] = None, email: Optional[str] = None, language: str = "en-US" ): email = account.email if account else email if email is None: diff --git a/api/services/workflow/workflow_converter.py b/api/services/workflow/workflow_converter.py index afcf1f7621..00b02f8091 100644 --- a/api/services/workflow/workflow_converter.py +++ b/api/services/workflow/workflow_converter.py @@ -402,7 +402,7 @@ class WorkflowConverter: ) role_prefix = None - prompts: Any = None + prompts: Optional[Any] = None # Chat Model if model_config.mode == LLMMode.CHAT.value: diff --git a/api/services/workflow_draft_variable_service.py b/api/services/workflow_draft_variable_service.py index b52f4924ba..9f01bcb668 100644 --- a/api/services/workflow_draft_variable_service.py +++ b/api/services/workflow_draft_variable_service.py @@ -1,5 +1,4 @@ import dataclasses -import datetime import logging from collections.abc import Mapping, Sequence from enum import StrEnum @@ -23,6 +22,7 @@ from core.workflow.nodes.variable_assigner.common.helpers import get_updated_var from core.workflow.variable_loader import VariableLoader from factories.file_factory import StorageKeyLoader from factories.variable_factory import build_segment, segment_to_variable +from libs.datetime_utils import naive_utc_now from models import App, Conversation from models.enums import DraftVariableType from models.workflow import Workflow, WorkflowDraftVariable, is_system_variable_editable @@ -231,7 +231,7 @@ class WorkflowDraftVariableService: variable.set_name(name) if value is not None: variable.set_value(value) - variable.last_edited_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + variable.last_edited_at = naive_utc_now() self._session.flush() return variable diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index c5ee4ce3f9..8834229e16 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -1,15 +1,15 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import ChildDocument, Document from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import DatasetAutoDisableLog, DocumentSegment from models.dataset import Document as DatasetDocument @@ -95,7 +95,7 @@ def add_document_to_index_task(dataset_document_id: str): DocumentSegment.enabled: True, DocumentSegment.disabled_at: None, DocumentSegment.disabled_by: None, - DocumentSegment.updated_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.updated_at: naive_utc_now(), } ) db.session.commit() @@ -107,7 +107,7 @@ def add_document_to_index_task(dataset_document_id: str): except Exception as e: logging.exception("add document to index failed") dataset_document.enabled = False - dataset_document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + dataset_document.disabled_at = naive_utc_now() dataset_document.indexing_status = "error" dataset_document.error = str(e) db.session.commit() diff --git a/api/tasks/annotation/add_annotation_to_index_task.py b/api/tasks/annotation/add_annotation_to_index_task.py index e436f00133..5bf8e7c33e 100644 --- a/api/tasks/annotation/add_annotation_to_index_task.py +++ b/api/tasks/annotation/add_annotation_to_index_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.datasource.vdb.vector_factory import Vector from core.rag.models.document import Document diff --git a/api/tasks/annotation/batch_import_annotations_task.py b/api/tasks/annotation/batch_import_annotations_task.py index 47dc3ee90e..fd33feea16 100644 --- a/api/tasks/annotation/batch_import_annotations_task.py +++ b/api/tasks/annotation/batch_import_annotations_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from werkzeug.exceptions import NotFound from core.rag.datasource.vdb.vector_factory import Vector diff --git a/api/tasks/annotation/delete_annotation_index_task.py b/api/tasks/annotation/delete_annotation_index_task.py index f016400e16..1894031a80 100644 --- a/api/tasks/annotation/delete_annotation_index_task.py +++ b/api/tasks/annotation/delete_annotation_index_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.datasource.vdb.vector_factory import Vector from extensions.ext_database import db diff --git a/api/tasks/annotation/disable_annotation_reply_task.py b/api/tasks/annotation/disable_annotation_reply_task.py index 0076113ce8..a8375dfa26 100644 --- a/api/tasks/annotation/disable_annotation_reply_task.py +++ b/api/tasks/annotation/disable_annotation_reply_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.datasource.vdb.vector_factory import Vector from extensions.ext_database import db diff --git a/api/tasks/annotation/enable_annotation_reply_task.py b/api/tasks/annotation/enable_annotation_reply_task.py index 44c65c0783..9ffaf81af6 100644 --- a/api/tasks/annotation/enable_annotation_reply_task.py +++ b/api/tasks/annotation/enable_annotation_reply_task.py @@ -1,14 +1,14 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.datasource.vdb.vector_factory import Vector from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset from models.model import App, AppAnnotationSetting, MessageAnnotation from services.dataset_service import DatasetCollectionBindingService @@ -72,7 +72,7 @@ def enable_annotation_reply_task( annotation_setting.score_threshold = score_threshold annotation_setting.collection_binding_id = dataset_collection_binding.id annotation_setting.updated_user_id = user_id - annotation_setting.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + annotation_setting.updated_at = naive_utc_now() db.session.add(annotation_setting) else: new_app_annotation_setting = AppAnnotationSetting( diff --git a/api/tasks/annotation/update_annotation_to_index_task.py b/api/tasks/annotation/update_annotation_to_index_task.py index 5f11d5aa00..337434b768 100644 --- a/api/tasks/annotation/update_annotation_to_index_task.py +++ b/api/tasks/annotation/update_annotation_to_index_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.datasource.vdb.vector_factory import Vector from core.rag.models.document import Document diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index e64a799146..ed47b62e1b 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.tools.utils.web_reader_tool import get_image_upload_file_ids diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index dee43cd854..50293f38a7 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -1,4 +1,3 @@ -import datetime import logging import tempfile import time @@ -7,7 +6,7 @@ from pathlib import Path import click import pandas as pd -from celery import shared_task # type: ignore +from celery import shared_task from sqlalchemy import func from sqlalchemy.orm import Session @@ -17,6 +16,7 @@ from extensions.ext_database import db from extensions.ext_redis import redis_client from extensions.ext_storage import storage from libs import helper +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment from models.model import UploadFile from services.vector_service import VectorService @@ -123,9 +123,9 @@ def batch_create_segment_to_index_task( word_count=len(content), tokens=tokens, created_by=user_id, - indexing_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + indexing_at=naive_utc_now(), status="completed", - completed_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + completed_at=naive_utc_now(), ) if dataset_document.doc_form == "qa_model": segment_document.answer = segment["answer"] diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py index 9a45115b05..3d3fadbd0a 100644 --- a/api/tasks/clean_dataset_task.py +++ b/api/tasks/clean_dataset_task.py @@ -2,10 +2,10 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory -from core.tools.utils.rag_web_reader import get_image_upload_file_ids +from core.tools.utils.web_reader_tool import get_image_upload_file_ids from extensions.ext_database import db from extensions.ext_storage import storage from models.dataset import ( diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py index d690106d17..c18329a9c2 100644 --- a/api/tasks/clean_document_task.py +++ b/api/tasks/clean_document_task.py @@ -3,10 +3,10 @@ import time from typing import Optional import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory -from core.tools.utils.rag_web_reader import get_image_upload_file_ids +from core.tools.utils.web_reader_tool import get_image_upload_file_ids from extensions.ext_database import db from extensions.ext_storage import storage from models.dataset import Dataset, DatasetMetadataBinding, DocumentSegment diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py index bf1a92f038..3ad6257cda 100644 --- a/api/tasks/clean_notion_document_task.py +++ b/api/tasks/clean_notion_document_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db diff --git a/api/tasks/create_segment_to_index_task.py b/api/tasks/create_segment_to_index_task.py index 543a512851..db2f69596d 100644 --- a/api/tasks/create_segment_to_index_task.py +++ b/api/tasks/create_segment_to_index_task.py @@ -1,15 +1,15 @@ -import datetime import logging import time from typing import Optional import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import DocumentSegment @@ -41,7 +41,7 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] db.session.query(DocumentSegment).filter_by(id=segment.id).update( { DocumentSegment.status: "indexing", - DocumentSegment.indexing_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.indexing_at: naive_utc_now(), } ) db.session.commit() @@ -79,7 +79,7 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] db.session.query(DocumentSegment).filter_by(id=segment.id).update( { DocumentSegment.status: "completed", - DocumentSegment.completed_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.completed_at: naive_utc_now(), } ) db.session.commit() @@ -89,7 +89,7 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] except Exception as e: logging.exception("create segment to index failed") segment.enabled = False - segment.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment.disabled_at = naive_utc_now() segment.status = "error" segment.error = str(e) db.session.commit() diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py index 8c4c1876ad..512ea1048a 100644 --- a/api/tasks/deal_dataset_vector_index_task.py +++ b/api/tasks/deal_dataset_vector_index_task.py @@ -1,8 +1,9 @@ import logging import time +from typing import Literal import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.index_processor_factory import IndexProcessorFactory @@ -13,7 +14,7 @@ from models.dataset import Document as DatasetDocument @shared_task(queue="dataset") -def deal_dataset_vector_index_task(dataset_id: str, action: str): +def deal_dataset_vector_index_task(dataset_id: str, action: Literal["remove", "add", "update"]): """ Async deal dataset from index :param dataset_id: dataset_id diff --git a/api/tasks/delete_account_task.py b/api/tasks/delete_account_task.py index ef50adf8d5..29f5a2450d 100644 --- a/api/tasks/delete_account_task.py +++ b/api/tasks/delete_account_task.py @@ -1,6 +1,6 @@ import logging -from celery import shared_task # type: ignore +from celery import shared_task from extensions.ext_database import db from models.account import Account diff --git a/api/tasks/delete_conversation_task.py b/api/tasks/delete_conversation_task.py new file mode 100644 index 0000000000..4279dd2c17 --- /dev/null +++ b/api/tasks/delete_conversation_task.py @@ -0,0 +1,68 @@ +import logging +import time + +import click +from celery import shared_task # type: ignore + +from extensions.ext_database import db +from models import ConversationVariable +from models.model import Message, MessageAnnotation, MessageFeedback +from models.tools import ToolConversationVariables, ToolFile +from models.web import PinnedConversation + + +@shared_task(queue="conversation") +def delete_conversation_related_data(conversation_id: str) -> None: + """ + Delete related data conversation in correct order from datatbase to respect foreign key constraints + + Args: + conversation_id: conversation Id + """ + + logging.info( + click.style(f"Starting to delete conversation data from db for conversation_id {conversation_id}", fg="green") + ) + start_at = time.perf_counter() + + try: + db.session.query(MessageAnnotation).where(MessageAnnotation.conversation_id == conversation_id).delete( + synchronize_session=False + ) + + db.session.query(MessageFeedback).where(MessageFeedback.conversation_id == conversation_id).delete( + synchronize_session=False + ) + + db.session.query(ToolConversationVariables).where( + ToolConversationVariables.conversation_id == conversation_id + ).delete(synchronize_session=False) + + db.session.query(ToolFile).where(ToolFile.conversation_id == conversation_id).delete(synchronize_session=False) + + db.session.query(ConversationVariable).where(ConversationVariable.conversation_id == conversation_id).delete( + synchronize_session=False + ) + + db.session.query(Message).where(Message.conversation_id == conversation_id).delete(synchronize_session=False) + + db.session.query(PinnedConversation).where(PinnedConversation.conversation_id == conversation_id).delete( + synchronize_session=False + ) + + db.session.commit() + + end_at = time.perf_counter() + logging.info( + click.style( + f"Succeeded cleaning data from db for conversation_id {conversation_id} latency: {end_at - start_at}", + fg="green", + ) + ) + + except Exception as e: + logging.exception("Failed to delete data from db for conversation_id: %s failed", conversation_id) + db.session.rollback() + raise e + finally: + db.session.close() diff --git a/api/tasks/delete_segment_from_index_task.py b/api/tasks/delete_segment_from_index_task.py index da12355d23..f091085fb8 100644 --- a/api/tasks/delete_segment_from_index_task.py +++ b/api/tasks/delete_segment_from_index_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py index fa4ec15f8a..c813a9dca6 100644 --- a/api/tasks/disable_segment_from_index_task.py +++ b/api/tasks/disable_segment_from_index_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py index f033f05084..252321ba83 100644 --- a/api/tasks/disable_segments_from_index_task.py +++ b/api/tasks/disable_segments_from_index_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index 993b2ac404..4afd13eb13 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -1,14 +1,14 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.rag.extractor.notion_extractor import NotionExtractor from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment from models.source import DataSourceOauthBinding @@ -72,7 +72,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): # check the page is updated if last_edited_time != page_edited_time: document.indexing_status = "parsing" - document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.processing_started_at = naive_utc_now() db.session.commit() # delete all document segment and index diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py index 728db2e2dc..c414b01d0e 100644 --- a/api/tasks/document_indexing_task.py +++ b/api/tasks/document_indexing_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from configs import dify_config from core.indexing_runner import DocumentIsPausedError, IndexingRunner diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py index 053c0c5f41..31bbc8b570 100644 --- a/api/tasks/document_indexing_update_task.py +++ b/api/tasks/document_indexing_update_task.py @@ -1,13 +1,13 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment @@ -31,7 +31,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str): return document.indexing_status = "parsing" - document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.processing_started_at = naive_utc_now() db.session.commit() # delete all document segment and index diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py index faa7e2b8d0..f3850b7e3b 100644 --- a/api/tasks/duplicate_document_indexing_task.py +++ b/api/tasks/duplicate_document_indexing_task.py @@ -1,14 +1,14 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from configs import dify_config from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment from services.feature_service import FeatureService @@ -55,7 +55,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): if document: document.indexing_status = "error" document.error = str(e) - document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.stopped_at = naive_utc_now() db.session.add(document) db.session.commit() return @@ -86,7 +86,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): db.session.commit() document.indexing_status = "parsing" - document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.processing_started_at = naive_utc_now() documents.append(document) db.session.add(document) db.session.commit() diff --git a/api/tasks/enable_segment_to_index_task.py b/api/tasks/enable_segment_to_index_task.py index f801c9d9ee..a4bcc043e3 100644 --- a/api/tasks/enable_segment_to_index_task.py +++ b/api/tasks/enable_segment_to_index_task.py @@ -1,15 +1,15 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import ChildDocument, Document from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import DocumentSegment @@ -89,7 +89,7 @@ def enable_segment_to_index_task(segment_id: str): except Exception as e: logging.exception("enable segment to index failed") segment.enabled = False - segment.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + segment.disabled_at = naive_utc_now() segment.status = "error" segment.error = str(e) db.session.commit() diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py index 777380631f..1db984f0d3 100644 --- a/api/tasks/enable_segments_to_index_task.py +++ b/api/tasks/enable_segments_to_index_task.py @@ -1,15 +1,15 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import ChildDocument, Document from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, DocumentSegment from models.dataset import Document as DatasetDocument @@ -103,7 +103,7 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i { "error": str(e), "status": "error", - "disabled_at": datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + "disabled_at": naive_utc_now(), "enabled": False, } ) diff --git a/api/tasks/mail_account_deletion_task.py b/api/tasks/mail_account_deletion_task.py index 38b5ca1800..43ddbfc03b 100644 --- a/api/tasks/mail_account_deletion_task.py +++ b/api/tasks/mail_account_deletion_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from extensions.ext_mail import mail from libs.email_i18n import EmailType, get_email_i18n_service diff --git a/api/tasks/mail_change_mail_task.py b/api/tasks/mail_change_mail_task.py index 054053558d..a56109705a 100644 --- a/api/tasks/mail_change_mail_task.py +++ b/api/tasks/mail_change_mail_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from extensions.ext_mail import mail from libs.email_i18n import EmailType, get_email_i18n_service diff --git a/api/tasks/mail_email_code_login.py b/api/tasks/mail_email_code_login.py index a82ab55384..53ea3709cd 100644 --- a/api/tasks/mail_email_code_login.py +++ b/api/tasks/mail_email_code_login.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from extensions.ext_mail import mail from libs.email_i18n import EmailType, get_email_i18n_service diff --git a/api/tasks/mail_enterprise_task.py b/api/tasks/mail_inner_task.py similarity index 85% rename from api/tasks/mail_enterprise_task.py rename to api/tasks/mail_inner_task.py index 9c80da06e5..cad4657bc8 100644 --- a/api/tasks/mail_enterprise_task.py +++ b/api/tasks/mail_inner_task.py @@ -3,7 +3,7 @@ import time from collections.abc import Mapping import click -from celery import shared_task # type: ignore +from celery import shared_task from flask import render_template_string from extensions.ext_mail import mail @@ -11,7 +11,7 @@ from libs.email_i18n import get_email_i18n_service @shared_task(queue="mail") -def send_enterprise_email_task(to: list[str], subject: str, body: str, substitutions: Mapping[str, str]): +def send_inner_email_task(to: list[str], subject: str, body: str, substitutions: Mapping[str, str]): if not mail.is_inited(): return diff --git a/api/tasks/mail_invite_member_task.py b/api/tasks/mail_invite_member_task.py index ff351f08af..f4f7f58416 100644 --- a/api/tasks/mail_invite_member_task.py +++ b/api/tasks/mail_invite_member_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from configs import dify_config from extensions.ext_mail import mail diff --git a/api/tasks/mail_owner_transfer_task.py b/api/tasks/mail_owner_transfer_task.py index 3856bf294a..db7158e786 100644 --- a/api/tasks/mail_owner_transfer_task.py +++ b/api/tasks/mail_owner_transfer_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from extensions.ext_mail import mail from libs.email_i18n import EmailType, get_email_i18n_service diff --git a/api/tasks/mail_reset_password_task.py b/api/tasks/mail_reset_password_task.py index b01af7827b..066d648530 100644 --- a/api/tasks/mail_reset_password_task.py +++ b/api/tasks/mail_reset_password_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from extensions.ext_mail import mail from libs.email_i18n import EmailType, get_email_i18n_service diff --git a/api/tasks/ops_trace_task.py b/api/tasks/ops_trace_task.py index c7e0047664..a4ef60b13c 100644 --- a/api/tasks/ops_trace_task.py +++ b/api/tasks/ops_trace_task.py @@ -1,7 +1,7 @@ import json import logging -from celery import shared_task # type: ignore +from celery import shared_task from flask import current_app from core.ops.entities.config_entity import OPS_FILE_PATH, OPS_TRACE_FAILED_KEY diff --git a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py index 9ea6aa6214..ec0b534546 100644 --- a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py +++ b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py @@ -2,7 +2,7 @@ import traceback import typing import click -from celery import shared_task # type: ignore +from celery import shared_task from core.helper import marketplace from core.helper.marketplace import MarketplacePluginDeclaration diff --git a/api/tasks/recover_document_indexing_task.py b/api/tasks/recover_document_indexing_task.py index ff489340cd..998fc6b32d 100644 --- a/api/tasks/recover_document_indexing_task.py +++ b/api/tasks/recover_document_indexing_task.py @@ -2,7 +2,7 @@ import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.indexing_runner import DocumentIsPausedError, IndexingRunner from extensions.ext_database import db diff --git a/api/tasks/remove_app_and_related_data_task.py b/api/tasks/remove_app_and_related_data_task.py index 828c52044f..3d623c09d1 100644 --- a/api/tasks/remove_app_and_related_data_task.py +++ b/api/tasks/remove_app_and_related_data_task.py @@ -4,7 +4,7 @@ from collections.abc import Callable import click import sqlalchemy as sa -from celery import shared_task # type: ignore +from celery import shared_task from sqlalchemy import delete from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import sessionmaker @@ -370,8 +370,8 @@ def delete_draft_variables_batch(app_id: str, batch_size: int = 1000) -> int: with db.engine.begin() as conn: # Get a batch of draft variable IDs query_sql = """ - SELECT id FROM workflow_draft_variables - WHERE app_id = :app_id + SELECT id FROM workflow_draft_variables + WHERE app_id = :app_id LIMIT :batch_size """ result = conn.execute(sa.text(query_sql), {"app_id": app_id, "batch_size": batch_size}) @@ -382,7 +382,7 @@ def delete_draft_variables_batch(app_id: str, batch_size: int = 1000) -> int: # Delete the batch delete_sql = """ - DELETE FROM workflow_draft_variables + DELETE FROM workflow_draft_variables WHERE id IN :ids """ deleted_result = conn.execute(sa.text(delete_sql), {"ids": tuple(draft_var_ids)}) diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py index 524130a297..6356b1c46c 100644 --- a/api/tasks/remove_document_from_index_task.py +++ b/api/tasks/remove_document_from_index_task.py @@ -1,13 +1,13 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import Document, DocumentSegment @@ -54,9 +54,9 @@ def remove_document_from_index_task(document_id: str): db.session.query(DocumentSegment).where(DocumentSegment.document_id == document.id).update( { DocumentSegment.enabled: False, - DocumentSegment.disabled_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.disabled_at: naive_utc_now(), DocumentSegment.disabled_by: document.disabled_by, - DocumentSegment.updated_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.updated_at: naive_utc_now(), } ) db.session.commit() diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index 26b41aff2e..67af857f40 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -1,14 +1,14 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.indexing_runner import IndexingRunner from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment from services.feature_service import FeatureService @@ -51,7 +51,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): if document: document.indexing_status = "error" document.error = str(e) - document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.stopped_at = naive_utc_now() db.session.add(document) db.session.commit() redis_client.delete(retry_indexing_cache_key) @@ -79,7 +79,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): db.session.commit() document.indexing_status = "parsing" - document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.processing_started_at = naive_utc_now() db.session.add(document) db.session.commit() @@ -89,7 +89,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): except Exception as ex: document.indexing_status = "error" document.error = str(ex) - document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.stopped_at = naive_utc_now() db.session.add(document) db.session.commit() logging.info(click.style(str(ex), fg="yellow")) diff --git a/api/tasks/sync_website_document_indexing_task.py b/api/tasks/sync_website_document_indexing_task.py index f112a97d2f..ad782f9b88 100644 --- a/api/tasks/sync_website_document_indexing_task.py +++ b/api/tasks/sync_website_document_indexing_task.py @@ -1,14 +1,14 @@ -import datetime import logging import time import click -from celery import shared_task # type: ignore +from celery import shared_task from core.indexing_runner import IndexingRunner from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment from services.feature_service import FeatureService @@ -46,7 +46,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): if document: document.indexing_status = "error" document.error = str(e) - document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.stopped_at = naive_utc_now() db.session.add(document) db.session.commit() redis_client.delete(sync_indexing_cache_key) @@ -72,7 +72,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): db.session.commit() document.indexing_status = "parsing" - document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.processing_started_at = naive_utc_now() db.session.add(document) db.session.commit() @@ -82,7 +82,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): except Exception as ex: document.indexing_status = "error" document.error = str(ex) - document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + document.stopped_at = naive_utc_now() db.session.add(document) db.session.commit() logging.info(click.style(str(ex), fg="yellow")) diff --git a/api/tasks/workflow_execution_tasks.py b/api/tasks/workflow_execution_tasks.py index 2f9fb628ca..77ddf83023 100644 --- a/api/tasks/workflow_execution_tasks.py +++ b/api/tasks/workflow_execution_tasks.py @@ -8,7 +8,7 @@ improving performance by offloading storage operations to background workers. import json import logging -from celery import shared_task # type: ignore[import-untyped] +from celery import shared_task from sqlalchemy import select from sqlalchemy.orm import sessionmaker diff --git a/api/tasks/workflow_node_execution_tasks.py b/api/tasks/workflow_node_execution_tasks.py index dfc8a33564..16356086cf 100644 --- a/api/tasks/workflow_node_execution_tasks.py +++ b/api/tasks/workflow_node_execution_tasks.py @@ -8,7 +8,7 @@ improving performance by offloading storage operations to background workers. import json import logging -from celery import shared_task # type: ignore[import-untyped] +from celery import shared_task from sqlalchemy import select from sqlalchemy.orm import sessionmaker diff --git a/api/tests/integration_tests/tools/__mock_server/openapi_todo.py b/api/tests/integration_tests/tools/__mock_server/openapi_todo.py index 83f4d70ce9..2f0f38e0b8 100644 --- a/api/tests/integration_tests/tools/__mock_server/openapi_todo.py +++ b/api/tests/integration_tests/tools/__mock_server/openapi_todo.py @@ -1,5 +1,5 @@ from flask import Flask, request -from flask_restful import Api, Resource +from flask_restx import Api, Resource app = Flask(__name__) api = Api(app) diff --git a/api/tests/integration_tests/vdb/__mock/baiduvectordb.py b/api/tests/integration_tests/vdb/__mock/baiduvectordb.py index 4af35a8bef..be5b4de5a2 100644 --- a/api/tests/integration_tests/vdb/__mock/baiduvectordb.py +++ b/api/tests/integration_tests/vdb/__mock/baiduvectordb.py @@ -1,5 +1,6 @@ import os from collections import UserDict +from typing import Optional from unittest.mock import MagicMock import pytest @@ -21,7 +22,7 @@ class MockBaiduVectorDBClass: def mock_vector_db_client( self, config=None, - adapter: HTTPAdapter = None, + adapter: Optional[HTTPAdapter] = None, ): self.conn = MagicMock() self._config = MagicMock() diff --git a/api/tests/integration_tests/vdb/__mock/tcvectordb.py b/api/tests/integration_tests/vdb/__mock/tcvectordb.py index ae5f9761b4..02f658aad6 100644 --- a/api/tests/integration_tests/vdb/__mock/tcvectordb.py +++ b/api/tests/integration_tests/vdb/__mock/tcvectordb.py @@ -23,7 +23,7 @@ class MockTcvectordbClass: key="", read_consistency: ReadConsistency = ReadConsistency.EVENTUAL_CONSISTENCY, timeout=10, - adapter: HTTPAdapter = None, + adapter: Optional[HTTPAdapter] = None, pool_size: int = 2, proxies: Optional[dict] = None, password: Optional[str] = None, @@ -72,11 +72,11 @@ class MockTcvectordbClass: shard: int, replicas: int, description: Optional[str] = None, - index: Index = None, - embedding: Embedding = None, + index: Optional[Index] = None, + embedding: Optional[Embedding] = None, timeout: Optional[float] = None, ttl_config: Optional[dict] = None, - filter_index_config: FilterIndexConfig = None, + filter_index_config: Optional[FilterIndexConfig] = None, indexes: Optional[list[IndexField]] = None, ) -> RPCCollection: return RPCCollection( @@ -113,7 +113,7 @@ class MockTcvectordbClass: database_name: str, collection_name: str, vectors: list[list[float]], - filter: Filter = None, + filter: Optional[Filter] = None, params=None, retrieve_vector: bool = False, limit: int = 10, @@ -128,7 +128,7 @@ class MockTcvectordbClass: collection_name: str, ann: Optional[Union[list[AnnSearch], AnnSearch]] = None, match: Optional[Union[list[KeywordSearch], KeywordSearch]] = None, - filter: Union[Filter, str] = None, + filter: Optional[Union[Filter, str]] = None, rerank: Optional[Rerank] = None, retrieve_vector: Optional[bool] = None, output_fields: Optional[list[str]] = None, @@ -158,7 +158,7 @@ class MockTcvectordbClass: database_name: str, collection_name: str, document_ids: Optional[list[str]] = None, - filter: Filter = None, + filter: Optional[Filter] = None, timeout: Optional[float] = None, ): return {"code": 0, "msg": "operation success"} diff --git a/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py b/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py index 8b57132772..21de8be6e3 100644 --- a/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py +++ b/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py @@ -1,3 +1,4 @@ +import contextlib import os import pytest @@ -44,10 +45,8 @@ class TestClickzettaVector(AbstractVectorTest): yield vector # Cleanup: delete the test collection - try: + with contextlib.suppress(Exception): vector.delete() - except Exception: - pass def test_clickzetta_vector_basic_operations(self, vector_store): """Test basic CRUD operations on Clickzetta vector store.""" diff --git a/api/tests/test_containers_integration_tests/services/test_account_service.py b/api/tests/test_containers_integration_tests/services/test_account_service.py index 3d7be0df7d..415e65ce51 100644 --- a/api/tests/test_containers_integration_tests/services/test_account_service.py +++ b/api/tests/test_containers_integration_tests/services/test_account_service.py @@ -1639,7 +1639,7 @@ class TestTenantService: email = fake.email() name = fake.name() password = fake.password(length=12) - invalid_action = fake.word() + invalid_action = "invalid_action_that_doesnt_exist" # Setup mocks mock_external_service_dependencies[ "feature_service" diff --git a/api/tests/test_containers_integration_tests/services/test_annotation_service.py b/api/tests/test_containers_integration_tests/services/test_annotation_service.py index 0ab5f398e3..92d93d601e 100644 --- a/api/tests/test_containers_integration_tests/services/test_annotation_service.py +++ b/api/tests/test_containers_integration_tests/services/test_annotation_service.py @@ -410,18 +410,18 @@ class TestAnnotationService: app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) # Create annotations with specific keywords - unique_keyword = fake.word() + unique_keyword = f"unique_{fake.uuid4()[:8]}" annotation_args = { "question": f"Question with {unique_keyword} keyword", "answer": f"Answer with {unique_keyword} keyword", } AppAnnotationService.insert_app_annotation_directly(annotation_args, app.id) - # Create another annotation without the keyword other_args = { - "question": "Question without keyword", - "answer": "Answer without keyword", + "question": "Different question without special term", + "answer": "Different answer without special content", } + AppAnnotationService.insert_app_annotation_directly(other_args, app.id) # Search with keyword @@ -471,7 +471,7 @@ class TestAnnotationService: # Verify annotation was deleted from extensions.ext_database import db - deleted_annotation = db.session.query(MessageAnnotation).filter(MessageAnnotation.id == annotation_id).first() + deleted_annotation = db.session.query(MessageAnnotation).where(MessageAnnotation.id == annotation_id).first() assert deleted_annotation is None # Verify delete_annotation_index_task was called (when annotation setting exists) @@ -1175,7 +1175,7 @@ class TestAnnotationService: AppAnnotationService.delete_app_annotation(app.id, annotation_id) # Verify annotation was deleted - deleted_annotation = db.session.query(MessageAnnotation).filter(MessageAnnotation.id == annotation_id).first() + deleted_annotation = db.session.query(MessageAnnotation).where(MessageAnnotation.id == annotation_id).first() assert deleted_annotation is None # Verify delete_annotation_index_task was called diff --git a/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py b/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py index 38f532fd64..6cd8337ff9 100644 --- a/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py +++ b/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py @@ -234,7 +234,7 @@ class TestAPIBasedExtensionService: # Verify extension was deleted from extensions.ext_database import db - deleted_extension = db.session.query(APIBasedExtension).filter(APIBasedExtension.id == extension_id).first() + deleted_extension = db.session.query(APIBasedExtension).where(APIBasedExtension.id == extension_id).first() assert deleted_extension is None def test_save_extension_duplicate_name(self, db_session_with_containers, mock_external_service_dependencies): diff --git a/api/tests/test_containers_integration_tests/services/test_feature_service.py b/api/tests/test_containers_integration_tests/services/test_feature_service.py new file mode 100644 index 0000000000..8bd5440411 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_feature_service.py @@ -0,0 +1,1785 @@ +from unittest.mock import patch + +import pytest +from faker import Faker + +from services.feature_service import FeatureModel, FeatureService, KnowledgeRateLimitModel, SystemFeatureModel + + +class TestFeatureService: + """Integration tests for FeatureService using testcontainers.""" + + @pytest.fixture + def mock_external_service_dependencies(self): + """Mock setup for external service dependencies.""" + with ( + patch("services.feature_service.BillingService") as mock_billing_service, + patch("services.feature_service.EnterpriseService") as mock_enterprise_service, + ): + # Setup default mock returns for BillingService + mock_billing_service.get_info.return_value = { + "enabled": True, + "subscription": {"plan": "pro", "interval": "monthly", "education": True}, + "members": {"size": 5, "limit": 10}, + "apps": {"size": 3, "limit": 20}, + "vector_space": {"size": 2, "limit": 10}, + "documents_upload_quota": {"size": 15, "limit": 100}, + "annotation_quota_limit": {"size": 8, "limit": 50}, + "docs_processing": "enhanced", + "can_replace_logo": True, + "model_load_balancing_enabled": True, + "knowledge_rate_limit": {"limit": 100}, + } + + mock_billing_service.get_knowledge_rate_limit.return_value = {"limit": 100, "subscription_plan": "pro"} + + # Setup default mock returns for EnterpriseService + mock_enterprise_service.get_workspace_info.return_value = { + "WorkspaceMembers": {"used": 5, "limit": 10, "enabled": True} + } + + mock_enterprise_service.get_info.return_value = { + "SSOEnforcedForSignin": True, + "SSOEnforcedForSigninProtocol": "saml", + "EnableEmailCodeLogin": True, + "EnableEmailPasswordLogin": False, + "IsAllowRegister": False, + "IsAllowCreateWorkspace": False, + "Branding": { + "applicationTitle": "Test Enterprise", + "loginPageLogo": "https://example.com/logo.png", + "workspaceLogo": "https://example.com/workspace.png", + "favicon": "https://example.com/favicon.ico", + }, + "WebAppAuth": {"allowSso": True, "allowEmailCodeLogin": True, "allowEmailPasswordLogin": False}, + "SSOEnforcedForWebProtocol": "oidc", + "License": { + "status": "active", + "expiredAt": "2025-12-31", + "workspaces": {"enabled": True, "limit": 5, "used": 2}, + }, + "PluginInstallationPermission": { + "pluginInstallationScope": "official_only", + "restrictToMarketplaceOnly": True, + }, + } + + yield { + "billing_service": mock_billing_service, + "enterprise_service": mock_enterprise_service, + } + + def _create_test_tenant_id(self): + """Helper method to create a test tenant ID.""" + fake = Faker() + return fake.uuid4() + + def test_get_features_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful feature retrieval with billing and enterprise enabled. + + This test verifies: + - Proper feature model creation with all required fields + - Correct integration with billing service + - Proper enterprise workspace information handling + - Return value correctness and structure + """ + # Arrange: Setup test data with proper config mocking + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = True + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = True + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = True + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify billing features + assert result.billing.enabled is True + assert result.billing.subscription.plan == "pro" + assert result.billing.subscription.interval == "monthly" + assert result.education.activated is True + + # Verify member limitations + assert result.members.size == 5 + assert result.members.limit == 10 + + # Verify app limitations + assert result.apps.size == 3 + assert result.apps.limit == 20 + + # Verify vector space limitations + assert result.vector_space.size == 2 + assert result.vector_space.limit == 10 + + # Verify document upload quota + assert result.documents_upload_quota.size == 15 + assert result.documents_upload_quota.limit == 100 + + # Verify annotation quota + assert result.annotation_quota_limit.size == 8 + assert result.annotation_quota_limit.limit == 50 + + # Verify other features + assert result.docs_processing == "enhanced" + assert result.can_replace_logo is True + assert result.model_load_balancing_enabled is True + assert result.knowledge_rate_limit == 100 + + # Verify enterprise features + assert result.workspace_members.enabled is True + assert result.workspace_members.size == 5 + assert result.workspace_members.limit == 10 + + # Verify webapp copyright is enabled for non-sandbox plans + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + mock_external_service_dependencies["enterprise_service"].get_workspace_info.assert_called_once_with( + tenant_id + ) + + def test_get_features_sandbox_plan(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval for sandbox plan with specific limitations. + + This test verifies: + - Proper handling of sandbox plan limitations + - Correct webapp copyright settings for sandbox + - Transfer workspace restrictions for sandbox plans + - Proper billing service integration + """ + # Arrange: Setup sandbox plan mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = False + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = False + mock_config.EDUCATION_ENABLED = False + + # Set mock return value inside the patch context + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "sandbox", "interval": "monthly", "education": False}, + "members": {"size": 1, "limit": 3}, + "apps": {"size": 1, "limit": 5}, + "vector_space": {"size": 1, "limit": 2}, + "documents_upload_quota": {"size": 5, "limit": 20}, + "annotation_quota_limit": {"size": 2, "limit": 10}, + "docs_processing": "standard", + "can_replace_logo": False, + "model_load_balancing_enabled": False, + "knowledge_rate_limit": {"limit": 10}, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify sandbox-specific limitations + assert result.billing.subscription.plan == "sandbox" + assert result.education.activated is False + + # Verify sandbox limitations + assert result.members.size == 1 + assert result.members.limit == 3 + assert result.apps.size == 1 + assert result.apps.limit == 5 + assert result.vector_space.size == 1 + assert result.vector_space.limit == 2 + assert result.documents_upload_quota.size == 5 + assert result.documents_upload_quota.limit == 20 + assert result.annotation_quota_limit.size == 2 + assert result.annotation_quota_limit.limit == 10 + + # Verify sandbox-specific restrictions + assert result.webapp_copyright_enabled is False + assert result.is_allow_transfer_workspace is False + assert result.can_replace_logo is False + assert result.model_load_balancing_enabled is False + assert result.docs_processing == "standard" + assert result.knowledge_rate_limit == 10 + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_get_knowledge_rate_limit_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful knowledge rate limit retrieval with billing enabled. + + This test verifies: + - Proper knowledge rate limit model creation + - Correct integration with billing service + - Proper rate limit configuration + - Return value correctness and structure + """ + # Arrange: Setup test data with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + + # Act: Execute the method under test + result = FeatureService.get_knowledge_rate_limit(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, KnowledgeRateLimitModel) + + # Verify rate limit configuration + assert result.enabled is True + assert result.limit == 100 + assert result.subscription_plan == "pro" + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_knowledge_rate_limit.assert_called_once_with( + tenant_id + ) + + def test_get_system_features_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful system features retrieval with enterprise and marketplace enabled. + + This test verifies: + - Proper system feature model creation + - Correct integration with enterprise service + - Proper marketplace configuration + - Return value correctness and structure + """ + # Arrange: Setup test data with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = True + mock_config.ENABLE_EMAIL_CODE_LOGIN = True + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify SSO configuration + assert result.sso_enforced_for_signin is True + assert result.sso_enforced_for_signin_protocol == "saml" + + # Verify authentication settings + assert result.enable_email_code_login is True + assert result.enable_email_password_login is False + assert result.is_allow_register is False + assert result.is_allow_create_workspace is False + + # Verify branding configuration + assert result.branding.application_title == "Test Enterprise" + assert result.branding.login_page_logo == "https://example.com/logo.png" + assert result.branding.workspace_logo == "https://example.com/workspace.png" + assert result.branding.favicon == "https://example.com/favicon.ico" + + # Verify webapp auth configuration + assert result.webapp_auth.allow_sso is True + assert result.webapp_auth.allow_email_code_login is True + assert result.webapp_auth.allow_email_password_login is False + assert result.webapp_auth.sso_config.protocol == "oidc" + + # Verify license configuration + assert result.license.status.value == "active" + assert result.license.expired_at == "2025-12-31" + assert result.license.workspaces.enabled is True + assert result.license.workspaces.limit == 5 + assert result.license.workspaces.size == 2 + + # Verify plugin installation permission + assert result.plugin_installation_permission.plugin_installation_scope == "official_only" + assert result.plugin_installation_permission.restrict_to_marketplace_only is True + + # Verify marketplace configuration + assert result.enable_marketplace is True + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_system_features_basic_config(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test system features retrieval with basic configuration (no enterprise). + + This test verifies: + - Proper system feature model creation without enterprise + - Correct environment variable handling + - Default configuration values + - Return value correctness and structure + """ + # Arrange: Setup basic config mock (no enterprise) + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = False + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = True + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = True + mock_config.ALLOW_CREATE_WORKSPACE = True + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify basic configuration + assert result.branding.enabled is False + assert result.webapp_auth.enabled is False + assert result.enable_change_email is True + + # Verify authentication settings from config + assert result.enable_email_code_login is True + assert result.enable_email_password_login is True + assert result.enable_social_oauth_login is False + assert result.is_allow_register is True + assert result.is_allow_create_workspace is True + assert result.is_email_setup is True + + # Verify marketplace configuration + assert result.enable_marketplace is False + + # Verify plugin package size (uses default value from dify_config) + assert result.max_plugin_package_size == 15728640 + + def test_get_features_billing_disabled(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval when billing is disabled. + + This test verifies: + - Proper feature model creation without billing + - Correct environment variable handling + - Default configuration values + - Return value correctness and structure + """ + # Arrange: Setup billing disabled mock + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = False + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = True + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = True + + tenant_id = self._create_test_tenant_id() + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify billing is disabled + assert result.billing.enabled is False + + # Verify environment-based features + assert result.can_replace_logo is True + assert result.model_load_balancing_enabled is True + assert result.dataset_operator_enabled is True + assert result.education.enabled is True + + # Verify default limitations + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 50 + assert result.annotation_quota_limit.size == 0 + assert result.annotation_quota_limit.limit == 10 + assert result.knowledge_rate_limit == 10 + assert result.docs_processing == "standard" + + # Verify no enterprise features + assert result.workspace_members.enabled is False + assert result.webapp_copyright_enabled is False + + def test_get_knowledge_rate_limit_billing_disabled( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test knowledge rate limit retrieval when billing is disabled. + + This test verifies: + - Proper knowledge rate limit model creation without billing + - Default rate limit configuration + - Return value correctness and structure + """ + # Arrange: Setup billing disabled mock + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = False + + tenant_id = self._create_test_tenant_id() + + # Act: Execute the method under test + result = FeatureService.get_knowledge_rate_limit(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, KnowledgeRateLimitModel) + + # Verify default configuration + assert result.enabled is False + assert result.limit == 10 + assert result.subscription_plan == "" # Empty string when billing is disabled + + # Verify no billing service calls + mock_external_service_dependencies["billing_service"].get_knowledge_rate_limit.assert_not_called() + + def test_get_features_enterprise_only(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval with enterprise enabled but billing disabled. + + This test verifies: + - Proper feature model creation with enterprise only + - Correct enterprise service integration + - Proper workspace member handling + - Return value correctness and structure + """ + # Arrange: Setup enterprise only mock + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = False + mock_config.ENTERPRISE_ENABLED = True + mock_config.CAN_REPLACE_LOGO = False + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = False + mock_config.EDUCATION_ENABLED = False + + tenant_id = self._create_test_tenant_id() + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify billing is disabled + assert result.billing.enabled is False + + # Verify enterprise features + assert result.webapp_copyright_enabled is True + + # Verify workspace members from enterprise + assert result.workspace_members.enabled is True + assert result.workspace_members.size == 5 + assert result.workspace_members.limit == 10 + + # Verify environment-based features + assert result.can_replace_logo is False + assert result.model_load_balancing_enabled is False + assert result.dataset_operator_enabled is False + assert result.education.enabled is False + + # Verify default limitations + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_workspace_info.assert_called_once_with( + tenant_id + ) + mock_external_service_dependencies["billing_service"].get_info.assert_not_called() + + def test_get_system_features_enterprise_disabled( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test system features retrieval when enterprise is disabled. + + This test verifies: + - Proper system feature model creation without enterprise + - Correct environment variable handling + - Default configuration values + - Return value correctness and structure + """ + # Arrange: Setup enterprise disabled mock + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = False + mock_config.MARKETPLACE_ENABLED = True + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = True + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = None + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 50 + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify enterprise features are disabled + assert result.branding.enabled is False + assert result.webapp_auth.enabled is False + assert result.enable_change_email is True + + # Verify authentication settings from config + assert result.enable_email_code_login is False + assert result.enable_email_password_login is True + assert result.enable_social_oauth_login is True + assert result.is_allow_register is False + assert result.is_allow_create_workspace is False + assert result.is_email_setup is False + + # Verify marketplace configuration + assert result.enable_marketplace is True + + # Verify plugin package size (uses default value from dify_config) + assert result.max_plugin_package_size == 15728640 + + # Verify default license status + assert result.license.status.value == "none" + assert result.license.expired_at == "" + assert result.license.workspaces.enabled is False + + # Verify no enterprise service calls + mock_external_service_dependencies["enterprise_service"].get_info.assert_not_called() + + def test_get_features_no_tenant_id(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval without tenant ID (billing disabled). + + This test verifies: + - Proper feature model creation without tenant ID + - Correct handling when billing is disabled + - Default configuration values + - Return value correctness and structure + """ + # Arrange: Setup no tenant ID scenario + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + # Act: Execute the method under test + result = FeatureService.get_features("") + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify billing is disabled due to no tenant ID + assert result.billing.enabled is False + + # Verify environment-based features + assert result.can_replace_logo is True + assert result.model_load_balancing_enabled is False + assert result.dataset_operator_enabled is True + assert result.education.enabled is False + + # Verify default limitations + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + + # Verify no billing service calls + mock_external_service_dependencies["billing_service"].get_info.assert_not_called() + + def test_get_features_partial_billing_info(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval with partial billing information. + + This test verifies: + - Proper handling of partial billing data + - Correct fallback to default values + - Proper billing service integration + - Return value correctness and structure + """ + # Arrange: Setup partial billing info mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "basic", "interval": "yearly"}, + # Missing members, apps, vector_space, etc. + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify billing features + assert result.billing.enabled is True + assert result.billing.subscription.plan == "basic" + assert result.billing.subscription.interval == "yearly" + + # Verify default values for missing billing info + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 50 + assert result.annotation_quota_limit.size == 0 + assert result.annotation_quota_limit.limit == 10 + assert result.knowledge_rate_limit == 10 + assert result.docs_processing == "standard" + + # Verify basic plan restrictions (non-sandbox plans have webapp copyright enabled) + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_get_features_edge_case_vector_space(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval with edge case vector space configuration. + + This test verifies: + - Proper handling of vector space quota limits + - Correct integration with billing service + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case vector space mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "pro", "interval": "monthly"}, + "vector_space": {"size": 0, "limit": 0}, + "apps": {"size": 5, "limit": 10}, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify vector space configuration + assert result.vector_space.size == 0 + assert result.vector_space.limit == 0 + + # Verify apps configuration + assert result.apps.size == 5 + assert result.apps.limit == 10 + + # Verify pro plan features + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify default values for missing billing info + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 50 + assert result.annotation_quota_limit.size == 0 + assert result.annotation_quota_limit.limit == 10 + assert result.knowledge_rate_limit == 10 + assert result.docs_processing == "standard" + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_get_system_features_edge_case_webapp_auth( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test system features retrieval with edge case webapp auth configuration. + + This test verifies: + - Proper handling of webapp auth configuration + - Correct enterprise service integration + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case webapp auth mock with proper config + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "WebAppAuth": {"allowSso": False, "allowEmailCodeLogin": True, "allowEmailPasswordLogin": False} + } + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify webapp auth configuration + assert result.webapp_auth.allow_sso is False + assert result.webapp_auth.allow_email_code_login is True + assert result.webapp_auth.allow_email_password_login is False + assert result.webapp_auth.sso_config.protocol == "" + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify default values for missing enterprise info + assert result.sso_enforced_for_signin is False + assert result.sso_enforced_for_signin_protocol == "" + assert result.enable_email_code_login is False + assert result.enable_email_password_login is True + assert result.is_allow_register is False + assert result.is_allow_create_workspace is False + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_features_edge_case_members_quota(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval with edge case members quota configuration. + + This test verifies: + - Proper handling of members quota limits + - Correct integration with billing service + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case members quota mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "basic", "interval": "yearly"}, + "members": {"size": 10, "limit": 10}, + "vector_space": {"size": 3, "limit": 5}, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify members configuration + assert result.members.size == 10 + assert result.members.limit == 10 + + # Verify vector space configuration + assert result.vector_space.size == 3 + assert result.vector_space.limit == 5 + + # Verify basic plan features (non-sandbox plans have webapp copyright enabled) + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify default values for missing billing info + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 50 + assert result.annotation_quota_limit.size == 0 + assert result.annotation_quota_limit.limit == 10 + assert result.knowledge_rate_limit == 10 + assert result.docs_processing == "standard" + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_plugin_installation_permission_scopes( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test system features retrieval with different plugin installation permission scopes. + + This test verifies: + - Proper handling of different plugin installation scopes + - Correct enterprise service integration + - Proper permission configuration + - Return value correctness and structure + """ + + # Test case 1: Official only scope + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "PluginInstallationPermission": { + "pluginInstallationScope": "official_only", + "restrictToMarketplaceOnly": True, + } + } + + result = FeatureService.get_system_features() + assert result.plugin_installation_permission.plugin_installation_scope == "official_only" + assert result.plugin_installation_permission.restrict_to_marketplace_only is True + + # Test case 2: All plugins scope + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "PluginInstallationPermission": {"pluginInstallationScope": "all", "restrictToMarketplaceOnly": False} + } + + result = FeatureService.get_system_features() + assert result.plugin_installation_permission.plugin_installation_scope == "all" + assert result.plugin_installation_permission.restrict_to_marketplace_only is False + + # Test case 3: Specific partners scope + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "PluginInstallationPermission": { + "pluginInstallationScope": "official_and_specific_partners", + "restrictToMarketplaceOnly": False, + } + } + + result = FeatureService.get_system_features() + assert result.plugin_installation_permission.plugin_installation_scope == "official_and_specific_partners" + assert result.plugin_installation_permission.restrict_to_marketplace_only is False + + # Test case 4: None scope + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "PluginInstallationPermission": {"pluginInstallationScope": "none", "restrictToMarketplaceOnly": True} + } + + result = FeatureService.get_system_features() + assert result.plugin_installation_permission.plugin_installation_scope == "none" + assert result.plugin_installation_permission.restrict_to_marketplace_only is True + + def test_get_features_workspace_members_missing( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test feature retrieval when workspace members info is missing from enterprise. + + This test verifies: + - Proper handling of missing workspace members data + - Correct enterprise service integration + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup missing workspace members mock + tenant_id = self._create_test_tenant_id() + mock_external_service_dependencies["enterprise_service"].get_workspace_info.return_value = { + # Missing WorkspaceMembers key + } + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = False + mock_config.ENTERPRISE_ENABLED = True + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify workspace members use default values + assert result.workspace_members.enabled is False + assert result.workspace_members.size == 0 + assert result.workspace_members.limit == 0 + + # Verify enterprise features + assert result.webapp_copyright_enabled is True + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_workspace_info.assert_called_once_with( + tenant_id + ) + + def test_get_system_features_license_inactive(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test system features retrieval with inactive license. + + This test verifies: + - Proper handling of inactive license status + - Correct enterprise service integration + - Proper license status handling + - Return value correctness and structure + """ + # Arrange: Setup inactive license mock with proper config + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "License": { + "status": "inactive", + "expiredAt": "", + "workspaces": {"enabled": False, "limit": 0, "used": 0}, + } + } + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify license status + assert result.license.status == "inactive" + assert result.license.expired_at == "" + assert result.license.workspaces.enabled is False + assert result.license.workspaces.size == 0 + assert result.license.workspaces.limit == 0 + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_system_features_partial_enterprise_info( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test system features retrieval with partial enterprise information. + + This test verifies: + - Proper handling of partial enterprise data + - Correct fallback to default values + - Proper enterprise service integration + - Return value correctness and structure + """ + # Arrange: Setup partial enterprise info mock with proper config + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "SSOEnforcedForSignin": True, + "Branding": {"applicationTitle": "Partial Enterprise"}, + # Missing WebAppAuth, License, PluginInstallationPermission, etc. + } + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify SSO configuration + assert result.sso_enforced_for_signin is True + assert result.sso_enforced_for_signin_protocol == "" + + # Verify branding configuration (partial) + assert result.branding.application_title == "Partial Enterprise" + assert result.branding.login_page_logo == "" + assert result.branding.workspace_logo == "" + assert result.branding.favicon == "" + + # Verify default values for missing enterprise info + assert result.webapp_auth.allow_sso is False + assert result.webapp_auth.allow_email_code_login is False + assert result.webapp_auth.allow_email_password_login is False + assert result.webapp_auth.sso_config.protocol == "" + + # Verify default license status + assert result.license.status == "none" + assert result.license.expired_at == "" + assert result.license.workspaces.enabled is False + + # Verify default plugin installation permission + assert result.plugin_installation_permission.plugin_installation_scope == "all" + assert result.plugin_installation_permission.restrict_to_marketplace_only is False + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_features_edge_case_limits(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval with edge case limit values. + + This test verifies: + - Proper handling of zero and negative limits + - Correct handling of very large limits + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case limits mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "enterprise", "interval": "yearly"}, + "members": {"size": 0, "limit": 0}, + "apps": {"size": 0, "limit": -1}, + "vector_space": {"size": 0, "limit": 999999}, + "documents_upload_quota": {"size": 0, "limit": 0}, + "annotation_quota_limit": {"size": 0, "limit": 1}, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify edge case limits + assert result.members.size == 0 + assert result.members.limit == 0 + assert result.apps.size == 0 + assert result.apps.limit == -1 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 999999 + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 0 + assert result.annotation_quota_limit.size == 0 + assert result.annotation_quota_limit.limit == 1 + + # Verify enterprise plan features + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_get_system_features_edge_case_protocols( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test system features retrieval with edge case protocol values. + + This test verifies: + - Proper handling of empty protocol strings + - Correct handling of special protocol values + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case protocols mock with proper config + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "SSOEnforcedForSigninProtocol": "", + "SSOEnforcedForWebProtocol": " ", + "WebAppAuth": {"allowSso": True, "allowEmailCodeLogin": False, "allowEmailPasswordLogin": True}, + } + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify edge case protocols + assert result.sso_enforced_for_signin_protocol == "" + assert result.webapp_auth.sso_config.protocol == " " + + # Verify webapp auth configuration + assert result.webapp_auth.allow_sso is True + assert result.webapp_auth.allow_email_code_login is False + assert result.webapp_auth.allow_email_password_login is True + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_features_edge_case_education(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test feature retrieval with edge case education configuration. + + This test verifies: + - Proper handling of education feature flags + - Correct integration with billing service + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case education mock + tenant_id = self._create_test_tenant_id() + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "education", "interval": "semester", "education": True}, + "members": {"size": 100, "limit": 200}, + "apps": {"size": 50, "limit": 100}, + "vector_space": {"size": 20, "limit": 50}, + "documents_upload_quota": {"size": 500, "limit": 1000}, + "annotation_quota_limit": {"size": 200, "limit": 500}, + } + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.EDUCATION_ENABLED = True + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify education features + assert result.education.enabled is True + assert result.education.activated is True + + # Verify education plan limits + assert result.members.size == 100 + assert result.members.limit == 200 + assert result.apps.size == 50 + assert result.apps.limit == 100 + assert result.vector_space.size == 20 + assert result.vector_space.limit == 50 + assert result.documents_upload_quota.size == 500 + assert result.documents_upload_quota.limit == 1000 + assert result.annotation_quota_limit.size == 200 + assert result.annotation_quota_limit.limit == 500 + + # Verify education plan features + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_license_limitation_model_is_available( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test LicenseLimitationModel.is_available method with various scenarios. + + This test verifies: + - Proper quota availability calculation + - Correct handling of unlimited limits + - Proper handling of disabled limits + - Return value correctness for different scenarios + """ + from services.feature_service import LicenseLimitationModel + + # Test case 1: Limit disabled + disabled_limit = LicenseLimitationModel(enabled=False, size=5, limit=10) + assert disabled_limit.is_available(3) is True + assert disabled_limit.is_available(10) is True + + # Test case 2: Unlimited limit + unlimited_limit = LicenseLimitationModel(enabled=True, size=5, limit=0) + assert unlimited_limit.is_available(3) is True + assert unlimited_limit.is_available(100) is True + + # Test case 3: Available quota + available_limit = LicenseLimitationModel(enabled=True, size=5, limit=10) + assert available_limit.is_available(3) is True + assert available_limit.is_available(5) is True + assert available_limit.is_available(1) is True + + # Test case 4: Insufficient quota + insufficient_limit = LicenseLimitationModel(enabled=True, size=8, limit=10) + assert insufficient_limit.is_available(3) is False + assert insufficient_limit.is_available(2) is True + assert insufficient_limit.is_available(1) is True + + # Test case 5: Exact quota usage + exact_limit = LicenseLimitationModel(enabled=True, size=7, limit=10) + assert exact_limit.is_available(3) is True + assert exact_limit.is_available(3) is True + + def test_get_features_workspace_members_disabled( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test feature retrieval when workspace members are disabled in enterprise. + + This test verifies: + - Proper handling of disabled workspace members + - Correct enterprise service integration + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup workspace members disabled mock + tenant_id = self._create_test_tenant_id() + mock_external_service_dependencies["enterprise_service"].get_workspace_info.return_value = { + "WorkspaceMembers": {"used": 0, "limit": 0, "enabled": False} + } + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = False + mock_config.ENTERPRISE_ENABLED = True + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify workspace members are disabled + assert result.workspace_members.enabled is False + assert result.workspace_members.size == 0 + assert result.workspace_members.limit == 0 + + # Verify enterprise features + assert result.webapp_copyright_enabled is True + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_workspace_info.assert_called_once_with(tenant_id) + + def test_get_system_features_license_expired(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test system features retrieval with expired license. + + This test verifies: + - Proper handling of expired license status + - Correct enterprise service integration + - Proper license status handling + - Return value correctness and structure + """ + # Arrange: Setup expired license mock with proper config + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "License": { + "status": "expired", + "expiredAt": "2023-12-31", + "workspaces": {"enabled": False, "limit": 0, "used": 0}, + } + } + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify license status + assert result.license.status == "expired" + assert result.license.expired_at == "2023-12-31" + assert result.license.workspaces.enabled is False + assert result.license.workspaces.size == 0 + assert result.license.workspaces.limit == 0 + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_features_edge_case_docs_processing( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test feature retrieval with edge case document processing configuration. + + This test verifies: + - Proper handling of different document processing modes + - Correct integration with billing service + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case docs processing mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = True + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "premium", "interval": "monthly"}, + "docs_processing": "advanced", + "can_replace_logo": True, + "model_load_balancing_enabled": True, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify docs processing configuration + assert result.docs_processing == "advanced" + assert result.can_replace_logo is True + assert result.model_load_balancing_enabled is True + + # Verify premium plan features + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify default limitations (no specific billing info) + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_get_system_features_edge_case_branding( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test system features retrieval with edge case branding configuration. + + This test verifies: + - Proper handling of partial branding information + - Correct enterprise service integration + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case branding mock with proper config + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "Branding": { + "applicationTitle": "Edge Case App", + "loginPageLogo": None, + "workspaceLogo": "", + "favicon": "https://example.com/favicon.ico", + } + } + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify branding configuration (edge cases) + assert result.branding.application_title == "Edge Case App" + assert result.branding.login_page_logo is None # None value from mock + assert result.branding.workspace_logo == "" + assert result.branding.favicon == "https://example.com/favicon.ico" + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify default values for missing enterprise info + assert result.sso_enforced_for_signin is False + assert result.sso_enforced_for_signin_protocol == "" + assert result.enable_email_code_login is False + assert result.enable_email_password_login is True + assert result.is_allow_register is False + assert result.is_allow_create_workspace is False + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_features_edge_case_annotation_quota( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test feature retrieval with edge case annotation quota configuration. + + This test verifies: + - Proper handling of annotation quota limits + - Correct integration with billing service + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case annotation quota mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "enterprise", "interval": "yearly"}, + "annotation_quota_limit": {"size": 999, "limit": 1000}, + "knowledge_rate_limit": {"limit": 500}, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify annotation quota configuration + assert result.annotation_quota_limit.size == 999 + assert result.annotation_quota_limit.limit == 1000 + + # Verify knowledge rate limit + assert result.knowledge_rate_limit == 500 + + # Verify enterprise plan features + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify default values for missing billing info + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 50 + assert result.docs_processing == "standard" + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_get_features_edge_case_documents_upload( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test feature retrieval with edge case documents upload settings. + + This test verifies: + - Proper handling of edge case documents upload configuration + - Correct integration with billing service + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup edge case documents upload mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": {"plan": "pro", "interval": "monthly"}, + "documents_upload_quota": { + "size": 0, # Edge case: zero current size + "limit": 0, # Edge case: zero limit + }, + "knowledge_rate_limit": {"limit": 100}, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify documents upload quota configuration (edge cases) + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 0 + + # Verify knowledge rate limit + assert result.knowledge_rate_limit == 100 + + # Verify pro plan features + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify default values for missing billing info + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + assert result.annotation_quota_limit.size == 0 + assert result.annotation_quota_limit.limit == 10 # Default value when not provided + assert result.docs_processing == "standard" + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) + + def test_get_system_features_edge_case_license_lost( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test system features with lost license status. + + This test verifies: + - Proper handling of lost license status + - Correct enterprise service integration + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup lost license mock with proper config + with patch("services.feature_service.dify_config") as mock_config: + mock_config.ENTERPRISE_ENABLED = True + mock_config.MARKETPLACE_ENABLED = False + mock_config.ENABLE_EMAIL_CODE_LOGIN = False + mock_config.ENABLE_EMAIL_PASSWORD_LOGIN = True + mock_config.ENABLE_SOCIAL_OAUTH_LOGIN = False + mock_config.ALLOW_REGISTER = False + mock_config.ALLOW_CREATE_WORKSPACE = False + mock_config.MAIL_TYPE = "smtp" + mock_config.PLUGIN_MAX_PACKAGE_SIZE = 100 + + mock_external_service_dependencies["enterprise_service"].get_info.return_value = { + "license": {"status": "lost", "expired_at": None, "plan": None} + } + + # Act: Execute the method under test + result = FeatureService.get_system_features() + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, SystemFeatureModel) + + # Verify enterprise features + assert result.branding.enabled is True + assert result.webapp_auth.enabled is True + assert result.enable_change_email is False + + # Verify default values for missing enterprise info + assert result.sso_enforced_for_signin is False + assert result.sso_enforced_for_signin_protocol == "" + assert result.enable_email_code_login is False + assert result.enable_email_password_login is True + assert result.is_allow_register is False + assert result.is_allow_create_workspace is False + + # Verify mock interactions + mock_external_service_dependencies["enterprise_service"].get_info.assert_called_once() + + def test_get_features_edge_case_education_disabled( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test feature retrieval with education feature disabled. + + This test verifies: + - Proper handling of disabled education features + - Correct integration with billing service + - Proper fallback to default values + - Return value correctness and structure + """ + # Arrange: Setup education disabled mock with proper config + tenant_id = self._create_test_tenant_id() + + with patch("services.feature_service.dify_config") as mock_config: + mock_config.BILLING_ENABLED = True + mock_config.ENTERPRISE_ENABLED = False + mock_config.CAN_REPLACE_LOGO = True + mock_config.MODEL_LB_ENABLED = False + mock_config.DATASET_OPERATOR_ENABLED = True + mock_config.EDUCATION_ENABLED = False + + mock_external_service_dependencies["billing_service"].get_info.return_value = { + "enabled": True, + "subscription": { + "plan": "pro", + "interval": "monthly", + "education": False, # Education explicitly disabled + }, + "knowledge_rate_limit": {"limit": 100}, + } + + # Act: Execute the method under test + result = FeatureService.get_features(tenant_id) + + # Assert: Verify the expected outcomes + assert result is not None + assert isinstance(result, FeatureModel) + + # Verify education configuration + assert result.education.activated is False + + # Verify knowledge rate limit + assert result.knowledge_rate_limit == 100 + + # Verify pro plan features + assert result.webapp_copyright_enabled is True + assert result.is_allow_transfer_workspace is True + + # Verify default values for missing billing info + assert result.members.size == 0 + assert result.members.limit == 1 + assert result.apps.size == 0 + assert result.apps.limit == 10 + assert result.vector_space.size == 0 + assert result.vector_space.limit == 5 + assert result.documents_upload_quota.size == 0 + assert result.documents_upload_quota.limit == 50 + assert result.annotation_quota_limit.size == 0 + assert result.annotation_quota_limit.limit == 10 # Default value when not provided + assert result.docs_processing == "standard" + + # Verify mock interactions + mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(tenant_id) diff --git a/api/tests/test_containers_integration_tests/services/test_message_service.py b/api/tests/test_containers_integration_tests/services/test_message_service.py index 25ba0d03ef..ece6de6cdf 100644 --- a/api/tests/test_containers_integration_tests/services/test_message_service.py +++ b/api/tests/test_containers_integration_tests/services/test_message_service.py @@ -484,7 +484,7 @@ class TestMessageService: # Verify feedback was deleted from extensions.ext_database import db - deleted_feedback = db.session.query(MessageFeedback).filter(MessageFeedback.id == feedback.id).first() + deleted_feedback = db.session.query(MessageFeedback).where(MessageFeedback.id == feedback.id).first() assert deleted_feedback is None def test_create_feedback_no_rating_when_not_exists( diff --git a/api/tests/test_containers_integration_tests/services/test_model_load_balancing_service.py b/api/tests/test_containers_integration_tests/services/test_model_load_balancing_service.py index a8a36b2565..cb20238f0c 100644 --- a/api/tests/test_containers_integration_tests/services/test_model_load_balancing_service.py +++ b/api/tests/test_containers_integration_tests/services/test_model_load_balancing_service.py @@ -469,6 +469,6 @@ class TestModelLoadBalancingService: # Verify inherit config was created in database inherit_configs = ( - db.session.query(LoadBalancingModelConfig).filter(LoadBalancingModelConfig.name == "__inherit__").all() + db.session.query(LoadBalancingModelConfig).where(LoadBalancingModelConfig.name == "__inherit__").all() ) assert len(inherit_configs) == 1 diff --git a/api/tests/test_containers_integration_tests/services/test_model_provider_service.py b/api/tests/test_containers_integration_tests/services/test_model_provider_service.py new file mode 100644 index 0000000000..8b7d44c1e4 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_model_provider_service.py @@ -0,0 +1,1172 @@ +from unittest.mock import MagicMock, patch + +import pytest +from faker import Faker + +from core.entities.model_entities import ModelStatus +from core.model_runtime.entities.model_entities import FetchFrom, ModelType +from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models.provider import Provider, ProviderModel, ProviderModelSetting, ProviderType +from services.model_provider_service import ModelProviderService + + +class TestModelProviderService: + """Integration tests for ModelProviderService using testcontainers.""" + + @pytest.fixture + def mock_external_service_dependencies(self): + """Mock setup for external service dependencies.""" + with ( + patch("services.model_provider_service.ProviderManager") as mock_provider_manager, + patch("services.model_provider_service.ModelProviderFactory") as mock_model_provider_factory, + ): + # Setup default mock returns + mock_provider_manager.return_value.get_configurations.return_value = MagicMock() + mock_model_provider_factory.return_value.get_provider_icon.return_value = (None, None) + + yield { + "provider_manager": mock_provider_manager, + "model_provider_factory": mock_model_provider_factory, + } + + def _create_test_account_and_tenant(self, db_session_with_containers, mock_external_service_dependencies): + """ + Helper method to create a test account and tenant for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + + Returns: + tuple: (account, tenant) - Created account and tenant instances + """ + fake = Faker() + + # Create account + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Create tenant for the account + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db.session.add(tenant) + db.session.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER.value, + current=True, + ) + db.session.add(join) + db.session.commit() + + # Set current tenant for account + account.current_tenant = tenant + + return account, tenant + + def _create_test_provider( + self, + db_session_with_containers, + mock_external_service_dependencies, + tenant_id: str, + provider_name: str = "openai", + ): + """ + Helper method to create a test provider for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tenant_id: Tenant ID for the provider + provider_name: Name of the provider + + Returns: + Provider: Created provider instance + """ + fake = Faker() + + provider = Provider( + tenant_id=tenant_id, + provider_name=provider_name, + provider_type="custom", + is_valid=True, + quota_type="free", + quota_limit=1000, + quota_used=0, + ) + + from extensions.ext_database import db + + db.session.add(provider) + db.session.commit() + + return provider + + def _create_test_provider_model( + self, + db_session_with_containers, + mock_external_service_dependencies, + tenant_id: str, + provider_name: str, + model_name: str = "gpt-3.5-turbo", + model_type: str = "llm", + ): + """ + Helper method to create a test provider model for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tenant_id: Tenant ID for the provider model + provider_name: Name of the provider + model_name: Name of the model + model_type: Type of the model + + Returns: + ProviderModel: Created provider model instance + """ + fake = Faker() + + provider_model = ProviderModel( + tenant_id=tenant_id, + provider_name=provider_name, + model_name=model_name, + model_type=model_type, + is_valid=True, + ) + + from extensions.ext_database import db + + db.session.add(provider_model) + db.session.commit() + + return provider_model + + def _create_test_provider_model_setting( + self, + db_session_with_containers, + mock_external_service_dependencies, + tenant_id: str, + provider_name: str, + model_name: str = "gpt-3.5-turbo", + model_type: str = "llm", + ): + """ + Helper method to create a test provider model setting for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tenant_id: Tenant ID for the provider model setting + provider_name: Name of the provider + model_name: Name of the model + model_type: Type of the model + + Returns: + ProviderModelSetting: Created provider model setting instance + """ + fake = Faker() + + provider_model_setting = ProviderModelSetting( + tenant_id=tenant_id, + provider_name=provider_name, + model_name=model_name, + model_type=model_type, + enabled=True, + load_balancing_enabled=False, + ) + + from extensions.ext_database import db + + db.session.add(provider_model_setting) + db.session.commit() + + return provider_model_setting + + def test_get_provider_list_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful provider list retrieval. + + This test verifies: + - Proper provider list retrieval with all required fields + - Correct filtering by model type + - Proper response structure and data mapping + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration + mock_provider_entity = MagicMock() + mock_provider_entity.provider = "openai" + mock_provider_entity.label = {"en_US": "OpenAI", "zh_Hans": "OpenAI"} + mock_provider_entity.description = {"en_US": "OpenAI provider", "zh_Hans": "OpenAI 提供商"} + mock_provider_entity.icon_small = {"en_US": "icon_small.png", "zh_Hans": "icon_small.png"} + mock_provider_entity.icon_large = {"en_US": "icon_large.png", "zh_Hans": "icon_large.png"} + mock_provider_entity.background = "#FF6B6B" + mock_provider_entity.help = None + mock_provider_entity.supported_model_types = [ModelType.LLM, ModelType.TEXT_EMBEDDING] + mock_provider_entity.configurate_methods = [] + mock_provider_entity.provider_credential_schema = None + mock_provider_entity.model_credential_schema = None + + mock_provider_config = MagicMock() + mock_provider_config.provider = mock_provider_entity + mock_provider_config.preferred_provider_type = ProviderType.CUSTOM + mock_provider_config.is_custom_configuration_available.return_value = True + mock_provider_config.system_configuration.enabled = True + mock_provider_config.system_configuration.current_quota_type = "free" + mock_provider_config.system_configuration.quota_configurations = [] + + mock_configurations = MagicMock() + mock_configurations.values.return_value = [mock_provider_config] + mock_provider_manager.get_configurations.return_value = mock_configurations + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_provider_list(tenant.id) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 1 + + provider_response = result[0] + assert provider_response.tenant_id == tenant.id + assert provider_response.provider == "openai" + assert provider_response.background == "#FF6B6B" + assert len(provider_response.supported_model_types) == 2 + assert ModelType.LLM in provider_response.supported_model_types + assert ModelType.TEXT_EMBEDDING in provider_response.supported_model_types + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_config.is_custom_configuration_available.assert_called_once() + + def test_get_provider_list_with_model_type_filter( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test provider list retrieval with model type filtering. + + This test verifies: + - Proper filtering by model type + - Only providers supporting the specified model type are returned + - Correct handling of unsupported model types + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Mock ProviderManager to return multiple provider configurations + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configurations with different supported model types + mock_provider_entity_llm = MagicMock() + mock_provider_entity_llm.provider = "openai" + mock_provider_entity_llm.label = {"en_US": "OpenAI", "zh_Hans": "OpenAI"} + mock_provider_entity_llm.description = {"en_US": "OpenAI provider", "zh_Hans": "OpenAI 提供商"} + mock_provider_entity_llm.icon_small = {"en_US": "icon_small.png", "zh_Hans": "icon_small.png"} + mock_provider_entity_llm.icon_large = {"en_US": "icon_large.png", "zh_Hans": "icon_large.png"} + mock_provider_entity_llm.background = "#FF6B6B" + mock_provider_entity_llm.help = None + mock_provider_entity_llm.supported_model_types = [ModelType.LLM] + mock_provider_entity_llm.configurate_methods = [] + mock_provider_entity_llm.provider_credential_schema = None + mock_provider_entity_llm.model_credential_schema = None + + mock_provider_entity_embedding = MagicMock() + mock_provider_entity_embedding.provider = "cohere" + mock_provider_entity_embedding.label = {"en_US": "Cohere", "zh_Hans": "Cohere"} + mock_provider_entity_embedding.description = {"en_US": "Cohere provider", "zh_Hans": "Cohere 提供商"} + mock_provider_entity_embedding.icon_small = {"en_US": "icon_small.png", "zh_Hans": "icon_small.png"} + mock_provider_entity_embedding.icon_large = {"en_US": "icon_large.png", "zh_Hans": "icon_large.png"} + mock_provider_entity_embedding.background = "#4ECDC4" + mock_provider_entity_embedding.help = None + mock_provider_entity_embedding.supported_model_types = [ModelType.TEXT_EMBEDDING] + mock_provider_entity_embedding.configurate_methods = [] + mock_provider_entity_embedding.provider_credential_schema = None + mock_provider_entity_embedding.model_credential_schema = None + + mock_provider_config_llm = MagicMock() + mock_provider_config_llm.provider = mock_provider_entity_llm + mock_provider_config_llm.preferred_provider_type = ProviderType.CUSTOM + mock_provider_config_llm.is_custom_configuration_available.return_value = True + mock_provider_config_llm.system_configuration.enabled = True + mock_provider_config_llm.system_configuration.current_quota_type = "free" + mock_provider_config_llm.system_configuration.quota_configurations = [] + + mock_provider_config_embedding = MagicMock() + mock_provider_config_embedding.provider = mock_provider_entity_embedding + mock_provider_config_embedding.preferred_provider_type = ProviderType.CUSTOM + mock_provider_config_embedding.is_custom_configuration_available.return_value = True + mock_provider_config_embedding.system_configuration.enabled = True + mock_provider_config_embedding.system_configuration.current_quota_type = "free" + mock_provider_config_embedding.system_configuration.quota_configurations = [] + + mock_configurations = MagicMock() + mock_configurations.values.return_value = [mock_provider_config_llm, mock_provider_config_embedding] + mock_provider_manager.get_configurations.return_value = mock_configurations + + # Act: Execute the method under test with LLM filter + service = ModelProviderService() + result = service.get_provider_list(tenant.id, model_type="llm") + + # Assert: Verify only LLM providers are returned + assert result is not None + assert len(result) == 1 + assert result[0].provider == "openai" + assert ModelType.LLM in result[0].supported_model_types + + # Act: Execute the method under test with TEXT_EMBEDDING filter + result = service.get_provider_list(tenant.id, model_type="text-embedding") + + # Assert: Verify only TEXT_EMBEDDING providers are returned + assert result is not None + assert len(result) == 1 + assert result[0].provider == "cohere" + assert ModelType.TEXT_EMBEDDING in result[0].supported_model_types + + def test_get_models_by_provider_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of models by provider. + + This test verifies: + - Proper model retrieval for a specific provider + - Correct response structure with tenant_id and model data + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider and models + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + provider_model_1 = self._create_test_provider_model( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai", "gpt-3.5-turbo", "llm" + ) + + provider_model_2 = self._create_test_provider_model( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai", "gpt-4", "llm" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock models + from core.entities.model_entities import ModelWithProviderEntity, SimpleModelProviderEntity + from core.model_runtime.entities.common_entities import I18nObject + from core.model_runtime.entities.provider_entities import ProviderEntity + + # Create real model objects instead of mocks + provider_entity_1 = SimpleModelProviderEntity( + ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), + icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), + icon_large=I18nObject(en_US="icon_large.png", zh_Hans="icon_large.png"), + supported_model_types=[ModelType.LLM], + configurate_methods=[], + models=[], + ) + ) + + provider_entity_2 = SimpleModelProviderEntity( + ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), + icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), + icon_large=I18nObject(en_US="icon_large.png", zh_Hans="icon_large.png"), + supported_model_types=[ModelType.LLM], + configurate_methods=[], + models=[], + ) + ) + + mock_model_1 = ModelWithProviderEntity( + model="gpt-3.5-turbo", + label=I18nObject(en_US="GPT-3.5 Turbo", zh_Hans="GPT-3.5 Turbo"), + model_type=ModelType.LLM, + features=[], + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={}, + deprecated=False, + provider=provider_entity_1, + status="active", + load_balancing_enabled=False, + ) + + mock_model_2 = ModelWithProviderEntity( + model="gpt-4", + label=I18nObject(en_US="GPT-4", zh_Hans="GPT-4"), + model_type=ModelType.LLM, + features=[], + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={}, + deprecated=False, + provider=provider_entity_2, + status="active", + load_balancing_enabled=False, + ) + + mock_configurations = MagicMock() + mock_configurations.get_models.return_value = [mock_model_1, mock_model_2] + mock_provider_manager.get_configurations.return_value = mock_configurations + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_models_by_provider(tenant.id, "openai") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 2 + + # Verify first model + assert result[0].provider.tenant_id == tenant.id + assert result[0].model == "gpt-3.5-turbo" + assert result[0].provider.provider == "openai" + + # Verify second model + assert result[1].provider.tenant_id == tenant.id + assert result[1].model == "gpt-4" + assert result[1].provider.provider == "openai" + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_configurations.get_models.assert_called_once_with(provider="openai") + + def test_get_provider_credentials_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of provider credentials. + + This test verifies: + - Proper credential retrieval for existing provider + - Correct handling of obfuscated credentials + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with credentials + mock_provider_configuration = MagicMock() + mock_provider_configuration.get_custom_credentials.return_value = { + "api_key": "sk-***123", + "base_url": "https://api.openai.com", + } + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_provider_credentials(tenant.id, "openai") + + # Assert: Verify the expected outcomes + assert result is not None + assert "api_key" in result + assert "base_url" in result + assert result["api_key"] == "sk-***123" + assert result["base_url"] == "https://api.openai.com" + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.get_custom_credentials.assert_called_once_with(obfuscated=True) + + def test_provider_credentials_validate_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful validation of provider credentials. + + This test verifies: + - Proper credential validation for existing provider + - Correct handling of valid credentials + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with validation method + mock_provider_configuration = MagicMock() + mock_provider_configuration.custom_credentials_validate.return_value = True + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Test credentials + test_credentials = {"api_key": "sk-test123", "base_url": "https://api.openai.com"} + + # Act: Execute the method under test + service = ModelProviderService() + # This should not raise an exception + service.provider_credentials_validate(tenant.id, "openai", test_credentials) + + # Assert: Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.custom_credentials_validate.assert_called_once_with(test_credentials) + + def test_provider_credentials_validate_invalid_provider( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test validation failure for non-existent provider. + + This test verifies: + - Proper error handling for non-existent provider + - Correct exception raising + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Mock ProviderManager to return empty configurations + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + mock_provider_manager.get_configurations.return_value = {} + + # Test credentials + test_credentials = {"api_key": "sk-test123", "base_url": "https://api.openai.com"} + + # Act & Assert: Execute the method under test and verify exception + service = ModelProviderService() + with pytest.raises(ValueError, match="Provider nonexistent does not exist."): + service.provider_credentials_validate(tenant.id, "nonexistent", test_credentials) + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + + def test_get_default_model_of_model_type_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful retrieval of default model for a specific model type. + + This test verifies: + - Proper default model retrieval for tenant and model type + - Correct response structure with tenant_id and model data + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic default model + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock default model response + from core.entities.model_entities import DefaultModelEntity, DefaultModelProviderEntity + from core.model_runtime.entities.common_entities import I18nObject + + mock_default_model = DefaultModelEntity( + model="gpt-3.5-turbo", + model_type=ModelType.LLM, + provider=DefaultModelProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), + icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), + icon_large=I18nObject(en_US="icon_large.png", zh_Hans="icon_large.png"), + supported_model_types=[ModelType.LLM], + ), + ) + + mock_provider_manager.get_default_model.return_value = mock_default_model + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_default_model_of_model_type(tenant.id, "llm") + + # Assert: Verify the expected outcomes + assert result is not None + assert result.model == "gpt-3.5-turbo" + assert result.model_type == ModelType.LLM + assert result.provider.tenant_id == tenant.id + assert result.provider.provider == "openai" + + # Verify mock interactions + mock_provider_manager.get_default_model.assert_called_once_with(tenant_id=tenant.id, model_type=ModelType.LLM) + + def test_update_default_model_of_model_type_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful update of default model for a specific model type. + + This test verifies: + - Proper default model update for tenant and model type + - Correct mock interactions with ProviderManager + - Database state management + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Act: Execute the method under test + service = ModelProviderService() + service.update_default_model_of_model_type(tenant.id, "llm", "openai", "gpt-4") + + # Assert: Verify mock interactions + mock_provider_manager.update_default_model_record.assert_called_once_with( + tenant_id=tenant.id, model_type=ModelType.LLM, provider="openai", model="gpt-4" + ) + + def test_get_model_provider_icon_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of model provider icon. + + This test verifies: + - Proper icon retrieval for provider and icon type + - Correct response structure with byte data and mime type + - Mock interactions with ModelProviderFactory + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ModelProviderFactory to return realistic icon data + mock_model_provider_factory = mock_external_service_dependencies["model_provider_factory"].return_value + mock_model_provider_factory.get_provider_icon.return_value = (b"fake_icon_data", "image/png") + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_model_provider_icon(tenant.id, "openai", "icon_small", "en_US") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 2 + assert result[0] == b"fake_icon_data" + assert result[1] == "image/png" + + # Verify mock interactions + mock_model_provider_factory.get_provider_icon.assert_called_once_with("openai", "icon_small", "en_US") + + def test_switch_preferred_provider_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful switching of preferred provider type. + + This test verifies: + - Proper provider type switching for tenant and provider + - Correct mock interactions with ProviderManager + - Provider configuration management + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with switch method + mock_provider_configuration = MagicMock() + mock_provider_configuration.switch_preferred_provider_type.return_value = None + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Act: Execute the method under test + service = ModelProviderService() + service.switch_preferred_provider(tenant.id, "openai", "custom") + + # Assert: Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.switch_preferred_provider_type.assert_called_once() + + def test_enable_model_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful enabling of a model. + + This test verifies: + - Proper model enabling for tenant, provider, and model + - Correct mock interactions with ProviderManager + - Model configuration management + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with enable method + mock_provider_configuration = MagicMock() + mock_provider_configuration.enable_model.return_value = None + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Act: Execute the method under test + service = ModelProviderService() + service.enable_model(tenant.id, "openai", "gpt-4", "llm") + + # Assert: Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.enable_model.assert_called_once_with(model_type=ModelType.LLM, model="gpt-4") + + def test_get_model_credentials_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of model credentials. + + This test verifies: + - Proper credential retrieval for model + - Correct response structure with obfuscated credentials + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with model credentials + mock_provider_configuration = MagicMock() + mock_provider_configuration.get_custom_model_credentials.return_value = { + "api_key": "sk-***123", + "base_url": "https://api.openai.com", + } + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_model_credentials(tenant.id, "openai", "llm", "gpt-4") + + # Assert: Verify the expected outcomes + assert result is not None + assert "api_key" in result + assert "base_url" in result + assert result["api_key"] == "sk-***123" + assert result["base_url"] == "https://api.openai.com" + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.get_custom_model_credentials.assert_called_once_with( + model_type=ModelType.LLM, model="gpt-4", obfuscated=True + ) + + def test_model_credentials_validate_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful validation of model credentials. + + This test verifies: + - Proper credential validation for model + - Correct mock interactions with ProviderManager + - Model credential validation process + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with validation method + mock_provider_configuration = MagicMock() + mock_provider_configuration.custom_model_credentials_validate.return_value = True + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Test credentials + test_credentials = {"api_key": "sk-test123", "base_url": "https://api.openai.com"} + + # Act: Execute the method under test + service = ModelProviderService() + # This should not raise an exception + service.model_credentials_validate(tenant.id, "openai", "llm", "gpt-4", test_credentials) + + # Assert: Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.custom_model_credentials_validate.assert_called_once_with( + model_type=ModelType.LLM, model="gpt-4", credentials=test_credentials + ) + + def test_save_model_credentials_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful saving of model credentials. + + This test verifies: + - Proper credential saving for model + - Correct mock interactions with ProviderManager + - Model credential management + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with save method + mock_provider_configuration = MagicMock() + mock_provider_configuration.add_or_update_custom_model_credentials.return_value = None + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Test credentials + test_credentials = {"api_key": "sk-test123", "base_url": "https://api.openai.com"} + + # Act: Execute the method under test + service = ModelProviderService() + service.save_model_credentials(tenant.id, "openai", "llm", "gpt-4", test_credentials) + + # Assert: Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.add_or_update_custom_model_credentials.assert_called_once_with( + model_type=ModelType.LLM, model="gpt-4", credentials=test_credentials + ) + + def test_remove_model_credentials_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful removal of model credentials. + + This test verifies: + - Proper credential removal for model + - Correct mock interactions with ProviderManager + - Model credential cleanup + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with remove method + mock_provider_configuration = MagicMock() + mock_provider_configuration.delete_custom_model_credentials.return_value = None + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Act: Execute the method under test + service = ModelProviderService() + service.remove_model_credentials(tenant.id, "openai", "llm", "gpt-4") + + # Assert: Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.delete_custom_model_credentials.assert_called_once_with( + model_type=ModelType.LLM, model="gpt-4" + ) + + def test_get_models_by_model_type_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of models by model type. + + This test verifies: + - Proper model retrieval for specific model type + - Correct response structure with provider grouping + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configurations object with get_models method + mock_provider_configurations = MagicMock() + mock_provider_configurations.get_models.return_value = [ + MagicMock( + provider=MagicMock( + provider="openai", + label={"en_US": "OpenAI", "zh_Hans": "OpenAI"}, + icon_small={"en_US": "icon_small.png", "zh_Hans": "icon_small.png"}, + icon_large={"en_US": "icon_large.png", "zh_Hans": "icon_large.png"}, + ), + model="gpt-3.5-turbo", + model_type=ModelType.LLM, + status=ModelStatus.ACTIVE, + deprecated=False, + label={"en_US": "GPT-3.5 Turbo", "zh_Hans": "GPT-3.5 Turbo"}, + features=[], + fetch_from="predefined-model", + model_properties={}, + load_balancing_enabled=False, + ), + MagicMock( + provider=MagicMock( + provider="openai", + label={"en_US": "OpenAI", "zh_Hans": "OpenAI"}, + icon_small={"en_US": "icon_small.png", "zh_Hans": "icon_small.png"}, + icon_large={"en_US": "icon_large.png", "zh_Hans": "icon_large.png"}, + ), + model="gpt-4", + model_type=ModelType.LLM, + status=ModelStatus.ACTIVE, + deprecated=False, + label={"en_US": "GPT-4", "zh_Hans": "GPT-4"}, + features=[], + fetch_from="predefined-model", + model_properties={}, + load_balancing_enabled=False, + ), + ] + mock_provider_manager.get_configurations.return_value = mock_provider_configurations + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_models_by_model_type(tenant.id, "llm") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 1 # One provider group + assert result[0].provider == "openai" + assert len(result[0].models) == 2 # Two models in the provider + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configurations.get_models.assert_called_once_with(model_type=ModelType.LLM) + + def test_get_model_parameter_rules_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of model parameter rules. + + This test verifies: + - Proper parameter rules retrieval for model + - Correct mock interactions with ProviderManager + - Model schema handling + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with parameter rules + mock_provider_configuration = MagicMock() + mock_credentials = {"api_key": "sk-test123"} + mock_model_schema = MagicMock() + + # Create mock parameter rules with proper return values + mock_temperature_rule = MagicMock() + mock_temperature_rule.name = "temperature" + mock_temperature_rule.type = "float" + mock_temperature_rule.min = 0.0 + mock_temperature_rule.max = 2.0 + + mock_max_tokens_rule = MagicMock() + mock_max_tokens_rule.name = "max_tokens" + mock_max_tokens_rule.type = "integer" + mock_max_tokens_rule.min = 1 + mock_max_tokens_rule.max = 4096 + + mock_model_schema.parameter_rules = [mock_temperature_rule, mock_max_tokens_rule] + + mock_provider_configuration.get_current_credentials.return_value = mock_credentials + mock_provider_configuration.get_model_schema.return_value = mock_model_schema + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_model_parameter_rules(tenant.id, "openai", "gpt-4") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 2 + assert result[0].name == "temperature" + assert result[1].name == "max_tokens" + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.get_current_credentials.assert_called_once_with( + model_type=ModelType.LLM, model="gpt-4" + ) + mock_provider_configuration.get_model_schema.assert_called_once_with( + model_type=ModelType.LLM, model="gpt-4", credentials=mock_credentials + ) + + def test_get_model_parameter_rules_no_credentials( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test parameter rules retrieval when no credentials are available. + + This test verifies: + - Proper handling of missing credentials + - Empty result when no credentials exist + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create test provider + provider = self._create_test_provider( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "openai" + ) + + # Mock ProviderManager to return realistic configuration + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + + # Create mock provider configuration with no credentials + mock_provider_configuration = MagicMock() + mock_provider_configuration.get_current_credentials.return_value = None + mock_provider_manager.get_configurations.return_value = {"openai": mock_provider_configuration} + + # Act: Execute the method under test + service = ModelProviderService() + result = service.get_model_parameter_rules(tenant.id, "openai", "gpt-4") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 0 + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) + mock_provider_configuration.get_current_credentials.assert_called_once_with( + model_type=ModelType.LLM, model="gpt-4" + ) + + def test_get_model_parameter_rules_provider_not_found( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test parameter rules retrieval when provider does not exist. + + This test verifies: + - Proper error handling for non-existent provider + - ValueError is raised with appropriate message + - Mock interactions with ProviderManager + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Mock ProviderManager to return empty configurations + mock_provider_manager = mock_external_service_dependencies["provider_manager"].return_value + mock_provider_manager.get_configurations.return_value = {} + + # Act & Assert: Execute the method under test and expect ValueError + service = ModelProviderService() + with pytest.raises(ValueError, match="Provider openai does not exist."): + service.get_model_parameter_rules(tenant.id, "openai", "gpt-4") + + # Verify mock interactions + mock_provider_manager.get_configurations.assert_called_once_with(tenant.id) diff --git a/api/tests/test_containers_integration_tests/services/test_saved_message_service.py b/api/tests/test_containers_integration_tests/services/test_saved_message_service.py new file mode 100644 index 0000000000..9e6b9837ae --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_saved_message_service.py @@ -0,0 +1,620 @@ +from unittest.mock import patch + +import pytest +from faker import Faker + +from models.model import EndUser, Message +from models.web import SavedMessage +from services.app_service import AppService +from services.saved_message_service import SavedMessageService + + +class TestSavedMessageService: + """Integration tests for SavedMessageService using testcontainers.""" + + @pytest.fixture + def mock_external_service_dependencies(self): + """Mock setup for external service dependencies.""" + with ( + patch("services.account_service.FeatureService") as mock_account_feature_service, + patch("services.app_service.ModelManager") as mock_model_manager, + patch("services.saved_message_service.MessageService") as mock_message_service, + ): + # Setup default mock returns + mock_account_feature_service.get_system_features.return_value.is_allow_register = True + + # Mock ModelManager for app creation + mock_model_instance = mock_model_manager.return_value + mock_model_instance.get_default_model_instance.return_value = None + mock_model_instance.get_default_provider_model_name.return_value = ("openai", "gpt-3.5-turbo") + + # Mock MessageService + mock_message_service.get_message.return_value = None + mock_message_service.pagination_by_last_id.return_value = None + + yield { + "account_feature_service": mock_account_feature_service, + "model_manager": mock_model_manager, + "message_service": mock_message_service, + } + + def _create_test_app_and_account(self, db_session_with_containers, mock_external_service_dependencies): + """ + Helper method to create a test app and account for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + + Returns: + tuple: (app, account) - Created app and account instances + """ + fake = Faker() + + # Setup mocks for account creation + mock_external_service_dependencies[ + "account_feature_service" + ].get_system_features.return_value.is_allow_register = True + + # Create account and tenant first + from services.account_service import AccountService, TenantService + + account = AccountService.create_account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + password=fake.password(length=12), + ) + TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) + tenant = account.current_tenant + + # Create app with realistic data + app_args = { + "name": fake.company(), + "description": fake.text(max_nb_chars=100), + "mode": "chat", + "icon_type": "emoji", + "icon": "🤖", + "icon_background": "#FF6B6B", + "api_rph": 100, + "api_rpm": 10, + } + + app_service = AppService() + app = app_service.create_app(tenant.id, app_args, account) + + return app, account + + def _create_test_end_user(self, db_session_with_containers, app): + """ + Helper method to create a test end user for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + app: App instance to associate the end user with + + Returns: + EndUser: Created end user instance + """ + fake = Faker() + + end_user = EndUser( + tenant_id=app.tenant_id, + app_id=app.id, + external_user_id=fake.uuid4(), + name=fake.name(), + type="normal", + session_id=fake.uuid4(), + is_anonymous=False, + ) + + from extensions.ext_database import db + + db.session.add(end_user) + db.session.commit() + + return end_user + + def _create_test_message(self, db_session_with_containers, app, user): + """ + Helper method to create a test message for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + app: App instance to associate the message with + user: User instance (Account or EndUser) to associate the message with + + Returns: + Message: Created message instance + """ + fake = Faker() + + # Create a simple conversation first + from models.model import Conversation + + conversation = Conversation( + app_id=app.id, + from_source="account" if hasattr(user, "current_tenant") else "end_user", + from_end_user_id=user.id if not hasattr(user, "current_tenant") else None, + from_account_id=user.id if hasattr(user, "current_tenant") else None, + name=fake.sentence(nb_words=3), + inputs={}, + status="normal", + mode="chat", + ) + + from extensions.ext_database import db + + db.session.add(conversation) + db.session.commit() + + # Create message + message = Message( + app_id=app.id, + conversation_id=conversation.id, + from_source="account" if hasattr(user, "current_tenant") else "end_user", + from_end_user_id=user.id if not hasattr(user, "current_tenant") else None, + from_account_id=user.id if hasattr(user, "current_tenant") else None, + inputs={}, + query=fake.sentence(nb_words=5), + message=fake.text(max_nb_chars=100), + answer=fake.text(max_nb_chars=200), + message_tokens=50, + answer_tokens=100, + message_unit_price=0.001, + answer_unit_price=0.002, + total_price=0.003, + currency="USD", + status="success", + ) + + db.session.add(message) + db.session.commit() + + return message + + def test_pagination_by_last_id_success_with_account_user( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful pagination by last ID with account user. + + This test verifies: + - Proper pagination with account user + - Correct filtering by app_id and user + - Proper role identification for account users + - MessageService integration + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create test messages + message1 = self._create_test_message(db_session_with_containers, app, account) + message2 = self._create_test_message(db_session_with_containers, app, account) + + # Create saved messages + saved_message1 = SavedMessage( + app_id=app.id, + message_id=message1.id, + created_by_role="account", + created_by=account.id, + ) + saved_message2 = SavedMessage( + app_id=app.id, + message_id=message2.id, + created_by_role="account", + created_by=account.id, + ) + + from extensions.ext_database import db + + db.session.add_all([saved_message1, saved_message2]) + db.session.commit() + + # Mock MessageService.pagination_by_last_id return value + from libs.infinite_scroll_pagination import InfiniteScrollPagination + + mock_pagination = InfiniteScrollPagination(data=[message1, message2], limit=10, has_more=False) + mock_external_service_dependencies["message_service"].pagination_by_last_id.return_value = mock_pagination + + # Act: Execute the method under test + result = SavedMessageService.pagination_by_last_id(app_model=app, user=account, last_id=None, limit=10) + + # Assert: Verify the expected outcomes + assert result is not None + assert result.data == [message1, message2] + assert result.limit == 10 + assert result.has_more is False + + # Verify MessageService was called with correct parameters + # Sort the IDs to handle database query order variations + expected_include_ids = sorted([message1.id, message2.id]) + actual_call = mock_external_service_dependencies["message_service"].pagination_by_last_id.call_args + actual_include_ids = sorted(actual_call.kwargs.get("include_ids", [])) + + assert actual_call.kwargs["app_model"] == app + assert actual_call.kwargs["user"] == account + assert actual_call.kwargs["last_id"] is None + assert actual_call.kwargs["limit"] == 10 + assert actual_include_ids == expected_include_ids + + # Verify database state + db.session.refresh(saved_message1) + db.session.refresh(saved_message2) + assert saved_message1.id is not None + assert saved_message2.id is not None + assert saved_message1.created_by_role == "account" + assert saved_message2.created_by_role == "account" + + def test_pagination_by_last_id_success_with_end_user( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful pagination by last ID with end user. + + This test verifies: + - Proper pagination with end user + - Correct filtering by app_id and user + - Proper role identification for end users + - MessageService integration + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + end_user = self._create_test_end_user(db_session_with_containers, app) + + # Create test messages + message1 = self._create_test_message(db_session_with_containers, app, end_user) + message2 = self._create_test_message(db_session_with_containers, app, end_user) + + # Create saved messages + saved_message1 = SavedMessage( + app_id=app.id, + message_id=message1.id, + created_by_role="end_user", + created_by=end_user.id, + ) + saved_message2 = SavedMessage( + app_id=app.id, + message_id=message2.id, + created_by_role="end_user", + created_by=end_user.id, + ) + + from extensions.ext_database import db + + db.session.add_all([saved_message1, saved_message2]) + db.session.commit() + + # Mock MessageService.pagination_by_last_id return value + from libs.infinite_scroll_pagination import InfiniteScrollPagination + + mock_pagination = InfiniteScrollPagination(data=[message1, message2], limit=5, has_more=True) + mock_external_service_dependencies["message_service"].pagination_by_last_id.return_value = mock_pagination + + # Act: Execute the method under test + result = SavedMessageService.pagination_by_last_id( + app_model=app, user=end_user, last_id="test_last_id", limit=5 + ) + + # Assert: Verify the expected outcomes + assert result is not None + assert result.data == [message1, message2] + assert result.limit == 5 + assert result.has_more is True + + # Verify MessageService was called with correct parameters + # Sort the IDs to handle database query order variations + expected_include_ids = sorted([message1.id, message2.id]) + actual_call = mock_external_service_dependencies["message_service"].pagination_by_last_id.call_args + actual_include_ids = sorted(actual_call.kwargs.get("include_ids", [])) + + assert actual_call.kwargs["app_model"] == app + assert actual_call.kwargs["user"] == end_user + assert actual_call.kwargs["last_id"] == "test_last_id" + assert actual_call.kwargs["limit"] == 5 + assert actual_include_ids == expected_include_ids + + # Verify database state + db.session.refresh(saved_message1) + db.session.refresh(saved_message2) + assert saved_message1.id is not None + assert saved_message2.id is not None + assert saved_message1.created_by_role == "end_user" + assert saved_message2.created_by_role == "end_user" + + def test_save_success_with_new_message(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful save of a new message. + + This test verifies: + - Proper creation of new saved message + - Correct database state after save + - Proper relationship establishment + - MessageService integration for message retrieval + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + message = self._create_test_message(db_session_with_containers, app, account) + + # Mock MessageService.get_message return value + mock_external_service_dependencies["message_service"].get_message.return_value = message + + # Act: Execute the method under test + SavedMessageService.save(app_model=app, user=account, message_id=message.id) + + # Assert: Verify the expected outcomes + # Check if saved message was created in database + from extensions.ext_database import db + + saved_message = ( + db.session.query(SavedMessage) + .where( + SavedMessage.app_id == app.id, + SavedMessage.message_id == message.id, + SavedMessage.created_by_role == "account", + SavedMessage.created_by == account.id, + ) + .first() + ) + + assert saved_message is not None + assert saved_message.app_id == app.id + assert saved_message.message_id == message.id + assert saved_message.created_by_role == "account" + assert saved_message.created_by == account.id + assert saved_message.created_at is not None + + # Verify MessageService.get_message was called + mock_external_service_dependencies["message_service"].get_message.assert_called_once_with( + app_model=app, user=account, message_id=message.id + ) + + # Verify database state + db.session.refresh(saved_message) + assert saved_message.id is not None + + def test_pagination_by_last_id_error_no_user(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test error handling when no user is provided. + + This test verifies: + - Proper error handling for missing user + - ValueError is raised when user is None + - No database operations are performed + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + SavedMessageService.pagination_by_last_id(app_model=app, user=None, last_id=None, limit=10) + + assert "User is required" in str(exc_info.value) + + # Verify no database operations were performed + from extensions.ext_database import db + + saved_messages = db.session.query(SavedMessage).all() + assert len(saved_messages) == 0 + + def test_save_error_no_user(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test error handling when saving message with no user. + + This test verifies: + - Method returns early when user is None + - No database operations are performed + - No exceptions are raised + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + message = self._create_test_message(db_session_with_containers, app, account) + + # Act: Execute the method under test with None user + result = SavedMessageService.save(app_model=app, user=None, message_id=message.id) + + # Assert: Verify the expected outcomes + assert result is None + + # Verify no saved message was created + from extensions.ext_database import db + + saved_message = ( + db.session.query(SavedMessage) + .where( + SavedMessage.app_id == app.id, + SavedMessage.message_id == message.id, + ) + .first() + ) + + assert saved_message is None + + def test_delete_success_existing_message(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful deletion of an existing saved message. + + This test verifies: + - Proper deletion of existing saved message + - Correct database state after deletion + - No errors during deletion process + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + message = self._create_test_message(db_session_with_containers, app, account) + + # Create a saved message first + saved_message = SavedMessage( + app_id=app.id, + message_id=message.id, + created_by_role="account", + created_by=account.id, + ) + + from extensions.ext_database import db + + db.session.add(saved_message) + db.session.commit() + + # Verify saved message exists + assert ( + db.session.query(SavedMessage) + .where( + SavedMessage.app_id == app.id, + SavedMessage.message_id == message.id, + SavedMessage.created_by_role == "account", + SavedMessage.created_by == account.id, + ) + .first() + is not None + ) + + # Act: Execute the method under test + SavedMessageService.delete(app_model=app, user=account, message_id=message.id) + + # Assert: Verify the expected outcomes + # Check if saved message was deleted from database + deleted_saved_message = ( + db.session.query(SavedMessage) + .where( + SavedMessage.app_id == app.id, + SavedMessage.message_id == message.id, + SavedMessage.created_by_role == "account", + SavedMessage.created_by == account.id, + ) + .first() + ) + + assert deleted_saved_message is None + + # Verify database state + db.session.commit() + # The message should still exist, only the saved_message should be deleted + assert db.session.query(Message).where(Message.id == message.id).first() is not None + + def test_pagination_by_last_id_error_no_user(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test error handling when no user is provided. + + This test verifies: + - Proper error handling for missing user + - ValueError is raised when user is None + - No database operations are performed + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + SavedMessageService.pagination_by_last_id(app_model=app, user=None, last_id=None, limit=10) + + assert "User is required" in str(exc_info.value) + + # Verify no database operations were performed for this specific test + # Note: We don't check total count as other tests may have created data + # Instead, we verify that the error was properly raised + pass + + def test_save_error_no_user(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test error handling when saving message with no user. + + This test verifies: + - Method returns early when user is None + - No database operations are performed + - No exceptions are raised + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + message = self._create_test_message(db_session_with_containers, app, account) + + # Act: Execute the method under test with None user + result = SavedMessageService.save(app_model=app, user=None, message_id=message.id) + + # Assert: Verify the expected outcomes + assert result is None + + # Verify no saved message was created + from extensions.ext_database import db + + saved_message = ( + db.session.query(SavedMessage) + .where( + SavedMessage.app_id == app.id, + SavedMessage.message_id == message.id, + ) + .first() + ) + + assert saved_message is None + + def test_delete_success_existing_message(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful deletion of an existing saved message. + + This test verifies: + - Proper deletion of existing saved message + - Correct database state after deletion + - No errors during deletion process + """ + # Arrange: Create test data + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + message = self._create_test_message(db_session_with_containers, app, account) + + # Create a saved message first + saved_message = SavedMessage( + app_id=app.id, + message_id=message.id, + created_by_role="account", + created_by=account.id, + ) + + from extensions.ext_database import db + + db.session.add(saved_message) + db.session.commit() + + # Verify saved message exists + assert ( + db.session.query(SavedMessage) + .where( + SavedMessage.app_id == app.id, + SavedMessage.message_id == message.id, + SavedMessage.created_by_role == "account", + SavedMessage.created_by == account.id, + ) + .first() + is not None + ) + + # Act: Execute the method under test + SavedMessageService.delete(app_model=app, user=account, message_id=message.id) + + # Assert: Verify the expected outcomes + # Check if saved message was deleted from database + deleted_saved_message = ( + db.session.query(SavedMessage) + .where( + SavedMessage.app_id == app.id, + SavedMessage.message_id == message.id, + SavedMessage.created_by_role == "account", + SavedMessage.created_by == account.id, + ) + .first() + ) + + assert deleted_saved_message is None + + # Verify database state + db.session.commit() + # The message should still exist, only the saved_message should be deleted + assert db.session.query(Message).where(Message.id == message.id).first() is not None diff --git a/api/tests/test_containers_integration_tests/services/test_tag_service.py b/api/tests/test_containers_integration_tests/services/test_tag_service.py new file mode 100644 index 0000000000..2d5cdf426d --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_tag_service.py @@ -0,0 +1,1192 @@ +from unittest.mock import patch + +import pytest +from faker import Faker +from werkzeug.exceptions import NotFound + +from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models.dataset import Dataset +from models.model import App, Tag, TagBinding +from services.tag_service import TagService + + +class TestTagService: + """Integration tests for TagService using testcontainers.""" + + @pytest.fixture + def mock_external_service_dependencies(self): + """Mock setup for external service dependencies.""" + with ( + patch("services.tag_service.current_user") as mock_current_user, + ): + # Setup default mock returns + mock_current_user.current_tenant_id = "test-tenant-id" + mock_current_user.id = "test-user-id" + + yield { + "current_user": mock_current_user, + } + + def _create_test_account_and_tenant(self, db_session_with_containers, mock_external_service_dependencies): + """ + Helper method to create a test account and tenant for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + + Returns: + tuple: (account, tenant) - Created account and tenant instances + """ + fake = Faker() + + # Create account + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Create tenant for the account + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db.session.add(tenant) + db.session.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER.value, + current=True, + ) + db.session.add(join) + db.session.commit() + + # Set current tenant for account + account.current_tenant = tenant + + # Update mock to use real tenant ID + mock_external_service_dependencies["current_user"].current_tenant_id = tenant.id + mock_external_service_dependencies["current_user"].id = account.id + + return account, tenant + + def _create_test_dataset(self, db_session_with_containers, mock_external_service_dependencies, tenant_id): + """ + Helper method to create a test dataset for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tenant_id: Tenant ID for the dataset + + Returns: + Dataset: Created dataset instance + """ + fake = Faker() + + dataset = Dataset( + name=fake.company(), + description=fake.text(max_nb_chars=100), + provider="vendor", + permission="only_me", + data_source_type="upload", + indexing_technique="high_quality", + tenant_id=tenant_id, + created_by=mock_external_service_dependencies["current_user"].id, + ) + + from extensions.ext_database import db + + db.session.add(dataset) + db.session.commit() + + return dataset + + def _create_test_app(self, db_session_with_containers, mock_external_service_dependencies, tenant_id): + """ + Helper method to create a test app for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tenant_id: Tenant ID for the app + + Returns: + App: Created app instance + """ + fake = Faker() + + app = App( + name=fake.company(), + description=fake.text(max_nb_chars=100), + mode="chat", + icon_type="emoji", + icon="🤖", + icon_background="#FF6B6B", + enable_site=False, + enable_api=False, + tenant_id=tenant_id, + created_by=mock_external_service_dependencies["current_user"].id, + ) + + from extensions.ext_database import db + + db.session.add(app) + db.session.commit() + + return app + + def _create_test_tags( + self, db_session_with_containers, mock_external_service_dependencies, tenant_id, tag_type, count=3 + ): + """ + Helper method to create test tags for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tenant_id: Tenant ID for the tags + tag_type: Type of tags to create + count: Number of tags to create + + Returns: + list: List of created tag instances + """ + fake = Faker() + tags = [] + + for i in range(count): + tag = Tag( + name=f"tag_{tag_type}_{i}_{fake.word()}", + type=tag_type, + tenant_id=tenant_id, + created_by=mock_external_service_dependencies["current_user"].id, + ) + tags.append(tag) + + from extensions.ext_database import db + + for tag in tags: + db.session.add(tag) + db.session.commit() + + return tags + + def _create_test_tag_bindings( + self, db_session_with_containers, mock_external_service_dependencies, tags, target_id, tenant_id + ): + """ + Helper method to create test tag bindings for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tags: List of tags to bind + target_id: Target ID to bind tags to + tenant_id: Tenant ID for the bindings + + Returns: + list: List of created tag binding instances + """ + tag_bindings = [] + + for tag in tags: + tag_binding = TagBinding( + tag_id=tag.id, + target_id=target_id, + tenant_id=tenant_id, + created_by=mock_external_service_dependencies["current_user"].id, + ) + tag_bindings.append(tag_binding) + + from extensions.ext_database import db + + for tag_binding in tag_bindings: + db.session.add(tag_binding) + db.session.commit() + + return tag_bindings + + def test_get_tags_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of tags with binding count. + + This test verifies: + - Proper tag retrieval with binding count + - Correct filtering by tag type and tenant + - Proper ordering by creation date + - Binding count calculation + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tags + tags = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "knowledge", 3 + ) + + # Create dataset and bind tags + dataset = self._create_test_dataset(db_session_with_containers, mock_external_service_dependencies, tenant.id) + self._create_test_tag_bindings( + db_session_with_containers, mock_external_service_dependencies, tags[:2], dataset.id, tenant.id + ) + + # Act: Execute the method under test + result = TagService.get_tags("knowledge", tenant.id) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 3 + + # Verify tag data structure + for tag_result in result: + assert hasattr(tag_result, "id") + assert hasattr(tag_result, "type") + assert hasattr(tag_result, "name") + assert hasattr(tag_result, "binding_count") + assert tag_result.type == "knowledge" + + # Verify binding count + tag_with_bindings = next((t for t in result if t.binding_count > 0), None) + assert tag_with_bindings is not None + assert tag_with_bindings.binding_count >= 1 + + # Verify ordering (newest first) - note: created_at is not in SELECT but used in ORDER BY + # The ordering is handled by the database, we just verify the results are returned + assert len(result) == 3 + + def test_get_tags_with_keyword_filter(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag retrieval with keyword filtering. + + This test verifies: + - Proper keyword filtering functionality + - Case-insensitive search + - Partial match functionality + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tags with specific names + tags = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "app", 3 + ) + + # Update tag names to make them searchable + from extensions.ext_database import db + + tags[0].name = "python_development" + tags[1].name = "machine_learning" + tags[2].name = "web_development" + db.session.commit() + + # Act: Execute the method under test with keyword filter + result = TagService.get_tags("app", tenant.id, keyword="development") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 2 # Should find python_development and web_development + + # Verify filtered results contain the keyword + for tag_result in result: + assert "development" in tag_result.name.lower() + + # Verify no results for non-matching keyword + result_no_match = TagService.get_tags("app", tenant.id, keyword="nonexistent") + assert len(result_no_match) == 0 + + def test_get_tags_empty_result(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag retrieval when no tags exist. + + This test verifies: + - Proper handling of empty tag sets + - Correct return value for no results + """ + # Arrange: Create test data without tags + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute the method under test + result = TagService.get_tags("knowledge", tenant.id) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 0 + assert isinstance(result, list) + + def test_get_target_ids_by_tag_ids_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of target IDs by tag IDs. + + This test verifies: + - Proper target ID retrieval for valid tag IDs + - Correct filtering by tag type and tenant + - Proper handling of tag bindings + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tags + tags = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "knowledge", 3 + ) + + # Create multiple datasets and bind tags + datasets = [] + for i in range(2): + dataset = self._create_test_dataset( + db_session_with_containers, mock_external_service_dependencies, tenant.id + ) + datasets.append(dataset) + # Bind first two tags to first dataset, last tag to second dataset + tags_to_bind = tags[:2] if i == 0 else tags[2:] + self._create_test_tag_bindings( + db_session_with_containers, mock_external_service_dependencies, tags_to_bind, dataset.id, tenant.id + ) + + # Act: Execute the method under test + tag_ids = [tag.id for tag in tags] + result = TagService.get_target_ids_by_tag_ids("knowledge", tenant.id, tag_ids) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 3 # Should find 3 target IDs (2 from first dataset, 1 from second) + + # Verify all dataset IDs are returned + dataset_ids = [dataset.id for dataset in datasets] + for target_id in result: + assert target_id in dataset_ids + + # Verify the first dataset appears twice (for the first two tags) + first_dataset_count = result.count(datasets[0].id) + assert first_dataset_count == 2 + + # Verify the second dataset appears once (for the last tag) + second_dataset_count = result.count(datasets[1].id) + assert second_dataset_count == 1 + + def test_get_target_ids_by_tag_ids_empty_tag_ids( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test target ID retrieval with empty tag IDs list. + + This test verifies: + - Proper handling of empty tag IDs + - Correct return value for empty input + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute the method under test with empty tag IDs + result = TagService.get_target_ids_by_tag_ids("knowledge", tenant.id, []) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 0 + assert isinstance(result, list) + + def test_get_target_ids_by_tag_ids_no_matching_tags( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test target ID retrieval when no tags match the criteria. + + This test verifies: + - Proper handling of non-existent tag IDs + - Correct return value for no matches + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create non-existent tag IDs + import uuid + + non_existent_tag_ids = [str(uuid.uuid4()), str(uuid.uuid4())] + + # Act: Execute the method under test + result = TagService.get_target_ids_by_tag_ids("knowledge", tenant.id, non_existent_tag_ids) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 0 + assert isinstance(result, list) + + def test_get_tag_by_tag_name_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of tags by tag name. + + This test verifies: + - Proper tag retrieval by name + - Correct filtering by tag type and tenant + - Proper return value structure + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tags with specific names + tags = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "app", 2 + ) + + # Update tag names to make them searchable + from extensions.ext_database import db + + tags[0].name = "python_tag" + tags[1].name = "ml_tag" + db.session.commit() + + # Act: Execute the method under test + result = TagService.get_tag_by_tag_name("app", tenant.id, "python_tag") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 1 + assert result[0].name == "python_tag" + assert result[0].type == "app" + assert result[0].tenant_id == tenant.id + + def test_get_tag_by_tag_name_no_matches(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag retrieval by name when no matches exist. + + This test verifies: + - Proper handling of non-existent tag names + - Correct return value for no matches + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute the method under test with non-existent tag name + result = TagService.get_tag_by_tag_name("knowledge", tenant.id, "nonexistent_tag") + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 0 + assert isinstance(result, list) + + def test_get_tag_by_tag_name_empty_parameters(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag retrieval by name with empty parameters. + + This test verifies: + - Proper handling of empty tag type + - Proper handling of empty tag name + - Correct return value for invalid input + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute the method under test with empty parameters + result_empty_type = TagService.get_tag_by_tag_name("", tenant.id, "test_tag") + result_empty_name = TagService.get_tag_by_tag_name("knowledge", tenant.id, "") + + # Assert: Verify the expected outcomes + assert result_empty_type is not None + assert len(result_empty_type) == 0 + assert result_empty_name is not None + assert len(result_empty_name) == 0 + + def test_get_tags_by_target_id_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of tags by target ID. + + This test verifies: + - Proper tag retrieval for a specific target + - Correct filtering by tag type and tenant + - Proper join with tag bindings + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tags + tags = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "app", 3 + ) + + # Create app and bind tags + app = self._create_test_app(db_session_with_containers, mock_external_service_dependencies, tenant.id) + self._create_test_tag_bindings( + db_session_with_containers, mock_external_service_dependencies, tags, app.id, tenant.id + ) + + # Act: Execute the method under test + result = TagService.get_tags_by_target_id("app", tenant.id, app.id) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 3 + + # Verify all tags are returned + for tag in result: + assert tag.type == "app" + assert tag.tenant_id == tenant.id + assert tag.id in [t.id for t in tags] + + def test_get_tags_by_target_id_no_bindings(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag retrieval by target ID when no tags are bound. + + This test verifies: + - Proper handling of targets with no tag bindings + - Correct return value for no results + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create app without binding any tags + app = self._create_test_app(db_session_with_containers, mock_external_service_dependencies, tenant.id) + + # Act: Execute the method under test + result = TagService.get_tags_by_target_id("app", tenant.id, app.id) + + # Assert: Verify the expected outcomes + assert result is not None + assert len(result) == 0 + assert isinstance(result, list) + + def test_save_tags_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful tag creation. + + This test verifies: + - Proper tag creation with all required fields + - Correct database state after creation + - Proper UUID generation + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + tag_args = {"name": "test_tag_name", "type": "knowledge"} + + # Act: Execute the method under test + result = TagService.save_tags(tag_args) + + # Assert: Verify the expected outcomes + assert result is not None + assert result.name == "test_tag_name" + assert result.type == "knowledge" + assert result.tenant_id == tenant.id + assert result.created_by == account.id + assert result.id is not None + + # Verify database state + from extensions.ext_database import db + + db.session.refresh(result) + assert result.id is not None + + # Verify tag was actually saved to database + saved_tag = db.session.query(Tag).where(Tag.id == result.id).first() + assert saved_tag is not None + assert saved_tag.name == "test_tag_name" + + def test_save_tags_duplicate_name_error(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag creation with duplicate name. + + This test verifies: + - Proper error handling for duplicate tag names + - Correct exception type and message + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create first tag + tag_args = {"name": "duplicate_tag", "type": "app"} + TagService.save_tags(tag_args) + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + TagService.save_tags(tag_args) + assert "Tag name already exists" in str(exc_info.value) + + def test_update_tags_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful tag update. + + This test verifies: + - Proper tag update with new name + - Correct database state after update + - Proper error handling for non-existent tags + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create a tag to update + tag_args = {"name": "original_name", "type": "knowledge"} + tag = TagService.save_tags(tag_args) + + # Update args + update_args = {"name": "updated_name", "type": "knowledge"} + + # Act: Execute the method under test + result = TagService.update_tags(update_args, tag.id) + + # Assert: Verify the expected outcomes + assert result is not None + assert result.name == "updated_name" + assert result.type == "knowledge" + assert result.id == tag.id + + # Verify database state + from extensions.ext_database import db + + db.session.refresh(result) + assert result.name == "updated_name" + + # Verify tag was actually updated in database + updated_tag = db.session.query(Tag).where(Tag.id == tag.id).first() + assert updated_tag is not None + assert updated_tag.name == "updated_name" + + def test_update_tags_not_found_error(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag update for non-existent tag. + + This test verifies: + - Proper error handling for non-existent tags + - Correct exception type + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create non-existent tag ID + import uuid + + non_existent_tag_id = str(uuid.uuid4()) + + update_args = {"name": "updated_name", "type": "knowledge"} + + # Act & Assert: Verify proper error handling + with pytest.raises(NotFound) as exc_info: + TagService.update_tags(update_args, non_existent_tag_id) + assert "Tag not found" in str(exc_info.value) + + def test_update_tags_duplicate_name_error(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag update with duplicate name. + + This test verifies: + - Proper error handling for duplicate tag names during update + - Correct exception type and message + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create two tags + tag1_args = {"name": "first_tag", "type": "app"} + tag1 = TagService.save_tags(tag1_args) + + tag2_args = {"name": "second_tag", "type": "app"} + tag2 = TagService.save_tags(tag2_args) + + # Try to update second tag with first tag's name + update_args = {"name": "first_tag", "type": "app"} + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + TagService.update_tags(update_args, tag2.id) + assert "Tag name already exists" in str(exc_info.value) + + def test_get_tag_binding_count_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of tag binding count. + + This test verifies: + - Proper binding count calculation + - Correct handling of tags with no bindings + - Proper database query execution + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tags + tags = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "knowledge", 2 + ) + + # Create dataset and bind first tag + dataset = self._create_test_dataset(db_session_with_containers, mock_external_service_dependencies, tenant.id) + self._create_test_tag_bindings( + db_session_with_containers, mock_external_service_dependencies, [tags[0]], dataset.id, tenant.id + ) + + # Act: Execute the method under test + result_tag_with_bindings = TagService.get_tag_binding_count(tags[0].id) + result_tag_without_bindings = TagService.get_tag_binding_count(tags[1].id) + + # Assert: Verify the expected outcomes + assert result_tag_with_bindings == 1 + assert result_tag_without_bindings == 0 + + def test_get_tag_binding_count_non_existent_tag( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test binding count retrieval for non-existent tag. + + This test verifies: + - Proper handling of non-existent tag IDs + - Correct return value for non-existent tags + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create non-existent tag ID + import uuid + + non_existent_tag_id = str(uuid.uuid4()) + + # Act: Execute the method under test + result = TagService.get_tag_binding_count(non_existent_tag_id) + + # Assert: Verify the expected outcomes + assert result == 0 + + def test_delete_tag_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful tag deletion. + + This test verifies: + - Proper tag deletion from database + - Proper cleanup of associated tag bindings + - Correct database state after deletion + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tag with bindings + tag = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "app", 1 + )[0] + + # Create app and bind tag + app = self._create_test_app(db_session_with_containers, mock_external_service_dependencies, tenant.id) + self._create_test_tag_bindings( + db_session_with_containers, mock_external_service_dependencies, [tag], app.id, tenant.id + ) + + # Verify tag and binding exist before deletion + from extensions.ext_database import db + + tag_before = db.session.query(Tag).where(Tag.id == tag.id).first() + assert tag_before is not None + + binding_before = db.session.query(TagBinding).where(TagBinding.tag_id == tag.id).first() + assert binding_before is not None + + # Act: Execute the method under test + TagService.delete_tag(tag.id) + + # Assert: Verify the expected outcomes + # Verify tag was deleted + tag_after = db.session.query(Tag).where(Tag.id == tag.id).first() + assert tag_after is None + + # Verify tag binding was deleted + binding_after = db.session.query(TagBinding).where(TagBinding.tag_id == tag.id).first() + assert binding_after is None + + def test_delete_tag_not_found_error(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag deletion for non-existent tag. + + This test verifies: + - Proper error handling for non-existent tags + - Correct exception type + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create non-existent tag ID + import uuid + + non_existent_tag_id = str(uuid.uuid4()) + + # Act & Assert: Verify proper error handling + with pytest.raises(NotFound) as exc_info: + TagService.delete_tag(non_existent_tag_id) + assert "Tag not found" in str(exc_info.value) + + def test_save_tag_binding_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful tag binding creation. + + This test verifies: + - Proper tag binding creation + - Correct handling of duplicate bindings + - Proper database state after creation + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tags + tags = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "knowledge", 2 + ) + + # Create dataset + dataset = self._create_test_dataset(db_session_with_containers, mock_external_service_dependencies, tenant.id) + + # Act: Execute the method under test + binding_args = {"type": "knowledge", "target_id": dataset.id, "tag_ids": [tag.id for tag in tags]} + TagService.save_tag_binding(binding_args) + + # Assert: Verify the expected outcomes + from extensions.ext_database import db + + # Verify tag bindings were created + for tag in tags: + binding = ( + db.session.query(TagBinding) + .where(TagBinding.tag_id == tag.id, TagBinding.target_id == dataset.id) + .first() + ) + assert binding is not None + assert binding.tenant_id == tenant.id + assert binding.created_by == account.id + + def test_save_tag_binding_duplicate_handling(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag binding creation with duplicate bindings. + + This test verifies: + - Proper handling of duplicate tag bindings + - No errors when trying to create existing bindings + - Correct database state after operation + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tag + tag = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "app", 1 + )[0] + + # Create app + app = self._create_test_app(db_session_with_containers, mock_external_service_dependencies, tenant.id) + + # Create first binding + binding_args = {"type": "app", "target_id": app.id, "tag_ids": [tag.id]} + TagService.save_tag_binding(binding_args) + + # Act: Try to create duplicate binding + TagService.save_tag_binding(binding_args) + + # Assert: Verify the expected outcomes + from extensions.ext_database import db + + # Verify only one binding exists + bindings = db.session.query(TagBinding).where(TagBinding.tag_id == tag.id, TagBinding.target_id == app.id).all() + assert len(bindings) == 1 + + def test_save_tag_binding_invalid_target_type(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test tag binding creation with invalid target type. + + This test verifies: + - Proper error handling for invalid target types + - Correct exception type + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tag + tag = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "knowledge", 1 + )[0] + + # Create non-existent target ID + import uuid + + non_existent_target_id = str(uuid.uuid4()) + + # Act & Assert: Verify proper error handling + binding_args = {"type": "invalid_type", "target_id": non_existent_target_id, "tag_ids": [tag.id]} + + with pytest.raises(NotFound) as exc_info: + TagService.save_tag_binding(binding_args) + assert "Invalid binding type" in str(exc_info.value) + + def test_delete_tag_binding_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful tag binding deletion. + + This test verifies: + - Proper tag binding deletion from database + - Correct database state after deletion + - Proper error handling for non-existent bindings + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tag + tag = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "knowledge", 1 + )[0] + + # Create dataset and bind tag + dataset = self._create_test_dataset(db_session_with_containers, mock_external_service_dependencies, tenant.id) + self._create_test_tag_bindings( + db_session_with_containers, mock_external_service_dependencies, [tag], dataset.id, tenant.id + ) + + # Verify binding exists before deletion + from extensions.ext_database import db + + binding_before = ( + db.session.query(TagBinding).where(TagBinding.tag_id == tag.id, TagBinding.target_id == dataset.id).first() + ) + assert binding_before is not None + + # Act: Execute the method under test + delete_args = {"type": "knowledge", "target_id": dataset.id, "tag_id": tag.id} + TagService.delete_tag_binding(delete_args) + + # Assert: Verify the expected outcomes + # Verify tag binding was deleted + binding_after = ( + db.session.query(TagBinding).where(TagBinding.tag_id == tag.id, TagBinding.target_id == dataset.id).first() + ) + assert binding_after is None + + def test_delete_tag_binding_non_existent_binding( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test tag binding deletion for non-existent binding. + + This test verifies: + - Proper handling of non-existent tag bindings + - No errors when trying to delete non-existent bindings + - Correct database state after operation + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create tag and dataset without binding + tag = self._create_test_tags( + db_session_with_containers, mock_external_service_dependencies, tenant.id, "app", 1 + )[0] + app = self._create_test_app(db_session_with_containers, mock_external_service_dependencies, tenant.id) + + # Act: Try to delete non-existent binding + delete_args = {"type": "app", "target_id": app.id, "tag_id": tag.id} + TagService.delete_tag_binding(delete_args) + + # Assert: Verify the expected outcomes + # No error should be raised, and database state should remain unchanged + from extensions.ext_database import db + + bindings = db.session.query(TagBinding).where(TagBinding.tag_id == tag.id, TagBinding.target_id == app.id).all() + assert len(bindings) == 0 + + def test_check_target_exists_knowledge_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful target existence check for knowledge type. + + This test verifies: + - Proper validation of knowledge dataset existence + - Correct error handling for non-existent datasets + - Proper tenant filtering + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create dataset + dataset = self._create_test_dataset(db_session_with_containers, mock_external_service_dependencies, tenant.id) + + # Act: Execute the method under test + TagService.check_target_exists("knowledge", dataset.id) + + # Assert: Verify the expected outcomes + # No exception should be raised for existing dataset + + def test_check_target_exists_knowledge_not_found( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test target existence check for non-existent knowledge dataset. + + This test verifies: + - Proper error handling for non-existent knowledge datasets + - Correct exception type and message + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create non-existent dataset ID + import uuid + + non_existent_dataset_id = str(uuid.uuid4()) + + # Act & Assert: Verify proper error handling + with pytest.raises(NotFound) as exc_info: + TagService.check_target_exists("knowledge", non_existent_dataset_id) + assert "Dataset not found" in str(exc_info.value) + + def test_check_target_exists_app_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful target existence check for app type. + + This test verifies: + - Proper validation of app existence + - Correct error handling for non-existent apps + - Proper tenant filtering + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create app + app = self._create_test_app(db_session_with_containers, mock_external_service_dependencies, tenant.id) + + # Act: Execute the method under test + TagService.check_target_exists("app", app.id) + + # Assert: Verify the expected outcomes + # No exception should be raised for existing app + + def test_check_target_exists_app_not_found(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test target existence check for non-existent app. + + This test verifies: + - Proper error handling for non-existent apps + - Correct exception type and message + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create non-existent app ID + import uuid + + non_existent_app_id = str(uuid.uuid4()) + + # Act & Assert: Verify proper error handling + with pytest.raises(NotFound) as exc_info: + TagService.check_target_exists("app", non_existent_app_id) + assert "App not found" in str(exc_info.value) + + def test_check_target_exists_invalid_type(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test target existence check for invalid type. + + This test verifies: + - Proper error handling for invalid target types + - Correct exception type and message + """ + # Arrange: Create test data + fake = Faker() + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Create non-existent target ID + import uuid + + non_existent_target_id = str(uuid.uuid4()) + + # Act & Assert: Verify proper error handling + with pytest.raises(NotFound) as exc_info: + TagService.check_target_exists("invalid_type", non_existent_target_id) + assert "Invalid binding type" in str(exc_info.value) diff --git a/api/tests/test_containers_integration_tests/services/test_web_conversation_service.py b/api/tests/test_containers_integration_tests/services/test_web_conversation_service.py new file mode 100644 index 0000000000..6d6f1dab72 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_web_conversation_service.py @@ -0,0 +1,574 @@ +from unittest.mock import patch + +import pytest +from faker import Faker + +from core.app.entities.app_invoke_entities import InvokeFrom +from models.account import Account +from models.model import Conversation, EndUser +from models.web import PinnedConversation +from services.account_service import AccountService, TenantService +from services.app_service import AppService +from services.web_conversation_service import WebConversationService + + +class TestWebConversationService: + """Integration tests for WebConversationService using testcontainers.""" + + @pytest.fixture + def mock_external_service_dependencies(self): + """Mock setup for external service dependencies.""" + with ( + patch("services.app_service.FeatureService") as mock_feature_service, + patch("services.app_service.EnterpriseService") as mock_enterprise_service, + patch("services.app_service.ModelManager") as mock_model_manager, + patch("services.account_service.FeatureService") as mock_account_feature_service, + ): + # Setup default mock returns for app service + mock_feature_service.get_system_features.return_value.webapp_auth.enabled = False + mock_enterprise_service.WebAppAuth.update_app_access_mode.return_value = None + mock_enterprise_service.WebAppAuth.cleanup_webapp.return_value = None + + # Setup default mock returns for account service + mock_account_feature_service.get_system_features.return_value.is_allow_register = True + + # Mock ModelManager for model configuration + mock_model_instance = mock_model_manager.return_value + mock_model_instance.get_default_model_instance.return_value = None + mock_model_instance.get_default_provider_model_name.return_value = ("openai", "gpt-3.5-turbo") + + yield { + "feature_service": mock_feature_service, + "enterprise_service": mock_enterprise_service, + "model_manager": mock_model_manager, + "account_feature_service": mock_account_feature_service, + } + + def _create_test_app_and_account(self, db_session_with_containers, mock_external_service_dependencies): + """ + Helper method to create a test app and account for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + + Returns: + tuple: (app, account) - Created app and account instances + """ + fake = Faker() + + # Setup mocks for account creation + mock_external_service_dependencies[ + "account_feature_service" + ].get_system_features.return_value.is_allow_register = True + + # Create account and tenant + account = AccountService.create_account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + password=fake.password(length=12), + ) + TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) + tenant = account.current_tenant + + # Create app with realistic data + app_args = { + "name": fake.company(), + "description": fake.text(max_nb_chars=100), + "mode": "chat", + "icon_type": "emoji", + "icon": "🤖", + "icon_background": "#FF6B6B", + "api_rph": 100, + "api_rpm": 10, + } + + app_service = AppService() + app = app_service.create_app(tenant.id, app_args, account) + + return app, account + + def _create_test_end_user(self, db_session_with_containers, app): + """ + Helper method to create a test end user for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + app: App instance + + Returns: + EndUser: Created end user instance + """ + fake = Faker() + + end_user = EndUser( + session_id=fake.uuid4(), + app_id=app.id, + type="normal", + is_anonymous=False, + tenant_id=app.tenant_id, + ) + + from extensions.ext_database import db + + db.session.add(end_user) + db.session.commit() + + return end_user + + def _create_test_conversation(self, db_session_with_containers, app, user, fake): + """ + Helper method to create a test conversation for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + app: App instance + user: User instance (Account or EndUser) + fake: Faker instance + + Returns: + Conversation: Created conversation instance + """ + conversation = Conversation( + app_id=app.id, + app_model_config_id=app.app_model_config_id, + model_provider="openai", + model_id="gpt-3.5-turbo", + mode="chat", + name=fake.sentence(nb_words=3), + summary=fake.text(max_nb_chars=100), + inputs={}, + introduction=fake.text(max_nb_chars=200), + system_instruction=fake.text(max_nb_chars=300), + system_instruction_tokens=50, + status="normal", + invoke_from=InvokeFrom.WEB_APP.value, + from_source="console" if isinstance(user, Account) else "api", + from_end_user_id=user.id if isinstance(user, EndUser) else None, + from_account_id=user.id if isinstance(user, Account) else None, + dialogue_count=0, + is_deleted=False, + ) + + from extensions.ext_database import db + + db.session.add(conversation) + db.session.commit() + + return conversation + + def test_pagination_by_last_id_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful pagination by last ID with basic parameters. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create multiple conversations + conversations = [] + for i in range(5): + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + conversations.append(conversation) + + # Test pagination without pinned filter + result = WebConversationService.pagination_by_last_id( + session=db_session_with_containers, + app_model=app, + user=account, + last_id=None, + limit=3, + invoke_from=InvokeFrom.WEB_APP, + pinned=None, + sort_by="-updated_at", + ) + + # Verify results + assert result.limit == 3 + assert len(result.data) == 3 + assert result.has_more is True + + # Verify conversations are in descending order by updated_at + assert result.data[0].updated_at >= result.data[1].updated_at + assert result.data[1].updated_at >= result.data[2].updated_at + + def test_pagination_by_last_id_with_pinned_filter( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test pagination by last ID with pinned conversation filter. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create conversations + conversations = [] + for i in range(5): + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + conversations.append(conversation) + + # Pin some conversations + pinned_conversation1 = PinnedConversation( + app_id=app.id, + conversation_id=conversations[0].id, + created_by_role="account", + created_by=account.id, + ) + pinned_conversation2 = PinnedConversation( + app_id=app.id, + conversation_id=conversations[2].id, + created_by_role="account", + created_by=account.id, + ) + + from extensions.ext_database import db + + db.session.add(pinned_conversation1) + db.session.add(pinned_conversation2) + db.session.commit() + + # Test pagination with pinned filter + result = WebConversationService.pagination_by_last_id( + session=db_session_with_containers, + app_model=app, + user=account, + last_id=None, + limit=10, + invoke_from=InvokeFrom.WEB_APP, + pinned=True, + sort_by="-updated_at", + ) + + # Verify only pinned conversations are returned + assert result.limit == 10 + assert len(result.data) == 2 + assert result.has_more is False + + # Verify the returned conversations are the pinned ones + returned_ids = [conv.id for conv in result.data] + expected_ids = [conversations[0].id, conversations[2].id] + assert set(returned_ids) == set(expected_ids) + + def test_pagination_by_last_id_with_unpinned_filter( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test pagination by last ID with unpinned conversation filter. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create conversations + conversations = [] + for i in range(5): + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + conversations.append(conversation) + + # Pin one conversation + pinned_conversation = PinnedConversation( + app_id=app.id, + conversation_id=conversations[0].id, + created_by_role="account", + created_by=account.id, + ) + + from extensions.ext_database import db + + db.session.add(pinned_conversation) + db.session.commit() + + # Test pagination with unpinned filter + result = WebConversationService.pagination_by_last_id( + session=db_session_with_containers, + app_model=app, + user=account, + last_id=None, + limit=10, + invoke_from=InvokeFrom.WEB_APP, + pinned=False, + sort_by="-updated_at", + ) + + # Verify unpinned conversations are returned (should be 4 out of 5) + assert result.limit == 10 + assert len(result.data) == 4 + assert result.has_more is False + + # Verify the pinned conversation is not in the results + returned_ids = [conv.id for conv in result.data] + assert conversations[0].id not in returned_ids + + # Verify all other conversations are in the results + expected_unpinned_ids = [conv.id for conv in conversations[1:]] + assert set(returned_ids) == set(expected_unpinned_ids) + + def test_pin_conversation_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful pinning of a conversation. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create a conversation + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + + # Pin the conversation + WebConversationService.pin(app, conversation.id, account) + + # Verify the conversation was pinned + from extensions.ext_database import db + + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "account", + PinnedConversation.created_by == account.id, + ) + .first() + ) + + assert pinned_conversation is not None + assert pinned_conversation.app_id == app.id + assert pinned_conversation.conversation_id == conversation.id + assert pinned_conversation.created_by_role == "account" + assert pinned_conversation.created_by == account.id + + def test_pin_conversation_already_pinned(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test pinning a conversation that is already pinned (should not create duplicate). + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create a conversation + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + + # Pin the conversation first time + WebConversationService.pin(app, conversation.id, account) + + # Pin the conversation again + WebConversationService.pin(app, conversation.id, account) + + # Verify only one pinned conversation record exists + from extensions.ext_database import db + + pinned_conversations = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "account", + PinnedConversation.created_by == account.id, + ) + .all() + ) + + assert len(pinned_conversations) == 1 + + def test_pin_conversation_with_end_user(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test pinning a conversation with an end user. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create an end user + end_user = self._create_test_end_user(db_session_with_containers, app) + + # Create a conversation for the end user + conversation = self._create_test_conversation(db_session_with_containers, app, end_user, fake) + + # Pin the conversation + WebConversationService.pin(app, conversation.id, end_user) + + # Verify the conversation was pinned + from extensions.ext_database import db + + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "end_user", + PinnedConversation.created_by == end_user.id, + ) + .first() + ) + + assert pinned_conversation is not None + assert pinned_conversation.app_id == app.id + assert pinned_conversation.conversation_id == conversation.id + assert pinned_conversation.created_by_role == "end_user" + assert pinned_conversation.created_by == end_user.id + + def test_unpin_conversation_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful unpinning of a conversation. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create a conversation + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + + # Pin the conversation first + WebConversationService.pin(app, conversation.id, account) + + # Verify it was pinned + from extensions.ext_database import db + + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "account", + PinnedConversation.created_by == account.id, + ) + .first() + ) + + assert pinned_conversation is not None + + # Unpin the conversation + WebConversationService.unpin(app, conversation.id, account) + + # Verify it was unpinned + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "account", + PinnedConversation.created_by == account.id, + ) + .first() + ) + + assert pinned_conversation is None + + def test_unpin_conversation_not_pinned(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test unpinning a conversation that is not pinned (should not cause error). + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create a conversation + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + + # Try to unpin a conversation that was never pinned + WebConversationService.unpin(app, conversation.id, account) + + # Verify no pinned conversation record exists + from extensions.ext_database import db + + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "account", + PinnedConversation.created_by == account.id, + ) + .first() + ) + + assert pinned_conversation is None + + def test_pagination_by_last_id_user_required_error( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test that pagination_by_last_id raises ValueError when user is None. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Test with None user + with pytest.raises(ValueError, match="User is required"): + WebConversationService.pagination_by_last_id( + session=db_session_with_containers, + app_model=app, + user=None, + last_id=None, + limit=10, + invoke_from=InvokeFrom.WEB_APP, + pinned=None, + sort_by="-updated_at", + ) + + def test_pin_conversation_user_none(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test that pin method returns early when user is None. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create a conversation + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + + # Try to pin with None user + WebConversationService.pin(app, conversation.id, None) + + # Verify no pinned conversation was created + from extensions.ext_database import db + + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + ) + .first() + ) + + assert pinned_conversation is None + + def test_unpin_conversation_user_none(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test that unpin method returns early when user is None. + """ + fake = Faker() + app, account = self._create_test_app_and_account(db_session_with_containers, mock_external_service_dependencies) + + # Create a conversation + conversation = self._create_test_conversation(db_session_with_containers, app, account, fake) + + # Pin the conversation first + WebConversationService.pin(app, conversation.id, account) + + # Verify it was pinned + from extensions.ext_database import db + + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "account", + PinnedConversation.created_by == account.id, + ) + .first() + ) + + assert pinned_conversation is not None + + # Try to unpin with None user + WebConversationService.unpin(app, conversation.id, None) + + # Verify the conversation is still pinned + pinned_conversation = ( + db.session.query(PinnedConversation) + .where( + PinnedConversation.app_id == app.id, + PinnedConversation.conversation_id == conversation.id, + PinnedConversation.created_by_role == "account", + PinnedConversation.created_by == account.id, + ) + .first() + ) + + assert pinned_conversation is not None diff --git a/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py b/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py new file mode 100644 index 0000000000..666b083ba6 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py @@ -0,0 +1,877 @@ +from unittest.mock import patch + +import pytest +from faker import Faker +from werkzeug.exceptions import NotFound, Unauthorized + +from libs.password import hash_password +from models.account import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole +from models.model import App, Site +from services.errors.account import AccountLoginError, AccountNotFoundError, AccountPasswordError +from services.webapp_auth_service import WebAppAuthService, WebAppAuthType + + +class TestWebAppAuthService: + """Integration tests for WebAppAuthService using testcontainers.""" + + @pytest.fixture + def mock_external_service_dependencies(self): + """Mock setup for external service dependencies.""" + with ( + patch("services.webapp_auth_service.PassportService") as mock_passport_service, + patch("services.webapp_auth_service.TokenManager") as mock_token_manager, + patch("services.webapp_auth_service.send_email_code_login_mail_task") as mock_mail_task, + patch("services.webapp_auth_service.AppService") as mock_app_service, + patch("services.webapp_auth_service.EnterpriseService") as mock_enterprise_service, + ): + # Setup default mock returns + mock_passport_service.return_value.issue.return_value = "mock_jwt_token" + mock_token_manager.generate_token.return_value = "mock_token" + mock_token_manager.get_token_data.return_value = {"code": "123456"} + mock_mail_task.delay.return_value = None + mock_app_service.get_app_id_by_code.return_value = "mock_app_id" + mock_enterprise_service.WebAppAuth.get_app_access_mode_by_id.return_value = type( + "MockWebAppAuth", (), {"access_mode": "private"} + )() + mock_enterprise_service.WebAppAuth.get_app_access_mode_by_code.return_value = type( + "MockWebAppAuth", (), {"access_mode": "private"} + )() + + yield { + "passport_service": mock_passport_service, + "token_manager": mock_token_manager, + "mail_task": mock_mail_task, + "app_service": mock_app_service, + "enterprise_service": mock_enterprise_service, + } + + def _create_test_account_and_tenant(self, db_session_with_containers, mock_external_service_dependencies): + """ + Helper method to create a test account and tenant for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + + Returns: + tuple: (account, tenant) - Created account and tenant instances + """ + fake = Faker() + + # Create account + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Create tenant for the account + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db.session.add(tenant) + db.session.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER.value, + current=True, + ) + db.session.add(join) + db.session.commit() + + # Set current tenant for account + account.current_tenant = tenant + + return account, tenant + + def _create_test_account_with_password(self, db_session_with_containers, mock_external_service_dependencies): + """ + Helper method to create a test account with password for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + + Returns: + tuple: (account, tenant, password) - Created account, tenant and password + """ + fake = Faker() + password = fake.password(length=12) + + # Create account with password + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + + # Hash password + salt = b"test_salt_16_bytes" + password_hash = hash_password(password, salt) + + # Convert to base64 for storage + import base64 + + account.password = base64.b64encode(password_hash).decode() + account.password_salt = base64.b64encode(salt).decode() + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Create tenant for the account + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db.session.add(tenant) + db.session.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER.value, + current=True, + ) + db.session.add(join) + db.session.commit() + + # Set current tenant for account + account.current_tenant = tenant + + return account, tenant, password + + def _create_test_app_and_site(self, db_session_with_containers, mock_external_service_dependencies, tenant): + """ + Helper method to create a test app and site for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + tenant: Tenant instance to associate with + + Returns: + tuple: (app, site) - Created app and site instances + """ + fake = Faker() + + # Create app + app = App( + tenant_id=tenant.id, + name=fake.company(), + description=fake.text(max_nb_chars=100), + mode="chat", + icon_type="emoji", + icon="🤖", + icon_background="#FF6B6B", + api_rph=100, + api_rpm=10, + enable_site=True, + enable_api=True, + ) + + from extensions.ext_database import db + + db.session.add(app) + db.session.commit() + + # Create site + site = Site( + app_id=app.id, + title=fake.company(), + code=fake.unique.lexify(text="??????"), + description=fake.text(max_nb_chars=100), + default_language="en-US", + status="normal", + customize_token_strategy="not_allow", + ) + db.session.add(site) + db.session.commit() + + return app, site + + def test_authenticate_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful authentication with valid email and password. + + This test verifies: + - Proper authentication with valid credentials + - Correct account return + - Database state consistency + """ + # Arrange: Create test data + account, tenant, password = self._create_test_account_with_password( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute authentication + result = WebAppAuthService.authenticate(account.email, password) + + # Assert: Verify successful authentication + assert result is not None + assert result.id == account.id + assert result.email == account.email + assert result.name == account.name + assert result.status == AccountStatus.ACTIVE.value + + # Verify database state + from extensions.ext_database import db + + db.session.refresh(result) + assert result.id is not None + assert result.password is not None + assert result.password_salt is not None + + def test_authenticate_account_not_found(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test authentication with non-existent email. + + This test verifies: + - Proper error handling for non-existent accounts + - Correct exception type and message + """ + # Arrange: Use non-existent email + fake = Faker() + non_existent_email = fake.email() + + # Act & Assert: Verify proper error handling + with pytest.raises(AccountNotFoundError): + WebAppAuthService.authenticate(non_existent_email, "any_password") + + def test_authenticate_account_banned(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test authentication with banned account. + + This test verifies: + - Proper error handling for banned accounts + - Correct exception type and message + """ + # Arrange: Create banned account + fake = Faker() + password = fake.password(length=12) + + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status=AccountStatus.BANNED.value, + ) + + # Hash password + salt = b"test_salt_16_bytes" + password_hash = hash_password(password, salt) + + # Convert to base64 for storage + import base64 + + account.password = base64.b64encode(password_hash).decode() + account.password_salt = base64.b64encode(salt).decode() + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Act & Assert: Verify proper error handling + with pytest.raises(AccountLoginError) as exc_info: + WebAppAuthService.authenticate(account.email, password) + + assert "Account is banned." in str(exc_info.value) + + def test_authenticate_invalid_password(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test authentication with invalid password. + + This test verifies: + - Proper error handling for invalid passwords + - Correct exception type and message + """ + # Arrange: Create account with password + account, tenant, correct_password = self._create_test_account_with_password( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act & Assert: Verify proper error handling with wrong password + with pytest.raises(AccountPasswordError) as exc_info: + WebAppAuthService.authenticate(account.email, "wrong_password") + + assert "Invalid email or password." in str(exc_info.value) + + def test_authenticate_account_without_password( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test authentication for account without password. + + This test verifies: + - Proper error handling for accounts without password + - Correct exception type and message + """ + # Arrange: Create account without password + fake = Faker() + + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Act & Assert: Verify proper error handling + with pytest.raises(AccountPasswordError) as exc_info: + WebAppAuthService.authenticate(account.email, "any_password") + + assert "Invalid email or password." in str(exc_info.value) + + def test_login_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful login and JWT token generation. + + This test verifies: + - Proper JWT token generation + - Correct token format and content + - Mock service integration + """ + # Arrange: Create test account + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute login + result = WebAppAuthService.login(account) + + # Assert: Verify successful login + assert result is not None + assert result == "mock_jwt_token" + + # Verify mock service was called correctly + mock_external_service_dependencies["passport_service"].return_value.issue.assert_called_once() + call_args = mock_external_service_dependencies["passport_service"].return_value.issue.call_args[0][0] + + assert call_args["sub"] == "Web API Passport" + assert call_args["user_id"] == account.id + assert call_args["session_id"] == account.email + assert call_args["token_source"] == "webapp_login_token" + assert call_args["auth_type"] == "internal" + assert "exp" in call_args + + def test_get_user_through_email_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful user retrieval through email. + + This test verifies: + - Proper user retrieval by email + - Correct account return + - Database state consistency + """ + # Arrange: Create test data + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute user retrieval + result = WebAppAuthService.get_user_through_email(account.email) + + # Assert: Verify successful retrieval + assert result is not None + assert result.id == account.id + assert result.email == account.email + assert result.name == account.name + assert result.status == AccountStatus.ACTIVE.value + + # Verify database state + from extensions.ext_database import db + + db.session.refresh(result) + assert result.id is not None + + def test_get_user_through_email_not_found(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test user retrieval with non-existent email. + + This test verifies: + - Proper handling for non-existent users + - Correct return value (None) + """ + # Arrange: Use non-existent email + fake = Faker() + non_existent_email = fake.email() + + # Act: Execute user retrieval + result = WebAppAuthService.get_user_through_email(non_existent_email) + + # Assert: Verify proper handling + assert result is None + + def test_get_user_through_email_banned(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test user retrieval with banned account. + + This test verifies: + - Proper error handling for banned accounts + - Correct exception type and message + """ + # Arrange: Create banned account + fake = Faker() + + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status=AccountStatus.BANNED.value, + ) + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Act & Assert: Verify proper error handling + with pytest.raises(Unauthorized) as exc_info: + WebAppAuthService.get_user_through_email(account.email) + + assert "Account is banned." in str(exc_info.value) + + def test_send_email_code_login_email_with_account( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test sending email code login email with account. + + This test verifies: + - Proper email code generation + - Token generation with correct data + - Mail task scheduling + - Mock service integration + """ + # Arrange: Create test account + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + + # Act: Execute email code login email sending + result = WebAppAuthService.send_email_code_login_email(account=account, language="en-US") + + # Assert: Verify successful email sending + assert result is not None + assert result == "mock_token" + + # Verify mock services were called correctly + mock_external_service_dependencies["token_manager"].generate_token.assert_called_once() + mock_external_service_dependencies["mail_task"].delay.assert_called_once() + + # Verify token generation parameters + token_call_args = mock_external_service_dependencies["token_manager"].generate_token.call_args + assert token_call_args[1]["account"] == account + assert token_call_args[1]["email"] == account.email + assert token_call_args[1]["token_type"] == "email_code_login" + assert "code" in token_call_args[1]["additional_data"] + + # Verify mail task parameters + mail_call_args = mock_external_service_dependencies["mail_task"].delay.call_args + assert mail_call_args[1]["language"] == "en-US" + assert mail_call_args[1]["to"] == account.email + assert "code" in mail_call_args[1] + + def test_send_email_code_login_email_with_email_only( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test sending email code login email with email only. + + This test verifies: + - Proper email code generation without account + - Token generation with email only + - Mail task scheduling + - Mock service integration + """ + # Arrange: Use test email + fake = Faker() + test_email = fake.email() + + # Act: Execute email code login email sending + result = WebAppAuthService.send_email_code_login_email(email=test_email, language="zh-Hans") + + # Assert: Verify successful email sending + assert result is not None + assert result == "mock_token" + + # Verify mock services were called correctly + mock_external_service_dependencies["token_manager"].generate_token.assert_called_once() + mock_external_service_dependencies["mail_task"].delay.assert_called_once() + + # Verify token generation parameters + token_call_args = mock_external_service_dependencies["token_manager"].generate_token.call_args + assert token_call_args[1]["account"] is None + assert token_call_args[1]["email"] == test_email + assert token_call_args[1]["token_type"] == "email_code_login" + assert "code" in token_call_args[1]["additional_data"] + + # Verify mail task parameters + mail_call_args = mock_external_service_dependencies["mail_task"].delay.call_args + assert mail_call_args[1]["language"] == "zh-Hans" + assert mail_call_args[1]["to"] == test_email + assert "code" in mail_call_args[1] + + def test_send_email_code_login_email_no_email_provided( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test sending email code login email without providing email. + + This test verifies: + - Proper error handling when no email is provided + - Correct exception type and message + """ + # Arrange: No email provided + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebAppAuthService.send_email_code_login_email() + + assert "Email must be provided." in str(exc_info.value) + + def test_get_email_code_login_data_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful retrieval of email code login data. + + This test verifies: + - Proper token data retrieval + - Correct data format + - Mock service integration + """ + # Arrange: Setup mock return + expected_data = {"code": "123456", "email": "test@example.com"} + mock_external_service_dependencies["token_manager"].get_token_data.return_value = expected_data + + # Act: Execute data retrieval + result = WebAppAuthService.get_email_code_login_data("mock_token") + + # Assert: Verify successful retrieval + assert result is not None + assert result == expected_data + assert result["code"] == "123456" + assert result["email"] == "test@example.com" + + # Verify mock service was called correctly + mock_external_service_dependencies["token_manager"].get_token_data.assert_called_once_with( + "mock_token", "email_code_login" + ) + + def test_get_email_code_login_data_no_data(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test email code login data retrieval when no data exists. + + This test verifies: + - Proper handling when no token data exists + - Correct return value (None) + - Mock service integration + """ + # Arrange: Setup mock return for no data + mock_external_service_dependencies["token_manager"].get_token_data.return_value = None + + # Act: Execute data retrieval + result = WebAppAuthService.get_email_code_login_data("invalid_token") + + # Assert: Verify proper handling + assert result is None + + # Verify mock service was called correctly + mock_external_service_dependencies["token_manager"].get_token_data.assert_called_once_with( + "invalid_token", "email_code_login" + ) + + def test_revoke_email_code_login_token_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful revocation of email code login token. + + This test verifies: + - Proper token revocation + - Mock service integration + """ + # Arrange: Setup mock + + # Act: Execute token revocation + WebAppAuthService.revoke_email_code_login_token("mock_token") + + # Assert: Verify mock service was called correctly + mock_external_service_dependencies["token_manager"].revoke_token.assert_called_once_with( + "mock_token", "email_code_login" + ) + + def test_create_end_user_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful end user creation. + + This test verifies: + - Proper end user creation with valid app code + - Correct database state after creation + - Proper relationship establishment + - Mock service integration + """ + # Arrange: Create test data + account, tenant = self._create_test_account_and_tenant( + db_session_with_containers, mock_external_service_dependencies + ) + app, site = self._create_test_app_and_site( + db_session_with_containers, mock_external_service_dependencies, tenant + ) + + # Act: Execute end user creation + result = WebAppAuthService.create_end_user(site.code, "test@example.com") + + # Assert: Verify successful creation + assert result is not None + assert result.tenant_id == app.tenant_id + assert result.app_id == app.id + assert result.type == "browser" + assert result.is_anonymous is False + assert result.session_id == "test@example.com" + assert result.name == "enterpriseuser" + assert result.external_user_id == "enterpriseuser" + + # Verify database state + from extensions.ext_database import db + + db.session.refresh(result) + assert result.id is not None + assert result.created_at is not None + assert result.updated_at is not None + + def test_create_end_user_site_not_found(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test end user creation with non-existent site code. + + This test verifies: + - Proper error handling for non-existent sites + - Correct exception type and message + """ + # Arrange: Use non-existent site code + fake = Faker() + non_existent_code = fake.unique.lexify(text="??????") + + # Act & Assert: Verify proper error handling + with pytest.raises(NotFound) as exc_info: + WebAppAuthService.create_end_user(non_existent_code, "test@example.com") + + assert "Site not found." in str(exc_info.value) + + def test_create_end_user_app_not_found(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test end user creation when app is not found. + + This test verifies: + - Proper error handling when app is missing + - Correct exception type and message + """ + # Arrange: Create site without app + fake = Faker() + tenant = Tenant( + name=fake.company(), + status="normal", + ) + + from extensions.ext_database import db + + db.session.add(tenant) + db.session.commit() + + site = Site( + app_id="00000000-0000-0000-0000-000000000000", + title=fake.company(), + code=fake.unique.lexify(text="??????"), + description=fake.text(max_nb_chars=100), + default_language="en-US", + status="normal", + customize_token_strategy="not_allow", + ) + db.session.add(site) + db.session.commit() + + # Act & Assert: Verify proper error handling + with pytest.raises(NotFound) as exc_info: + WebAppAuthService.create_end_user(site.code, "test@example.com") + + assert "App not found." in str(exc_info.value) + + def test_is_app_require_permission_check_with_access_mode_private( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test permission check requirement for private access mode. + + This test verifies: + - Proper permission check requirement for private mode + - Correct return value + - Mock service integration + """ + # Arrange: Setup test with private access mode + + # Act: Execute permission check requirement test + result = WebAppAuthService.is_app_require_permission_check(access_mode="private") + + # Assert: Verify correct result + assert result is True + + def test_is_app_require_permission_check_with_access_mode_public( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test permission check requirement for public access mode. + + This test verifies: + - Proper permission check requirement for public mode + - Correct return value + - Mock service integration + """ + # Arrange: Setup test with public access mode + + # Act: Execute permission check requirement test + result = WebAppAuthService.is_app_require_permission_check(access_mode="public") + + # Assert: Verify correct result + assert result is False + + def test_is_app_require_permission_check_with_app_code( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test permission check requirement using app code. + + This test verifies: + - Proper permission check requirement using app code + - Correct return value + - Mock service integration + """ + # Arrange: Setup mock for app service + mock_external_service_dependencies["app_service"].get_app_id_by_code.return_value = "mock_app_id" + + # Act: Execute permission check requirement test + result = WebAppAuthService.is_app_require_permission_check(app_code="mock_app_code") + + # Assert: Verify correct result + assert result is True + + # Verify mock service was called correctly + mock_external_service_dependencies["app_service"].get_app_id_by_code.assert_called_once_with("mock_app_code") + mock_external_service_dependencies[ + "enterprise_service" + ].WebAppAuth.get_app_access_mode_by_id.assert_called_once_with("mock_app_id") + + def test_is_app_require_permission_check_no_parameters( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test permission check requirement with no parameters. + + This test verifies: + - Proper error handling when no parameters provided + - Correct exception type and message + """ + # Arrange: No parameters provided + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebAppAuthService.is_app_require_permission_check() + + assert "Either app_code or app_id must be provided." in str(exc_info.value) + + def test_get_app_auth_type_with_access_mode_public( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test app authentication type for public access mode. + + This test verifies: + - Proper authentication type determination for public mode + - Correct return value + - Mock service integration + """ + # Arrange: Setup test with public access mode + + # Act: Execute authentication type determination + result = WebAppAuthService.get_app_auth_type(access_mode="public") + + # Assert: Verify correct result + assert result == WebAppAuthType.PUBLIC + + def test_get_app_auth_type_with_access_mode_private( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test app authentication type for private access mode. + + This test verifies: + - Proper authentication type determination for private mode + - Correct return value + - Mock service integration + """ + # Arrange: Setup test with private access mode + + # Act: Execute authentication type determination + result = WebAppAuthService.get_app_auth_type(access_mode="private") + + # Assert: Verify correct result + assert result == WebAppAuthType.INTERNAL + + def test_get_app_auth_type_with_app_code(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test app authentication type using app code. + + This test verifies: + - Proper authentication type determination using app code + - Correct return value + - Mock service integration + """ + # Arrange: Setup mock for enterprise service + mock_webapp_auth = type("MockWebAppAuth", (), {"access_mode": "sso_verified"})() + mock_external_service_dependencies[ + "enterprise_service" + ].WebAppAuth.get_app_access_mode_by_code.return_value = mock_webapp_auth + + # Act: Execute authentication type determination + result = WebAppAuthService.get_app_auth_type(app_code="mock_app_code") + + # Assert: Verify correct result + assert result == WebAppAuthType.EXTERNAL + + # Verify mock service was called correctly + mock_external_service_dependencies[ + "enterprise_service" + ].WebAppAuth.get_app_access_mode_by_code.assert_called_once_with("mock_app_code") + + def test_get_app_auth_type_no_parameters(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test app authentication type with no parameters. + + This test verifies: + - Proper error handling when no parameters provided + - Correct exception type and message + """ + # Arrange: No parameters provided + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebAppAuthService.get_app_auth_type() + + assert "Either app_code or access_mode must be provided." in str(exc_info.value) diff --git a/api/tests/test_containers_integration_tests/services/test_website_service.py b/api/tests/test_containers_integration_tests/services/test_website_service.py new file mode 100644 index 0000000000..ec2f1556af --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_website_service.py @@ -0,0 +1,1437 @@ +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +from faker import Faker + +from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole +from services.website_service import ( + CrawlOptions, + ScrapeRequest, + WebsiteCrawlApiRequest, + WebsiteCrawlStatusApiRequest, + WebsiteService, +) + + +class TestWebsiteService: + """Integration tests for WebsiteService using testcontainers.""" + + @pytest.fixture + def mock_external_service_dependencies(self): + """Mock setup for external service dependencies.""" + with ( + patch("services.website_service.ApiKeyAuthService") as mock_api_key_auth_service, + patch("services.website_service.FirecrawlApp") as mock_firecrawl_app, + patch("services.website_service.WaterCrawlProvider") as mock_watercrawl_provider, + patch("services.website_service.requests") as mock_requests, + patch("services.website_service.redis_client") as mock_redis_client, + patch("services.website_service.storage") as mock_storage, + patch("services.website_service.encrypter") as mock_encrypter, + ): + # Setup default mock returns + mock_api_key_auth_service.get_auth_credentials.return_value = { + "config": {"api_key": "encrypted_api_key", "base_url": "https://api.example.com"} + } + mock_encrypter.decrypt_token.return_value = "decrypted_api_key" + + # Mock FirecrawlApp + mock_firecrawl_instance = MagicMock() + mock_firecrawl_instance.crawl_url.return_value = "test_job_id_123" + mock_firecrawl_instance.check_crawl_status.return_value = { + "status": "completed", + "total": 5, + "current": 5, + "data": [{"source_url": "https://example.com", "title": "Test Page"}], + } + mock_firecrawl_app.return_value = mock_firecrawl_instance + + # Mock WaterCrawlProvider + mock_watercrawl_instance = MagicMock() + mock_watercrawl_instance.crawl_url.return_value = {"status": "active", "job_id": "watercrawl_job_123"} + mock_watercrawl_instance.get_crawl_status.return_value = { + "status": "completed", + "job_id": "watercrawl_job_123", + "total": 3, + "current": 3, + "data": [], + } + mock_watercrawl_instance.get_crawl_url_data.return_value = { + "title": "WaterCrawl Page", + "source_url": "https://example.com", + "description": "Test description", + "markdown": "# Test Content", + } + mock_watercrawl_instance.scrape_url.return_value = { + "title": "Scraped Page", + "content": "Test content", + "url": "https://example.com", + } + mock_watercrawl_provider.return_value = mock_watercrawl_instance + + # Mock requests + mock_response = MagicMock() + mock_response.json.return_value = {"code": 200, "data": {"taskId": "jina_job_123"}} + mock_requests.get.return_value = mock_response + mock_requests.post.return_value = mock_response + + # Mock Redis + mock_redis_client.setex.return_value = None + mock_redis_client.get.return_value = str(datetime.now().timestamp()) + mock_redis_client.delete.return_value = None + + # Mock Storage + mock_storage.exists.return_value = False + mock_storage.load_once.return_value = None + + yield { + "api_key_auth_service": mock_api_key_auth_service, + "firecrawl_app": mock_firecrawl_app, + "watercrawl_provider": mock_watercrawl_provider, + "requests": mock_requests, + "redis_client": mock_redis_client, + "storage": mock_storage, + "encrypter": mock_encrypter, + } + + def _create_test_account(self, db_session_with_containers, mock_external_service_dependencies): + """ + Helper method to create a test account with proper tenant setup. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + mock_external_service_dependencies: Mock dependencies + + Returns: + Account: Created account instance + """ + fake = Faker() + + # Create account + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + + from extensions.ext_database import db + + db.session.add(account) + db.session.commit() + + # Create tenant for the account + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db.session.add(tenant) + db.session.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER.value, + current=True, + ) + db.session.add(join) + db.session.commit() + + # Set current tenant for account + account.current_tenant = tenant + + return account + + def test_document_create_args_validate_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful argument validation for document creation. + + This test verifies: + - Valid arguments are accepted without errors + - All required fields are properly validated + - Optional fields are handled correctly + """ + # Arrange: Prepare valid arguments + valid_args = { + "provider": "firecrawl", + "url": "https://example.com", + "options": { + "limit": 5, + "crawl_sub_pages": True, + "only_main_content": False, + "includes": "blog,news", + "excludes": "admin,private", + "max_depth": 3, + "use_sitemap": True, + }, + } + + # Act: Validate arguments + WebsiteService.document_create_args_validate(valid_args) + + # Assert: No exception should be raised + # If we reach here, validation passed successfully + + def test_document_create_args_validate_missing_provider( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test argument validation fails when provider is missing. + + This test verifies: + - Missing provider raises ValueError + - Proper error message is provided + - Validation stops at first missing required field + """ + # Arrange: Prepare arguments without provider + invalid_args = {"url": "https://example.com", "options": {"limit": 5, "crawl_sub_pages": True}} + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.document_create_args_validate(invalid_args) + + assert "Provider is required" in str(exc_info.value) + + def test_document_create_args_validate_missing_url( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test argument validation fails when URL is missing. + + This test verifies: + - Missing URL raises ValueError + - Proper error message is provided + - Validation continues after provider check + """ + # Arrange: Prepare arguments without URL + invalid_args = {"provider": "firecrawl", "options": {"limit": 5, "crawl_sub_pages": True}} + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.document_create_args_validate(invalid_args) + + assert "URL is required" in str(exc_info.value) + + def test_crawl_url_firecrawl_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful URL crawling with Firecrawl provider. + + This test verifies: + - Firecrawl provider is properly initialized + - API credentials are retrieved and decrypted + - Crawl parameters are correctly formatted + - Job ID is returned with active status + - Redis cache is properly set + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + fake = Faker() + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request + api_request = WebsiteCrawlApiRequest( + provider="firecrawl", + url="https://example.com", + options={ + "limit": 10, + "crawl_sub_pages": True, + "only_main_content": True, + "includes": "blog,news", + "excludes": "admin,private", + "max_depth": 2, + "use_sitemap": True, + }, + ) + + # Act: Execute crawl operation + result = WebsiteService.crawl_url(api_request) + + # Assert: Verify successful operation + assert result is not None + assert result["status"] == "active" + assert result["job_id"] == "test_job_id_123" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "firecrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + mock_external_service_dependencies["firecrawl_app"].assert_called_once_with( + api_key="decrypted_api_key", base_url="https://api.example.com" + ) + + # Verify Redis cache was set + mock_external_service_dependencies["redis_client"].setex.assert_called_once() + + def test_crawl_url_watercrawl_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful URL crawling with WaterCrawl provider. + + This test verifies: + - WaterCrawl provider is properly initialized + - API credentials are retrieved and decrypted + - Crawl options are correctly passed to provider + - Provider returns expected response format + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request + api_request = WebsiteCrawlApiRequest( + provider="watercrawl", + url="https://example.com", + options={ + "limit": 5, + "crawl_sub_pages": False, + "only_main_content": False, + "includes": None, + "excludes": None, + "max_depth": None, + "use_sitemap": False, + }, + ) + + # Act: Execute crawl operation + result = WebsiteService.crawl_url(api_request) + + # Assert: Verify successful operation + assert result is not None + assert result["status"] == "active" + assert result["job_id"] == "watercrawl_job_123" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "watercrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with( + api_key="decrypted_api_key", base_url="https://api.example.com" + ) + + def test_crawl_url_jinareader_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful URL crawling with JinaReader provider. + + This test verifies: + - JinaReader provider handles single page crawling + - API credentials are retrieved and decrypted + - HTTP requests are made with proper headers + - Response is properly parsed and returned + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request for single page crawling + api_request = WebsiteCrawlApiRequest( + provider="jinareader", + url="https://example.com", + options={ + "limit": 1, + "crawl_sub_pages": False, + "only_main_content": True, + "includes": None, + "excludes": None, + "max_depth": None, + "use_sitemap": False, + }, + ) + + # Act: Execute crawl operation + result = WebsiteService.crawl_url(api_request) + + # Assert: Verify successful operation + assert result is not None + assert result["status"] == "active" + assert result["data"] is not None + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "jinareader" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify HTTP request was made + mock_external_service_dependencies["requests"].get.assert_called_once_with( + "https://r.jina.ai/https://example.com", + headers={"Accept": "application/json", "Authorization": "Bearer decrypted_api_key"}, + ) + + def test_crawl_url_invalid_provider(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test crawl operation fails with invalid provider. + + This test verifies: + - Invalid provider raises ValueError + - Proper error message is provided + - Service handles unsupported providers gracefully + """ + # Arrange: Create test account and prepare request with invalid provider + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request with invalid provider + api_request = WebsiteCrawlApiRequest( + provider="invalid_provider", + url="https://example.com", + options={"limit": 5, "crawl_sub_pages": False, "only_main_content": False}, + ) + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.crawl_url(api_request) + + assert "Invalid provider" in str(exc_info.value) + + def test_get_crawl_status_firecrawl_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful crawl status retrieval with Firecrawl provider. + + This test verifies: + - Firecrawl status is properly retrieved + - API credentials are retrieved and decrypted + - Status data includes all required fields + - Redis cache is properly managed for completed jobs + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request + api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123") + + # Act: Get crawl status + result = WebsiteService.get_crawl_status_typed(api_request) + + # Assert: Verify successful operation + assert result is not None + assert result["status"] == "completed" + assert result["job_id"] == "test_job_id_123" + assert result["total"] == 5 + assert result["current"] == 5 + assert "data" in result + assert "time_consuming" in result + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "firecrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify Redis cache was accessed and cleaned up + mock_external_service_dependencies["redis_client"].get.assert_called_once() + mock_external_service_dependencies["redis_client"].delete.assert_called_once() + + def test_get_crawl_status_watercrawl_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful crawl status retrieval with WaterCrawl provider. + + This test verifies: + - WaterCrawl status is properly retrieved + - API credentials are retrieved and decrypted + - Provider returns expected status format + - All required status fields are present + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request + api_request = WebsiteCrawlStatusApiRequest(provider="watercrawl", job_id="watercrawl_job_123") + + # Act: Get crawl status + result = WebsiteService.get_crawl_status_typed(api_request) + + # Assert: Verify successful operation + assert result is not None + assert result["status"] == "completed" + assert result["job_id"] == "watercrawl_job_123" + assert result["total"] == 3 + assert result["current"] == 3 + assert "data" in result + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "watercrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + def test_get_crawl_status_jinareader_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful crawl status retrieval with JinaReader provider. + + This test verifies: + - JinaReader status is properly retrieved + - API credentials are retrieved and decrypted + - HTTP requests are made with proper parameters + - Status data is properly formatted and returned + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request + api_request = WebsiteCrawlStatusApiRequest(provider="jinareader", job_id="jina_job_123") + + # Act: Get crawl status + result = WebsiteService.get_crawl_status_typed(api_request) + + # Assert: Verify successful operation + assert result is not None + assert result["status"] == "active" + assert result["job_id"] == "jina_job_123" + assert "total" in result + assert "current" in result + assert "data" in result + assert "time_consuming" in result + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "jinareader" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify HTTP request was made + mock_external_service_dependencies["requests"].post.assert_called_once() + + def test_get_crawl_status_invalid_provider(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test crawl status retrieval fails with invalid provider. + + This test verifies: + - Invalid provider raises ValueError + - Proper error message is provided + - Service handles unsupported providers gracefully + """ + # Arrange: Create test account and prepare request with invalid provider + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request with invalid provider + api_request = WebsiteCrawlStatusApiRequest(provider="invalid_provider", job_id="test_job_id_123") + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.get_crawl_status_typed(api_request) + + assert "Invalid provider" in str(exc_info.value) + + def test_get_crawl_status_missing_credentials(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test crawl status retrieval fails when credentials are missing. + + This test verifies: + - Missing credentials raises ValueError + - Proper error message is provided + - Service handles authentication failures gracefully + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Mock missing credentials + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.return_value = None + + # Create API request + api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123") + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.get_crawl_status_typed(api_request) + + assert "No valid credentials found for the provider" in str(exc_info.value) + + def test_get_crawl_status_missing_api_key(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test crawl status retrieval fails when API key is missing from config. + + This test verifies: + - Missing API key raises ValueError + - Proper error message is provided + - Service handles configuration failures gracefully + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Mock missing API key in config + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.return_value = { + "config": {"base_url": "https://api.example.com"} + } + + # Create API request + api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="test_job_id_123") + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.get_crawl_status_typed(api_request) + + assert "API key not found in configuration" in str(exc_info.value) + + def test_get_crawl_url_data_firecrawl_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test successful URL data retrieval with Firecrawl provider. + + This test verifies: + - Firecrawl URL data is properly retrieved + - API credentials are retrieved and decrypted + - Data is returned for matching URL + - Storage fallback works when needed + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock storage to return existing data + mock_external_service_dependencies["storage"].exists.return_value = True + mock_external_service_dependencies["storage"].load_once.return_value = ( + b"[" + b'{"source_url": "https://example.com", "title": "Test Page", ' + b'"description": "Test Description", "markdown": "# Test Content"}' + b"]" + ) + + # Act: Get URL data + result = WebsiteService.get_crawl_url_data( + job_id="test_job_id_123", + provider="firecrawl", + url="https://example.com", + tenant_id=account.current_tenant.id, + ) + + # Assert: Verify successful operation + assert result is not None + assert result["source_url"] == "https://example.com" + assert result["title"] == "Test Page" + assert result["description"] == "Test Description" + assert result["markdown"] == "# Test Content" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "firecrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify storage was accessed + mock_external_service_dependencies["storage"].exists.assert_called_once() + mock_external_service_dependencies["storage"].load_once.assert_called_once() + + def test_get_crawl_url_data_watercrawl_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful URL data retrieval with WaterCrawl provider. + + This test verifies: + - WaterCrawl URL data is properly retrieved + - API credentials are retrieved and decrypted + - Provider returns expected data format + - All required data fields are present + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Act: Get URL data + result = WebsiteService.get_crawl_url_data( + job_id="watercrawl_job_123", + provider="watercrawl", + url="https://example.com", + tenant_id=account.current_tenant.id, + ) + + # Assert: Verify successful operation + assert result is not None + assert result["title"] == "WaterCrawl Page" + assert result["source_url"] == "https://example.com" + assert result["description"] == "Test description" + assert result["markdown"] == "# Test Content" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "watercrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + def test_get_crawl_url_data_jinareader_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful URL data retrieval with JinaReader provider. + + This test verifies: + - JinaReader URL data is properly retrieved + - API credentials are retrieved and decrypted + - HTTP requests are made with proper parameters + - Data is properly formatted and returned + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock successful response for JinaReader + mock_response = MagicMock() + mock_response.json.return_value = { + "code": 200, + "data": { + "title": "JinaReader Page", + "url": "https://example.com", + "description": "Test description", + "content": "# Test Content", + }, + } + mock_external_service_dependencies["requests"].get.return_value = mock_response + + # Act: Get URL data without job_id (single page scraping) + result = WebsiteService.get_crawl_url_data( + job_id="", provider="jinareader", url="https://example.com", tenant_id=account.current_tenant.id + ) + + # Assert: Verify successful operation + assert result is not None + assert result["title"] == "JinaReader Page" + assert result["url"] == "https://example.com" + assert result["description"] == "Test description" + assert result["content"] == "# Test Content" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "jinareader" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify HTTP request was made + mock_external_service_dependencies["requests"].get.assert_called_once_with( + "https://r.jina.ai/https://example.com", + headers={"Accept": "application/json", "Authorization": "Bearer decrypted_api_key"}, + ) + + def test_get_scrape_url_data_firecrawl_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful URL scraping with Firecrawl provider. + + This test verifies: + - Firecrawl scraping is properly executed + - API credentials are retrieved and decrypted + - Scraping parameters are correctly passed + - Scraped data is returned in expected format + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock FirecrawlApp scraping response + mock_firecrawl_instance = MagicMock() + mock_firecrawl_instance.scrape_url.return_value = { + "title": "Scraped Page Title", + "content": "This is the scraped content", + "url": "https://example.com", + "description": "Page description", + } + mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance + + # Act: Scrape URL + result = WebsiteService.get_scrape_url_data( + provider="firecrawl", url="https://example.com", tenant_id=account.current_tenant.id, only_main_content=True + ) + + # Assert: Verify successful operation + assert result is not None + assert result["title"] == "Scraped Page Title" + assert result["content"] == "This is the scraped content" + assert result["url"] == "https://example.com" + assert result["description"] == "Page description" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "firecrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify FirecrawlApp was called with correct parameters + mock_external_service_dependencies["firecrawl_app"].assert_called_once_with( + api_key="decrypted_api_key", base_url="https://api.example.com" + ) + mock_firecrawl_instance.scrape_url.assert_called_once_with( + url="https://example.com", params={"onlyMainContent": True} + ) + + def test_get_scrape_url_data_watercrawl_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful URL scraping with WaterCrawl provider. + + This test verifies: + - WaterCrawl scraping is properly executed + - API credentials are retrieved and decrypted + - Provider returns expected scraping format + - All required data fields are present + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Act: Scrape URL + result = WebsiteService.get_scrape_url_data( + provider="watercrawl", + url="https://example.com", + tenant_id=account.current_tenant.id, + only_main_content=False, + ) + + # Assert: Verify successful operation + assert result is not None + assert result["title"] == "Scraped Page" + assert result["content"] == "Test content" + assert result["url"] == "https://example.com" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "watercrawl" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify WaterCrawlProvider was called with correct parameters + mock_external_service_dependencies["watercrawl_provider"].assert_called_once_with( + api_key="decrypted_api_key", base_url="https://api.example.com" + ) + + def test_get_scrape_url_data_invalid_provider(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test URL scraping fails with invalid provider. + + This test verifies: + - Invalid provider raises ValueError + - Proper error message is provided + - Service handles unsupported providers gracefully + """ + # Arrange: Create test account and prepare request with invalid provider + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.get_scrape_url_data( + provider="invalid_provider", + url="https://example.com", + tenant_id=account.current_tenant.id, + only_main_content=False, + ) + + assert "Invalid provider" in str(exc_info.value) + + def test_crawl_options_include_exclude_paths(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test CrawlOptions include and exclude path methods. + + This test verifies: + - Include paths are properly parsed from comma-separated string + - Exclude paths are properly parsed from comma-separated string + - Empty or None values are handled correctly + - Path lists are returned in expected format + """ + # Arrange: Create CrawlOptions with various path configurations + options_with_paths = CrawlOptions(includes="blog,news,articles", excludes="admin,private,test") + + options_without_paths = CrawlOptions(includes=None, excludes="") + + # Act: Get include and exclude paths + include_paths = options_with_paths.get_include_paths() + exclude_paths = options_with_paths.get_exclude_paths() + + empty_include_paths = options_without_paths.get_include_paths() + empty_exclude_paths = options_without_paths.get_exclude_paths() + + # Assert: Verify path parsing + assert include_paths == ["blog", "news", "articles"] + assert exclude_paths == ["admin", "private", "test"] + assert empty_include_paths == [] + assert empty_exclude_paths == [] + + def test_website_crawl_api_request_conversion(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test WebsiteCrawlApiRequest conversion to CrawlRequest. + + This test verifies: + - API request is properly converted to internal CrawlRequest + - All options are correctly mapped + - Default values are applied when options are missing + - Conversion maintains data integrity + """ + # Arrange: Create API request with various options + api_request = WebsiteCrawlApiRequest( + provider="firecrawl", + url="https://example.com", + options={ + "limit": 10, + "crawl_sub_pages": True, + "only_main_content": True, + "includes": "blog,news", + "excludes": "admin,private", + "max_depth": 3, + "use_sitemap": False, + }, + ) + + # Act: Convert to CrawlRequest + crawl_request = api_request.to_crawl_request() + + # Assert: Verify conversion + assert crawl_request.url == "https://example.com" + assert crawl_request.provider == "firecrawl" + assert crawl_request.options.limit == 10 + assert crawl_request.options.crawl_sub_pages is True + assert crawl_request.options.only_main_content is True + assert crawl_request.options.includes == "blog,news" + assert crawl_request.options.excludes == "admin,private" + assert crawl_request.options.max_depth == 3 + assert crawl_request.options.use_sitemap is False + + def test_website_crawl_api_request_from_args(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test WebsiteCrawlApiRequest creation from Flask arguments. + + This test verifies: + - Request is properly created from parsed arguments + - Required fields are validated + - Optional fields are handled correctly + - Validation errors are properly raised + """ + # Arrange: Prepare valid arguments + valid_args = {"provider": "watercrawl", "url": "https://example.com", "options": {"limit": 5}} + + # Act: Create request from args + request = WebsiteCrawlApiRequest.from_args(valid_args) + + # Assert: Verify request creation + assert request.provider == "watercrawl" + assert request.url == "https://example.com" + assert request.options == {"limit": 5} + + # Test missing provider + invalid_args = {"url": "https://example.com", "options": {}} + with pytest.raises(ValueError) as exc_info: + WebsiteCrawlApiRequest.from_args(invalid_args) + assert "Provider is required" in str(exc_info.value) + + # Test missing URL + invalid_args = {"provider": "watercrawl", "options": {}} + with pytest.raises(ValueError) as exc_info: + WebsiteCrawlApiRequest.from_args(invalid_args) + assert "URL is required" in str(exc_info.value) + + # Test missing options + invalid_args = {"provider": "watercrawl", "url": "https://example.com"} + with pytest.raises(ValueError) as exc_info: + WebsiteCrawlApiRequest.from_args(invalid_args) + assert "Options are required" in str(exc_info.value) + + def test_crawl_url_jinareader_sub_pages_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test successful URL crawling with JinaReader provider for sub-pages. + + This test verifies: + - JinaReader provider handles sub-page crawling correctly + - HTTP POST request is made with proper parameters + - Job ID is returned for multi-page crawling + - All required parameters are passed correctly + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request for sub-page crawling + api_request = WebsiteCrawlApiRequest( + provider="jinareader", + url="https://example.com", + options={ + "limit": 5, + "crawl_sub_pages": True, + "only_main_content": False, + "includes": None, + "excludes": None, + "max_depth": None, + "use_sitemap": True, + }, + ) + + # Act: Execute crawl operation + result = WebsiteService.crawl_url(api_request) + + # Assert: Verify successful operation + assert result is not None + assert result["status"] == "active" + assert result["job_id"] == "jina_job_123" + + # Verify external service interactions + mock_external_service_dependencies["api_key_auth_service"].get_auth_credentials.assert_called_once_with( + account.current_tenant.id, "website", "jinareader" + ) + mock_external_service_dependencies["encrypter"].decrypt_token.assert_called_once_with( + tenant_id=account.current_tenant.id, token="encrypted_api_key" + ) + + # Verify HTTP POST request was made for sub-page crawling + mock_external_service_dependencies["requests"].post.assert_called_once_with( + "https://adaptivecrawl-kir3wx7b3a-uc.a.run.app", + json={"url": "https://example.com", "maxPages": 5, "useSitemap": True}, + headers={"Content-Type": "application/json", "Authorization": "Bearer decrypted_api_key"}, + ) + + def test_crawl_url_jinareader_failed_response(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test JinaReader crawling fails when API returns error. + + This test verifies: + - Failed API response raises ValueError + - Proper error message is provided + - Service handles API failures gracefully + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock failed response + mock_failed_response = MagicMock() + mock_failed_response.json.return_value = {"code": 500, "error": "Internal server error"} + mock_external_service_dependencies["requests"].get.return_value = mock_failed_response + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request + api_request = WebsiteCrawlApiRequest( + provider="jinareader", + url="https://example.com", + options={"limit": 1, "crawl_sub_pages": False, "only_main_content": True}, + ) + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.crawl_url(api_request) + + assert "Failed to crawl" in str(exc_info.value) + + def test_get_crawl_status_firecrawl_active_job( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test Firecrawl status retrieval for active (not completed) job. + + This test verifies: + - Active job status is properly returned + - Redis cache is not deleted for active jobs + - Time consuming is not calculated for active jobs + - All required status fields are present + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock active job status + mock_firecrawl_instance = MagicMock() + mock_firecrawl_instance.check_crawl_status.return_value = { + "status": "active", + "total": 10, + "current": 3, + "data": [], + } + mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance + + # Mock current_user for the test + with patch("services.website_service.current_user") as mock_current_user: + mock_current_user.current_tenant_id = account.current_tenant.id + + # Create API request + api_request = WebsiteCrawlStatusApiRequest(provider="firecrawl", job_id="active_job_123") + + # Act: Get crawl status + result = WebsiteService.get_crawl_status_typed(api_request) + + # Assert: Verify active job status + assert result is not None + assert result["status"] == "active" + assert result["job_id"] == "active_job_123" + assert result["total"] == 10 + assert result["current"] == 3 + assert "data" in result + assert "time_consuming" not in result + + # Verify Redis cache was not accessed for active jobs + mock_external_service_dependencies["redis_client"].get.assert_not_called() + mock_external_service_dependencies["redis_client"].delete.assert_not_called() + + def test_get_crawl_url_data_firecrawl_storage_fallback( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test Firecrawl URL data retrieval with storage fallback. + + This test verifies: + - Storage fallback works when storage has data + - API call is not made when storage has data + - Data is properly parsed from storage + - Correct URL data is returned + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock storage to return existing data + mock_external_service_dependencies["storage"].exists.return_value = True + mock_external_service_dependencies["storage"].load_once.return_value = ( + b"[" + b'{"source_url": "https://example.com/page1", ' + b'"title": "Page 1", "description": "Description 1", "markdown": "# Page 1"}, ' + b'{"source_url": "https://example.com/page2", "title": "Page 2", ' + b'"description": "Description 2", "markdown": "# Page 2"}' + b"]" + ) + + # Act: Get URL data for specific URL + result = WebsiteService.get_crawl_url_data( + job_id="test_job_id_123", + provider="firecrawl", + url="https://example.com/page1", + tenant_id=account.current_tenant.id, + ) + + # Assert: Verify successful operation + assert result is not None + assert result["source_url"] == "https://example.com/page1" + assert result["title"] == "Page 1" + assert result["description"] == "Description 1" + assert result["markdown"] == "# Page 1" + + # Verify storage was accessed + mock_external_service_dependencies["storage"].exists.assert_called_once() + mock_external_service_dependencies["storage"].load_once.assert_called_once() + + def test_get_crawl_url_data_firecrawl_api_fallback( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test Firecrawl URL data retrieval with API fallback when storage is empty. + + This test verifies: + - API fallback works when storage has no data + - FirecrawlApp is called to get data + - Completed job status is checked + - Data is returned from API response + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock storage to return no data + mock_external_service_dependencies["storage"].exists.return_value = False + + # Mock FirecrawlApp for API fallback + mock_firecrawl_instance = MagicMock() + mock_firecrawl_instance.check_crawl_status.return_value = { + "status": "completed", + "data": [ + { + "source_url": "https://example.com/api_page", + "title": "API Page", + "description": "API Description", + "markdown": "# API Content", + } + ], + } + mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance + + # Act: Get URL data + result = WebsiteService.get_crawl_url_data( + job_id="test_job_id_123", + provider="firecrawl", + url="https://example.com/api_page", + tenant_id=account.current_tenant.id, + ) + + # Assert: Verify successful operation + assert result is not None + assert result["source_url"] == "https://example.com/api_page" + assert result["title"] == "API Page" + assert result["description"] == "API Description" + assert result["markdown"] == "# API Content" + + # Verify API was called + mock_external_service_dependencies["firecrawl_app"].assert_called_once() + + def test_get_crawl_url_data_firecrawl_incomplete_job( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test Firecrawl URL data retrieval fails for incomplete job. + + This test verifies: + - Incomplete job raises ValueError + - Proper error message is provided + - Service handles incomplete jobs gracefully + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock storage to return no data + mock_external_service_dependencies["storage"].exists.return_value = False + + # Mock incomplete job status + mock_firecrawl_instance = MagicMock() + mock_firecrawl_instance.check_crawl_status.return_value = {"status": "active", "data": []} + mock_external_service_dependencies["firecrawl_app"].return_value = mock_firecrawl_instance + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.get_crawl_url_data( + job_id="test_job_id_123", + provider="firecrawl", + url="https://example.com/page", + tenant_id=account.current_tenant.id, + ) + + assert "Crawl job is not completed" in str(exc_info.value) + + def test_get_crawl_url_data_jinareader_with_job_id( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test JinaReader URL data retrieval with job ID for multi-page crawling. + + This test verifies: + - JinaReader handles job ID-based data retrieval + - Status check is performed before data retrieval + - Processed data is properly formatted + - Correct URL data is returned + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock successful status response + mock_status_response = MagicMock() + mock_status_response.json.return_value = { + "code": 200, + "data": { + "status": "completed", + "processed": { + "https://example.com/page1": { + "data": { + "title": "Page 1", + "url": "https://example.com/page1", + "description": "Description 1", + "content": "# Content 1", + } + } + }, + }, + } + mock_external_service_dependencies["requests"].post.return_value = mock_status_response + + # Act: Get URL data with job ID + result = WebsiteService.get_crawl_url_data( + job_id="jina_job_123", + provider="jinareader", + url="https://example.com/page1", + tenant_id=account.current_tenant.id, + ) + + # Assert: Verify successful operation + assert result is not None + assert result["title"] == "Page 1" + assert result["url"] == "https://example.com/page1" + assert result["description"] == "Description 1" + assert result["content"] == "# Content 1" + + # Verify HTTP requests were made + assert mock_external_service_dependencies["requests"].post.call_count == 2 + + def test_get_crawl_url_data_jinareader_incomplete_job( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test JinaReader URL data retrieval fails for incomplete job. + + This test verifies: + - Incomplete job raises ValueError + - Proper error message is provided + - Service handles incomplete jobs gracefully + """ + # Arrange: Create test account and prepare request + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock incomplete job status + mock_status_response = MagicMock() + mock_status_response.json.return_value = {"code": 200, "data": {"status": "active", "processed": {}}} + mock_external_service_dependencies["requests"].post.return_value = mock_status_response + + # Act & Assert: Verify proper error handling + with pytest.raises(ValueError) as exc_info: + WebsiteService.get_crawl_url_data( + job_id="jina_job_123", + provider="jinareader", + url="https://example.com/page", + tenant_id=account.current_tenant.id, + ) + + assert "Crawl job is not completed" in str(exc_info.value) + + def test_crawl_options_default_values(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test CrawlOptions default values and initialization. + + This test verifies: + - Default values are properly set + - Optional fields can be None + - Boolean fields have correct defaults + - Integer fields have correct defaults + """ + # Arrange: Create CrawlOptions with minimal parameters + options = CrawlOptions() + + # Assert: Verify default values + assert options.limit == 1 + assert options.crawl_sub_pages is False + assert options.only_main_content is False + assert options.includes is None + assert options.excludes is None + assert options.max_depth is None + assert options.use_sitemap is True + + # Test with custom values + custom_options = CrawlOptions( + limit=10, + crawl_sub_pages=True, + only_main_content=True, + includes="blog,news", + excludes="admin", + max_depth=3, + use_sitemap=False, + ) + + assert custom_options.limit == 10 + assert custom_options.crawl_sub_pages is True + assert custom_options.only_main_content is True + assert custom_options.includes == "blog,news" + assert custom_options.excludes == "admin" + assert custom_options.max_depth == 3 + assert custom_options.use_sitemap is False + + def test_website_crawl_status_api_request_from_args( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test WebsiteCrawlStatusApiRequest creation from Flask arguments. + + This test verifies: + - Request is properly created from parsed arguments + - Required fields are validated + - Job ID is properly handled + - Validation errors are properly raised + """ + # Arrange: Prepare valid arguments + valid_args = {"provider": "firecrawl"} + job_id = "test_job_123" + + # Act: Create request from args + request = WebsiteCrawlStatusApiRequest.from_args(valid_args, job_id) + + # Assert: Verify request creation + assert request.provider == "firecrawl" + assert request.job_id == "test_job_123" + + # Test missing provider + invalid_args = {} + with pytest.raises(ValueError) as exc_info: + WebsiteCrawlStatusApiRequest.from_args(invalid_args, job_id) + assert "Provider is required" in str(exc_info.value) + + # Test missing job ID + with pytest.raises(ValueError) as exc_info: + WebsiteCrawlStatusApiRequest.from_args(valid_args, "") + assert "Job ID is required" in str(exc_info.value) + + def test_scrape_request_initialization(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test ScrapeRequest dataclass initialization and properties. + + This test verifies: + - ScrapeRequest is properly initialized + - All fields are correctly set + - Boolean field works correctly + - String fields are properly assigned + """ + # Arrange: Create ScrapeRequest + request = ScrapeRequest( + provider="firecrawl", url="https://example.com", tenant_id="tenant_123", only_main_content=True + ) + + # Assert: Verify initialization + assert request.provider == "firecrawl" + assert request.url == "https://example.com" + assert request.tenant_id == "tenant_123" + assert request.only_main_content is True + + # Test with different values + request2 = ScrapeRequest( + provider="watercrawl", url="https://test.com", tenant_id="tenant_456", only_main_content=False + ) + + assert request2.provider == "watercrawl" + assert request2.url == "https://test.com" + assert request2.tenant_id == "tenant_456" + assert request2.only_main_content is False diff --git a/api/tests/unit_tests/controllers/console/app/workflow_draft_variables_test.py b/api/tests/unit_tests/controllers/console/app/workflow_draft_variables_test.py index f26be6702a..ac3c8e45c9 100644 --- a/api/tests/unit_tests/controllers/console/app/workflow_draft_variables_test.py +++ b/api/tests/unit_tests/controllers/console/app/workflow_draft_variables_test.py @@ -1,9 +1,8 @@ -import datetime import uuid from collections import OrderedDict from typing import Any, NamedTuple -from flask_restful import marshal +from flask_restx import marshal from controllers.console.app.workflow_draft_variable import ( _WORKFLOW_DRAFT_VARIABLE_FIELDS, @@ -13,6 +12,7 @@ from controllers.console.app.workflow_draft_variable import ( ) from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID from factories.variable_factory import build_segment +from libs.datetime_utils import naive_utc_now from models.workflow import WorkflowDraftVariable from services.workflow_draft_variable_service import WorkflowDraftVariableList @@ -57,7 +57,7 @@ class TestWorkflowDraftVariableFields: ) sys_var.id = str(uuid.uuid4()) - sys_var.last_edited_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + sys_var.last_edited_at = naive_utc_now() sys_var.visible = True expected_without_value = OrderedDict( @@ -88,7 +88,7 @@ class TestWorkflowDraftVariableFields: ) node_var.id = str(uuid.uuid4()) - node_var.last_edited_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + node_var.last_edited_at = naive_utc_now() expected_without_value: OrderedDict[str, Any] = OrderedDict( { diff --git a/api/tests/unit_tests/core/mcp/client/test_sse.py b/api/tests/unit_tests/core/mcp/client/test_sse.py index 880a0d4940..aadd366762 100644 --- a/api/tests/unit_tests/core/mcp/client/test_sse.py +++ b/api/tests/unit_tests/core/mcp/client/test_sse.py @@ -1,3 +1,4 @@ +import contextlib import json import queue import threading @@ -124,13 +125,10 @@ def test_sse_client_connection_validation(): mock_event_source.iter_sse.return_value = [endpoint_event] # Test connection - try: + with contextlib.suppress(Exception): with sse_client(test_url) as (read_queue, write_queue): assert read_queue is not None assert write_queue is not None - except Exception as e: - # Connection might fail due to mocking, but we're testing the validation logic - pass def test_sse_client_error_handling(): @@ -178,7 +176,7 @@ def test_sse_client_timeout_configuration(): mock_event_source.iter_sse.return_value = [] mock_sse_connect.return_value.__enter__.return_value = mock_event_source - try: + with contextlib.suppress(Exception): with sse_client( test_url, headers=custom_headers, timeout=custom_timeout, sse_read_timeout=custom_sse_timeout ) as (read_queue, write_queue): @@ -190,9 +188,6 @@ def test_sse_client_timeout_configuration(): assert call_args is not None timeout_arg = call_args[1]["timeout"] assert timeout_arg.read == custom_sse_timeout - except Exception: - # Connection might fail due to mocking, but we tested the configuration - pass def test_sse_transport_endpoint_validation(): @@ -251,12 +246,10 @@ def test_sse_client_queue_cleanup(): # Mock connection that raises an exception mock_sse_connect.side_effect = Exception("Connection failed") - try: + with contextlib.suppress(Exception): with sse_client(test_url) as (rq, wq): read_queue = rq write_queue = wq - except Exception: - pass # Expected to fail # Queues should be cleaned up even on exception # Note: In real implementation, cleanup should put None to signal shutdown @@ -283,11 +276,9 @@ def test_sse_client_headers_propagation(): mock_event_source.iter_sse.return_value = [] mock_sse_connect.return_value.__enter__.return_value = mock_event_source - try: + with contextlib.suppress(Exception): with sse_client(test_url, headers=custom_headers): pass - except Exception: - pass # Expected due to mocking # Verify headers were passed to client factory mock_client_factory.assert_called_with(headers=custom_headers) diff --git a/api/tests/unit_tests/core/model_runtime/entities/test_llm_entities.py b/api/tests/unit_tests/core/model_runtime/entities/test_llm_entities.py new file mode 100644 index 0000000000..c10f7b89c3 --- /dev/null +++ b/api/tests/unit_tests/core/model_runtime/entities/test_llm_entities.py @@ -0,0 +1,148 @@ +"""Tests for LLMUsage entity.""" + +from decimal import Decimal + +from core.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata + + +class TestLLMUsage: + """Test cases for LLMUsage class.""" + + def test_from_metadata_with_all_tokens(self): + """Test from_metadata when all token types are provided.""" + metadata: LLMUsageMetadata = { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "prompt_unit_price": 0.001, + "completion_unit_price": 0.002, + "total_price": 0.2, + "currency": "USD", + "latency": 1.5, + } + + usage = LLMUsage.from_metadata(metadata) + + assert usage.prompt_tokens == 100 + assert usage.completion_tokens == 50 + assert usage.total_tokens == 150 + assert usage.prompt_unit_price == Decimal("0.001") + assert usage.completion_unit_price == Decimal("0.002") + assert usage.total_price == Decimal("0.2") + assert usage.currency == "USD" + assert usage.latency == 1.5 + + def test_from_metadata_with_prompt_tokens_only(self): + """Test from_metadata when only prompt_tokens is provided.""" + metadata: LLMUsageMetadata = { + "prompt_tokens": 100, + "total_tokens": 100, + } + + usage = LLMUsage.from_metadata(metadata) + + assert usage.prompt_tokens == 100 + assert usage.completion_tokens == 0 + assert usage.total_tokens == 100 + + def test_from_metadata_with_completion_tokens_only(self): + """Test from_metadata when only completion_tokens is provided.""" + metadata: LLMUsageMetadata = { + "completion_tokens": 50, + "total_tokens": 50, + } + + usage = LLMUsage.from_metadata(metadata) + + assert usage.prompt_tokens == 0 + assert usage.completion_tokens == 50 + assert usage.total_tokens == 50 + + def test_from_metadata_calculates_total_when_missing(self): + """Test from_metadata calculates total_tokens when not provided.""" + metadata: LLMUsageMetadata = { + "prompt_tokens": 100, + "completion_tokens": 50, + } + + usage = LLMUsage.from_metadata(metadata) + + assert usage.prompt_tokens == 100 + assert usage.completion_tokens == 50 + assert usage.total_tokens == 150 # Should be calculated + + def test_from_metadata_with_total_but_no_completion(self): + """ + Test from_metadata when total_tokens is provided but completion_tokens is 0. + This tests the fix for issue #24360 - prompt tokens should NOT be assigned to completion_tokens. + """ + metadata: LLMUsageMetadata = { + "prompt_tokens": 479, + "completion_tokens": 0, + "total_tokens": 521, + } + + usage = LLMUsage.from_metadata(metadata) + + # This is the key fix - prompt tokens should remain as prompt tokens + assert usage.prompt_tokens == 479 + assert usage.completion_tokens == 0 + assert usage.total_tokens == 521 + + def test_from_metadata_with_empty_metadata(self): + """Test from_metadata with empty metadata.""" + metadata: LLMUsageMetadata = {} + + usage = LLMUsage.from_metadata(metadata) + + assert usage.prompt_tokens == 0 + assert usage.completion_tokens == 0 + assert usage.total_tokens == 0 + assert usage.currency == "USD" + assert usage.latency == 0.0 + + def test_from_metadata_preserves_zero_completion_tokens(self): + """ + Test that zero completion_tokens are preserved when explicitly set. + This is important for agent nodes that only use prompt tokens. + """ + metadata: LLMUsageMetadata = { + "prompt_tokens": 1000, + "completion_tokens": 0, + "total_tokens": 1000, + "prompt_unit_price": 0.15, + "completion_unit_price": 0.60, + "prompt_price": 0.00015, + "completion_price": 0, + "total_price": 0.00015, + } + + usage = LLMUsage.from_metadata(metadata) + + assert usage.prompt_tokens == 1000 + assert usage.completion_tokens == 0 + assert usage.total_tokens == 1000 + assert usage.prompt_price == Decimal("0.00015") + assert usage.completion_price == Decimal(0) + assert usage.total_price == Decimal("0.00015") + + def test_from_metadata_with_decimal_values(self): + """Test from_metadata handles decimal values correctly.""" + metadata: LLMUsageMetadata = { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "prompt_unit_price": "0.001", + "completion_unit_price": "0.002", + "prompt_price": "0.1", + "completion_price": "0.1", + "total_price": "0.2", + } + + usage = LLMUsage.from_metadata(metadata) + + assert usage.prompt_unit_price == Decimal("0.001") + assert usage.completion_unit_price == Decimal("0.002") + assert usage.prompt_price == Decimal("0.1") + assert usage.completion_price == Decimal("0.1") + assert usage.total_price == Decimal("0.2") diff --git a/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py b/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py index f6d22690d1..8abed0a3f9 100644 --- a/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py +++ b/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py @@ -164,7 +164,7 @@ def test__get_chat_model_prompt_messages_with_files_no_memory(get_chat_model_arg ) assert isinstance(prompt_messages[3].content, list) assert len(prompt_messages[3].content) == 2 - assert prompt_messages[3].content[1].data == files[0].remote_url + assert prompt_messages[3].content[0].data == files[0].remote_url @pytest.fixture diff --git a/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py b/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py index 450501c256..e7733b2317 100644 --- a/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py +++ b/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py @@ -5,7 +5,6 @@ These tests verify the Celery-based asynchronous storage functionality for workflow execution data. """ -from datetime import UTC, datetime from unittest.mock import Mock, patch from uuid import uuid4 @@ -13,6 +12,7 @@ import pytest from core.repositories.celery_workflow_execution_repository import CeleryWorkflowExecutionRepository from core.workflow.entities.workflow_execution import WorkflowExecution, WorkflowType +from libs.datetime_utils import naive_utc_now from models import Account, EndUser from models.enums import WorkflowRunTriggeredFrom @@ -56,7 +56,7 @@ def sample_workflow_execution(): workflow_version="1.0", graph={"nodes": [], "edges": []}, inputs={"input1": "value1"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) @@ -199,7 +199,7 @@ class TestCeleryWorkflowExecutionRepository: workflow_version="1.0", graph={"nodes": [], "edges": []}, inputs={"input1": "value1"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) exec2 = WorkflowExecution.new( id_=str(uuid4()), @@ -208,7 +208,7 @@ class TestCeleryWorkflowExecutionRepository: workflow_version="1.0", graph={"nodes": [], "edges": []}, inputs={"input2": "value2"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) # Save both executions @@ -235,7 +235,7 @@ class TestCeleryWorkflowExecutionRepository: workflow_version="1.0", graph={"nodes": [], "edges": []}, inputs={"input1": "value1"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) repo.save(execution) diff --git a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py index b38d994f03..0c6fdc8f92 100644 --- a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py +++ b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py @@ -5,7 +5,6 @@ These tests verify the Celery-based asynchronous storage functionality for workflow node execution data. """ -from datetime import UTC, datetime from unittest.mock import Mock, patch from uuid import uuid4 @@ -18,6 +17,7 @@ from core.workflow.entities.workflow_node_execution import ( ) from core.workflow.nodes.enums import NodeType from core.workflow.repositories.workflow_node_execution_repository import OrderConfig +from libs.datetime_utils import naive_utc_now from models import Account, EndUser from models.workflow import WorkflowNodeExecutionTriggeredFrom @@ -65,7 +65,7 @@ def sample_workflow_node_execution(): title="Test Node", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.now(UTC).replace(tzinfo=None), + created_at=naive_utc_now(), ) @@ -263,7 +263,7 @@ class TestCeleryWorkflowNodeExecutionRepository: title="Node 1", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.now(UTC).replace(tzinfo=None), + created_at=naive_utc_now(), ) exec2 = WorkflowNodeExecution( id=str(uuid4()), @@ -276,7 +276,7 @@ class TestCeleryWorkflowNodeExecutionRepository: title="Node 2", inputs={"input2": "value2"}, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.now(UTC).replace(tzinfo=None), + created_at=naive_utc_now(), ) # Save both executions @@ -314,7 +314,7 @@ class TestCeleryWorkflowNodeExecutionRepository: title="Node 2", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.now(UTC).replace(tzinfo=None), + created_at=naive_utc_now(), ) exec2 = WorkflowNodeExecution( id=str(uuid4()), @@ -327,7 +327,7 @@ class TestCeleryWorkflowNodeExecutionRepository: title="Node 1", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.now(UTC).replace(tzinfo=None), + created_at=naive_utc_now(), ) # Save in random order diff --git a/api/tests/unit_tests/core/repositories/test_factory.py b/api/tests/unit_tests/core/repositories/test_factory.py index 5146e82e8f..30f51902ef 100644 --- a/api/tests/unit_tests/core/repositories/test_factory.py +++ b/api/tests/unit_tests/core/repositories/test_factory.py @@ -2,19 +2,19 @@ Unit tests for the RepositoryFactory. This module tests the factory pattern implementation for creating repository instances -based on configuration, including error handling and validation. +based on configuration, including error handling. """ from unittest.mock import MagicMock, patch import pytest -from pytest_mock import MockerFixture from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker from core.repositories.factory import DifyCoreRepositoryFactory, RepositoryImportError from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from libs.module_loading import import_string from models import Account, EndUser from models.enums import WorkflowRunTriggeredFrom from models.workflow import WorkflowNodeExecutionTriggeredFrom @@ -23,98 +23,30 @@ from models.workflow import WorkflowNodeExecutionTriggeredFrom class TestRepositoryFactory: """Test cases for RepositoryFactory.""" - def test_import_class_success(self): + def test_import_string_success(self): """Test successful class import.""" # Test importing a real class class_path = "unittest.mock.MagicMock" - result = DifyCoreRepositoryFactory._import_class(class_path) + result = import_string(class_path) assert result is MagicMock - def test_import_class_invalid_path(self): + def test_import_string_invalid_path(self): """Test import with invalid module path.""" - with pytest.raises(RepositoryImportError) as exc_info: - DifyCoreRepositoryFactory._import_class("invalid.module.path") - assert "Cannot import repository class" in str(exc_info.value) + with pytest.raises(ImportError) as exc_info: + import_string("invalid.module.path") + assert "No module named" in str(exc_info.value) - def test_import_class_invalid_class_name(self): + def test_import_string_invalid_class_name(self): """Test import with invalid class name.""" - with pytest.raises(RepositoryImportError) as exc_info: - DifyCoreRepositoryFactory._import_class("unittest.mock.NonExistentClass") - assert "Cannot import repository class" in str(exc_info.value) + with pytest.raises(ImportError) as exc_info: + import_string("unittest.mock.NonExistentClass") + assert "does not define" in str(exc_info.value) - def test_import_class_malformed_path(self): + def test_import_string_malformed_path(self): """Test import with malformed path (no dots).""" - with pytest.raises(RepositoryImportError) as exc_info: - DifyCoreRepositoryFactory._import_class("invalidpath") - assert "Cannot import repository class" in str(exc_info.value) - - def test_validate_repository_interface_success(self): - """Test successful interface validation.""" - - # Create a mock class that implements the required methods - class MockRepository: - def save(self): - pass - - def get_by_id(self): - pass - - # Create a mock interface class - class MockInterface: - def save(self): - pass - - def get_by_id(self): - pass - - # Should not raise an exception when all methods are present - DifyCoreRepositoryFactory._validate_repository_interface(MockRepository, MockInterface) - - def test_validate_repository_interface_missing_methods(self): - """Test interface validation with missing methods.""" - - # Create a mock class that's missing required methods - class IncompleteRepository: - def save(self): - pass - - # Missing get_by_id method - - # Create a mock interface that requires both methods - class MockInterface: - def save(self): - pass - - def get_by_id(self): - pass - - def missing_method(self): - pass - - with pytest.raises(RepositoryImportError) as exc_info: - DifyCoreRepositoryFactory._validate_repository_interface(IncompleteRepository, MockInterface) - assert "does not implement required methods" in str(exc_info.value) - - def test_validate_repository_interface_with_private_methods(self): - """Test that private methods are ignored during interface validation.""" - - class MockRepository: - def save(self): - pass - - def _private_method(self): - pass - - # Create a mock interface with private methods - class MockInterface: - def save(self): - pass - - def _private_method(self): - pass - - # Should not raise exception - private methods should be ignored - DifyCoreRepositoryFactory._validate_repository_interface(MockRepository, MockInterface) + with pytest.raises(ImportError) as exc_info: + import_string("invalidpath") + assert "doesn't look like a module path" in str(exc_info.value) @patch("core.repositories.factory.dify_config") def test_create_workflow_execution_repository_success(self, mock_config): @@ -133,11 +65,8 @@ class TestRepositoryFactory: mock_repository_instance = MagicMock(spec=WorkflowExecutionRepository) mock_repository_class.return_value = mock_repository_instance - # Mock the validation methods - with ( - patch.object(DifyCoreRepositoryFactory, "_import_class", return_value=mock_repository_class), - patch.object(DifyCoreRepositoryFactory, "_validate_repository_interface"), - ): + # Mock import_string + with patch("core.repositories.factory.import_string", return_value=mock_repository_class): result = DifyCoreRepositoryFactory.create_workflow_execution_repository( session_factory=mock_session_factory, user=mock_user, @@ -170,34 +99,7 @@ class TestRepositoryFactory: app_id="test-app-id", triggered_from=WorkflowRunTriggeredFrom.APP_RUN, ) - assert "Cannot import repository class" in str(exc_info.value) - - @patch("core.repositories.factory.dify_config") - def test_create_workflow_execution_repository_validation_error(self, mock_config, mocker: MockerFixture): - """Test WorkflowExecutionRepository creation with validation error.""" - # Setup mock configuration - mock_config.CORE_WORKFLOW_EXECUTION_REPOSITORY = "unittest.mock.MagicMock" - - mock_session_factory = MagicMock(spec=sessionmaker) - mock_user = MagicMock(spec=Account) - - # Mock the import to succeed but validation to fail - mock_repository_class = MagicMock() - mocker.patch.object(DifyCoreRepositoryFactory, "_import_class", return_value=mock_repository_class) - mocker.patch.object( - DifyCoreRepositoryFactory, - "_validate_repository_interface", - side_effect=RepositoryImportError("Interface validation failed"), - ) - - with pytest.raises(RepositoryImportError) as exc_info: - DifyCoreRepositoryFactory.create_workflow_execution_repository( - session_factory=mock_session_factory, - user=mock_user, - app_id="test-app-id", - triggered_from=WorkflowRunTriggeredFrom.APP_RUN, - ) - assert "Interface validation failed" in str(exc_info.value) + assert "Failed to create WorkflowExecutionRepository" in str(exc_info.value) @patch("core.repositories.factory.dify_config") def test_create_workflow_execution_repository_instantiation_error(self, mock_config): @@ -212,11 +114,8 @@ class TestRepositoryFactory: mock_repository_class = MagicMock() mock_repository_class.side_effect = Exception("Instantiation failed") - # Mock the validation methods to succeed - with ( - patch.object(DifyCoreRepositoryFactory, "_import_class", return_value=mock_repository_class), - patch.object(DifyCoreRepositoryFactory, "_validate_repository_interface"), - ): + # Mock import_string to return a failing class + with patch("core.repositories.factory.import_string", return_value=mock_repository_class): with pytest.raises(RepositoryImportError) as exc_info: DifyCoreRepositoryFactory.create_workflow_execution_repository( session_factory=mock_session_factory, @@ -243,11 +142,8 @@ class TestRepositoryFactory: mock_repository_instance = MagicMock(spec=WorkflowNodeExecutionRepository) mock_repository_class.return_value = mock_repository_instance - # Mock the validation methods - with ( - patch.object(DifyCoreRepositoryFactory, "_import_class", return_value=mock_repository_class), - patch.object(DifyCoreRepositoryFactory, "_validate_repository_interface"), - ): + # Mock import_string + with patch("core.repositories.factory.import_string", return_value=mock_repository_class): result = DifyCoreRepositoryFactory.create_workflow_node_execution_repository( session_factory=mock_session_factory, user=mock_user, @@ -280,34 +176,7 @@ class TestRepositoryFactory: app_id="test-app-id", triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, ) - assert "Cannot import repository class" in str(exc_info.value) - - @patch("core.repositories.factory.dify_config") - def test_create_workflow_node_execution_repository_validation_error(self, mock_config, mocker: MockerFixture): - """Test WorkflowNodeExecutionRepository creation with validation error.""" - # Setup mock configuration - mock_config.CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY = "unittest.mock.MagicMock" - - mock_session_factory = MagicMock(spec=sessionmaker) - mock_user = MagicMock(spec=EndUser) - - # Mock the import to succeed but validation to fail - mock_repository_class = MagicMock() - mocker.patch.object(DifyCoreRepositoryFactory, "_import_class", return_value=mock_repository_class) - mocker.patch.object( - DifyCoreRepositoryFactory, - "_validate_repository_interface", - side_effect=RepositoryImportError("Interface validation failed"), - ) - - with pytest.raises(RepositoryImportError) as exc_info: - DifyCoreRepositoryFactory.create_workflow_node_execution_repository( - session_factory=mock_session_factory, - user=mock_user, - app_id="test-app-id", - triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, - ) - assert "Interface validation failed" in str(exc_info.value) + assert "Failed to create WorkflowNodeExecutionRepository" in str(exc_info.value) @patch("core.repositories.factory.dify_config") def test_create_workflow_node_execution_repository_instantiation_error(self, mock_config): @@ -322,11 +191,8 @@ class TestRepositoryFactory: mock_repository_class = MagicMock() mock_repository_class.side_effect = Exception("Instantiation failed") - # Mock the validation methods to succeed - with ( - patch.object(DifyCoreRepositoryFactory, "_import_class", return_value=mock_repository_class), - patch.object(DifyCoreRepositoryFactory, "_validate_repository_interface"), - ): + # Mock import_string to return a failing class + with patch("core.repositories.factory.import_string", return_value=mock_repository_class): with pytest.raises(RepositoryImportError) as exc_info: DifyCoreRepositoryFactory.create_workflow_node_execution_repository( session_factory=mock_session_factory, @@ -359,11 +225,8 @@ class TestRepositoryFactory: mock_repository_instance = MagicMock(spec=WorkflowExecutionRepository) mock_repository_class.return_value = mock_repository_instance - # Mock the validation methods - with ( - patch.object(DifyCoreRepositoryFactory, "_import_class", return_value=mock_repository_class), - patch.object(DifyCoreRepositoryFactory, "_validate_repository_interface"), - ): + # Mock import_string + with patch("core.repositories.factory.import_string", return_value=mock_repository_class): result = DifyCoreRepositoryFactory.create_workflow_execution_repository( session_factory=mock_engine, # Using Engine instead of sessionmaker user=mock_user, diff --git a/api/tests/unit_tests/core/tools/utils/test_encryption.py b/api/tests/unit_tests/core/tools/utils/test_encryption.py new file mode 100644 index 0000000000..6425ab0b8d --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_encryption.py @@ -0,0 +1,181 @@ +import copy +from unittest.mock import patch + +import pytest + +from core.entities.provider_entities import BasicProviderConfig +from core.tools.utils.encryption import ProviderConfigEncrypter + + +# --------------------------- +# A no-op cache +# --------------------------- +class NoopCache: + """Simple cache stub: always returns None, does nothing for set/delete.""" + + def get(self): + return None + + def set(self, config): + pass + + def delete(self): + pass + + +@pytest.fixture +def secret_field() -> BasicProviderConfig: + """A SECRET_INPUT field named 'password'.""" + return BasicProviderConfig( + name="password", + type=BasicProviderConfig.Type.SECRET_INPUT, + ) + + +@pytest.fixture +def normal_field() -> BasicProviderConfig: + """A TEXT_INPUT field named 'username'.""" + return BasicProviderConfig( + name="username", + type=BasicProviderConfig.Type.TEXT_INPUT, + ) + + +@pytest.fixture +def encrypter_obj(secret_field, normal_field): + """ + Build ProviderConfigEncrypter with: + - tenant_id = tenant123 + - one secret field (password) and one normal field (username) + - NoopCache as cache + """ + return ProviderConfigEncrypter( + tenant_id="tenant123", + config=[secret_field, normal_field], + provider_config_cache=NoopCache(), + ) + + +# ============================================================ +# ProviderConfigEncrypter.encrypt() +# ============================================================ + + +def test_encrypt_only_secret_is_encrypted_and_non_secret_unchanged(encrypter_obj): + """ + Secret field should be encrypted, non-secret field unchanged. + Verify encrypt_token called only for secret field. + Also check deep copy (input not modified). + """ + data_in = {"username": "alice", "password": "plain_pwd"} + data_copy = copy.deepcopy(data_in) + + with patch("core.tools.utils.encryption.encrypter.encrypt_token", return_value="CIPHERTEXT") as mock_encrypt: + out = encrypter_obj.encrypt(data_in) + + assert out["username"] == "alice" + assert out["password"] == "CIPHERTEXT" + mock_encrypt.assert_called_once_with("tenant123", "plain_pwd") + assert data_in == data_copy # deep copy semantics + + +def test_encrypt_missing_secret_key_is_ok(encrypter_obj): + """If secret field missing in input, no error and no encryption called.""" + with patch("core.tools.utils.encryption.encrypter.encrypt_token") as mock_encrypt: + out = encrypter_obj.encrypt({"username": "alice"}) + assert out["username"] == "alice" + mock_encrypt.assert_not_called() + + +# ============================================================ +# ProviderConfigEncrypter.mask_tool_credentials() +# ============================================================ + + +@pytest.mark.parametrize( + ("raw", "prefix", "suffix"), + [ + ("longsecret", "lo", "et"), + ("abcdefg", "ab", "fg"), + ("1234567", "12", "67"), + ], +) +def test_mask_tool_credentials_long_secret(encrypter_obj, raw, prefix, suffix): + """ + For length > 6: keep first 2 and last 2, mask middle with '*'. + """ + data_in = {"username": "alice", "password": raw} + data_copy = copy.deepcopy(data_in) + + out = encrypter_obj.mask_tool_credentials(data_in) + masked = out["password"] + + assert masked.startswith(prefix) + assert masked.endswith(suffix) + assert "*" in masked + assert len(masked) == len(raw) + assert data_in == data_copy # deep copy semantics + + +@pytest.mark.parametrize("raw", ["", "1", "12", "123", "123456"]) +def test_mask_tool_credentials_short_secret(encrypter_obj, raw): + """ + For length <= 6: fully mask with '*' of same length. + """ + out = encrypter_obj.mask_tool_credentials({"password": raw}) + assert out["password"] == ("*" * len(raw)) + + +def test_mask_tool_credentials_missing_key_noop(encrypter_obj): + """If secret key missing, leave other fields unchanged.""" + data_in = {"username": "alice"} + data_copy = copy.deepcopy(data_in) + + out = encrypter_obj.mask_tool_credentials(data_in) + assert out["username"] == "alice" + assert data_in == data_copy + + +# ============================================================ +# ProviderConfigEncrypter.decrypt() +# ============================================================ + + +def test_decrypt_normal_flow(encrypter_obj): + """ + Normal decrypt flow: + - decrypt_token called for secret field + - secret replaced with decrypted value + - non-secret unchanged + """ + data_in = {"username": "alice", "password": "ENC"} + data_copy = copy.deepcopy(data_in) + + with patch("core.tools.utils.encryption.encrypter.decrypt_token", return_value="PLAIN") as mock_decrypt: + out = encrypter_obj.decrypt(data_in) + + assert out["username"] == "alice" + assert out["password"] == "PLAIN" + mock_decrypt.assert_called_once_with("tenant123", "ENC") + assert data_in == data_copy # deep copy semantics + + +@pytest.mark.parametrize("empty_val", ["", None]) +def test_decrypt_skip_empty_values(encrypter_obj, empty_val): + """Skip decrypt if value is empty or None, keep original.""" + with patch("core.tools.utils.encryption.encrypter.decrypt_token") as mock_decrypt: + out = encrypter_obj.decrypt({"password": empty_val}) + + mock_decrypt.assert_not_called() + assert out["password"] == empty_val + + +def test_decrypt_swallow_exception_and_keep_original(encrypter_obj): + """ + If decrypt_token raises, exception should be swallowed, + and original value preserved. + """ + with patch("core.tools.utils.encryption.encrypter.decrypt_token", side_effect=Exception("boom")): + out = encrypter_obj.decrypt({"password": "ENC_ERR"}) + + assert out["password"] == "ENC_ERR" diff --git a/api/tests/unit_tests/core/tools/utils/test_web_reader_tool.py b/api/tests/unit_tests/core/tools/utils/test_web_reader_tool.py new file mode 100644 index 0000000000..20f753786d --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_web_reader_tool.py @@ -0,0 +1,312 @@ +import pytest + +from core.tools.utils.web_reader_tool import ( + extract_using_readabilipy, + get_image_upload_file_ids, + get_url, + page_result, +) + + +class FakeResponse: + """Minimal fake response object for ssrf_proxy / cloudscraper.""" + + def __init__(self, *, status_code=200, headers=None, content=b"", text=""): + self.status_code = status_code + self.headers = headers or {} + self.content = content + self.text = text if text else content.decode("utf-8", errors="ignore") + + +# --------------------------- +# Tests: page_result +# --------------------------- +@pytest.mark.parametrize( + ("text", "cursor", "maxlen", "expected"), + [ + ("abcdef", 0, 3, "abc"), + ("abcdef", 2, 10, "cdef"), # maxlen beyond end + ("abcdef", 6, 5, ""), # cursor at end + ("abcdef", 7, 5, ""), # cursor beyond end + ("", 0, 5, ""), # empty text + ], +) +def test_page_result(text, cursor, maxlen, expected): + assert page_result(text, cursor, maxlen) == expected + + +# --------------------------- +# Tests: get_url +# --------------------------- +@pytest.fixture +def stub_support_types(monkeypatch): + """Stub supported content types list.""" + import core.tools.utils.web_reader_tool as mod + + # e.g. binary types supported by ExtractProcessor + monkeypatch.setattr(mod.extract_processor, "SUPPORT_URL_CONTENT_TYPES", ["application/pdf", "text/plain"]) + return mod + + +def test_get_url_unsupported_content_type(monkeypatch, stub_support_types): + # HEAD 200 but content-type not supported and not text/html + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse( + status_code=200, + headers={"Content-Type": "image/png"}, # not supported + ) + + monkeypatch.setattr(stub_support_types.ssrf_proxy, "head", fake_head) + + result = get_url("https://x.test/file.png") + assert result == "Unsupported content-type [image/png] of URL." + + +def test_get_url_supported_binary_type_uses_extract_processor(monkeypatch, stub_support_types): + """ + When content-type is in SUPPORT_URL_CONTENT_TYPES, + should call ExtractProcessor.load_from_url and return its text. + """ + calls = {"load": 0} + + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse( + status_code=200, + headers={"Content-Type": "application/pdf"}, + ) + + def fake_load_from_url(url, return_text=False): + calls["load"] += 1 + assert return_text is True + return "PDF extracted text" + + monkeypatch.setattr(stub_support_types.ssrf_proxy, "head", fake_head) + monkeypatch.setattr(stub_support_types.ExtractProcessor, "load_from_url", staticmethod(fake_load_from_url)) + + result = get_url("https://x.test/doc.pdf") + assert calls["load"] == 1 + assert result == "PDF extracted text" + + +def test_get_url_html_flow_with_chardet_and_readability(monkeypatch, stub_support_types): + """200 + text/html → GET, chardet detects encoding, readability returns article which is templated.""" + + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse(status_code=200, headers={"Content-Type": "text/html"}) + + def fake_get(url, headers=None, follow_redirects=True, timeout=None): + html = b"xhello" + return FakeResponse(status_code=200, headers={"Content-Type": "text/html"}, content=html) + + # chardet.detect returns utf-8 + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod.ssrf_proxy, "head", fake_head) + monkeypatch.setattr(mod.ssrf_proxy, "get", fake_get) + monkeypatch.setattr(mod.chardet, "detect", lambda b: {"encoding": "utf-8"}) + + # readability → a dict that maps to Article, then FULL_TEMPLATE + def fake_simple_json_from_html_string(html, use_readability=True): + return { + "title": "My Title", + "byline": "Bob", + "plain_text": [{"type": "text", "text": "Hello world"}], + } + + monkeypatch.setattr(mod, "simple_json_from_html_string", fake_simple_json_from_html_string) + + out = get_url("https://x.test/page") + assert "TITLE: My Title" in out + assert "AUTHOR: Bob" in out + assert "Hello world" in out + + +def test_get_url_html_flow_empty_article_text_returns_empty(monkeypatch, stub_support_types): + """If readability returns no text, should return empty string.""" + + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse(status_code=200, headers={"Content-Type": "text/html"}) + + def fake_get(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse(status_code=200, headers={"Content-Type": "text/html"}, content=b"") + + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod.ssrf_proxy, "head", fake_head) + monkeypatch.setattr(mod.ssrf_proxy, "get", fake_get) + monkeypatch.setattr(mod.chardet, "detect", lambda b: {"encoding": "utf-8"}) + # readability returns empty plain_text + monkeypatch.setattr(mod, "simple_json_from_html_string", lambda html, use_readability=True: {"plain_text": []}) + + out = get_url("https://x.test/empty") + assert out == "" + + +def test_get_url_403_cloudscraper_fallback(monkeypatch, stub_support_types): + """HEAD 403 → use cloudscraper.get via ssrf_proxy.make_request, then proceed.""" + + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse(status_code=403, headers={}) + + # cloudscraper.create_scraper() → object with .get() + class FakeScraper: + def __init__(self): + pass # removed unused attribute + + def get(self, url, headers=None, follow_redirects=True, timeout=None): + # mimic html 200 + html = b"hi" + return FakeResponse(status_code=200, headers={"Content-Type": "text/html"}, content=html) + + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod.ssrf_proxy, "head", fake_head) + monkeypatch.setattr(mod.cloudscraper, "create_scraper", lambda: FakeScraper()) + monkeypatch.setattr(mod.chardet, "detect", lambda b: {"encoding": "utf-8"}) + monkeypatch.setattr( + mod, + "simple_json_from_html_string", + lambda html, use_readability=True: {"title": "T", "byline": "A", "plain_text": [{"type": "text", "text": "X"}]}, + ) + + out = get_url("https://x.test/403") + assert "TITLE: T" in out + assert "AUTHOR: A" in out + assert "X" in out + + +def test_get_url_head_non_200_returns_status(monkeypatch, stub_support_types): + """HEAD returns non-200 and non-403 → should directly return code message.""" + + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse(status_code=500) + + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod.ssrf_proxy, "head", fake_head) + + out = get_url("https://x.test/fail") + assert out == "URL returned status code 500." + + +def test_get_url_content_disposition_filename_detection(monkeypatch, stub_support_types): + """ + If HEAD 200 with no Content-Type but Content-Disposition filename suggests a supported type, + it should route to ExtractProcessor.load_from_url. + """ + calls = {"load": 0} + + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse(status_code=200, headers={"Content-Disposition": 'attachment; filename="doc.pdf"'}) + + def fake_load_from_url(url, return_text=False): + calls["load"] += 1 + return "From ExtractProcessor via filename" + + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod.ssrf_proxy, "head", fake_head) + monkeypatch.setattr(mod.ExtractProcessor, "load_from_url", staticmethod(fake_load_from_url)) + + out = get_url("https://x.test/fname") + assert calls["load"] == 1 + assert out == "From ExtractProcessor via filename" + + +def test_get_url_html_encoding_fallback_when_decode_fails(monkeypatch, stub_support_types): + """ + If chardet returns an encoding but content.decode raises, should fallback to response.text. + """ + + def fake_head(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse(status_code=200, headers={"Content-Type": "text/html"}) + + # Return bytes that will raise with the chosen encoding + def fake_get(url, headers=None, follow_redirects=True, timeout=None): + return FakeResponse( + status_code=200, + headers={"Content-Type": "text/html"}, + content=b"\xff\xfe\xfa", # likely to fail under utf-8 + text="fallback text", + ) + + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod.ssrf_proxy, "head", fake_head) + monkeypatch.setattr(mod.ssrf_proxy, "get", fake_get) + monkeypatch.setattr(mod.chardet, "detect", lambda b: {"encoding": "utf-8"}) + monkeypatch.setattr( + mod, + "simple_json_from_html_string", + lambda html, use_readability=True: {"title": "", "byline": "", "plain_text": [{"type": "text", "text": "ok"}]}, + ) + + out = get_url("https://x.test/enc-fallback") + assert "ok" in out + + +# --------------------------- +# Tests: extract_using_readabilipy +# --------------------------- + + +def test_extract_using_readabilipy_field_mapping_and_defaults(monkeypatch): + # stub readabilipy.simple_json_from_html_string + def fake_simple_json_from_html_string(html, use_readability=True): + return { + "title": "Hello", + "byline": "Alice", + "plain_text": [{"type": "text", "text": "world"}], + } + + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod, "simple_json_from_html_string", fake_simple_json_from_html_string) + + article = extract_using_readabilipy("...") + assert article.title == "Hello" + assert article.author == "Alice" + assert isinstance(article.text, list) + assert article.text + assert article.text[0]["text"] == "world" + + +def test_extract_using_readabilipy_defaults_when_missing(monkeypatch): + def fake_simple_json_from_html_string(html, use_readability=True): + return {} # all missing + + import core.tools.utils.web_reader_tool as mod + + monkeypatch.setattr(mod, "simple_json_from_html_string", fake_simple_json_from_html_string) + + article = extract_using_readabilipy("...") + assert article.title == "" + assert article.author == "" + assert article.text == [] + + +# --------------------------- +# Tests: get_image_upload_file_ids +# --------------------------- +def test_get_image_upload_file_ids(): + # should extract id from https + file-preview + content = "![image](https://example.com/a/b/files/abc123/file-preview)" + assert get_image_upload_file_ids(content) == ["abc123"] + + # should extract id from http + image-preview + content = "![image](http://host/files/xyz789/image-preview)" + assert get_image_upload_file_ids(content) == ["xyz789"] + + # should not match invalid scheme 'htt://' + content = "![image](htt://host/files/bad/file-preview)" + assert get_image_upload_file_ids(content) == [] + + # should extract multiple ids in order + content = """ + some text + ![image](https://h/files/id1/file-preview) + middle + ![image](http://h/files/id2/image-preview) + end + """ + assert get_image_upload_file_ids(content) == ["id1", "id2"] diff --git a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer_stream_processor.py b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer_stream_processor.py index 137e8b889d..8b1b9a55bc 100644 --- a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer_stream_processor.py +++ b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer_stream_processor.py @@ -1,6 +1,5 @@ import uuid from collections.abc import Generator -from datetime import UTC, datetime from core.workflow.entities.variable_pool import VariablePool from core.workflow.graph_engine.entities.event import ( @@ -15,6 +14,7 @@ from core.workflow.nodes.answer.answer_stream_processor import AnswerStreamProce from core.workflow.nodes.enums import NodeType from core.workflow.nodes.start.entities import StartNodeData from core.workflow.system_variable import SystemVariable +from libs.datetime_utils import naive_utc_now def _recursive_process(graph: Graph, next_node_id: str) -> Generator[GraphEngineEvent, None, None]: @@ -29,7 +29,7 @@ def _recursive_process(graph: Graph, next_node_id: str) -> Generator[GraphEngine def _publish_events(graph: Graph, next_node_id: str) -> Generator[GraphEngineEvent, None, None]: - route_node_state = RouteNodeState(node_id=next_node_id, start_at=datetime.now(UTC).replace(tzinfo=None)) + route_node_state = RouteNodeState(node_id=next_node_id, start_at=naive_utc_now()) parallel_id = graph.node_parallel_mapping.get(next_node_id) parallel_start_node_id = None @@ -68,7 +68,7 @@ def _publish_events(graph: Graph, next_node_id: str) -> Generator[GraphEngineEve ) route_node_state.status = RouteNodeState.Status.SUCCESS - route_node_state.finished_at = datetime.now(UTC).replace(tzinfo=None) + route_node_state.finished_at = naive_utc_now() yield NodeRunSucceededEvent( id=node_execution_id, node_id=next_node_id, diff --git a/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py b/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py index 4866db1fdb..1d2eba1e71 100644 --- a/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py +++ b/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py @@ -1,5 +1,4 @@ import json -from datetime import UTC, datetime from unittest.mock import MagicMock import pytest @@ -23,6 +22,7 @@ from core.workflow.repositories.workflow_execution_repository import WorkflowExe from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.system_variable import SystemVariable from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager +from libs.datetime_utils import naive_utc_now from models.enums import CreatorUserRole from models.model import AppMode from models.workflow import Workflow, WorkflowRun @@ -145,8 +145,8 @@ def real_workflow(): workflow.graph = json.dumps(graph_data) workflow.features = json.dumps({"file_upload": {"enabled": False}}) workflow.created_by = "test-user-id" - workflow.created_at = datetime.now(UTC).replace(tzinfo=None) - workflow.updated_at = datetime.now(UTC).replace(tzinfo=None) + workflow.created_at = naive_utc_now() + workflow.updated_at = naive_utc_now() workflow._environment_variables = "{}" workflow._conversation_variables = "{}" @@ -169,7 +169,7 @@ def real_workflow_run(): workflow_run.outputs = json.dumps({"answer": "test answer"}) workflow_run.created_by_role = CreatorUserRole.ACCOUNT workflow_run.created_by = "test-user-id" - workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None) + workflow_run.created_at = naive_utc_now() return workflow_run @@ -211,7 +211,7 @@ def test_handle_workflow_run_success(workflow_cycle_manager, mock_workflow_execu workflow_type=WorkflowType.CHAT, graph={"nodes": [], "edges": []}, inputs={"query": "test query"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) # Pre-populate the cache with the workflow execution @@ -245,7 +245,7 @@ def test_handle_workflow_run_failed(workflow_cycle_manager, mock_workflow_execut workflow_type=WorkflowType.CHAT, graph={"nodes": [], "edges": []}, inputs={"query": "test query"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) # Pre-populate the cache with the workflow execution @@ -282,7 +282,7 @@ def test_handle_node_execution_start(workflow_cycle_manager, mock_workflow_execu workflow_type=WorkflowType.CHAT, graph={"nodes": [], "edges": []}, inputs={"query": "test query"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) # Pre-populate the cache with the workflow execution @@ -335,7 +335,7 @@ def test_get_workflow_execution_or_raise_error(workflow_cycle_manager, mock_work workflow_type=WorkflowType.CHAT, graph={"nodes": [], "edges": []}, inputs={"query": "test query"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) # Pre-populate the cache with the workflow execution @@ -366,7 +366,7 @@ def test_handle_workflow_node_execution_success(workflow_cycle_manager): event.process_data = {"process": "test process"} event.outputs = {"output": "test output"} event.execution_metadata = {WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 100} - event.start_at = datetime.now(UTC).replace(tzinfo=None) + event.start_at = naive_utc_now() # Create a real node execution @@ -379,7 +379,7 @@ def test_handle_workflow_node_execution_success(workflow_cycle_manager): node_id="test-node-id", node_type=NodeType.LLM, title="Test Node", - created_at=datetime.now(UTC).replace(tzinfo=None), + created_at=naive_utc_now(), ) # Pre-populate the cache with the node execution @@ -409,7 +409,7 @@ def test_handle_workflow_run_partial_success(workflow_cycle_manager, mock_workfl workflow_type=WorkflowType.CHAT, graph={"nodes": [], "edges": []}, inputs={"query": "test query"}, - started_at=datetime.now(UTC).replace(tzinfo=None), + started_at=naive_utc_now(), ) # Pre-populate the cache with the workflow execution @@ -443,7 +443,7 @@ def test_handle_workflow_node_execution_failed(workflow_cycle_manager): event.process_data = {"process": "test process"} event.outputs = {"output": "test output"} event.execution_metadata = {WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 100} - event.start_at = datetime.now(UTC).replace(tzinfo=None) + event.start_at = naive_utc_now() event.error = "Test error message" # Create a real node execution @@ -457,7 +457,7 @@ def test_handle_workflow_node_execution_failed(workflow_cycle_manager): node_id="test-node-id", node_type=NodeType.LLM, title="Test Node", - created_at=datetime.now(UTC).replace(tzinfo=None), + created_at=naive_utc_now(), ) # Pre-populate the cache with the node execution diff --git a/api/tests/unit_tests/models/test_workflow.py b/api/tests/unit_tests/models/test_workflow.py index 5bc77ad0ef..4c61320c29 100644 --- a/api/tests/unit_tests/models/test_workflow.py +++ b/api/tests/unit_tests/models/test_workflow.py @@ -9,7 +9,6 @@ from core.file.models import File from core.variables import FloatVariable, IntegerVariable, SecretVariable, StringVariable from core.variables.segments import IntegerSegment, Segment from factories.variable_factory import build_segment -from models.model import EndUser from models.workflow import Workflow, WorkflowDraftVariable, WorkflowNodeExecutionModel, is_system_variable_editable @@ -43,14 +42,9 @@ def test_environment_variables(): {"name": "var4", "value": 3.14, "id": str(uuid4()), "selector": ["env", "var4"]} ) - # Mock current_user as an EndUser - mock_user = mock.Mock(spec=EndUser) - mock_user.tenant_id = "tenant_id" - with ( mock.patch("core.helper.encrypter.encrypt_token", return_value="encrypted_token"), mock.patch("core.helper.encrypter.decrypt_token", return_value="secret"), - mock.patch("models.workflow.current_user", mock_user), ): # Set the environment_variables property of the Workflow instance variables = [variable1, variable2, variable3, variable4] @@ -90,14 +84,9 @@ def test_update_environment_variables(): {"name": "var4", "value": 3.14, "id": str(uuid4()), "selector": ["env", "var4"]} ) - # Mock current_user as an EndUser - mock_user = mock.Mock(spec=EndUser) - mock_user.tenant_id = "tenant_id" - with ( mock.patch("core.helper.encrypter.encrypt_token", return_value="encrypted_token"), mock.patch("core.helper.encrypter.decrypt_token", return_value="secret"), - mock.patch("models.workflow.current_user", mock_user), ): variables = [variable1, variable2, variable3, variable4] @@ -136,14 +125,9 @@ def test_to_dict(): # Create some EnvironmentVariable instances - # Mock current_user as an EndUser - mock_user = mock.Mock(spec=EndUser) - mock_user.tenant_id = "tenant_id" - with ( mock.patch("core.helper.encrypter.encrypt_token", return_value="encrypted_token"), mock.patch("core.helper.encrypter.decrypt_token", return_value="secret"), - mock.patch("models.workflow.current_user", mock_user), ): # Set the environment_variables property of the Workflow instance workflow.environment_variables = [ diff --git a/api/tests/unit_tests/services/test_dataset_service_batch_update_document_status.py b/api/tests/unit_tests/services/test_dataset_service_batch_update_document_status.py index dc09aca5b2..1881ceac26 100644 --- a/api/tests/unit_tests/services/test_dataset_service_batch_update_document_status.py +++ b/api/tests/unit_tests/services/test_dataset_service_batch_update_document_status.py @@ -93,16 +93,15 @@ class TestDatasetServiceBatchUpdateDocumentStatus: with ( patch("services.dataset_service.DocumentService.get_document") as mock_get_doc, patch("extensions.ext_database.db.session") as mock_db, - patch("services.dataset_service.datetime") as mock_datetime, + patch("services.dataset_service.naive_utc_now") as mock_naive_utc_now, ): current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) - mock_datetime.datetime.now.return_value = current_time - mock_datetime.UTC = datetime.UTC + mock_naive_utc_now.return_value = current_time yield { "get_document": mock_get_doc, "db_session": mock_db, - "datetime": mock_datetime, + "naive_utc_now": mock_naive_utc_now, "current_time": current_time, } @@ -120,21 +119,21 @@ class TestDatasetServiceBatchUpdateDocumentStatus: assert document.enabled == True assert document.disabled_at is None assert document.disabled_by is None - assert document.updated_at == current_time.replace(tzinfo=None) + assert document.updated_at == current_time def _assert_document_disabled(self, document: Mock, user_id: str, current_time: datetime.datetime): """Helper method to verify document was disabled correctly.""" assert document.enabled == False - assert document.disabled_at == current_time.replace(tzinfo=None) + assert document.disabled_at == current_time assert document.disabled_by == user_id - assert document.updated_at == current_time.replace(tzinfo=None) + assert document.updated_at == current_time def _assert_document_archived(self, document: Mock, user_id: str, current_time: datetime.datetime): """Helper method to verify document was archived correctly.""" assert document.archived == True - assert document.archived_at == current_time.replace(tzinfo=None) + assert document.archived_at == current_time assert document.archived_by == user_id - assert document.updated_at == current_time.replace(tzinfo=None) + assert document.updated_at == current_time def _assert_document_unarchived(self, document: Mock): """Helper method to verify document was unarchived correctly.""" @@ -430,7 +429,7 @@ class TestDatasetServiceBatchUpdateDocumentStatus: # Verify document attributes were updated correctly self._assert_document_unarchived(archived_doc) - assert archived_doc.updated_at == mock_document_service_dependencies["current_time"].replace(tzinfo=None) + assert archived_doc.updated_at == mock_document_service_dependencies["current_time"] # Verify Redis cache was set (because document is enabled) redis_mock.setex.assert_called_once_with("document_doc-1_indexing", 600, 1) @@ -495,9 +494,7 @@ class TestDatasetServiceBatchUpdateDocumentStatus: # Verify document was unarchived self._assert_document_unarchived(archived_disabled_doc) - assert archived_disabled_doc.updated_at == mock_document_service_dependencies["current_time"].replace( - tzinfo=None - ) + assert archived_disabled_doc.updated_at == mock_document_service_dependencies["current_time"] # Verify no Redis cache was set (document is disabled) redis_mock.setex.assert_not_called() diff --git a/api/tests/unit_tests/services/test_metadata_bug_complete.py b/api/tests/unit_tests/services/test_metadata_bug_complete.py index c4c7579e83..0fc36510b9 100644 --- a/api/tests/unit_tests/services/test_metadata_bug_complete.py +++ b/api/tests/unit_tests/services/test_metadata_bug_complete.py @@ -1,7 +1,7 @@ from unittest.mock import Mock, patch import pytest -from flask_restful import reqparse +from flask_restx import reqparse from werkzeug.exceptions import BadRequest from services.entities.knowledge_entities.knowledge_entities import MetadataArgs diff --git a/api/tests/unit_tests/services/test_metadata_nullable_bug.py b/api/tests/unit_tests/services/test_metadata_nullable_bug.py index ef4d05c1d9..7f6344f942 100644 --- a/api/tests/unit_tests/services/test_metadata_nullable_bug.py +++ b/api/tests/unit_tests/services/test_metadata_nullable_bug.py @@ -1,7 +1,7 @@ from unittest.mock import Mock, patch import pytest -from flask_restful import reqparse +from flask_restx import reqparse from services.entities.knowledge_entities.knowledge_entities import MetadataArgs from services.metadata_service import MetadataService diff --git a/api/uv.lock b/api/uv.lock index cecce2bc43..45b020e1dd 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.11, <3.13" resolution-markers = [ "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", @@ -741,6 +741,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/af/0dcccc7fdcdf170f9a1585e5e96b6fb0ba1749ef6be8c89a6202284759bd/celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525", size = 438775, upload-time = "2025-06-01T11:08:09.94Z" }, ] +[[package]] +name = "celery-types" +version = "0.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/d1/0823e71c281e4ad0044e278cf1577d1a68e05f2809424bf94e1614925c5d/celery_types-0.23.0.tar.gz", hash = "sha256:402ed0555aea3cd5e1e6248f4632e4f18eec8edb2435173f9e6dc08449fa101e", size = 31479, upload-time = "2025-03-03T23:56:51.547Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/8b/92bb54dd74d145221c3854aa245c84f4dc04cc9366147496182cec8e88e3/celery_types-0.23.0-py3-none-any.whl", hash = "sha256:0cc495b8d7729891b7e070d0ec8d4906d2373209656a6e8b8276fe1ed306af9a", size = 50189, upload-time = "2025-03-03T23:56:50.458Z" }, +] + [[package]] name = "certifi" version = "2025.6.15" @@ -1254,7 +1266,7 @@ dependencies = [ { name = "flask-login" }, { name = "flask-migrate" }, { name = "flask-orjson" }, - { name = "flask-restful" }, + { name = "flask-restx" }, { name = "flask-sqlalchemy" }, { name = "gevent" }, { name = "gmpy2" }, @@ -1326,6 +1338,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "boto3-stubs" }, + { name = "celery-types" }, { name = "coverage" }, { name = "dotenv-linter" }, { name = "faker" }, @@ -1436,13 +1449,13 @@ requires-dist = [ { name = "cachetools", specifier = "~=5.3.0" }, { name = "celery", specifier = "~=5.5.2" }, { name = "chardet", specifier = "~=5.1.0" }, - { name = "flask", specifier = "~=3.1.0" }, + { name = "flask", specifier = "~=3.1.2" }, { name = "flask-compress", specifier = "~=1.17" }, { name = "flask-cors", specifier = "~=6.0.0" }, { name = "flask-login", specifier = "~=0.6.3" }, { name = "flask-migrate", specifier = "~=4.0.7" }, { name = "flask-orjson", specifier = "~=2.0.0" }, - { name = "flask-restful", specifier = "~=0.3.10" }, + { name = "flask-restx", specifier = ">=1.3.0" }, { name = "flask-sqlalchemy", specifier = "~=3.1.1" }, { name = "gevent", specifier = "~=24.11.1" }, { name = "gmpy2", specifier = "~=2.2.1" }, @@ -1514,12 +1527,13 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "boto3-stubs", specifier = ">=1.38.20" }, + { name = "celery-types", specifier = ">=0.23.0" }, { name = "coverage", specifier = "~=7.2.4" }, { name = "dotenv-linter", specifier = "~=0.5.0" }, { name = "faker", specifier = "~=32.1.0" }, { name = "hypothesis", specifier = ">=6.131.15" }, { name = "lxml-stubs", specifier = "~=0.5.1" }, - { name = "mypy", specifier = "~=1.16.0" }, + { name = "mypy", specifier = "~=1.17.1" }, { name = "pandas-stubs", specifier = "~=2.2.3" }, { name = "pytest", specifier = "~=8.3.2" }, { name = "pytest-benchmark", specifier = "~=4.0.0" }, @@ -1602,7 +1616,7 @@ vdb = [ { name = "pgvector", specifier = "==0.2.5" }, { name = "pymilvus", specifier = "~=2.5.0" }, { name = "pymochow", specifier = "==1.3.1" }, - { name = "pyobvector", specifier = "~=0.1.6" }, + { name = "pyobvector", specifier = "~=0.2.15" }, { name = "qdrant-client", specifier = "==1.9.0" }, { name = "tablestore", specifier = "==6.2.0" }, { name = "tcvectordb", specifier = "~=1.6.4" }, @@ -1790,7 +1804,7 @@ wheels = [ [[package]] name = "flask" -version = "3.1.1" +version = "3.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "blinker" }, @@ -1800,9 +1814,9 @@ dependencies = [ { name = "markupsafe" }, { name = "werkzeug" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c0/de/e47735752347f4128bcf354e0da07ef311a78244eba9e3dc1d4a5ab21a98/flask-3.1.1.tar.gz", hash = "sha256:284c7b8f2f58cb737f0cf1c30fd7eaf0ccfcde196099d24ecede3fc2005aa59e", size = 753440, upload-time = "2025-05-13T15:01:17.447Z" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/6d/cfe3c0fcc5e477df242b98bfe186a4c34357b4847e87ecaef04507332dab/flask-3.1.2.tar.gz", hash = "sha256:bf656c15c80190ed628ad08cdfd3aaa35beb087855e2f494910aa3774cc4fd87", size = 720160, upload-time = "2025-08-19T21:03:21.205Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl", hash = "sha256:07aae2bb5eaf77993ef57e357491839f5fd9f4dc281593a81a9e4d79a24f295c", size = 103305, upload-time = "2025-05-13T15:01:15.591Z" }, + { url = "https://files.pythonhosted.org/packages/ec/f9/7f9263c5695f4bd0023734af91bedb2ff8209e8de6ead162f35d8dc762fd/flask-3.1.2-py3-none-any.whl", hash = "sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c", size = 103308, upload-time = "2025-08-19T21:03:19.499Z" }, ] [[package]] @@ -1875,18 +1889,20 @@ wheels = [ ] [[package]] -name = "flask-restful" -version = "0.3.10" +name = "flask-restx" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aniso8601" }, { name = "flask" }, + { name = "importlib-resources" }, + { name = "jsonschema" }, { name = "pytz" }, - { name = "six" }, + { name = "werkzeug" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c0/ce/a0a133db616ea47f78a41e15c4c68b9f08cab3df31eb960f61899200a119/Flask-RESTful-0.3.10.tar.gz", hash = "sha256:fe4af2ef0027df8f9b4f797aba20c5566801b6ade995ac63b588abf1a59cec37", size = 110453, upload-time = "2023-05-21T03:58:55.781Z" } +sdist = { url = "https://files.pythonhosted.org/packages/45/4c/2e7d84e2b406b47cf3bf730f521efe474977b404ee170d8ea68dc37e6733/flask-restx-1.3.0.tar.gz", hash = "sha256:4f3d3fa7b6191fcc715b18c201a12cd875176f92ba4acc61626ccfd571ee1728", size = 2814072, upload-time = "2023-12-10T14:48:55.575Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/7b/f0b45f0df7d2978e5ae51804bb5939b7897b2ace24306009da0cc34d8d1f/Flask_RESTful-0.3.10-py2.py3-none-any.whl", hash = "sha256:1cf93c535172f112e080b0d4503a8d15f93a48c88bdd36dd87269bdaf405051b", size = 26217, upload-time = "2023-05-21T03:58:54.004Z" }, + { url = "https://files.pythonhosted.org/packages/a5/bf/1907369f2a7ee614dde5152ff8f811159d357e77962aa3f8c2e937f63731/flask_restx-1.3.0-py2.py3-none-any.whl", hash = "sha256:636c56c3fb3f2c1df979e748019f084a938c4da2035a3e535a4673e4fc177691", size = 2798683, upload-time = "2023-12-10T14:48:53.293Z" }, ] [[package]] @@ -3272,28 +3288,28 @@ wheels = [ [[package]] name = "mypy" -version = "1.16.1" +version = "1.17.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mypy-extensions" }, { name = "pathspec" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/81/69/92c7fa98112e4d9eb075a239caa4ef4649ad7d441545ccffbd5e34607cbb/mypy-1.16.1.tar.gz", hash = "sha256:6bd00a0a2094841c5e47e7374bb42b83d64c527a502e3334e1173a0c24437bab", size = 3324747, upload-time = "2025-06-16T16:51:35.145Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/22/ea637422dedf0bf36f3ef238eab4e455e2a0dcc3082b5cc067615347ab8e/mypy-1.17.1.tar.gz", hash = "sha256:25e01ec741ab5bb3eec8ba9cdb0f769230368a22c959c4937360efb89b7e9f01", size = 3352570, upload-time = "2025-07-31T07:54:19.204Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/61/ec1245aa1c325cb7a6c0f8570a2eee3bfc40fa90d19b1267f8e50b5c8645/mypy-1.16.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:472e4e4c100062488ec643f6162dd0d5208e33e2f34544e1fc931372e806c0cc", size = 10890557, upload-time = "2025-06-16T16:37:21.421Z" }, - { url = "https://files.pythonhosted.org/packages/6b/bb/6eccc0ba0aa0c7a87df24e73f0ad34170514abd8162eb0c75fd7128171fb/mypy-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea16e2a7d2714277e349e24d19a782a663a34ed60864006e8585db08f8ad1782", size = 10012921, upload-time = "2025-06-16T16:51:28.659Z" }, - { url = "https://files.pythonhosted.org/packages/5f/80/b337a12e2006715f99f529e732c5f6a8c143bb58c92bb142d5ab380963a5/mypy-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08e850ea22adc4d8a4014651575567b0318ede51e8e9fe7a68f25391af699507", size = 11802887, upload-time = "2025-06-16T16:50:53.627Z" }, - { url = "https://files.pythonhosted.org/packages/d9/59/f7af072d09793d581a745a25737c7c0a945760036b16aeb620f658a017af/mypy-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22d76a63a42619bfb90122889b903519149879ddbf2ba4251834727944c8baca", size = 12531658, upload-time = "2025-06-16T16:33:55.002Z" }, - { url = "https://files.pythonhosted.org/packages/82/c4/607672f2d6c0254b94a646cfc45ad589dd71b04aa1f3d642b840f7cce06c/mypy-1.16.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2c7ce0662b6b9dc8f4ed86eb7a5d505ee3298c04b40ec13b30e572c0e5ae17c4", size = 12732486, upload-time = "2025-06-16T16:37:03.301Z" }, - { url = "https://files.pythonhosted.org/packages/b6/5e/136555ec1d80df877a707cebf9081bd3a9f397dedc1ab9750518d87489ec/mypy-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:211287e98e05352a2e1d4e8759c5490925a7c784ddc84207f4714822f8cf99b6", size = 9479482, upload-time = "2025-06-16T16:47:37.48Z" }, - { url = "https://files.pythonhosted.org/packages/b4/d6/39482e5fcc724c15bf6280ff5806548c7185e0c090712a3736ed4d07e8b7/mypy-1.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:af4792433f09575d9eeca5c63d7d90ca4aeceda9d8355e136f80f8967639183d", size = 11066493, upload-time = "2025-06-16T16:47:01.683Z" }, - { url = "https://files.pythonhosted.org/packages/e6/e5/26c347890efc6b757f4d5bb83f4a0cf5958b8cf49c938ac99b8b72b420a6/mypy-1.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66df38405fd8466ce3517eda1f6640611a0b8e70895e2a9462d1d4323c5eb4b9", size = 10081687, upload-time = "2025-06-16T16:48:19.367Z" }, - { url = "https://files.pythonhosted.org/packages/44/c7/b5cb264c97b86914487d6a24bd8688c0172e37ec0f43e93b9691cae9468b/mypy-1.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44e7acddb3c48bd2713994d098729494117803616e116032af192871aed80b79", size = 11839723, upload-time = "2025-06-16T16:49:20.912Z" }, - { url = "https://files.pythonhosted.org/packages/15/f8/491997a9b8a554204f834ed4816bda813aefda31cf873bb099deee3c9a99/mypy-1.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ab5eca37b50188163fa7c1b73c685ac66c4e9bdee4a85c9adac0e91d8895e15", size = 12722980, upload-time = "2025-06-16T16:37:40.929Z" }, - { url = "https://files.pythonhosted.org/packages/df/f0/2bd41e174b5fd93bc9de9a28e4fb673113633b8a7f3a607fa4a73595e468/mypy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb6229b2c9086247e21a83c309754b9058b438704ad2f6807f0d8227f6ebdd", size = 12903328, upload-time = "2025-06-16T16:34:35.099Z" }, - { url = "https://files.pythonhosted.org/packages/61/81/5572108a7bec2c46b8aff7e9b524f371fe6ab5efb534d38d6b37b5490da8/mypy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:1f0435cf920e287ff68af3d10a118a73f212deb2ce087619eb4e648116d1fe9b", size = 9562321, upload-time = "2025-06-16T16:48:58.823Z" }, - { url = "https://files.pythonhosted.org/packages/cf/d3/53e684e78e07c1a2bf7105715e5edd09ce951fc3f47cf9ed095ec1b7a037/mypy-1.16.1-py3-none-any.whl", hash = "sha256:5fc2ac4027d0ef28d6ba69a0343737a23c4d1b83672bf38d1fe237bdc0643b37", size = 2265923, upload-time = "2025-06-16T16:48:02.366Z" }, + { url = "https://files.pythonhosted.org/packages/46/cf/eadc80c4e0a70db1c08921dcc220357ba8ab2faecb4392e3cebeb10edbfa/mypy-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad37544be07c5d7fba814eb370e006df58fed8ad1ef33ed1649cb1889ba6ff58", size = 10921009, upload-time = "2025-07-31T07:53:23.037Z" }, + { url = "https://files.pythonhosted.org/packages/5d/c1/c869d8c067829ad30d9bdae051046561552516cfb3a14f7f0347b7d973ee/mypy-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:064e2ff508e5464b4bd807a7c1625bc5047c5022b85c70f030680e18f37273a5", size = 10047482, upload-time = "2025-07-31T07:53:26.151Z" }, + { url = "https://files.pythonhosted.org/packages/98/b9/803672bab3fe03cee2e14786ca056efda4bb511ea02dadcedde6176d06d0/mypy-1.17.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70401bbabd2fa1aa7c43bb358f54037baf0586f41e83b0ae67dd0534fc64edfd", size = 11832883, upload-time = "2025-07-31T07:53:47.948Z" }, + { url = "https://files.pythonhosted.org/packages/88/fb/fcdac695beca66800918c18697b48833a9a6701de288452b6715a98cfee1/mypy-1.17.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e92bdc656b7757c438660f775f872a669b8ff374edc4d18277d86b63edba6b8b", size = 12566215, upload-time = "2025-07-31T07:54:04.031Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/a932da3d3dace99ee8eb2043b6ab03b6768c36eb29a02f98f46c18c0da0e/mypy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c1fdf4abb29ed1cb091cf432979e162c208a5ac676ce35010373ff29247bcad5", size = 12751956, upload-time = "2025-07-31T07:53:36.263Z" }, + { url = "https://files.pythonhosted.org/packages/8c/cf/6438a429e0f2f5cab8bc83e53dbebfa666476f40ee322e13cac5e64b79e7/mypy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:ff2933428516ab63f961644bc49bc4cbe42bbffb2cd3b71cc7277c07d16b1a8b", size = 9507307, upload-time = "2025-07-31T07:53:59.734Z" }, + { url = "https://files.pythonhosted.org/packages/17/a2/7034d0d61af8098ec47902108553122baa0f438df8a713be860f7407c9e6/mypy-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69e83ea6553a3ba79c08c6e15dbd9bfa912ec1e493bf75489ef93beb65209aeb", size = 11086295, upload-time = "2025-07-31T07:53:28.124Z" }, + { url = "https://files.pythonhosted.org/packages/14/1f/19e7e44b594d4b12f6ba8064dbe136505cec813549ca3e5191e40b1d3cc2/mypy-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b16708a66d38abb1e6b5702f5c2c87e133289da36f6a1d15f6a5221085c6403", size = 10112355, upload-time = "2025-07-31T07:53:21.121Z" }, + { url = "https://files.pythonhosted.org/packages/5b/69/baa33927e29e6b4c55d798a9d44db5d394072eef2bdc18c3e2048c9ed1e9/mypy-1.17.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89e972c0035e9e05823907ad5398c5a73b9f47a002b22359b177d40bdaee7056", size = 11875285, upload-time = "2025-07-31T07:53:55.293Z" }, + { url = "https://files.pythonhosted.org/packages/90/13/f3a89c76b0a41e19490b01e7069713a30949d9a6c147289ee1521bcea245/mypy-1.17.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03b6d0ed2b188e35ee6d5c36b5580cffd6da23319991c49ab5556c023ccf1341", size = 12737895, upload-time = "2025-07-31T07:53:43.623Z" }, + { url = "https://files.pythonhosted.org/packages/23/a1/c4ee79ac484241301564072e6476c5a5be2590bc2e7bfd28220033d2ef8f/mypy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c837b896b37cd103570d776bda106eabb8737aa6dd4f248451aecf53030cdbeb", size = 12931025, upload-time = "2025-07-31T07:54:17.125Z" }, + { url = "https://files.pythonhosted.org/packages/89/b8/7409477be7919a0608900e6320b155c72caab4fef46427c5cc75f85edadd/mypy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:665afab0963a4b39dff7c1fa563cc8b11ecff7910206db4b2e64dd1ba25aed19", size = 9584664, upload-time = "2025-07-31T07:54:12.842Z" }, + { url = "https://files.pythonhosted.org/packages/1d/f3/8fcd2af0f5b806f6cf463efaffd3c9548a28f84220493ecd38d127b6b66d/mypy-1.17.1-py3-none-any.whl", hash = "sha256:a9f52c0351c21fe24c21d8c0eb1f62967b262d6729393397b6f443c3b773c3b9", size = 2283411, upload-time = "2025-07-31T07:53:24.664Z" }, ] [[package]] @@ -4569,17 +4585,19 @@ wheels = [ [[package]] name = "pyobvector" -version = "0.1.14" +version = "0.2.15" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiomysql" }, { name = "numpy" }, + { name = "pydantic" }, { name = "pymysql" }, { name = "sqlalchemy" }, + { name = "sqlglot" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dc/59/7d762061808948dd6aad165a000b34e22163dc83fb5014184eeacc0fabe5/pyobvector-0.1.14.tar.gz", hash = "sha256:4f85cdd63064d040e94c0a96099a0cd5cda18ce625865382e89429f28422fc02", size = 26780, upload-time = "2024-11-20T11:46:18.017Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/7d/3f3aac6acf1fdd1782042d6eecd48efaa2ee355af0dbb61e93292d629391/pyobvector-0.2.15.tar.gz", hash = "sha256:5de258c1e952c88b385b5661e130c1cf8262c498c1f8a4a348a35962d379fce4", size = 39611, upload-time = "2025-08-18T02:49:26.683Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/68/ecb21b74c974e7be7f9034e205d08db62d614ff5c221581ae96d37ef853e/pyobvector-0.1.14-py3-none-any.whl", hash = "sha256:828e0bec49a177355b70c7a1270af3b0bf5239200ee0d096e4165b267eeff97c", size = 35526, upload-time = "2024-11-20T11:46:16.809Z" }, + { url = "https://files.pythonhosted.org/packages/5f/1f/a62754ba9b8a02c038d2a96cb641b71d3809f34d2ba4f921fecd7840d7fb/pyobvector-0.2.15-py3-none-any.whl", hash = "sha256:feeefe849ee5400e72a9a4d3844e425a58a99053dd02abe06884206923065ebb", size = 52680, upload-time = "2025-08-18T02:49:25.452Z" }, ] [[package]] @@ -5432,6 +5450,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, ] +[[package]] +name = "sqlglot" +version = "26.33.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/9d/fcd59b4612d5ad1e2257c67c478107f073b19e1097d3bfde2fb517884416/sqlglot-26.33.0.tar.gz", hash = "sha256:2817278779fa51d6def43aa0d70690b93a25c83eb18ec97130fdaf707abc0d73", size = 5353340, upload-time = "2025-07-01T13:09:06.311Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/8d/f1d9cb5b18e06aa45689fbeaaea6ebab66d5f01d1e65029a8f7657c06be5/sqlglot-26.33.0-py3-none-any.whl", hash = "sha256:031cee20c0c796a83d26d079a47fdce667604df430598c7eabfa4e4dfd147033", size = 477610, upload-time = "2025-07-01T13:09:03.926Z" }, +] + [[package]] name = "sseclient-py" version = "1.8.0" diff --git a/docker/.env.example b/docker/.env.example index 743a1e8bba..711898016e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -215,6 +215,8 @@ DB_DATABASE=dify # The size of the database connection pool. # The default is 30 connections, which can be appropriately increased. SQLALCHEMY_POOL_SIZE=30 +# The default is 10 connections, which allows temporary overflow beyond the pool size. +SQLALCHEMY_MAX_OVERFLOW=10 # Database connection pool recycling time, the default is 3600 seconds. SQLALCHEMY_POOL_RECYCLE=3600 # Whether to print SQL, default is false. @@ -887,6 +889,14 @@ API_WORKFLOW_RUN_REPOSITORY=repositories.sqlalchemy_api_workflow_run_repository. # API workflow node execution repository implementation API_WORKFLOW_NODE_EXECUTION_REPOSITORY=repositories.sqlalchemy_api_workflow_node_execution_repository.DifyAPISQLAlchemyWorkflowNodeExecutionRepository +# Workflow log cleanup configuration +# Enable automatic cleanup of workflow run logs to manage database size +WORKFLOW_LOG_CLEANUP_ENABLED=false +# Number of days to retain workflow run logs (default: 30 days) +WORKFLOW_LOG_RETENTION_DAYS=30 +# Batch size for workflow log cleanup operations (default: 100) +WORKFLOW_LOG_CLEANUP_BATCH_SIZE=100 + # HTTP request node in workflow configuration HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760 HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576 diff --git a/docker/README.md b/docker/README.md index 22dfe2c91c..b5c46eb9fc 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,7 +4,7 @@ Welcome to the new `docker` directory for deploying Dify using Docker Compose. T ### What's Updated -- **Certbot Container**: `docker-compose.yaml` now contains `certbot` for managing SSL certificates. This container automatically renews certificates and ensures secure HTTPS connections. +- **Certbot Container**: `docker-compose.yaml` now contains `certbot` for managing SSL certificates. This container automatically renews certificates and ensures secure HTTPS connections.\ For more information, refer `docker/certbot/README.md`. - **Persistent Environment Variables**: Environment variables are now managed through a `.env` file, ensuring that your configurations persist across deployments. @@ -13,43 +13,44 @@ Welcome to the new `docker` directory for deploying Dify using Docker Compose. T > The `.env` file is a crucial component in Docker and Docker Compose environments, serving as a centralized configuration file where you can define environment variables that are accessible to the containers at runtime. This file simplifies the management of environment settings across different stages of development, testing, and production, providing consistency and ease of configuration to deployments. - **Unified Vector Database Services**: All vector database services are now managed from a single Docker Compose file `docker-compose.yaml`. You can switch between different vector databases by setting the `VECTOR_STORE` environment variable in your `.env` file. + - **Mandatory .env File**: A `.env` file is now required to run `docker compose up`. This file is crucial for configuring your deployment and for any custom settings to persist through upgrades. ### How to Deploy Dify with `docker-compose.yaml` 1. **Prerequisites**: Ensure Docker and Docker Compose are installed on your system. -2. **Environment Setup**: - - Navigate to the `docker` directory. - - Copy the `.env.example` file to a new file named `.env` by running `cp .env.example .env`. - - Customize the `.env` file as needed. Refer to the `.env.example` file for detailed configuration options. -3. **Running the Services**: - - Execute `docker compose up` from the `docker` directory to start the services. - - To specify a vector database, set the `VECTOR_STORE` variable in your `.env` file to your desired vector database service, such as `milvus`, `weaviate`, or `opensearch`. -4. **SSL Certificate Setup**: - - Refer `docker/certbot/README.md` to set up SSL certificates using Certbot. -5. **OpenTelemetry Collector Setup**: +1. **Environment Setup**: + - Navigate to the `docker` directory. + - Copy the `.env.example` file to a new file named `.env` by running `cp .env.example .env`. + - Customize the `.env` file as needed. Refer to the `.env.example` file for detailed configuration options. +1. **Running the Services**: + - Execute `docker compose up` from the `docker` directory to start the services. + - To specify a vector database, set the `VECTOR_STORE` variable in your `.env` file to your desired vector database service, such as `milvus`, `weaviate`, or `opensearch`. +1. **SSL Certificate Setup**: + - Refer `docker/certbot/README.md` to set up SSL certificates using Certbot. +1. **OpenTelemetry Collector Setup**: - Change `ENABLE_OTEL` to `true` in `.env`. - Configure `OTLP_BASE_ENDPOINT` properly. ### How to Deploy Middleware for Developing Dify 1. **Middleware Setup**: - - Use the `docker-compose.middleware.yaml` for setting up essential middleware services like databases and caches. - - Navigate to the `docker` directory. - - Ensure the `middleware.env` file is created by running `cp middleware.env.example middleware.env` (refer to the `middleware.env.example` file). -2. **Running Middleware Services**: - - Navigate to the `docker` directory. - - Execute `docker compose -f docker-compose.middleware.yaml --profile weaviate -p dify up -d` to start the middleware services. (Change the profile to other vector database if you are not using weaviate) + - Use the `docker-compose.middleware.yaml` for setting up essential middleware services like databases and caches. + - Navigate to the `docker` directory. + - Ensure the `middleware.env` file is created by running `cp middleware.env.example middleware.env` (refer to the `middleware.env.example` file). +1. **Running Middleware Services**: + - Navigate to the `docker` directory. + - Execute `docker compose -f docker-compose.middleware.yaml --profile weaviate -p dify up -d` to start the middleware services. (Change the profile to other vector database if you are not using weaviate) ### Migration for Existing Users For users migrating from the `docker-legacy` setup: 1. **Review Changes**: Familiarize yourself with the new `.env` configuration and Docker Compose setup. -2. **Transfer Customizations**: - - If you have customized configurations such as `docker-compose.yaml`, `ssrf_proxy/squid.conf`, or `nginx/conf.d/default.conf`, you will need to reflect these changes in the `.env` file you create. -3. **Data Migration**: - - Ensure that data from services like databases and caches is backed up and migrated appropriately to the new structure if necessary. +1. **Transfer Customizations**: + - If you have customized configurations such as `docker-compose.yaml`, `ssrf_proxy/squid.conf`, or `nginx/conf.d/default.conf`, you will need to reflect these changes in the `.env` file you create. +1. **Data Migration**: + - Ensure that data from services like databases and caches is backed up and migrated appropriately to the new structure if necessary. ### Overview of `.env` @@ -64,39 +65,49 @@ For users migrating from the `docker-legacy` setup: The `.env.example` file provided in the Docker setup is extensive and covers a wide range of configuration options. It is structured into several sections, each pertaining to different aspects of the application and its services. Here are some of the key sections and variables: 1. **Common Variables**: - - `CONSOLE_API_URL`, `SERVICE_API_URL`: URLs for different API services. - - `APP_WEB_URL`: Frontend application URL. - - `FILES_URL`: Base URL for file downloads and previews. -2. **Server Configuration**: - - `LOG_LEVEL`, `DEBUG`, `FLASK_DEBUG`: Logging and debug settings. - - `SECRET_KEY`: A key for encrypting session cookies and other sensitive data. + - `CONSOLE_API_URL`, `SERVICE_API_URL`: URLs for different API services. + - `APP_WEB_URL`: Frontend application URL. + - `FILES_URL`: Base URL for file downloads and previews. -3. **Database Configuration**: - - `DB_USERNAME`, `DB_PASSWORD`, `DB_HOST`, `DB_PORT`, `DB_DATABASE`: PostgreSQL database credentials and connection details. +1. **Server Configuration**: -4. **Redis Configuration**: - - `REDIS_HOST`, `REDIS_PORT`, `REDIS_PASSWORD`: Redis server connection settings. + - `LOG_LEVEL`, `DEBUG`, `FLASK_DEBUG`: Logging and debug settings. + - `SECRET_KEY`: A key for encrypting session cookies and other sensitive data. -5. **Celery Configuration**: - - `CELERY_BROKER_URL`: Configuration for Celery message broker. +1. **Database Configuration**: -6. **Storage Configuration**: - - `STORAGE_TYPE`, `S3_BUCKET_NAME`, `AZURE_BLOB_ACCOUNT_NAME`: Settings for file storage options like local, S3, Azure Blob, etc. + - `DB_USERNAME`, `DB_PASSWORD`, `DB_HOST`, `DB_PORT`, `DB_DATABASE`: PostgreSQL database credentials and connection details. -7. **Vector Database Configuration**: - - `VECTOR_STORE`: Type of vector database (e.g., `weaviate`, `milvus`). - - Specific settings for each vector store like `WEAVIATE_ENDPOINT`, `MILVUS_URI`. +1. **Redis Configuration**: -8. **CORS Configuration**: - - `WEB_API_CORS_ALLOW_ORIGINS`, `CONSOLE_CORS_ALLOW_ORIGINS`: Settings for cross-origin resource sharing. + - `REDIS_HOST`, `REDIS_PORT`, `REDIS_PASSWORD`: Redis server connection settings. -9. **OpenTelemetry Configuration**: - - `ENABLE_OTEL`: Enable OpenTelemetry collector in api. - - `OTLP_BASE_ENDPOINT`: Endpoint for your OTLP exporter. - -10. **Other Service-Specific Environment Variables**: - - Each service like `nginx`, `redis`, `db`, and vector databases have specific environment variables that are directly referenced in the `docker-compose.yaml`. +1. **Celery Configuration**: + + - `CELERY_BROKER_URL`: Configuration for Celery message broker. + +1. **Storage Configuration**: + + - `STORAGE_TYPE`, `S3_BUCKET_NAME`, `AZURE_BLOB_ACCOUNT_NAME`: Settings for file storage options like local, S3, Azure Blob, etc. + +1. **Vector Database Configuration**: + + - `VECTOR_STORE`: Type of vector database (e.g., `weaviate`, `milvus`). + - Specific settings for each vector store like `WEAVIATE_ENDPOINT`, `MILVUS_URI`. + +1. **CORS Configuration**: + + - `WEB_API_CORS_ALLOW_ORIGINS`, `CONSOLE_CORS_ALLOW_ORIGINS`: Settings for cross-origin resource sharing. + +1. **OpenTelemetry Configuration**: + + - `ENABLE_OTEL`: Enable OpenTelemetry collector in api. + - `OTLP_BASE_ENDPOINT`: Endpoint for your OTLP exporter. + +1. **Other Service-Specific Environment Variables**: + + - Each service like `nginx`, `redis`, `db`, and vector databases have specific environment variables that are directly referenced in the `docker-compose.yaml`. ### Additional Information diff --git a/docker/certbot/README.md b/docker/certbot/README.md index 21be34b33a..62b1eee395 100644 --- a/docker/certbot/README.md +++ b/docker/certbot/README.md @@ -2,12 +2,12 @@ ## Short description -docker compose certbot configurations with Backward compatibility (without certbot container). +docker compose certbot configurations with Backward compatibility (without certbot container).\ Use `docker compose --profile certbot up` to use this features. ## The simplest way for launching new servers with SSL certificates -1. Get letsencrypt certs +1. Get letsencrypt certs\ set `.env` values ```properties NGINX_SSL_CERT_FILENAME=fullchain.pem @@ -25,7 +25,7 @@ Use `docker compose --profile certbot up` to use this features. ```shell docker compose exec -it certbot /bin/sh /update-cert.sh ``` -2. Edit `.env` file and `docker compose --profile certbot up` again. +1. Edit `.env` file and `docker compose --profile certbot up` again.\ set `.env` value additionally ```properties NGINX_HTTPS_ENABLED=true @@ -34,7 +34,7 @@ Use `docker compose --profile certbot up` to use this features. ```shell docker compose --profile certbot up -d --no-deps --force-recreate nginx ``` - Then you can access your serve with HTTPS. + Then you can access your serve with HTTPS.\ [https://your_domain.com](https://your_domain.com) ## SSL certificates renewal diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index bcf9588dff..d3b75d93af 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -57,6 +57,7 @@ x-shared-env: &shared-api-worker-env DB_PORT: ${DB_PORT:-5432} DB_DATABASE: ${DB_DATABASE:-dify} SQLALCHEMY_POOL_SIZE: ${SQLALCHEMY_POOL_SIZE:-30} + SQLALCHEMY_MAX_OVERFLOW: ${SQLALCHEMY_MAX_OVERFLOW:-10} SQLALCHEMY_POOL_RECYCLE: ${SQLALCHEMY_POOL_RECYCLE:-3600} SQLALCHEMY_ECHO: ${SQLALCHEMY_ECHO:-false} SQLALCHEMY_POOL_PRE_PING: ${SQLALCHEMY_POOL_PRE_PING:-false} @@ -396,6 +397,9 @@ x-shared-env: &shared-api-worker-env CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY: ${CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY:-core.repositories.sqlalchemy_workflow_node_execution_repository.SQLAlchemyWorkflowNodeExecutionRepository} API_WORKFLOW_RUN_REPOSITORY: ${API_WORKFLOW_RUN_REPOSITORY:-repositories.sqlalchemy_api_workflow_run_repository.DifyAPISQLAlchemyWorkflowRunRepository} API_WORKFLOW_NODE_EXECUTION_REPOSITORY: ${API_WORKFLOW_NODE_EXECUTION_REPOSITORY:-repositories.sqlalchemy_api_workflow_node_execution_repository.DifyAPISQLAlchemyWorkflowNodeExecutionRepository} + WORKFLOW_LOG_CLEANUP_ENABLED: ${WORKFLOW_LOG_CLEANUP_ENABLED:-false} + WORKFLOW_LOG_RETENTION_DAYS: ${WORKFLOW_LOG_RETENTION_DAYS:-30} + WORKFLOW_LOG_CLEANUP_BATCH_SIZE: ${WORKFLOW_LOG_CLEANUP_BATCH_SIZE:-100} HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760} HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576} HTTP_REQUEST_NODE_SSL_VERIFY: ${HTTP_REQUEST_NODE_SSL_VERIFY:-True} diff --git a/sdks/nodejs-client/README.md b/sdks/nodejs-client/README.md index 37b5ca2d0a..3a5688bcbe 100644 --- a/sdks/nodejs-client/README.md +++ b/sdks/nodejs-client/README.md @@ -1,12 +1,15 @@ # Dify Node.js SDK + This is the Node.js SDK for the Dify API, which allows you to easily integrate Dify into your Node.js applications. ## Install + ```bash npm install dify-client ``` ## Usage + After installing the SDK, you can use it in your project like this: ```js @@ -60,4 +63,5 @@ client.messageFeedback(messageId, rating, user) Replace 'your-api-key-here' with your actual Dify API key.Replace 'your-app-id-here' with your actual Dify APP ID. ## License + This SDK is released under the MIT License. diff --git a/sdks/php-client/README.md b/sdks/php-client/README.md index 91e77ad9ff..444b16a565 100644 --- a/sdks/php-client/README.md +++ b/sdks/php-client/README.md @@ -11,7 +11,7 @@ This is the PHP SDK for the Dify API, which allows you to easily integrate Dify If you want to try the example, you can run `composer install` in this directory. -In exist project, copy the `dify-client.php` to you project, and merge the following to your `composer.json` file, then run `composer install && composer dump-autoload` to install. Guzzle does not require 7.9, other versions have not been tested, but you can try. +In exist project, copy the `dify-client.php` to you project, and merge the following to your `composer.json` file, then run `composer install && composer dump-autoload` to install. Guzzle does not require 7.9, other versions have not been tested, but you can try. ```json { diff --git a/sdks/python-client/README.md b/sdks/python-client/README.md index 7401fd2fd4..34b14b3a94 100644 --- a/sdks/python-client/README.md +++ b/sdks/python-client/README.md @@ -141,8 +141,6 @@ with open(file_path, "rb") as file: result = response.json() print(f'upload_file_id: {result.get("id")}') ``` - - - Others @@ -184,7 +182,8 @@ print('[rename result]') print(rename_conversation_response.json()) ``` -* Using the Workflow Client +- Using the Workflow Client + ```python import json import requests diff --git a/web/Dockerfile b/web/Dockerfile index d284efca87..1376dec749 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -34,7 +34,7 @@ COPY --from=packages /app/web/ . COPY . . ENV NODE_OPTIONS="--max-old-space-size=4096" -RUN pnpm build +RUN pnpm build:docker # production stage diff --git a/web/README.md b/web/README.md index 3d9fd2de87..a47cfab041 100644 --- a/web/README.md +++ b/web/README.md @@ -7,6 +7,7 @@ This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next ### Run by source code Before starting the web frontend service, please make sure the following environment is ready. + - [Node.js](https://nodejs.org) >= v22.11.x - [pnpm](https://pnpm.io) v10.x @@ -103,11 +104,9 @@ pnpm run test ``` If you are not familiar with writing tests, here is some code to refer to: -* [classnames.spec.ts](./utils/classnames.spec.ts) -* [index.spec.tsx](./app/components/base/button/index.spec.tsx) - - +- [classnames.spec.ts](./utils/classnames.spec.ts) +- [index.spec.tsx](./app/components/base/button/index.spec.tsx) ## Documentation diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index f1bb5d9156..0d41691dfd 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -1858,10 +1858,10 @@ ___ title="Request" tag="DELETE" label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" - targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} + targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} > ```bash {{ title: 'cURL' }} - curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ + curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ --header 'Authorization: Bearer {api_key}' ``` diff --git a/web/app/(commonLayout)/datasets/template/template.ja.mdx b/web/app/(commonLayout)/datasets/template/template.ja.mdx index 3011cecbc1..5c7a752c11 100644 --- a/web/app/(commonLayout)/datasets/template/template.ja.mdx +++ b/web/app/(commonLayout)/datasets/template/template.ja.mdx @@ -1614,10 +1614,10 @@ ___ title="リクエスト" tag="DELETE" label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" - targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} + targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} > ```bash {{ title: 'cURL' }} - curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ + curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ --header 'Authorization: Bearer {api_key}' ``` diff --git a/web/app/(commonLayout)/education-apply/page.tsx b/web/app/(commonLayout)/education-apply/page.tsx index 873034452e..5dd3c35519 100644 --- a/web/app/(commonLayout)/education-apply/page.tsx +++ b/web/app/(commonLayout)/education-apply/page.tsx @@ -13,12 +13,12 @@ import { useProviderContext } from '@/context/provider-context' export default function EducationApply() { const router = useRouter() - const { enableEducationPlan, isEducationAccount } = useProviderContext() + const { enableEducationPlan } = useProviderContext() const searchParams = useSearchParams() const token = searchParams.get('token') const showEducationApplyPage = useMemo(() => { - return enableEducationPlan && !isEducationAccount && token - }, [enableEducationPlan, isEducationAccount, token]) + return enableEducationPlan && token + }, [enableEducationPlan, token]) useEffect(() => { if (!showEducationApplyPage) diff --git a/web/app/account/account-page/AvatarWithEdit.tsx b/web/app/account/account-page/AvatarWithEdit.tsx index 41a6971bf5..88e3a7b343 100644 --- a/web/app/account/account-page/AvatarWithEdit.tsx +++ b/web/app/account/account-page/AvatarWithEdit.tsx @@ -4,7 +4,7 @@ import type { Area } from 'react-easy-crop' import React, { useCallback, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' -import { RiPencilLine } from '@remixicon/react' +import { RiDeleteBin5Line, RiPencilLine } from '@remixicon/react' import { updateUserProfile } from '@/service/common' import { ToastContext } from '@/app/components/base/toast' import ImageInput, { type OnImageInput } from '@/app/components/base/app-icon-picker/ImageInput' @@ -27,6 +27,8 @@ const AvatarWithEdit = ({ onSave, ...props }: AvatarWithEditProps) => { const [inputImageInfo, setInputImageInfo] = useState() const [isShowAvatarPicker, setIsShowAvatarPicker] = useState(false) const [uploading, setUploading] = useState(false) + const [isShowDeleteConfirm, setIsShowDeleteConfirm] = useState(false) + const [hoverArea, setHoverArea] = useState('left') const handleImageInput: OnImageInput = useCallback(async (isCropped: boolean, fileOrTempUrl: string | File, croppedAreaPixels?: Area, fileName?: string) => { setInputImageInfo( @@ -48,6 +50,18 @@ const AvatarWithEdit = ({ onSave, ...props }: AvatarWithEditProps) => { } }, [notify, onSave, t]) + const handleDeleteAvatar = useCallback(async () => { + try { + await updateUserProfile({ url: 'account/avatar', body: { avatar: '' } }) + notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) + setIsShowDeleteConfirm(false) + onSave?.() + } + catch (e) { + notify({ type: 'error', message: (e as Error).message }) + } + }, [notify, onSave, t]) + const { handleLocalFileUpload } = useLocalFileUploader({ limit: 3, disabled: false, @@ -86,12 +100,21 @@ const AvatarWithEdit = ({ onSave, ...props }: AvatarWithEditProps) => {
{ setIsShowAvatarPicker(true) }} className="absolute inset-0 flex cursor-pointer items-center justify-center rounded-full bg-black/50 opacity-0 transition-opacity group-hover:opacity-100" + onClick={() => hoverArea === 'right' ? setIsShowDeleteConfirm(true) : setIsShowAvatarPicker(true)} + onMouseMove={(e) => { + const rect = e.currentTarget.getBoundingClientRect() + const x = e.clientX - rect.left + const isRight = x > rect.width / 2 + setHoverArea(isRight ? 'right' : 'left') + }} > - + {hoverArea === 'right' ? + + : - + } +
@@ -115,6 +138,26 @@ const AvatarWithEdit = ({ onSave, ...props }: AvatarWithEditProps) => { + + setIsShowDeleteConfirm(false)} + > +
{t('common.avatar.deleteTitle')}
+

{t('common.avatar.deleteDescription')}

+ +
+ + + +
+
) } diff --git a/web/app/components/app-sidebar/index.tsx b/web/app/components/app-sidebar/index.tsx index cf32339b8a..c3ff45d6a6 100644 --- a/web/app/components/app-sidebar/index.tsx +++ b/web/app/components/app-sidebar/index.tsx @@ -107,7 +107,7 @@ const AppDetailNav = ({ title, desc, isExternal, icon, icon_background, navigati )}
-
+