mirror of https://github.com/langgenius/dify.git
Mergin main into fix/chore-fix
This commit is contained in:
commit
ae1eeb9b2a
|
|
@ -39,7 +39,7 @@ jobs:
|
|||
api/pyproject.toml
|
||||
api/poetry.lock
|
||||
|
||||
- name: Poetry check
|
||||
- name: Check Poetry lockfile
|
||||
run: |
|
||||
poetry check -C api --lock
|
||||
poetry show -C api
|
||||
|
|
@ -47,6 +47,9 @@ jobs:
|
|||
- name: Install dependencies
|
||||
run: poetry install -C api --with dev
|
||||
|
||||
- name: Check dependencies in pyproject.toml
|
||||
run: poetry run -C api bash dev/pytest/pytest_artifacts.sh
|
||||
|
||||
- name: Run Unit tests
|
||||
run: poetry run -C api bash dev/pytest/pytest_unit_tests.sh
|
||||
|
||||
|
|
|
|||
10
README.md
10
README.md
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat on Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on Twitter"></a>
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -196,10 +196,14 @@ If you'd like to configure a highly-available setup, there are community-contrib
|
|||
|
||||
#### Using Terraform for Deployment
|
||||
|
||||
Deploy Dify to Cloud Platform with a single click using [terraform](https://www.terraform.io/)
|
||||
|
||||
##### Azure Global
|
||||
Deploy Dify to Azure with a single click using [terraform](https://www.terraform.io/).
|
||||
- [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## Contributing
|
||||
|
||||
For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||
|
|
@ -219,7 +223,7 @@ At the same time, please consider supporting Dify by sharing it on social media
|
|||
* [Github Discussion](https://github.com/langgenius/dify/discussions). Best for: sharing feedback and asking questions.
|
||||
* [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||
* [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community.
|
||||
* [Twitter](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community.
|
||||
* [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community.
|
||||
|
||||
## Star history
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat on Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on Twitter"></a>
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -179,10 +179,13 @@ docker compose up -d
|
|||
|
||||
#### استخدام Terraform للتوزيع
|
||||
|
||||
انشر Dify إلى منصة السحابة بنقرة واحدة باستخدام [terraform](https://www.terraform.io/)
|
||||
|
||||
##### Azure Global
|
||||
استخدم [terraform](https://www.terraform.io/) لنشر Dify على Azure بنقرة واحدة.
|
||||
- [Azure Terraform بواسطة @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform بواسطة @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## المساهمة
|
||||
|
||||
|
|
|
|||
10
README_CN.md
10
README_CN.md
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat on Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on Twitter"></a>
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -202,10 +202,14 @@ docker compose up -d
|
|||
|
||||
#### 使用 Terraform 部署
|
||||
|
||||
使用 [terraform](https://www.terraform.io/) 一键将 Dify 部署到云平台
|
||||
|
||||
##### Azure Global
|
||||
使用 [terraform](https://www.terraform.io/) 一键部署 Dify 到 Azure。
|
||||
- [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## Star History
|
||||
|
||||
[](https://star-history.com/#langgenius/dify&Date)
|
||||
|
|
@ -232,7 +236,7 @@ docker compose up -d
|
|||
- [GitHub Issues](https://github.com/langgenius/dify/issues)。👉:使用 Dify.AI 时遇到的错误和问题,请参阅[贡献指南](CONTRIBUTING.md)。
|
||||
- [电子邮件支持](mailto:hello@dify.ai?subject=[GitHub]Questions%20About%20Dify)。👉:关于使用 Dify.AI 的问题。
|
||||
- [Discord](https://discord.gg/FngNHpbcY7)。👉:分享您的应用程序并与社区交流。
|
||||
- [Twitter](https://twitter.com/dify_ai)。👉:分享您的应用程序并与社区交流。
|
||||
- [X(Twitter)](https://twitter.com/dify_ai)。👉:分享您的应用程序并与社区交流。
|
||||
- [商业许可](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry)。👉:有关商业用途许可 Dify.AI 的商业咨询。
|
||||
- [微信]() 👉:扫描下方二维码,添加微信好友,备注 Dify,我们将邀请您加入 Dify 社区。
|
||||
<img src="./images/wechat.png" alt="wechat" width="100"/>
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat en Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="seguir en Twitter"></a>
|
||||
alt="seguir en X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Descargas de Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -204,10 +204,13 @@ Si desea configurar una configuración de alta disponibilidad, la comunidad prop
|
|||
|
||||
#### Uso de Terraform para el despliegue
|
||||
|
||||
Despliega Dify en una plataforma en la nube con un solo clic utilizando [terraform](https://www.terraform.io/)
|
||||
|
||||
##### Azure Global
|
||||
Utiliza [terraform](https://www.terraform.io/) para desplegar Dify en Azure con un solo clic.
|
||||
- [Azure Terraform por @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform por @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## Contribuir
|
||||
|
||||
|
|
@ -228,7 +231,7 @@ Al mismo tiempo, considera apoyar a Dify compartiéndolo en redes sociales y en
|
|||
* [Discusión en GitHub](https://github.com/langgenius/dify/discussions). Lo mejor para: compartir comentarios y hacer preguntas.
|
||||
* [Reporte de problemas en GitHub](https://github.com/langgenius/dify/issues). Lo mejor para: errores que encuentres usando Dify.AI y propuestas de características. Consulta nuestra [Guía de contribución](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||
* [Discord](https://discord.gg/FngNHpbcY7). Lo mejor para: compartir tus aplicaciones y pasar el rato con la comunidad.
|
||||
* [Twitter](https://twitter.com/dify_ai). Lo mejor para: compartir tus aplicaciones y pasar el rato con la comunidad.
|
||||
* [X(Twitter)](https://twitter.com/dify_ai). Lo mejor para: compartir tus aplicaciones y pasar el rato con la comunidad.
|
||||
|
||||
## Historial de Estrellas
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat sur Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="suivre sur Twitter"></a>
|
||||
alt="suivre sur X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Tirages Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -202,10 +202,13 @@ Si vous souhaitez configurer une configuration haute disponibilité, la communau
|
|||
|
||||
#### Utilisation de Terraform pour le déploiement
|
||||
|
||||
Déployez Dify sur une plateforme cloud en un clic en utilisant [terraform](https://www.terraform.io/)
|
||||
|
||||
##### Azure Global
|
||||
Utilisez [terraform](https://www.terraform.io/) pour déployer Dify sur Azure en un clic.
|
||||
- [Azure Terraform par @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform par @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## Contribuer
|
||||
|
||||
|
|
@ -226,7 +229,7 @@ Dans le même temps, veuillez envisager de soutenir Dify en le partageant sur le
|
|||
* [Discussion GitHub](https://github.com/langgenius/dify/discussions). Meilleur pour: partager des commentaires et poser des questions.
|
||||
* [Problèmes GitHub](https://github.com/langgenius/dify/issues). Meilleur pour: les bogues que vous rencontrez en utilisant Dify.AI et les propositions de fonctionnalités. Consultez notre [Guide de contribution](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||
* [Discord](https://discord.gg/FngNHpbcY7). Meilleur pour: partager vos applications et passer du temps avec la communauté.
|
||||
* [Twitter](https://twitter.com/dify_ai). Meilleur pour: partager vos applications et passer du temps avec la communauté.
|
||||
* [X(Twitter)](https://twitter.com/dify_ai). Meilleur pour: partager vos applications et passer du temps avec la communauté.
|
||||
|
||||
## Historique des étoiles
|
||||
|
||||
|
|
|
|||
15
README_JA.md
15
README_JA.md
|
|
@ -17,7 +17,7 @@
|
|||
alt="Discordでチャット"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="Twitterでフォロー"></a>
|
||||
alt="X(Twitter)でフォロー"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -68,7 +68,7 @@ DifyはオープンソースのLLMアプリケーション開発プラットフ
|
|||
プロンプトの作成、モデルパフォーマンスの比較が行え、チャットベースのアプリに音声合成などの機能も追加できます。
|
||||
|
||||
**4. RAGパイプライン**:
|
||||
ドキュメントの取り込みから検索までをカバーする広範なRAG機能ができます。ほかにもPDF、PPT、その他の一般的なドキュメントフォーマットからのテキスト抽出のサーポイントも提供します。
|
||||
ドキュメントの取り込みから検索までをカバーする広範なRAG機能ができます。ほかにもPDF、PPT、その他の一般的なドキュメントフォーマットからのテキスト抽出のサポートも提供します。
|
||||
|
||||
**5. エージェント機能**:
|
||||
LLM Function CallingやReActに基づくエージェントの定義が可能で、AIエージェント用のプリビルトまたはカスタムツールを追加できます。Difyには、Google検索、DALL·E、Stable Diffusion、WolframAlphaなどのAIエージェント用の50以上の組み込みツールが提供します。
|
||||
|
|
@ -201,10 +201,13 @@ docker compose up -d
|
|||
|
||||
#### Terraformを使用したデプロイ
|
||||
|
||||
##### Azure Global
|
||||
[terraform](https://www.terraform.io/) を使用して、AzureにDifyをワンクリックでデプロイします。
|
||||
- [nikawangのAzure Terraform](https://github.com/nikawang/dify-azure-terraform)
|
||||
[terraform](https://www.terraform.io/) を使用して、ワンクリックでDifyをクラウドプラットフォームにデプロイします
|
||||
|
||||
##### Azure Global
|
||||
- [@nikawangによるAzure Terraform](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [@sotazumによるGoogle Cloud Terraform](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## 貢献
|
||||
|
||||
|
|
@ -225,7 +228,7 @@ docker compose up -d
|
|||
* [Github Discussion](https://github.com/langgenius/dify/discussions). 主に: フィードバックの共有や質問。
|
||||
* [GitHub Issues](https://github.com/langgenius/dify/issues). 主に: Dify.AIを使用する際に発生するエラーや問題については、[貢献ガイド](CONTRIBUTING_JA.md)を参照してください
|
||||
* [Discord](https://discord.gg/FngNHpbcY7). 主に: アプリケーションの共有やコミュニティとの交流。
|
||||
* [Twitter](https://twitter.com/dify_ai). 主に: アプリケーションの共有やコミュニティとの交流。
|
||||
* [X(Twitter)](https://twitter.com/dify_ai). 主に: アプリケーションの共有やコミュニティとの交流。
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
13
README_KL.md
13
README_KL.md
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat on Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on Twitter"></a>
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -202,10 +202,13 @@ If you'd like to configure a highly-available setup, there are community-contrib
|
|||
|
||||
#### Terraform atorlugu pilersitsineq
|
||||
|
||||
##### Azure Global
|
||||
Atoruk [terraform](https://www.terraform.io/) Dify-mik Azure-mut ataatsikkut ikkussuilluarlugu.
|
||||
- [Azure Terraform atorlugu @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
wa'logh nIqHom neH ghun deployment toy'wI' [terraform](https://www.terraform.io/) lo'laH.
|
||||
|
||||
##### Azure Global
|
||||
- [Azure Terraform mung @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform qachlot @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## Contributing
|
||||
|
||||
|
|
@ -228,7 +231,7 @@ At the same time, please consider supporting Dify by sharing it on social media
|
|||
). Best for: sharing feedback and asking questions.
|
||||
* [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||
* [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community.
|
||||
* [Twitter](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community.
|
||||
* [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community.
|
||||
|
||||
## Star History
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat on Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on Twitter"></a>
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -39,7 +39,6 @@
|
|||
<a href="./README_AR.md"><img alt="README بالعربية" src="https://img.shields.io/badge/العربية-d9d9d9"></a>
|
||||
<a href="./README_TR.md"><img alt="Türkçe README" src="https://img.shields.io/badge/Türkçe-d9d9d9"></a>
|
||||
<a href="./README_VI.md"><img alt="README Tiếng Việt" src="https://img.shields.io/badge/Ti%E1%BA%BFng%20Vi%E1%BB%87t-d9d9d9"></a>
|
||||
|
||||
</p>
|
||||
|
||||
|
||||
|
|
@ -195,10 +194,14 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
|
|||
|
||||
#### Terraform을 사용한 배포
|
||||
|
||||
[terraform](https://www.terraform.io/)을 사용하여 단 한 번의 클릭으로 Dify를 클라우드 플랫폼에 배포하십시오
|
||||
|
||||
##### Azure Global
|
||||
[terraform](https://www.terraform.io/)을 사용하여 Azure에 Dify를 원클릭으로 배포하세요.
|
||||
- [nikawang의 Azure Terraform](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [sotazum의 Google Cloud Terraform](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## 기여
|
||||
|
||||
코드에 기여하고 싶은 분들은 [기여 가이드](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)를 참조하세요.
|
||||
|
|
|
|||
12
README_TR.md
12
README_TR.md
|
|
@ -17,7 +17,7 @@
|
|||
alt="Discord'da sohbet et"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="Twitter'da takip et"></a>
|
||||
alt="X(Twitter)'da takip et"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Çekmeleri" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -200,9 +200,13 @@ Yüksek kullanılabilirliğe sahip bir kurulum yapılandırmak isterseniz, Dify'
|
|||
|
||||
#### Dağıtım için Terraform Kullanımı
|
||||
|
||||
Dify'ı bulut platformuna tek tıklamayla dağıtın [terraform](https://www.terraform.io/) kullanarak
|
||||
|
||||
##### Azure Global
|
||||
[Terraform](https://www.terraform.io/) kullanarak Dify'ı Azure'a tek tıklamayla dağıtın.
|
||||
- [@nikawang tarafından Azure Terraform](https://github.com/nikawang/dify-azure-terraform)
|
||||
- [Azure Terraform tarafından @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform tarafından @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## Katkıda Bulunma
|
||||
|
||||
|
|
@ -222,7 +226,7 @@ Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda p
|
|||
* [Github Tartışmaları](https://github.com/langgenius/dify/discussions). En uygun: geri bildirim paylaşmak ve soru sormak için.
|
||||
* [GitHub Sorunları](https://github.com/langgenius/dify/issues). En uygun: Dify.AI kullanırken karşılaştığınız hatalar ve özellik önerileri için. [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakın.
|
||||
* [Discord](https://discord.gg/FngNHpbcY7). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için.
|
||||
* [Twitter](https://twitter.com/dify_ai). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için.
|
||||
* [X(Twitter)](https://twitter.com/dify_ai). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için.
|
||||
|
||||
## Star history
|
||||
|
||||
|
|
|
|||
10
README_VI.md
10
README_VI.md
|
|
@ -17,7 +17,7 @@
|
|||
alt="chat trên Discord"></a>
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="theo dõi trên Twitter"></a>
|
||||
alt="theo dõi trên X(Twitter)"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
|
@ -196,10 +196,14 @@ Nếu bạn muốn cấu hình một cài đặt có độ sẵn sàng cao, có
|
|||
|
||||
#### Sử dụng Terraform để Triển khai
|
||||
|
||||
Triển khai Dify lên nền tảng đám mây với một cú nhấp chuột bằng cách sử dụng [terraform](https://www.terraform.io/)
|
||||
|
||||
##### Azure Global
|
||||
Triển khai Dify lên Azure chỉ với một cú nhấp chuột bằng cách sử dụng [terraform](https://www.terraform.io/).
|
||||
- [Azure Terraform bởi @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||
|
||||
##### Google Cloud
|
||||
- [Google Cloud Terraform bởi @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||
|
||||
## Đóng góp
|
||||
|
||||
Đối với những người muốn đóng góp mã, xem [Hướng dẫn Đóng góp](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) của chúng tôi.
|
||||
|
|
@ -219,7 +223,7 @@ Triển khai Dify lên Azure chỉ với một cú nhấp chuột bằng cách s
|
|||
* [Thảo luận GitHub](https://github.com/langgenius/dify/discussions). Tốt nhất cho: chia sẻ phản hồi và đặt câu hỏi.
|
||||
* [Vấn đề GitHub](https://github.com/langgenius/dify/issues). Tốt nhất cho: lỗi bạn gặp phải khi sử dụng Dify.AI và đề xuất tính năng. Xem [Hướng dẫn Đóng góp](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) của chúng tôi.
|
||||
* [Discord](https://discord.gg/FngNHpbcY7). Tốt nhất cho: chia sẻ ứng dụng của bạn và giao lưu với cộng đồng.
|
||||
* [Twitter](https://twitter.com/dify_ai). Tốt nhất cho: chia sẻ ứng dụng của bạn và giao lưu với cộng đồng.
|
||||
* [X(Twitter)](https://twitter.com/dify_ai). Tốt nhất cho: chia sẻ ứng dụng của bạn và giao lưu với cộng đồng.
|
||||
|
||||
## Lịch sử Yêu thích
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ FILES_URL=http://127.0.0.1:5001
|
|||
# The time in seconds after the signature is rejected
|
||||
FILES_ACCESS_TIMEOUT=300
|
||||
|
||||
# Access token expiration time in minutes
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=60
|
||||
|
||||
# celery configuration
|
||||
CELERY_BROKER_URL=redis://:difyai123456@localhost:6379/1
|
||||
|
||||
|
|
@ -39,7 +42,7 @@ DB_DATABASE=dify
|
|||
|
||||
# Storage configuration
|
||||
# use for store upload files, private keys...
|
||||
# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos
|
||||
# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos, baidu-obs, supabase
|
||||
STORAGE_TYPE=local
|
||||
STORAGE_LOCAL_PATH=storage
|
||||
S3_USE_AWS_MANAGED_IAM=false
|
||||
|
|
@ -79,6 +82,12 @@ HUAWEI_OBS_SECRET_KEY=your-secret-key
|
|||
HUAWEI_OBS_ACCESS_KEY=your-access-key
|
||||
HUAWEI_OBS_SERVER=your-server-url
|
||||
|
||||
# Baidu OBS Storage Configuration
|
||||
BAIDU_OBS_BUCKET_NAME=your-bucket-name
|
||||
BAIDU_OBS_SECRET_KEY=your-secret-key
|
||||
BAIDU_OBS_ACCESS_KEY=your-access-key
|
||||
BAIDU_OBS_ENDPOINT=your-server-url
|
||||
|
||||
# OCI Storage configuration
|
||||
OCI_ENDPOINT=your-endpoint
|
||||
OCI_BUCKET_NAME=your-bucket-name
|
||||
|
|
@ -93,11 +102,16 @@ VOLCENGINE_TOS_ACCESS_KEY=your-access-key
|
|||
VOLCENGINE_TOS_SECRET_KEY=your-secret-key
|
||||
VOLCENGINE_TOS_REGION=your-region
|
||||
|
||||
# Supabase Storage Configuration
|
||||
SUPABASE_BUCKET_NAME=your-bucket-name
|
||||
SUPABASE_API_KEY=your-access-key
|
||||
SUPABASE_URL=your-server-url
|
||||
|
||||
# CORS configuration
|
||||
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
|
||||
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector
|
||||
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, vikingdb
|
||||
VECTOR_STORE=weaviate
|
||||
|
||||
# Weaviate configuration
|
||||
|
|
@ -197,6 +211,24 @@ OPENSEARCH_USER=admin
|
|||
OPENSEARCH_PASSWORD=admin
|
||||
OPENSEARCH_SECURE=true
|
||||
|
||||
# Baidu configuration
|
||||
BAIDU_VECTOR_DB_ENDPOINT=http://127.0.0.1:5287
|
||||
BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS=30000
|
||||
BAIDU_VECTOR_DB_ACCOUNT=root
|
||||
BAIDU_VECTOR_DB_API_KEY=dify
|
||||
BAIDU_VECTOR_DB_DATABASE=dify
|
||||
BAIDU_VECTOR_DB_SHARD=1
|
||||
BAIDU_VECTOR_DB_REPLICAS=3
|
||||
|
||||
# ViKingDB configuration
|
||||
VIKINGDB_ACCESS_KEY=your-ak
|
||||
VIKINGDB_SECRET_KEY=your-sk
|
||||
VIKINGDB_REGION=cn-shanghai
|
||||
VIKINGDB_HOST=api-vikingdb.xxx.volces.com
|
||||
VIKINGDB_SCHEMA=http
|
||||
VIKINGDB_CONNECTION_TIMEOUT=30
|
||||
VIKINGDB_SOCKET_TIMEOUT=30
|
||||
|
||||
# Upload configuration
|
||||
UPLOAD_FILE_SIZE_LIMIT=15
|
||||
UPLOAD_FILE_BATCH_LIMIT=5
|
||||
|
|
@ -265,6 +297,9 @@ HTTP_REQUEST_MAX_WRITE_TIMEOUT=600
|
|||
HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
|
||||
HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
|
||||
|
||||
# Respect X-* headers to redirect clients
|
||||
RESPECT_XFORWARD_HEADERS_ENABLED=false
|
||||
|
||||
# Log file path
|
||||
LOG_FILE=
|
||||
|
||||
|
|
|
|||
14
api/app.py
14
api/app.py
|
|
@ -26,7 +26,7 @@ from commands import register_commands
|
|||
from configs import dify_config
|
||||
|
||||
# DO NOT REMOVE BELOW
|
||||
from events import event_handlers
|
||||
from events import event_handlers # noqa: F401
|
||||
from extensions import (
|
||||
ext_celery,
|
||||
ext_code_based_extension,
|
||||
|
|
@ -36,6 +36,7 @@ from extensions import (
|
|||
ext_login,
|
||||
ext_mail,
|
||||
ext_migrate,
|
||||
ext_proxy_fix,
|
||||
ext_redis,
|
||||
ext_sentry,
|
||||
ext_storage,
|
||||
|
|
@ -45,7 +46,7 @@ from extensions.ext_login import login_manager
|
|||
from libs.passport import PassportService
|
||||
|
||||
# TODO: Find a way to avoid importing models here
|
||||
from models import account, dataset, model, source, task, tool, tools, web
|
||||
from models import account, dataset, model, source, task, tool, tools, web # noqa: F401
|
||||
from services.account_service import AccountService
|
||||
|
||||
# DO NOT REMOVE ABOVE
|
||||
|
|
@ -156,6 +157,7 @@ def initialize_extensions(app):
|
|||
ext_mail.init_app(app)
|
||||
ext_hosting_provider.init_app(app)
|
||||
ext_sentry.init_app(app)
|
||||
ext_proxy_fix.init_app(app)
|
||||
|
||||
|
||||
# Flask-Login configuration
|
||||
|
|
@ -181,10 +183,10 @@ def load_user_from_request(request_from_flask_login):
|
|||
decoded = PassportService().verify(auth_token)
|
||||
user_id = decoded.get("user_id")
|
||||
|
||||
account = AccountService.load_logged_in_account(account_id=user_id, token=auth_token)
|
||||
if account:
|
||||
contexts.tenant_id.set(account.current_tenant_id)
|
||||
return account
|
||||
logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
|
||||
if logged_in_account:
|
||||
contexts.tenant_id.set(logged_in_account.current_tenant_id)
|
||||
return logged_in_account
|
||||
|
||||
|
||||
@login_manager.unauthorized_handler
|
||||
|
|
|
|||
|
|
@ -347,6 +347,14 @@ def migrate_knowledge_vector_database():
|
|||
index_name = Dataset.gen_collection_name_by_id(dataset_id)
|
||||
index_struct_dict = {"type": "elasticsearch", "vector_store": {"class_prefix": index_name}}
|
||||
dataset.index_struct = json.dumps(index_struct_dict)
|
||||
elif vector_type == VectorType.BAIDU:
|
||||
dataset_id = dataset.id
|
||||
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
|
||||
index_struct_dict = {
|
||||
"type": VectorType.BAIDU,
|
||||
"vector_store": {"class_prefix": collection_name},
|
||||
}
|
||||
dataset.index_struct = json.dumps(index_struct_dict)
|
||||
else:
|
||||
raise ValueError(f"Vector store {vector_type} is not supported.")
|
||||
|
||||
|
|
|
|||
|
|
@ -289,6 +289,12 @@ class HttpConfig(BaseSettings):
|
|||
default=None,
|
||||
)
|
||||
|
||||
RESPECT_XFORWARD_HEADERS_ENABLED: bool = Field(
|
||||
description="Enable or disable the X-Forwarded-For Proxy Fix middleware from Werkzeug"
|
||||
" to respect X-* headers to redirect clients",
|
||||
default=False,
|
||||
)
|
||||
|
||||
|
||||
class InnerAPIConfig(BaseSettings):
|
||||
"""
|
||||
|
|
@ -396,9 +402,9 @@ class WorkflowConfig(BaseSettings):
|
|||
)
|
||||
|
||||
|
||||
class OAuthConfig(BaseSettings):
|
||||
class AuthConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for OAuth authentication
|
||||
Configuration for authentication and OAuth
|
||||
"""
|
||||
|
||||
OAUTH_REDIRECT_PATH: str = Field(
|
||||
|
|
@ -407,7 +413,7 @@ class OAuthConfig(BaseSettings):
|
|||
)
|
||||
|
||||
GITHUB_CLIENT_ID: Optional[str] = Field(
|
||||
description="GitHub OAuth client secret",
|
||||
description="GitHub OAuth client ID",
|
||||
default=None,
|
||||
)
|
||||
|
||||
|
|
@ -426,6 +432,11 @@ class OAuthConfig(BaseSettings):
|
|||
default=None,
|
||||
)
|
||||
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES: PositiveInt = Field(
|
||||
description="Expiration time for access tokens in minutes",
|
||||
default=60,
|
||||
)
|
||||
|
||||
|
||||
class ModerationConfig(BaseSettings):
|
||||
"""
|
||||
|
|
@ -643,6 +654,7 @@ class PositionConfig(BaseSettings):
|
|||
class FeatureConfig(
|
||||
# place the configs in alphabet order
|
||||
AppExecutionConfig,
|
||||
AuthConfig, # Changed from OAuthConfig to AuthConfig
|
||||
BillingConfig,
|
||||
CodeExecutionSandboxConfig,
|
||||
PluginConfig,
|
||||
|
|
@ -659,14 +671,13 @@ class FeatureConfig(
|
|||
MailConfig,
|
||||
ModelLoadBalanceConfig,
|
||||
ModerationConfig,
|
||||
OAuthConfig,
|
||||
PositionConfig,
|
||||
RagEtlConfig,
|
||||
SecurityConfig,
|
||||
ToolConfig,
|
||||
UpdateConfig,
|
||||
WorkflowConfig,
|
||||
WorkspaceConfig,
|
||||
PositionConfig,
|
||||
# hosted services config
|
||||
HostedServiceConfig,
|
||||
CeleryBeatConfig,
|
||||
|
|
|
|||
|
|
@ -8,9 +8,11 @@ from configs.middleware.cache.redis_config import RedisConfig
|
|||
from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
|
||||
from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
|
||||
from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
|
||||
from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
|
||||
from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
|
||||
from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
|
||||
from configs.middleware.storage.oci_storage_config import OCIStorageConfig
|
||||
from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig
|
||||
from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
|
||||
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
||||
|
|
@ -26,6 +28,7 @@ from configs.middleware.vdb.qdrant_config import QdrantConfig
|
|||
from configs.middleware.vdb.relyt_config import RelytConfig
|
||||
from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig
|
||||
from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig
|
||||
from configs.middleware.vdb.vikingdb_config import VikingDBConfig
|
||||
from configs.middleware.vdb.weaviate_config import WeaviateConfig
|
||||
|
||||
|
||||
|
|
@ -190,6 +193,22 @@ class CeleryConfig(DatabaseConfig):
|
|||
return self.CELERY_BROKER_URL.startswith("rediss://") if self.CELERY_BROKER_URL else False
|
||||
|
||||
|
||||
class InternalTestConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Internal Test
|
||||
"""
|
||||
|
||||
AWS_SECRET_ACCESS_KEY: Optional[str] = Field(
|
||||
description="Internal test AWS secret access key",
|
||||
default=None,
|
||||
)
|
||||
|
||||
AWS_ACCESS_KEY_ID: Optional[str] = Field(
|
||||
description="Internal test AWS access key ID",
|
||||
default=None,
|
||||
)
|
||||
|
||||
|
||||
class MiddlewareConfig(
|
||||
# place the configs in alphabet order
|
||||
CeleryConfig,
|
||||
|
|
@ -200,12 +219,14 @@ class MiddlewareConfig(
|
|||
StorageConfig,
|
||||
AliyunOSSStorageConfig,
|
||||
AzureBlobStorageConfig,
|
||||
BaiduOBSStorageConfig,
|
||||
GoogleCloudStorageConfig,
|
||||
TencentCloudCOSStorageConfig,
|
||||
HuaweiCloudOBSStorageConfig,
|
||||
VolcengineTOSStorageConfig,
|
||||
S3StorageConfig,
|
||||
OCIStorageConfig,
|
||||
S3StorageConfig,
|
||||
SupabaseStorageConfig,
|
||||
TencentCloudCOSStorageConfig,
|
||||
VolcengineTOSStorageConfig,
|
||||
# configs of vdb and vdb providers
|
||||
VectorStoreConfig,
|
||||
AnalyticdbConfig,
|
||||
|
|
@ -222,5 +243,7 @@ class MiddlewareConfig(
|
|||
TiDBVectorConfig,
|
||||
WeaviateConfig,
|
||||
ElasticsearchConfig,
|
||||
InternalTestConfig,
|
||||
VikingDBConfig,
|
||||
):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class BaiduOBSStorageConfig(BaseModel):
|
||||
"""
|
||||
Configuration settings for Baidu Object Storage Service (OBS)
|
||||
"""
|
||||
|
||||
BAIDU_OBS_BUCKET_NAME: Optional[str] = Field(
|
||||
description="Name of the Baidu OBS bucket to store and retrieve objects (e.g., 'my-obs-bucket')",
|
||||
default=None,
|
||||
)
|
||||
|
||||
BAIDU_OBS_ACCESS_KEY: Optional[str] = Field(
|
||||
description="Access Key ID for authenticating with Baidu OBS",
|
||||
default=None,
|
||||
)
|
||||
|
||||
BAIDU_OBS_SECRET_KEY: Optional[str] = Field(
|
||||
description="Secret Access Key for authenticating with Baidu OBS",
|
||||
default=None,
|
||||
)
|
||||
|
||||
BAIDU_OBS_ENDPOINT: Optional[str] = Field(
|
||||
description="URL of the Baidu OSS endpoint for your chosen region (e.g., 'https://.bj.bcebos.com')",
|
||||
default=None,
|
||||
)
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SupabaseStorageConfig(BaseModel):
|
||||
"""
|
||||
Configuration settings for Supabase Object Storage Service
|
||||
"""
|
||||
|
||||
SUPABASE_BUCKET_NAME: Optional[str] = Field(
|
||||
description="Name of the Supabase bucket to store and retrieve objects (e.g., 'dify-bucket')",
|
||||
default=None,
|
||||
)
|
||||
|
||||
SUPABASE_API_KEY: Optional[str] = Field(
|
||||
description="API KEY for authenticating with Supabase",
|
||||
default=None,
|
||||
)
|
||||
|
||||
SUPABASE_URL: Optional[str] = Field(
|
||||
description="URL of the Supabase",
|
||||
default=None,
|
||||
)
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import Field, NonNegativeInt, PositiveInt
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class BaiduVectorDBConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Baidu Vector Database
|
||||
"""
|
||||
|
||||
BAIDU_VECTOR_DB_ENDPOINT: Optional[str] = Field(
|
||||
description="URL of the Baidu Vector Database service (e.g., 'http://vdb.bj.baidubce.com')",
|
||||
default=None,
|
||||
)
|
||||
|
||||
BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS: PositiveInt = Field(
|
||||
description="Timeout in milliseconds for Baidu Vector Database operations (default is 30000 milliseconds)",
|
||||
default=30000,
|
||||
)
|
||||
|
||||
BAIDU_VECTOR_DB_ACCOUNT: Optional[str] = Field(
|
||||
description="Account for authenticating with the Baidu Vector Database",
|
||||
default=None,
|
||||
)
|
||||
|
||||
BAIDU_VECTOR_DB_API_KEY: Optional[str] = Field(
|
||||
description="API key for authenticating with the Baidu Vector Database service",
|
||||
default=None,
|
||||
)
|
||||
|
||||
BAIDU_VECTOR_DB_DATABASE: Optional[str] = Field(
|
||||
description="Name of the specific Baidu Vector Database to connect to",
|
||||
default=None,
|
||||
)
|
||||
|
||||
BAIDU_VECTOR_DB_SHARD: PositiveInt = Field(
|
||||
description="Number of shards for the Baidu Vector Database (default is 1)",
|
||||
default=1,
|
||||
)
|
||||
|
||||
BAIDU_VECTOR_DB_REPLICAS: NonNegativeInt = Field(
|
||||
description="Number of replicas for the Baidu Vector Database (default is 3)",
|
||||
default=3,
|
||||
)
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class VikingDBConfig(BaseModel):
|
||||
"""
|
||||
Configuration for connecting to Volcengine VikingDB.
|
||||
Refer to the following documentation for details on obtaining credentials:
|
||||
https://www.volcengine.com/docs/6291/65568
|
||||
"""
|
||||
|
||||
VIKINGDB_ACCESS_KEY: Optional[str] = Field(
|
||||
default=None, description="The Access Key provided by Volcengine VikingDB for API authentication."
|
||||
)
|
||||
VIKINGDB_SECRET_KEY: Optional[str] = Field(
|
||||
default=None, description="The Secret Key provided by Volcengine VikingDB for API authentication."
|
||||
)
|
||||
VIKINGDB_REGION: Optional[str] = Field(
|
||||
default="cn-shanghai",
|
||||
description="The region of the Volcengine VikingDB service.(e.g., 'cn-shanghai', 'cn-beijing').",
|
||||
)
|
||||
VIKINGDB_HOST: Optional[str] = Field(
|
||||
default="api-vikingdb.mlp.cn-shanghai.volces.com",
|
||||
description="The host of the Volcengine VikingDB service.(e.g., 'api-vikingdb.volces.com', \
|
||||
'api-vikingdb.mlp.cn-shanghai.volces.com')",
|
||||
)
|
||||
VIKINGDB_SCHEME: Optional[str] = Field(
|
||||
default="http",
|
||||
description="The scheme of the Volcengine VikingDB service.(e.g., 'http', 'https').",
|
||||
)
|
||||
VIKINGDB_CONNECTION_TIMEOUT: Optional[int] = Field(
|
||||
default=30, description="The connection timeout of the Volcengine VikingDB service."
|
||||
)
|
||||
VIKINGDB_SOCKET_TIMEOUT: Optional[int] = Field(
|
||||
default=30, description="The socket timeout of the Volcengine VikingDB service."
|
||||
)
|
||||
|
|
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
|
|||
|
||||
CURRENT_VERSION: str = Field(
|
||||
description="Dify version",
|
||||
default="0.8.3",
|
||||
default="0.9.1",
|
||||
)
|
||||
|
||||
COMMIT_SHA: str = Field(
|
||||
|
|
|
|||
|
|
@ -37,7 +37,16 @@ from .auth import activate, data_source_bearer_auth, data_source_oauth, forgot_p
|
|||
from .billing import billing
|
||||
|
||||
# Import datasets controllers
|
||||
from .datasets import data_source, datasets, datasets_document, datasets_segments, file, hit_testing, website
|
||||
from .datasets import (
|
||||
data_source,
|
||||
datasets,
|
||||
datasets_document,
|
||||
datasets_segments,
|
||||
external,
|
||||
file,
|
||||
hit_testing,
|
||||
website,
|
||||
)
|
||||
|
||||
# Import explore controllers
|
||||
from .explore import (
|
||||
|
|
|
|||
|
|
@ -188,6 +188,7 @@ class ChatConversationApi(Resource):
|
|||
subquery.c.from_end_user_session_id.ilike(keyword_filter),
|
||||
),
|
||||
)
|
||||
.group_by(Conversation.id)
|
||||
)
|
||||
|
||||
account = current_user
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from flask_restful import Resource, reqparse
|
|||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.setup import setup_required
|
||||
from libs.helper import email, get_remote_ip
|
||||
from libs.helper import email, extract_remote_ip
|
||||
from libs.password import valid_password
|
||||
from models.account import Account
|
||||
from services.account_service import AccountService, TenantService
|
||||
|
|
@ -40,17 +40,16 @@ class LoginApi(Resource):
|
|||
"data": "workspace not found, please contact system admin to invite you to join in a workspace",
|
||||
}
|
||||
|
||||
token = AccountService.login(account, ip_address=get_remote_ip(request))
|
||||
token_pair = AccountService.login(account=account, ip_address=extract_remote_ip(request))
|
||||
|
||||
return {"result": "success", "data": token}
|
||||
return {"result": "success", "data": token_pair.model_dump()}
|
||||
|
||||
|
||||
class LogoutApi(Resource):
|
||||
@setup_required
|
||||
def get(self):
|
||||
account = cast(Account, flask_login.current_user)
|
||||
token = request.headers.get("Authorization", "").split(" ")[1]
|
||||
AccountService.logout(account=account, token=token)
|
||||
AccountService.logout(account=account)
|
||||
flask_login.logout_user()
|
||||
return {"result": "success"}
|
||||
|
||||
|
|
@ -106,5 +105,19 @@ class ResetPasswordApi(Resource):
|
|||
return {"result": "success"}
|
||||
|
||||
|
||||
class RefreshTokenApi(Resource):
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("refresh_token", type=str, required=True, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
new_token_pair = AccountService.refresh_token(args["refresh_token"])
|
||||
return {"result": "success", "data": new_token_pair.model_dump()}
|
||||
except Exception as e:
|
||||
return {"result": "fail", "data": str(e)}, 401
|
||||
|
||||
|
||||
api.add_resource(LoginApi, "/login")
|
||||
api.add_resource(LogoutApi, "/logout")
|
||||
api.add_resource(RefreshTokenApi, "/refresh-token")
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from flask_restful import Resource
|
|||
from configs import dify_config
|
||||
from constants.languages import languages
|
||||
from extensions.ext_database import db
|
||||
from libs.helper import get_remote_ip
|
||||
from libs.helper import extract_remote_ip
|
||||
from libs.oauth import GitHubOAuth, GoogleOAuth, OAuthUserInfo
|
||||
from models.account import Account, AccountStatus
|
||||
from services.account_service import AccountService, RegisterService, TenantService
|
||||
|
|
@ -81,9 +81,14 @@ class OAuthCallback(Resource):
|
|||
|
||||
TenantService.create_owner_tenant_if_not_exist(account)
|
||||
|
||||
token = AccountService.login(account, ip_address=get_remote_ip(request))
|
||||
token_pair = AccountService.login(
|
||||
account=account,
|
||||
ip_address=extract_remote_ip(request),
|
||||
)
|
||||
|
||||
return redirect(f"{dify_config.CONSOLE_WEB_URL}?console_token={token}")
|
||||
return redirect(
|
||||
f"{dify_config.CONSOLE_WEB_URL}?access_token={token_pair.access_token}&refresh_token={token_pair.refresh_token}"
|
||||
)
|
||||
|
||||
|
||||
def _get_account_by_openid_or_email(provider: str, user_info: OAuthUserInfo) -> Optional[Account]:
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ class DatasetListApi(Resource):
|
|||
page = request.args.get("page", default=1, type=int)
|
||||
limit = request.args.get("limit", default=20, type=int)
|
||||
ids = request.args.getlist("ids")
|
||||
provider = request.args.get("provider", default="vendor")
|
||||
# provider = request.args.get("provider", default="vendor")
|
||||
search = request.args.get("keyword", default=None, type=str)
|
||||
tag_ids = request.args.getlist("tag_ids")
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ class DatasetListApi(Resource):
|
|||
datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
|
||||
else:
|
||||
datasets, total = DatasetService.get_datasets(
|
||||
page, limit, provider, current_user.current_tenant_id, current_user, search, tag_ids
|
||||
page, limit, current_user.current_tenant_id, current_user, search, tag_ids
|
||||
)
|
||||
|
||||
# check embedding setting
|
||||
|
|
@ -110,6 +110,26 @@ class DatasetListApi(Resource):
|
|||
nullable=True,
|
||||
help="Invalid indexing technique.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"external_knowledge_api_id",
|
||||
type=str,
|
||||
nullable=True,
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"provider",
|
||||
type=str,
|
||||
nullable=True,
|
||||
choices=Dataset.PROVIDER_LIST,
|
||||
required=False,
|
||||
default="vendor",
|
||||
)
|
||||
parser.add_argument(
|
||||
"external_knowledge_id",
|
||||
type=str,
|
||||
nullable=True,
|
||||
required=False,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
|
||||
|
|
@ -123,6 +143,9 @@ class DatasetListApi(Resource):
|
|||
indexing_technique=args["indexing_technique"],
|
||||
account=current_user,
|
||||
permission=DatasetPermissionEnum.ONLY_ME,
|
||||
provider=args["provider"],
|
||||
external_knowledge_api_id=args["external_knowledge_api_id"],
|
||||
external_knowledge_id=args["external_knowledge_id"],
|
||||
)
|
||||
except services.errors.dataset.DatasetNameDuplicateError:
|
||||
raise DatasetNameDuplicateError()
|
||||
|
|
@ -211,6 +234,33 @@ class DatasetApi(Resource):
|
|||
)
|
||||
parser.add_argument("retrieval_model", type=dict, location="json", help="Invalid retrieval model.")
|
||||
parser.add_argument("partial_member_list", type=list, location="json", help="Invalid parent user list.")
|
||||
|
||||
parser.add_argument(
|
||||
"external_retrieval_model",
|
||||
type=dict,
|
||||
required=False,
|
||||
nullable=True,
|
||||
location="json",
|
||||
help="Invalid external retrieval model.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"external_knowledge_id",
|
||||
type=str,
|
||||
required=False,
|
||||
nullable=True,
|
||||
location="json",
|
||||
help="Invalid external knowledge id.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"external_knowledge_api_id",
|
||||
type=str,
|
||||
required=False,
|
||||
nullable=True,
|
||||
location="json",
|
||||
help="Invalid external knowledge api id.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
data = request.get_json()
|
||||
|
||||
|
|
@ -567,6 +617,8 @@ class DatasetRetrievalSettingApi(Resource):
|
|||
| VectorType.CHROMA
|
||||
| VectorType.TENCENT
|
||||
| VectorType.PGVECTO_RS
|
||||
| VectorType.BAIDU
|
||||
| VectorType.VIKINGDB
|
||||
):
|
||||
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
||||
case (
|
||||
|
|
@ -603,6 +655,8 @@ class DatasetRetrievalSettingMockApi(Resource):
|
|||
| VectorType.CHROMA
|
||||
| VectorType.TENCENT
|
||||
| VectorType.PGVECTO_RS
|
||||
| VectorType.BAIDU
|
||||
| VectorType.VIKINGDB
|
||||
):
|
||||
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
||||
case (
|
||||
|
|
|
|||
|
|
@ -0,0 +1,263 @@
|
|||
from flask import request
|
||||
from flask_login import current_user
|
||||
from flask_restful import Resource, marshal, reqparse
|
||||
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.datasets.error import DatasetNameDuplicateError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from fields.dataset_fields import dataset_detail_fields
|
||||
from libs.login import login_required
|
||||
from services.dataset_service import DatasetService
|
||||
from services.external_knowledge_service import ExternalDatasetService
|
||||
from services.hit_testing_service import HitTestingService
|
||||
from services.knowledge_service import ExternalDatasetTestService
|
||||
|
||||
|
||||
def _validate_name(name):
|
||||
if not name or len(name) < 1 or len(name) > 100:
|
||||
raise ValueError("Name must be between 1 to 100 characters.")
|
||||
return name
|
||||
|
||||
|
||||
def _validate_description_length(description):
|
||||
if description and len(description) > 400:
|
||||
raise ValueError("Description cannot exceed 400 characters.")
|
||||
return description
|
||||
|
||||
|
||||
class ExternalApiTemplateListApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self):
|
||||
page = request.args.get("page", default=1, type=int)
|
||||
limit = request.args.get("limit", default=20, type=int)
|
||||
search = request.args.get("keyword", default=None, type=str)
|
||||
|
||||
external_knowledge_apis, total = ExternalDatasetService.get_external_knowledge_apis(
|
||||
page, limit, current_user.current_tenant_id, search
|
||||
)
|
||||
response = {
|
||||
"data": [item.to_dict() for item in external_knowledge_apis],
|
||||
"has_more": len(external_knowledge_apis) == limit,
|
||||
"limit": limit,
|
||||
"total": total,
|
||||
"page": page,
|
||||
}
|
||||
return response, 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument(
|
||||
"name",
|
||||
nullable=False,
|
||||
required=True,
|
||||
help="Name is required. Name must be between 1 to 100 characters.",
|
||||
type=_validate_name,
|
||||
)
|
||||
parser.add_argument(
|
||||
"settings",
|
||||
type=dict,
|
||||
location="json",
|
||||
nullable=False,
|
||||
required=True,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
ExternalDatasetService.validate_api_list(args["settings"])
|
||||
|
||||
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
|
||||
if not current_user.is_dataset_editor:
|
||||
raise Forbidden()
|
||||
|
||||
try:
|
||||
external_knowledge_api = ExternalDatasetService.create_external_knowledge_api(
|
||||
tenant_id=current_user.current_tenant_id, user_id=current_user.id, args=args
|
||||
)
|
||||
except services.errors.dataset.DatasetNameDuplicateError:
|
||||
raise DatasetNameDuplicateError()
|
||||
|
||||
return external_knowledge_api.to_dict(), 201
|
||||
|
||||
|
||||
class ExternalApiTemplateApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, external_knowledge_api_id):
|
||||
external_knowledge_api_id = str(external_knowledge_api_id)
|
||||
external_knowledge_api = ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id)
|
||||
if external_knowledge_api is None:
|
||||
raise NotFound("API template not found.")
|
||||
|
||||
return external_knowledge_api.to_dict(), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def patch(self, external_knowledge_api_id):
|
||||
external_knowledge_api_id = str(external_knowledge_api_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument(
|
||||
"name",
|
||||
nullable=False,
|
||||
required=True,
|
||||
help="type is required. Name must be between 1 to 100 characters.",
|
||||
type=_validate_name,
|
||||
)
|
||||
parser.add_argument(
|
||||
"settings",
|
||||
type=dict,
|
||||
location="json",
|
||||
nullable=False,
|
||||
required=True,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
ExternalDatasetService.validate_api_list(args["settings"])
|
||||
|
||||
external_knowledge_api = ExternalDatasetService.update_external_knowledge_api(
|
||||
tenant_id=current_user.current_tenant_id,
|
||||
user_id=current_user.id,
|
||||
external_knowledge_api_id=external_knowledge_api_id,
|
||||
args=args,
|
||||
)
|
||||
|
||||
return external_knowledge_api.to_dict(), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, external_knowledge_api_id):
|
||||
external_knowledge_api_id = str(external_knowledge_api_id)
|
||||
|
||||
# The role of the current user in the ta table must be admin, owner, or editor
|
||||
if not current_user.is_editor or current_user.is_dataset_operator:
|
||||
raise Forbidden()
|
||||
|
||||
ExternalDatasetService.delete_external_knowledge_api(current_user.current_tenant_id, external_knowledge_api_id)
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
class ExternalApiUseCheckApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, external_knowledge_api_id):
|
||||
external_knowledge_api_id = str(external_knowledge_api_id)
|
||||
|
||||
external_knowledge_api_is_using, count = ExternalDatasetService.external_knowledge_api_use_check(
|
||||
external_knowledge_api_id
|
||||
)
|
||||
return {"is_using": external_knowledge_api_is_using, "count": count}, 200
|
||||
|
||||
|
||||
class ExternalDatasetCreateApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
# The role of the current user in the ta table must be admin, owner, or editor
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("external_knowledge_api_id", type=str, required=True, nullable=False, location="json")
|
||||
parser.add_argument("external_knowledge_id", type=str, required=True, nullable=False, location="json")
|
||||
parser.add_argument(
|
||||
"name",
|
||||
nullable=False,
|
||||
required=True,
|
||||
help="name is required. Name must be between 1 to 100 characters.",
|
||||
type=_validate_name,
|
||||
)
|
||||
parser.add_argument("description", type=str, required=False, nullable=True, location="json")
|
||||
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
|
||||
if not current_user.is_dataset_editor:
|
||||
raise Forbidden()
|
||||
|
||||
try:
|
||||
dataset = ExternalDatasetService.create_external_dataset(
|
||||
tenant_id=current_user.current_tenant_id,
|
||||
user_id=current_user.id,
|
||||
args=args,
|
||||
)
|
||||
except services.errors.dataset.DatasetNameDuplicateError:
|
||||
raise DatasetNameDuplicateError()
|
||||
|
||||
return marshal(dataset, dataset_detail_fields), 201
|
||||
|
||||
|
||||
class ExternalKnowledgeHitTestingApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("query", type=str, location="json")
|
||||
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
HitTestingService.hit_testing_args_check(args)
|
||||
|
||||
try:
|
||||
response = HitTestingService.external_retrieve(
|
||||
dataset=dataset,
|
||||
query=args["query"],
|
||||
account=current_user,
|
||||
external_retrieval_model=args["external_retrieval_model"],
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise InternalServerError(str(e))
|
||||
|
||||
|
||||
class BedrockRetrievalApi(Resource):
|
||||
# this api is only for internal testing
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("retrieval_setting", nullable=False, required=True, type=dict, location="json")
|
||||
parser.add_argument(
|
||||
"query",
|
||||
nullable=False,
|
||||
required=True,
|
||||
type=str,
|
||||
)
|
||||
parser.add_argument("knowledge_id", nullable=False, required=True, type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Call the knowledge retrieval service
|
||||
result = ExternalDatasetTestService.knowledge_retrieval(
|
||||
args["retrieval_setting"], args["query"], args["knowledge_id"]
|
||||
)
|
||||
return result, 200
|
||||
|
||||
|
||||
api.add_resource(ExternalKnowledgeHitTestingApi, "/datasets/<uuid:dataset_id>/external-hit-testing")
|
||||
api.add_resource(ExternalDatasetCreateApi, "/datasets/external")
|
||||
api.add_resource(ExternalApiTemplateListApi, "/datasets/external-knowledge-api")
|
||||
api.add_resource(ExternalApiTemplateApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>")
|
||||
api.add_resource(ExternalApiUseCheckApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>/use-check")
|
||||
# this api is only for internal test
|
||||
api.add_resource(BedrockRetrievalApi, "/test/retrieval")
|
||||
|
|
@ -47,6 +47,7 @@ class HitTestingApi(Resource):
|
|||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("query", type=str, location="json")
|
||||
parser.add_argument("retrieval_model", type=dict, required=False, location="json")
|
||||
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
HitTestingService.hit_testing_args_check(args)
|
||||
|
|
@ -57,6 +58,7 @@ class HitTestingApi(Resource):
|
|||
query=args["query"],
|
||||
account=current_user,
|
||||
retrieval_model=args["retrieval_model"],
|
||||
external_retrieval_model=args["external_retrieval_model"],
|
||||
limit=10,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,9 @@ class WebsiteCrawlApi(Resource):
|
|||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("provider", type=str, choices=["firecrawl"], required=True, nullable=True, location="json")
|
||||
parser.add_argument(
|
||||
"provider", type=str, choices=["firecrawl", "jinareader"], required=True, nullable=True, location="json"
|
||||
)
|
||||
parser.add_argument("url", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
|
||||
args = parser.parse_args()
|
||||
|
|
@ -33,7 +35,7 @@ class WebsiteCrawlStatusApi(Resource):
|
|||
@account_initialization_required
|
||||
def get(self, job_id: str):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("provider", type=str, choices=["firecrawl"], required=True, location="args")
|
||||
parser.add_argument("provider", type=str, choices=["firecrawl", "jinareader"], required=True, location="args")
|
||||
args = parser.parse_args()
|
||||
# get crawl status
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from flask import request
|
|||
from flask_restful import Resource, reqparse
|
||||
|
||||
from configs import dify_config
|
||||
from libs.helper import StrLen, email, get_remote_ip
|
||||
from libs.helper import StrLen, email, extract_remote_ip
|
||||
from libs.password import valid_password
|
||||
from models.model import DifySetup, db
|
||||
from services.account_service import RegisterService, TenantService
|
||||
|
|
@ -46,7 +46,7 @@ class SetupApi(Resource):
|
|||
|
||||
# setup
|
||||
RegisterService.setup(
|
||||
email=args["email"], name=args["name"], password=args["password"], ip_address=get_remote_ip(request)
|
||||
email=args["email"], name=args["name"], password=args["password"], ip_address=extract_remote_ip(request)
|
||||
)
|
||||
|
||||
return {"result": "success"}, 201
|
||||
|
|
|
|||
|
|
@ -38,11 +38,52 @@ class VersionApi(Resource):
|
|||
return result
|
||||
|
||||
content = json.loads(response.content)
|
||||
result["version"] = content["version"]
|
||||
result["release_date"] = content["releaseDate"]
|
||||
result["release_notes"] = content["releaseNotes"]
|
||||
result["can_auto_update"] = content["canAutoUpdate"]
|
||||
if _has_new_version(latest_version=content["version"], current_version=f"{args.get('current_version')}"):
|
||||
result["version"] = content["version"]
|
||||
result["release_date"] = content["releaseDate"]
|
||||
result["release_notes"] = content["releaseNotes"]
|
||||
result["can_auto_update"] = content["canAutoUpdate"]
|
||||
return result
|
||||
|
||||
|
||||
def _has_new_version(*, latest_version: str, current_version: str) -> bool:
|
||||
def parse_version(version: str) -> tuple:
|
||||
# Split version into parts and pre-release suffix if any
|
||||
parts = version.split("-")
|
||||
version_parts = parts[0].split(".")
|
||||
pre_release = parts[1] if len(parts) > 1 else None
|
||||
|
||||
# Validate version format
|
||||
if len(version_parts) != 3:
|
||||
raise ValueError(f"Invalid version format: {version}")
|
||||
|
||||
try:
|
||||
# Convert version parts to integers
|
||||
major, minor, patch = map(int, version_parts)
|
||||
return (major, minor, patch, pre_release)
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid version format: {version}")
|
||||
|
||||
latest = parse_version(latest_version)
|
||||
current = parse_version(current_version)
|
||||
|
||||
# Compare major, minor, and patch versions
|
||||
for latest_part, current_part in zip(latest[:3], current[:3]):
|
||||
if latest_part > current_part:
|
||||
return True
|
||||
elif latest_part < current_part:
|
||||
return False
|
||||
|
||||
# If versions are equal, check pre-release suffixes
|
||||
if latest[3] is None and current[3] is not None:
|
||||
return True
|
||||
elif latest[3] is not None and current[3] is None:
|
||||
return False
|
||||
elif latest[3] is not None and current[3] is not None:
|
||||
# Simple string comparison for pre-release versions
|
||||
return latest[3] > current[3]
|
||||
|
||||
return False
|
||||
|
||||
|
||||
api.add_resource(VersionApi, "/version")
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class ModelProviderValidateApi(Resource):
|
|||
response = {"result": "success" if result else "error"}
|
||||
|
||||
if not result:
|
||||
response["error"] = error
|
||||
response["error"] = error or "Unknown error"
|
||||
|
||||
return response
|
||||
|
||||
|
|
|
|||
|
|
@ -72,8 +72,9 @@ class DefaultModelApi(Resource):
|
|||
provider=model_setting["provider"],
|
||||
model=model_setting["model"],
|
||||
)
|
||||
except Exception:
|
||||
logging.warning(f"{model_setting['model_type']} save error")
|
||||
except Exception as ex:
|
||||
logging.exception(f"{model_setting['model_type']} save error: {ex}")
|
||||
raise ex
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,7 @@
|
|||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class UnsupportedFileTypeError(BaseHTTPException):
|
||||
error_code = "unsupported_file_type"
|
||||
description = "File type not allowed."
|
||||
code = 415
|
||||
|
|
@ -4,7 +4,7 @@ from werkzeug.exceptions import NotFound
|
|||
|
||||
import services
|
||||
from controllers.files import api
|
||||
from libs.exception import BaseHTTPException
|
||||
from controllers.files.error import UnsupportedFileTypeError
|
||||
from services.account_service import TenantService
|
||||
from services.file_service import FileService
|
||||
|
||||
|
|
@ -50,9 +50,3 @@ class WorkspaceWebappLogoApi(Resource):
|
|||
|
||||
api.add_resource(ImagePreviewApi, "/files/<uuid:file_id>/image-preview")
|
||||
api.add_resource(WorkspaceWebappLogoApi, "/files/workspaces/<uuid:workspace_id>/webapp-logo")
|
||||
|
||||
|
||||
class UnsupportedFileTypeError(BaseHTTPException):
|
||||
error_code = "unsupported_file_type"
|
||||
description = "File type not allowed."
|
||||
code = 415
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ from flask_restful import Resource, reqparse
|
|||
from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
from controllers.files import api
|
||||
from controllers.files.error import UnsupportedFileTypeError
|
||||
from core.tools.tool_file_manager import ToolFileManager
|
||||
from libs.exception import BaseHTTPException
|
||||
|
||||
|
||||
class ToolFilePreviewApi(Resource):
|
||||
|
|
@ -43,9 +43,3 @@ class ToolFilePreviewApi(Resource):
|
|||
|
||||
|
||||
api.add_resource(ToolFilePreviewApi, "/files/tools/<uuid:file_id>.<string:extension>")
|
||||
|
||||
|
||||
class UnsupportedFileTypeError(BaseHTTPException):
|
||||
error_code = "unsupported_file_type"
|
||||
description = "File type not allowed."
|
||||
code = 415
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from flask_restful import Resource, reqparse
|
|||
from werkzeug.exceptions import InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from constants import UUID_NIL
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.app.error import (
|
||||
AppUnavailableError,
|
||||
|
|
@ -107,6 +108,7 @@ class ChatApi(Resource):
|
|||
parser.add_argument("conversation_id", type=uuid_value, location="json")
|
||||
parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
|
||||
parser.add_argument("auto_generate_name", type=bool, required=False, default=True, location="json")
|
||||
parser.add_argument("parent_message_id", type=uuid_value, required=False, default=UUID_NIL, location="json")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
|
|
@ -28,11 +28,11 @@ class DatasetListApi(DatasetApiResource):
|
|||
|
||||
page = request.args.get("page", default=1, type=int)
|
||||
limit = request.args.get("limit", default=20, type=int)
|
||||
provider = request.args.get("provider", default="vendor")
|
||||
# provider = request.args.get("provider", default="vendor")
|
||||
search = request.args.get("keyword", default=None, type=str)
|
||||
tag_ids = request.args.getlist("tag_ids")
|
||||
|
||||
datasets, total = DatasetService.get_datasets(page, limit, provider, tenant_id, current_user, search, tag_ids)
|
||||
datasets, total = DatasetService.get_datasets(page, limit, tenant_id, current_user, search, tag_ids)
|
||||
# check embedding setting
|
||||
provider_manager = ProviderManager()
|
||||
configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)
|
||||
|
|
@ -82,6 +82,26 @@ class DatasetListApi(DatasetApiResource):
|
|||
required=False,
|
||||
nullable=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"external_knowledge_api_id",
|
||||
type=str,
|
||||
nullable=True,
|
||||
required=False,
|
||||
default="_validate_name",
|
||||
)
|
||||
parser.add_argument(
|
||||
"provider",
|
||||
type=str,
|
||||
nullable=True,
|
||||
required=False,
|
||||
default="vendor",
|
||||
)
|
||||
parser.add_argument(
|
||||
"external_knowledge_id",
|
||||
type=str,
|
||||
nullable=True,
|
||||
required=False,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
|
@ -91,6 +111,9 @@ class DatasetListApi(DatasetApiResource):
|
|||
indexing_technique=args["indexing_technique"],
|
||||
account=current_user,
|
||||
permission=args["permission"],
|
||||
provider=args["provider"],
|
||||
external_knowledge_api_id=args["external_knowledge_api_id"],
|
||||
external_knowledge_id=args["external_knowledge_id"],
|
||||
)
|
||||
except services.errors.dataset.DatasetNameDuplicateError:
|
||||
raise DatasetNameDuplicateError()
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import json
|
||||
from typing import Optional
|
||||
|
||||
from core.agent.cot_agent_runner import CotAgentRunner
|
||||
from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage, UserPromptMessage
|
||||
|
|
@ -21,7 +22,7 @@ class CotCompletionAgentRunner(CotAgentRunner):
|
|||
|
||||
return system_prompt
|
||||
|
||||
def _organize_historic_prompt(self, current_session_messages: list[PromptMessage] = None) -> str:
|
||||
def _organize_historic_prompt(self, current_session_messages: Optional[list[PromptMessage]] = None) -> str:
|
||||
"""
|
||||
Organize historic prompt
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ class CotAgentOutputParser:
|
|||
) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]:
|
||||
def parse_action(json_str):
|
||||
try:
|
||||
action = json.loads(json_str)
|
||||
action = json.loads(json_str, strict=False)
|
||||
action_name = None
|
||||
action_input = None
|
||||
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
|||
# always enable retriever resource in debugger mode
|
||||
app_config.additional_features.show_retrieve_source = True
|
||||
|
||||
workflow_run_id = str(uuid.uuid4())
|
||||
# init application generate entity
|
||||
application_generate_entity = AdvancedChatAppGenerateEntity(
|
||||
task_id=str(uuid.uuid4()),
|
||||
|
|
@ -138,6 +139,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
|||
invoke_from=invoke_from,
|
||||
extras=extras,
|
||||
trace_manager=trace_manager,
|
||||
workflow_run_id=workflow_run_id,
|
||||
)
|
||||
contexts.tenant_id.set(application_generate_entity.app_config.tenant_id)
|
||||
|
||||
|
|
|
|||
|
|
@ -149,6 +149,9 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
|
|||
SystemVariableKey.CONVERSATION_ID: self.conversation.id,
|
||||
SystemVariableKey.USER_ID: user_id,
|
||||
SystemVariableKey.DIALOGUE_COUNT: conversation_dialogue_count,
|
||||
SystemVariableKey.APP_ID: app_config.app_id,
|
||||
SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
|
||||
SystemVariableKey.WORKFLOW_RUN_ID: self.application_generate_entity.workflow_run_id,
|
||||
}
|
||||
|
||||
# init variable pool
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ from core.app.entities.task_entities import (
|
|||
from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline
|
||||
from core.app.task_pipeline.message_cycle_manage import MessageCycleManage
|
||||
from core.app.task_pipeline.workflow_cycle_manage import WorkflowCycleManage
|
||||
from core.model_runtime.entities.llm_entities import LLMUsage
|
||||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.ops.ops_trace_manager import TraceQueueManager
|
||||
from core.workflow.enums import SystemVariableKey
|
||||
|
|
@ -55,6 +56,7 @@ from models.account import Account
|
|||
from models.model import Conversation, EndUser, Message
|
||||
from models.workflow import (
|
||||
Workflow,
|
||||
WorkflowNodeExecution,
|
||||
WorkflowRunStatus,
|
||||
)
|
||||
|
||||
|
|
@ -71,6 +73,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
|||
_workflow: Workflow
|
||||
_user: Union[Account, EndUser]
|
||||
_workflow_system_variables: dict[SystemVariableKey, Any]
|
||||
_wip_workflow_node_executions: dict[str, WorkflowNodeExecution]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -107,9 +110,14 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
|||
SystemVariableKey.FILES: application_generate_entity.files,
|
||||
SystemVariableKey.CONVERSATION_ID: conversation.id,
|
||||
SystemVariableKey.USER_ID: user_id,
|
||||
SystemVariableKey.DIALOGUE_COUNT: conversation.dialogue_count,
|
||||
SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
|
||||
SystemVariableKey.WORKFLOW_ID: workflow.id,
|
||||
SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id,
|
||||
}
|
||||
|
||||
self._task_state = WorkflowTaskState()
|
||||
self._wip_workflow_node_executions = {}
|
||||
|
||||
self._conversation_name_generate_thread = None
|
||||
|
||||
|
|
@ -505,6 +513,10 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
|||
self._message.total_price = usage.total_price
|
||||
self._message.currency = usage.currency
|
||||
|
||||
self._task_state.metadata["usage"] = jsonable_encoder(usage)
|
||||
else:
|
||||
self._task_state.metadata["usage"] = jsonable_encoder(LLMUsage.empty_usage())
|
||||
|
||||
db.session.commit()
|
||||
|
||||
message_was_created.send(
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
|||
user_id = user.id if isinstance(user, Account) else user.session_id
|
||||
trace_manager = TraceQueueManager(app_model.id, user_id)
|
||||
|
||||
workflow_run_id = str(uuid.uuid4())
|
||||
# init application generate entity
|
||||
application_generate_entity = WorkflowAppGenerateEntity(
|
||||
task_id=str(uuid.uuid4()),
|
||||
|
|
@ -122,6 +123,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
|||
invoke_from=invoke_from,
|
||||
call_depth=call_depth,
|
||||
trace_manager=trace_manager,
|
||||
workflow_run_id=workflow_run_id,
|
||||
)
|
||||
contexts.tenant_id.set(application_generate_entity.app_config.tenant_id)
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,9 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
|
|||
system_inputs = {
|
||||
SystemVariableKey.FILES: files,
|
||||
SystemVariableKey.USER_ID: user_id,
|
||||
SystemVariableKey.APP_ID: app_config.app_id,
|
||||
SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
|
||||
SystemVariableKey.WORKFLOW_RUN_ID: self.application_generate_entity.workflow_run_id,
|
||||
}
|
||||
|
||||
variable_pool = VariablePool(
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ from models.workflow import (
|
|||
Workflow,
|
||||
WorkflowAppLog,
|
||||
WorkflowAppLogCreatedFrom,
|
||||
WorkflowNodeExecution,
|
||||
WorkflowRun,
|
||||
WorkflowRunStatus,
|
||||
)
|
||||
|
|
@ -69,6 +70,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
|||
_task_state: WorkflowTaskState
|
||||
_application_generate_entity: WorkflowAppGenerateEntity
|
||||
_workflow_system_variables: dict[SystemVariableKey, Any]
|
||||
_wip_workflow_node_executions: dict[str, WorkflowNodeExecution]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -97,9 +99,13 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
|||
self._workflow_system_variables = {
|
||||
SystemVariableKey.FILES: application_generate_entity.files,
|
||||
SystemVariableKey.USER_ID: user_id,
|
||||
SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
|
||||
SystemVariableKey.WORKFLOW_ID: workflow.id,
|
||||
SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id,
|
||||
}
|
||||
|
||||
self._task_state = WorkflowTaskState()
|
||||
self._wip_workflow_node_executions = {}
|
||||
|
||||
def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ class AdvancedChatAppGenerateEntity(AppGenerateEntity):
|
|||
|
||||
conversation_id: Optional[str] = None
|
||||
parent_message_id: Optional[str] = None
|
||||
workflow_run_id: Optional[str] = None
|
||||
query: str
|
||||
|
||||
class SingleIterationRunEntity(BaseModel):
|
||||
|
|
@ -172,6 +173,7 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
|
|||
|
||||
# app config
|
||||
app_config: WorkflowUIBasedAppConfig
|
||||
workflow_run_id: Optional[str] = None
|
||||
|
||||
class SingleIterationRunEntity(BaseModel):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
class VariableError(Exception):
|
||||
class VariableError(ValueError):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
import logging
|
||||
from threading import Thread
|
||||
from typing import Optional, Union
|
||||
|
||||
from flask import Flask, current_app
|
||||
|
||||
from configs import dify_config
|
||||
from core.app.entities.app_invoke_entities import (
|
||||
AdvancedChatAppGenerateEntity,
|
||||
AgentChatAppGenerateEntity,
|
||||
|
|
@ -82,7 +84,9 @@ class MessageCycleManage:
|
|||
try:
|
||||
name = LLMGenerator.generate_conversation_name(app_model.tenant_id, query)
|
||||
conversation.name = name
|
||||
except:
|
||||
except Exception as e:
|
||||
if dify_config.DEBUG:
|
||||
logging.exception(f"generate conversation name failed: {e}")
|
||||
pass
|
||||
|
||||
db.session.merge(conversation)
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ class WorkflowCycleManage:
|
|||
_user: Union[Account, EndUser]
|
||||
_task_state: WorkflowTaskState
|
||||
_workflow_system_variables: dict[SystemVariableKey, Any]
|
||||
_wip_workflow_node_executions: dict[str, WorkflowNodeExecution]
|
||||
|
||||
def _handle_workflow_run_start(self) -> WorkflowRun:
|
||||
max_sequence = (
|
||||
|
|
@ -85,6 +86,9 @@ class WorkflowCycleManage:
|
|||
|
||||
# init workflow run
|
||||
workflow_run = WorkflowRun()
|
||||
workflow_run_id = self._workflow_system_variables[SystemVariableKey.WORKFLOW_RUN_ID]
|
||||
if workflow_run_id:
|
||||
workflow_run.id = workflow_run_id
|
||||
workflow_run.tenant_id = self._workflow.tenant_id
|
||||
workflow_run.app_id = self._workflow.app_id
|
||||
workflow_run.sequence_number = new_sequence_number
|
||||
|
|
@ -248,6 +252,8 @@ class WorkflowCycleManage:
|
|||
db.session.refresh(workflow_node_execution)
|
||||
db.session.close()
|
||||
|
||||
self._wip_workflow_node_executions[workflow_node_execution.node_execution_id] = workflow_node_execution
|
||||
|
||||
return workflow_node_execution
|
||||
|
||||
def _handle_workflow_node_execution_success(self, event: QueueNodeSucceededEvent) -> WorkflowNodeExecution:
|
||||
|
|
@ -260,20 +266,36 @@ class WorkflowCycleManage:
|
|||
|
||||
inputs = WorkflowEntry.handle_special_values(event.inputs)
|
||||
outputs = WorkflowEntry.handle_special_values(event.outputs)
|
||||
execution_metadata = (
|
||||
json.dumps(jsonable_encoder(event.execution_metadata)) if event.execution_metadata else None
|
||||
)
|
||||
finished_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
elapsed_time = (finished_at - event.start_at).total_seconds()
|
||||
|
||||
db.session.query(WorkflowNodeExecution).filter(WorkflowNodeExecution.id == workflow_node_execution.id).update(
|
||||
{
|
||||
WorkflowNodeExecution.status: WorkflowNodeExecutionStatus.SUCCEEDED.value,
|
||||
WorkflowNodeExecution.inputs: json.dumps(inputs) if inputs else None,
|
||||
WorkflowNodeExecution.process_data: json.dumps(event.process_data) if event.process_data else None,
|
||||
WorkflowNodeExecution.outputs: json.dumps(outputs) if outputs else None,
|
||||
WorkflowNodeExecution.execution_metadata: execution_metadata,
|
||||
WorkflowNodeExecution.finished_at: finished_at,
|
||||
WorkflowNodeExecution.elapsed_time: elapsed_time,
|
||||
}
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
db.session.close()
|
||||
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value
|
||||
workflow_node_execution.inputs = json.dumps(inputs) if inputs else None
|
||||
workflow_node_execution.process_data = json.dumps(event.process_data) if event.process_data else None
|
||||
workflow_node_execution.outputs = json.dumps(outputs) if outputs else None
|
||||
workflow_node_execution.execution_metadata = (
|
||||
json.dumps(jsonable_encoder(event.execution_metadata)) if event.execution_metadata else None
|
||||
)
|
||||
workflow_node_execution.finished_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
workflow_node_execution.elapsed_time = (workflow_node_execution.finished_at - event.start_at).total_seconds()
|
||||
workflow_node_execution.execution_metadata = execution_metadata
|
||||
workflow_node_execution.finished_at = finished_at
|
||||
workflow_node_execution.elapsed_time = elapsed_time
|
||||
|
||||
db.session.commit()
|
||||
db.session.refresh(workflow_node_execution)
|
||||
db.session.close()
|
||||
self._wip_workflow_node_executions.pop(workflow_node_execution.node_execution_id)
|
||||
|
||||
return workflow_node_execution
|
||||
|
||||
|
|
@ -287,18 +309,33 @@ class WorkflowCycleManage:
|
|||
|
||||
inputs = WorkflowEntry.handle_special_values(event.inputs)
|
||||
outputs = WorkflowEntry.handle_special_values(event.outputs)
|
||||
finished_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
elapsed_time = (finished_at - event.start_at).total_seconds()
|
||||
|
||||
db.session.query(WorkflowNodeExecution).filter(WorkflowNodeExecution.id == workflow_node_execution.id).update(
|
||||
{
|
||||
WorkflowNodeExecution.status: WorkflowNodeExecutionStatus.FAILED.value,
|
||||
WorkflowNodeExecution.error: event.error,
|
||||
WorkflowNodeExecution.inputs: json.dumps(inputs) if inputs else None,
|
||||
WorkflowNodeExecution.process_data: json.dumps(event.process_data) if event.process_data else None,
|
||||
WorkflowNodeExecution.outputs: json.dumps(outputs) if outputs else None,
|
||||
WorkflowNodeExecution.finished_at: finished_at,
|
||||
WorkflowNodeExecution.elapsed_time: elapsed_time,
|
||||
}
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
db.session.close()
|
||||
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value
|
||||
workflow_node_execution.error = event.error
|
||||
workflow_node_execution.finished_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
workflow_node_execution.inputs = json.dumps(inputs) if inputs else None
|
||||
workflow_node_execution.process_data = json.dumps(event.process_data) if event.process_data else None
|
||||
workflow_node_execution.outputs = json.dumps(outputs) if outputs else None
|
||||
workflow_node_execution.elapsed_time = (workflow_node_execution.finished_at - event.start_at).total_seconds()
|
||||
workflow_node_execution.finished_at = finished_at
|
||||
workflow_node_execution.elapsed_time = elapsed_time
|
||||
|
||||
db.session.commit()
|
||||
db.session.refresh(workflow_node_execution)
|
||||
db.session.close()
|
||||
self._wip_workflow_node_executions.pop(workflow_node_execution.node_execution_id)
|
||||
|
||||
return workflow_node_execution
|
||||
|
||||
|
|
@ -675,17 +712,7 @@ class WorkflowCycleManage:
|
|||
:param node_execution_id: workflow node execution id
|
||||
:return:
|
||||
"""
|
||||
workflow_node_execution = (
|
||||
db.session.query(WorkflowNodeExecution)
|
||||
.filter(
|
||||
WorkflowNodeExecution.tenant_id == self._application_generate_entity.app_config.tenant_id,
|
||||
WorkflowNodeExecution.app_id == self._application_generate_entity.app_config.app_id,
|
||||
WorkflowNodeExecution.workflow_id == self._workflow.id,
|
||||
WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value,
|
||||
WorkflowNodeExecution.node_execution_id == node_execution_id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
workflow_node_execution = self._wip_workflow_node_executions.get(node_execution_id)
|
||||
|
||||
if not workflow_node_execution:
|
||||
raise Exception(f"Workflow node execution not found: {node_execution_id}")
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
import os
|
||||
from collections.abc import Iterable, Mapping
|
||||
from typing import Any, Optional, TextIO, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from configs import dify_config
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
|
|
@ -50,7 +50,8 @@ class DifyAgentCallbackHandler(BaseModel):
|
|||
tool_inputs: Mapping[str, Any],
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
print_text("\n[on_tool_start] ToolCall:" + tool_name + "\n" + str(tool_inputs) + "\n", color=self.color)
|
||||
if dify_config.DEBUG:
|
||||
print_text("\n[on_tool_start] ToolCall:" + tool_name + "\n" + str(tool_inputs) + "\n", color=self.color)
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
|
|
@ -62,11 +63,12 @@ class DifyAgentCallbackHandler(BaseModel):
|
|||
trace_manager: Optional[TraceQueueManager] = None,
|
||||
) -> None:
|
||||
"""If not the final action, print out observation."""
|
||||
print_text("\n[on_tool_end]\n", color=self.color)
|
||||
print_text("Tool: " + tool_name + "\n", color=self.color)
|
||||
print_text("Inputs: " + str(tool_inputs) + "\n", color=self.color)
|
||||
print_text("Outputs: " + str(tool_outputs)[:1000] + "\n", color=self.color)
|
||||
print_text("\n")
|
||||
if dify_config.DEBUG:
|
||||
print_text("\n[on_tool_end]\n", color=self.color)
|
||||
print_text("Tool: " + tool_name + "\n", color=self.color)
|
||||
print_text("Inputs: " + str(tool_inputs) + "\n", color=self.color)
|
||||
print_text("Outputs: " + str(tool_outputs)[:1000] + "\n", color=self.color)
|
||||
print_text("\n")
|
||||
|
||||
if trace_manager:
|
||||
trace_manager.add_trace_task(
|
||||
|
|
@ -82,30 +84,33 @@ class DifyAgentCallbackHandler(BaseModel):
|
|||
|
||||
def on_tool_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
print_text("\n[on_tool_error] Error: " + str(error) + "\n", color="red")
|
||||
if dify_config.DEBUG:
|
||||
print_text("\n[on_tool_error] Error: " + str(error) + "\n", color="red")
|
||||
|
||||
def on_agent_start(self, thought: str) -> None:
|
||||
"""Run on agent start."""
|
||||
if thought:
|
||||
print_text(
|
||||
"\n[on_agent_start] \nCurrent Loop: " + str(self.current_loop) + "\nThought: " + thought + "\n",
|
||||
color=self.color,
|
||||
)
|
||||
else:
|
||||
print_text("\n[on_agent_start] \nCurrent Loop: " + str(self.current_loop) + "\n", color=self.color)
|
||||
if dify_config.DEBUG:
|
||||
if thought:
|
||||
print_text(
|
||||
"\n[on_agent_start] \nCurrent Loop: " + str(self.current_loop) + "\nThought: " + thought + "\n",
|
||||
color=self.color,
|
||||
)
|
||||
else:
|
||||
print_text("\n[on_agent_start] \nCurrent Loop: " + str(self.current_loop) + "\n", color=self.color)
|
||||
|
||||
def on_agent_finish(self, color: Optional[str] = None, **kwargs: Any) -> None:
|
||||
"""Run on agent end."""
|
||||
print_text("\n[on_agent_finish]\n Loop: " + str(self.current_loop) + "\n", color=self.color)
|
||||
if dify_config.DEBUG:
|
||||
print_text("\n[on_agent_finish]\n Loop: " + str(self.current_loop) + "\n", color=self.color)
|
||||
|
||||
self.current_loop += 1
|
||||
|
||||
@property
|
||||
def ignore_agent(self) -> bool:
|
||||
"""Whether to ignore agent callbacks."""
|
||||
return not os.environ.get("DEBUG") or os.environ.get("DEBUG", "").lower() != "true"
|
||||
return not dify_config.DEBUG
|
||||
|
||||
@property
|
||||
def ignore_chat_model(self) -> bool:
|
||||
"""Whether to ignore chat model callbacks."""
|
||||
return not os.environ.get("DEBUG") or os.environ.get("DEBUG", "").lower() != "true"
|
||||
return not dify_config.DEBUG
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ class DatasetIndexToolCallbackHandler:
|
|||
DocumentSegment.index_node_id == document.metadata["doc_id"]
|
||||
)
|
||||
|
||||
# if 'dataset_id' in document.metadata:
|
||||
if "dataset_id" in document.metadata:
|
||||
query = query.filter(DocumentSegment.dataset_id == document.metadata["dataset_id"])
|
||||
|
||||
|
|
@ -59,7 +58,7 @@ class DatasetIndexToolCallbackHandler:
|
|||
for item in resource:
|
||||
dataset_retriever_resource = DatasetRetrieverResource(
|
||||
message_id=self._message_id,
|
||||
position=item.get("position"),
|
||||
position=item.get("position") or 0,
|
||||
dataset_id=item.get("dataset_id"),
|
||||
dataset_name=item.get("dataset_name"),
|
||||
document_id=item.get("document_id"),
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from typing import Optional, cast
|
|||
import numpy as np
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from configs import dify_config
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.model_entities import ModelPropertyKey
|
||||
|
|
@ -110,6 +111,8 @@ class CacheEmbedding(Embeddings):
|
|||
embedding_results = embedding_result.embeddings[0]
|
||||
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
|
||||
except Exception as ex:
|
||||
if dify_config.DEBUG:
|
||||
logging.exception(f"Failed to embed query text: {ex}")
|
||||
raise ex
|
||||
|
||||
try:
|
||||
|
|
@ -122,6 +125,8 @@ class CacheEmbedding(Embeddings):
|
|||
encoded_str = encoded_vector.decode("utf-8")
|
||||
redis_client.setex(embedding_cache_key, 600, encoded_str)
|
||||
except Exception as ex:
|
||||
logging.exception("Failed to add embedding to redis %s", ex)
|
||||
if dify_config.DEBUG:
|
||||
logging.exception("Failed to add embedding to redis %s", ex)
|
||||
raise ex
|
||||
|
||||
return embedding_results
|
||||
|
|
|
|||
|
|
@ -198,16 +198,34 @@ class MessageFileParser:
|
|||
if "amazonaws.com" not in parsed_url.netloc:
|
||||
return False
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
required_params = ["Signature", "Expires"]
|
||||
for param in required_params:
|
||||
if param not in query_params:
|
||||
|
||||
def check_presign_v2(query_params):
|
||||
required_params = ["Signature", "Expires"]
|
||||
for param in required_params:
|
||||
if param not in query_params:
|
||||
return False
|
||||
if not query_params["Expires"][0].isdigit():
|
||||
return False
|
||||
if not query_params["Expires"][0].isdigit():
|
||||
return False
|
||||
signature = query_params["Signature"][0]
|
||||
if not re.match(r"^[A-Za-z0-9+/]+={0,2}$", signature):
|
||||
return False
|
||||
return True
|
||||
signature = query_params["Signature"][0]
|
||||
if not re.match(r"^[A-Za-z0-9+/]+={0,2}$", signature):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def check_presign_v4(query_params):
|
||||
required_params = ["X-Amz-Signature", "X-Amz-Expires"]
|
||||
for param in required_params:
|
||||
if param not in query_params:
|
||||
return False
|
||||
if not query_params["X-Amz-Expires"][0].isdigit():
|
||||
return False
|
||||
signature = query_params["X-Amz-Signature"][0]
|
||||
if not re.match(r"^[A-Za-z0-9+/]+={0,2}$", signature):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
return check_presign_v4(query_params) or check_presign_v2(query_params)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
|
|
|||
|
|
@ -211,9 +211,9 @@ class IndexingRunner:
|
|||
tenant_id: str,
|
||||
extract_settings: list[ExtractSetting],
|
||||
tmp_processing_rule: dict,
|
||||
doc_form: str = None,
|
||||
doc_form: Optional[str] = None,
|
||||
doc_language: str = "English",
|
||||
dataset_id: str = None,
|
||||
dataset_id: Optional[str] = None,
|
||||
indexing_technique: str = "economy",
|
||||
) -> dict:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -58,7 +58,11 @@ class TokenBufferMemory:
|
|||
# instead of all messages from the conversation, we only need to extract messages
|
||||
# that belong to the thread of last message
|
||||
thread_messages = extract_thread_messages(messages)
|
||||
thread_messages.pop(0)
|
||||
|
||||
# for newly created message, its answer is temporarily empty, we don't need to add it to memory
|
||||
if thread_messages and not thread_messages[0].answer:
|
||||
thread_messages.pop(0)
|
||||
|
||||
messages = list(reversed(thread_messages))
|
||||
|
||||
message_file_parser = MessageFileParser(tenant_id=app_record.tenant_id, app_id=app_record.id)
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ class LargeLanguageModel(AIModel):
|
|||
)
|
||||
|
||||
try:
|
||||
<<<<<<< HEAD
|
||||
plugin_model_manager = PluginModelManager()
|
||||
result = plugin_model_manager.invoke_llm(
|
||||
tenant_id=self.tenant_id,
|
||||
|
|
@ -116,6 +117,10 @@ class LargeLanguageModel(AIModel):
|
|||
break
|
||||
|
||||
result = LLMResult(
|
||||
=======
|
||||
if "response_format" in model_parameters and model_parameters["response_format"] in {"JSON", "XML"}:
|
||||
result = self._code_block_mode_wrapper(
|
||||
>>>>>>> main
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
message=AssistantPromptMessage(content=content or content_list),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,11 @@
|
|||
import logging
|
||||
<<<<<<< HEAD
|
||||
from typing import Optional
|
||||
=======
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Optional
|
||||
>>>>>>> main
|
||||
|
||||
from pydantic import ConfigDict
|
||||
|
||||
|
|
@ -59,6 +65,7 @@ class TTSModel(AIModel):
|
|||
:param credentials: model credentials
|
||||
:return: voices lists
|
||||
"""
|
||||
<<<<<<< HEAD
|
||||
plugin_model_manager = PluginModelManager()
|
||||
return plugin_model_manager.get_tts_model_voices(
|
||||
tenant_id=self.tenant_id,
|
||||
|
|
@ -69,3 +76,85 @@ class TTSModel(AIModel):
|
|||
credentials=credentials,
|
||||
language=language,
|
||||
)
|
||||
=======
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
|
||||
voices = model_schema.model_properties[ModelPropertyKey.VOICES]
|
||||
if language:
|
||||
return [
|
||||
{"name": d["name"], "value": d["mode"]}
|
||||
for d in voices
|
||||
if language and language in d.get("language")
|
||||
]
|
||||
else:
|
||||
return [{"name": d["name"], "value": d["mode"]} for d in voices]
|
||||
|
||||
def _get_model_default_voice(self, model: str, credentials: dict) -> Any:
|
||||
"""
|
||||
Get voice for given tts model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return: voice
|
||||
"""
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.DEFAULT_VOICE in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.DEFAULT_VOICE]
|
||||
|
||||
def _get_model_audio_type(self, model: str, credentials: dict) -> str:
|
||||
"""
|
||||
Get audio type for given tts model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return: voice
|
||||
"""
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.AUDIO_TYPE in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.AUDIO_TYPE]
|
||||
|
||||
def _get_model_word_limit(self, model: str, credentials: dict) -> int:
|
||||
"""
|
||||
Get audio type for given tts model
|
||||
:return: audio type
|
||||
"""
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.WORD_LIMIT in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.WORD_LIMIT]
|
||||
|
||||
def _get_model_workers_limit(self, model: str, credentials: dict) -> int:
|
||||
"""
|
||||
Get audio max workers for given tts model
|
||||
:return: audio type
|
||||
"""
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.MAX_WORKERS in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.MAX_WORKERS]
|
||||
|
||||
@staticmethod
|
||||
def _split_text_into_sentences(org_text, max_length=2000, pattern=r"[。.!?]"):
|
||||
match = re.compile(pattern)
|
||||
tx = match.finditer(org_text)
|
||||
start = 0
|
||||
result = []
|
||||
one_sentence = ""
|
||||
for i in tx:
|
||||
end = i.regs[0][1]
|
||||
tmp = org_text[start:end]
|
||||
if len(one_sentence + tmp) > max_length:
|
||||
result.append(one_sentence)
|
||||
one_sentence = ""
|
||||
one_sentence += tmp
|
||||
start = end
|
||||
last_sens = org_text[start:]
|
||||
if last_sens:
|
||||
one_sentence += last_sens
|
||||
if one_sentence != "":
|
||||
result.append(one_sentence)
|
||||
return result
|
||||
>>>>>>> main
|
||||
|
|
|
|||
|
|
@ -1,168 +0,0 @@
|
|||
from collections.abc import Mapping
|
||||
from typing import Optional
|
||||
|
||||
import openai
|
||||
from httpx import Timeout
|
||||
from openai import OpenAI
|
||||
from openai.types import ModerationCreateResponse
|
||||
|
||||
from core.model_runtime.entities.model_entities import ModelPropertyKey
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
|
||||
|
||||
|
||||
class OpenAIModerationModel(ModerationModel):
|
||||
"""
|
||||
Model class for OpenAI text moderation model.
|
||||
"""
|
||||
|
||||
def _invoke(self, model: str, credentials: dict, text: str, user: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Invoke moderation model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param text: text to moderate
|
||||
:param user: unique user id
|
||||
:return: false if text is safe, true otherwise
|
||||
"""
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
|
||||
# init model client
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
# chars per chunk
|
||||
length = self._get_max_characters_per_chunk(model, credentials)
|
||||
text_chunks = [text[i : i + length] for i in range(0, len(text), length)]
|
||||
|
||||
max_text_chunks = self._get_max_chunks(model, credentials)
|
||||
chunks = [text_chunks[i : i + max_text_chunks] for i in range(0, len(text_chunks), max_text_chunks)]
|
||||
|
||||
for text_chunk in chunks:
|
||||
moderation_result = self._moderation_invoke(model=model, client=client, texts=text_chunk)
|
||||
|
||||
for result in moderation_result.results:
|
||||
if result.flagged is True:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
# call moderation model
|
||||
self._moderation_invoke(
|
||||
model=model,
|
||||
client=client,
|
||||
texts=["ping"],
|
||||
)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _moderation_invoke(self, model: str, client: OpenAI, texts: list[str]) -> ModerationCreateResponse:
|
||||
"""
|
||||
Invoke moderation model
|
||||
|
||||
:param model: model name
|
||||
:param client: model client
|
||||
:param texts: texts to moderate
|
||||
:return: false if text is safe, true otherwise
|
||||
"""
|
||||
# call moderation model
|
||||
moderation_result = client.moderations.create(model=model, input=texts)
|
||||
|
||||
return moderation_result
|
||||
|
||||
def _get_max_characters_per_chunk(self, model: str, credentials: dict) -> int:
|
||||
"""
|
||||
Get max characters per chunk
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return: max characters per chunk
|
||||
"""
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.MAX_CHARACTERS_PER_CHUNK in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.MAX_CHARACTERS_PER_CHUNK]
|
||||
|
||||
return 2000
|
||||
|
||||
def _get_max_chunks(self, model: str, credentials: dict) -> int:
|
||||
"""
|
||||
Get max chunks for given embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return: max chunks
|
||||
"""
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.MAX_CHUNKS in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.MAX_CHUNKS]
|
||||
|
||||
return 1
|
||||
|
||||
def _to_credential_kwargs(self, credentials: Mapping) -> dict:
|
||||
"""
|
||||
Transform credentials to kwargs for model instance
|
||||
|
||||
:param credentials:
|
||||
:return:
|
||||
"""
|
||||
credentials_kwargs = {
|
||||
"api_key": credentials["openai_api_key"],
|
||||
"timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
|
||||
"max_retries": 1,
|
||||
}
|
||||
|
||||
if credentials.get("openai_api_base"):
|
||||
openai_api_base = credentials["openai_api_base"].rstrip("/")
|
||||
credentials_kwargs["base_url"] = openai_api_base + "/v1"
|
||||
|
||||
if "openai_organization" in credentials:
|
||||
credentials_kwargs["organization"] = credentials["openai_organization"]
|
||||
|
||||
return credentials_kwargs
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
|
||||
InvokeServerUnavailableError: [openai.InternalServerError],
|
||||
InvokeRateLimitError: [openai.RateLimitError],
|
||||
InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError],
|
||||
InvokeBadRequestError: [
|
||||
openai.BadRequestError,
|
||||
openai.NotFoundError,
|
||||
openai.UnprocessableEntityError,
|
||||
openai.APIError,
|
||||
],
|
||||
}
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
<svg version="1.0" xmlns="http://www.w3.org/2000/svg" width="100.000000pt" height="19.000000pt" viewBox="0 0 300.000000 57.000000" preserveAspectRatio="xMidYMid meet"><g transform="translate(0.000000,57.000000) scale(0.100000,-0.100000)" fill="#000000" stroke="none"><path d="M2505 368 c-38 -84 -86 -188 -106 -230 l-38 -78 27 0 c24 0 30 7 55
|
||||
75 l28 75 100 0 100 0 25 -55 c13 -31 24 -64 24 -75 0 -17 7 -20 44 -20 l43 0
|
||||
-37 73 c-20 39 -68 143 -106 229 -38 87 -74 158 -80 158 -5 0 -41 -69 -79
|
||||
-152z m110 -30 c22 -51 41 -95 42 -98 2 -3 -36 -6 -83 -7 -76 -1 -85 0 -81 15
|
||||
12 40 72 182 77 182 3 0 24 -41 45 -92z"/><path d="M63 493 c19 -61 197 -438 209 -440 10 -2 147 282 216 449 2 4 -10 8
|
||||
-27 8 -23 0 -31 -5 -31 -17 0 -16 -142 -365 -146 -360 -8 11 -144 329 -149
|
||||
350 -6 23 -12 27 -42 27 -29 0 -34 -3 -30 -17z"/><path d="M2855 285 l0 -225 30 0 30 0 0 225 0 225 -30 0 -30 0 0 -225z"/><path d="M588 380 c-55 -30 -82 -74 -86 -145 -3 -50 0 -66 20 -95 39 -58 82
|
||||
-80 153 -80 68 0 110 21 149 73 32 43 30 150 -3 196 -47 66 -158 90 -233 51z
|
||||
m133 -16 c59 -30 89 -156 54 -224 -45 -87 -162 -78 -201 16 -18 44 -18 128 1
|
||||
164 28 55 90 73 146 44z"/><path d="M935 303 l76 -98 -7 -72 -6 -73 33 0 34 0 -3 78 -4 77 71 93 c65 85
|
||||
68 92 46 92 -15 0 -29 -9 -36 -22 -18 -33 -90 -128 -98 -128 -6 1 -67 85 -88
|
||||
122 -8 15 -24 23 -53 25 l-41 4 76 -98z"/><path d="M1257 230 c-82 -169 -83 -170 -57 -170 17 0 27 6 27 15 0 8 7 31 17
|
||||
52 l17 38 79 0 78 1 16 -34 c9 -18 16 -42 16 -52 0 -17 7 -20 41 -20 22 0 39
|
||||
3 37 8 -2 4 -39 80 -83 170 -43 89 -84 162 -92 162 -7 0 -50 -76 -96 -170z
|
||||
m90 -38 c-33 -2 -61 -1 -63 1 -2 2 10 34 26 71 l31 68 33 -68 33 -69 -60 -3z"/><path d="M1665 386 c-37 -16 -84 -63 -97 -96 -13 -35 -12 -104 2 -132 49 -94
|
||||
182 -134 280 -83 24 12 29 22 32 64 3 49 3 49 -30 53 l-33 4 3 -45 c4 -61 -5
|
||||
-71 -60 -71 -93 0 -142 57 -142 164 0 44 5 60 25 85 47 55 136 65 184 20 30
|
||||
-28 35 -20 11 19 -19 31 -22 32 -82 32 -35 -1 -76 -7 -93 -14z"/><path d="M1955 230 l0 -170 91 0 c76 0 93 3 98 16 4 9 5 18 4 20 -2 1 -31 -1
|
||||
-66 -5 -34 -4 -64 -5 -67 -3 -3 3 -5 36 -5 73 l0 68 55 -6 c49 -5 55 -4 55 13
|
||||
0 17 -6 19 -55 16 l-55 -4 0 61 0 61 64 0 c48 0 65 4 70 15 4 13 -10 15 -92
|
||||
15 l-97 0 0 -170z"/></g></svg>
|
||||
|
Before Width: | Height: | Size: 2.2 KiB |
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="64px" height="64px" viewBox="0 0 64 64" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<title>voyage</title>
|
||||
<g id="voyage" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
|
||||
<rect id="矩形" fill="#333333" x="0" y="0" width="64" height="64" rx="12"></rect>
|
||||
<path d="M12.1128004,51.4376727 C13.8950799,45.8316747 30.5922254,11.1847688 31.7178757,11.0009656 C32.6559176,10.8171624 45.5070913,36.9172188 51.9795803,52.2647871 C52.1671887,52.6323936 51.0415384,53 49.4468672,53 C47.2893709,53 46.5389374,52.540492 46.5389374,51.4376727 C46.5389374,49.967247 33.2187427,17.8935861 32.8435259,18.3530942 C32.0930924,19.3640118 19.3357228,48.5887229 18.8667019,50.5186566 C18.3038768,52.6323936 17.7410516,53 14.926926,53 C12.2066045,53 11.7375836,52.7242952 12.1128004,51.4376727 Z" id="路径" fill="#FFFFFF" transform="translate(32, 32) scale(1, -1) translate(-32, -32)"></path>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1.0 KiB |
|
|
@ -1,4 +0,0 @@
|
|||
model: rerank-1
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 8000
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
model: rerank-lite-1
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 4000
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
|
||||
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
|
||||
|
||||
|
||||
class VoyageRerankModel(RerankModel):
|
||||
"""
|
||||
Model class for Voyage rerank model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
query: str,
|
||||
docs: list[str],
|
||||
score_threshold: Optional[float] = None,
|
||||
top_n: Optional[int] = None,
|
||||
user: Optional[str] = None,
|
||||
) -> RerankResult:
|
||||
"""
|
||||
Invoke rerank model
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param query: search query
|
||||
:param docs: docs for reranking
|
||||
:param score_threshold: score threshold
|
||||
:param top_n: top n documents to return
|
||||
:param user: unique user id
|
||||
:return: rerank result
|
||||
"""
|
||||
if len(docs) == 0:
|
||||
return RerankResult(model=model, docs=[])
|
||||
|
||||
base_url = credentials.get("base_url", "https://api.voyageai.com/v1")
|
||||
base_url = base_url.removesuffix("/")
|
||||
|
||||
try:
|
||||
response = httpx.post(
|
||||
base_url + "/rerank",
|
||||
json={"model": model, "query": query, "documents": docs, "top_k": top_n, "return_documents": True},
|
||||
headers={"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
results = response.json()
|
||||
|
||||
rerank_documents = []
|
||||
for result in results["data"]:
|
||||
rerank_document = RerankDocument(
|
||||
index=result["index"],
|
||||
text=result["document"],
|
||||
score=result["relevance_score"],
|
||||
)
|
||||
if score_threshold is None or result["relevance_score"] >= score_threshold:
|
||||
rerank_documents.append(rerank_document)
|
||||
|
||||
return RerankResult(model=model, docs=rerank_documents)
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise InvokeServerUnavailableError(str(e))
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
query="What is the capital of the United States?",
|
||||
docs=[
|
||||
"Carson City is the capital city of the American state of Nevada. At the 2010 United States "
|
||||
"Census, Carson City had a population of 55,274.",
|
||||
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
|
||||
"are a political division controlled by the United States. Its capital is Saipan.",
|
||||
],
|
||||
score_threshold=0.8,
|
||||
)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [httpx.ConnectError],
|
||||
InvokeServerUnavailableError: [httpx.RemoteProtocolError],
|
||||
InvokeRateLimitError: [],
|
||||
InvokeAuthorizationError: [httpx.HTTPStatusError],
|
||||
InvokeBadRequestError: [httpx.RequestError],
|
||||
}
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.RERANK,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "8000"))},
|
||||
)
|
||||
|
||||
return entity
|
||||
|
|
@ -1,172 +0,0 @@
|
|||
import time
|
||||
from json import JSONDecodeError, dumps
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
|
||||
class VoyageTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Voyage text embedding model.
|
||||
"""
|
||||
|
||||
api_base: str = "https://api.voyageai.com/v1"
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError("api_key is required")
|
||||
|
||||
base_url = credentials.get("base_url", self.api_base)
|
||||
base_url = base_url.removesuffix("/")
|
||||
|
||||
url = base_url + "/embeddings"
|
||||
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
|
||||
voyage_input_type = "null"
|
||||
if input_type is not None:
|
||||
voyage_input_type = input_type.value
|
||||
data = {"model": model, "input": texts, "input_type": voyage_input_type}
|
||||
|
||||
try:
|
||||
response = requests.post(url, headers=headers, data=dumps(data))
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
try:
|
||||
resp = response.json()
|
||||
msg = resp["detail"]
|
||||
if response.status_code == 401:
|
||||
raise InvokeAuthorizationError(msg)
|
||||
elif response.status_code == 429:
|
||||
raise InvokeRateLimitError(msg)
|
||||
elif response.status_code == 500:
|
||||
raise InvokeServerUnavailableError(msg)
|
||||
else:
|
||||
raise InvokeBadRequestError(msg)
|
||||
except JSONDecodeError as e:
|
||||
raise InvokeServerUnavailableError(
|
||||
f"Failed to convert response to json: {e} with text: {response.text}"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
embeddings = resp["data"]
|
||||
usage = resp["usage"]
|
||||
except Exception as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError, InvokeBadRequestError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size"))},
|
||||
)
|
||||
|
||||
return entity
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
model: voyage-3-lite
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 32000
|
||||
pricing:
|
||||
input: '0.00002'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
model: voyage-3
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 32000
|
||||
pricing:
|
||||
input: '0.00006'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
import logging
|
||||
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VoyageProvider(ModelProvider):
|
||||
def validate_provider_credentials(self, credentials: dict) -> None:
|
||||
"""
|
||||
Validate provider credentials
|
||||
if validate failed, raise exception
|
||||
|
||||
:param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
|
||||
"""
|
||||
try:
|
||||
model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
|
||||
|
||||
# Use `voyage-3` model for validate,
|
||||
# no matter what model you pass in, text completion model or chat model
|
||||
model_instance.validate_credentials(model="voyage-3", credentials=credentials)
|
||||
except CredentialsValidateFailedError as ex:
|
||||
raise ex
|
||||
except Exception as ex:
|
||||
logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
|
||||
raise ex
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
provider: voyage
|
||||
label:
|
||||
en_US: Voyage
|
||||
description:
|
||||
en_US: Embedding and Rerank Model Supported
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.svg
|
||||
background: "#EFFDFD"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your API key from Voyage AI
|
||||
zh_Hans: 从 Voyage 获取 API Key
|
||||
url:
|
||||
en_US: https://dash.voyageai.com/
|
||||
supported_model_types:
|
||||
- text-embedding
|
||||
- rerank
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
|
|
@ -18,8 +18,12 @@ class KeywordsModeration(Moderation):
|
|||
if not config.get("keywords"):
|
||||
raise ValueError("keywords is required")
|
||||
|
||||
if len(config.get("keywords")) > 1000:
|
||||
raise ValueError("keywords length must be less than 1000")
|
||||
if len(config.get("keywords")) > 10000:
|
||||
raise ValueError("keywords length must be less than 10000")
|
||||
|
||||
keywords_row_len = config["keywords"].split("\n")
|
||||
if len(keywords_row_len) > 100:
|
||||
raise ValueError("the number of rows for the keywords must be less than 100")
|
||||
|
||||
def moderation_for_inputs(self, inputs: dict, query: str = "") -> ModerationInputsResult:
|
||||
flagged = False
|
||||
|
|
|
|||
|
|
@ -110,26 +110,35 @@ class LangFuseDataTrace(BaseTraceInstance):
|
|||
self.add_trace(langfuse_trace_data=trace_data)
|
||||
|
||||
# through workflow_run_id get all_nodes_execution
|
||||
workflow_nodes_executions = (
|
||||
db.session.query(
|
||||
WorkflowNodeExecution.id,
|
||||
WorkflowNodeExecution.tenant_id,
|
||||
WorkflowNodeExecution.app_id,
|
||||
WorkflowNodeExecution.title,
|
||||
WorkflowNodeExecution.node_type,
|
||||
WorkflowNodeExecution.status,
|
||||
WorkflowNodeExecution.inputs,
|
||||
WorkflowNodeExecution.outputs,
|
||||
WorkflowNodeExecution.created_at,
|
||||
WorkflowNodeExecution.elapsed_time,
|
||||
WorkflowNodeExecution.process_data,
|
||||
WorkflowNodeExecution.execution_metadata,
|
||||
)
|
||||
workflow_nodes_execution_id_records = (
|
||||
db.session.query(WorkflowNodeExecution.id)
|
||||
.filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
for node_execution in workflow_nodes_executions:
|
||||
for node_execution_id_record in workflow_nodes_execution_id_records:
|
||||
node_execution = (
|
||||
db.session.query(
|
||||
WorkflowNodeExecution.id,
|
||||
WorkflowNodeExecution.tenant_id,
|
||||
WorkflowNodeExecution.app_id,
|
||||
WorkflowNodeExecution.title,
|
||||
WorkflowNodeExecution.node_type,
|
||||
WorkflowNodeExecution.status,
|
||||
WorkflowNodeExecution.inputs,
|
||||
WorkflowNodeExecution.outputs,
|
||||
WorkflowNodeExecution.created_at,
|
||||
WorkflowNodeExecution.elapsed_time,
|
||||
WorkflowNodeExecution.process_data,
|
||||
WorkflowNodeExecution.execution_metadata,
|
||||
)
|
||||
.filter(WorkflowNodeExecution.id == node_execution_id_record.id)
|
||||
.first()
|
||||
)
|
||||
|
||||
if not node_execution:
|
||||
continue
|
||||
|
||||
node_execution_id = node_execution.id
|
||||
tenant_id = node_execution.tenant_id
|
||||
app_id = node_execution.app_id
|
||||
|
|
@ -159,6 +168,16 @@ class LangFuseDataTrace(BaseTraceInstance):
|
|||
"status": status,
|
||||
}
|
||||
)
|
||||
process_data = json.loads(node_execution.process_data) if node_execution.process_data else {}
|
||||
model_provider = process_data.get("model_provider", None)
|
||||
model_name = process_data.get("model_name", None)
|
||||
if model_provider is not None and model_name is not None:
|
||||
metadata.update(
|
||||
{
|
||||
"model_provider": model_provider,
|
||||
"model_name": model_name,
|
||||
}
|
||||
)
|
||||
|
||||
# add span
|
||||
if trace_info.message_id:
|
||||
|
|
@ -191,7 +210,6 @@ class LangFuseDataTrace(BaseTraceInstance):
|
|||
|
||||
self.add_span(langfuse_span_data=span_data)
|
||||
|
||||
process_data = json.loads(node_execution.process_data) if node_execution.process_data else {}
|
||||
if process_data and process_data.get("model_mode") == "chat":
|
||||
total_token = metadata.get("total_tokens", 0)
|
||||
# add generation
|
||||
|
|
|
|||
|
|
@ -100,26 +100,35 @@ class LangSmithDataTrace(BaseTraceInstance):
|
|||
self.add_run(langsmith_run)
|
||||
|
||||
# through workflow_run_id get all_nodes_execution
|
||||
workflow_nodes_executions = (
|
||||
db.session.query(
|
||||
WorkflowNodeExecution.id,
|
||||
WorkflowNodeExecution.tenant_id,
|
||||
WorkflowNodeExecution.app_id,
|
||||
WorkflowNodeExecution.title,
|
||||
WorkflowNodeExecution.node_type,
|
||||
WorkflowNodeExecution.status,
|
||||
WorkflowNodeExecution.inputs,
|
||||
WorkflowNodeExecution.outputs,
|
||||
WorkflowNodeExecution.created_at,
|
||||
WorkflowNodeExecution.elapsed_time,
|
||||
WorkflowNodeExecution.process_data,
|
||||
WorkflowNodeExecution.execution_metadata,
|
||||
)
|
||||
workflow_nodes_execution_id_records = (
|
||||
db.session.query(WorkflowNodeExecution.id)
|
||||
.filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
for node_execution in workflow_nodes_executions:
|
||||
for node_execution_id_record in workflow_nodes_execution_id_records:
|
||||
node_execution = (
|
||||
db.session.query(
|
||||
WorkflowNodeExecution.id,
|
||||
WorkflowNodeExecution.tenant_id,
|
||||
WorkflowNodeExecution.app_id,
|
||||
WorkflowNodeExecution.title,
|
||||
WorkflowNodeExecution.node_type,
|
||||
WorkflowNodeExecution.status,
|
||||
WorkflowNodeExecution.inputs,
|
||||
WorkflowNodeExecution.outputs,
|
||||
WorkflowNodeExecution.created_at,
|
||||
WorkflowNodeExecution.elapsed_time,
|
||||
WorkflowNodeExecution.process_data,
|
||||
WorkflowNodeExecution.execution_metadata,
|
||||
)
|
||||
.filter(WorkflowNodeExecution.id == node_execution_id_record.id)
|
||||
.first()
|
||||
)
|
||||
|
||||
if not node_execution:
|
||||
continue
|
||||
|
||||
node_execution_id = node_execution.id
|
||||
tenant_id = node_execution.tenant_id
|
||||
app_id = node_execution.app_id
|
||||
|
|
|
|||
|
|
@ -27,9 +27,11 @@ class BaseKeyword(ABC):
|
|||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def delete(self) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query: str, **kwargs: Any) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from core.rag.rerank.constants.rerank_mode import RerankMode
|
|||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset
|
||||
from services.external_knowledge_service import ExternalDatasetService
|
||||
|
||||
default_retrieval_model = {
|
||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||
|
|
@ -34,6 +35,9 @@ class RetrievalService:
|
|||
weights: Optional[dict] = None,
|
||||
):
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
return []
|
||||
|
||||
if not dataset or dataset.available_document_count == 0 or dataset.available_segment_count == 0:
|
||||
return []
|
||||
all_documents = []
|
||||
|
|
@ -108,6 +112,16 @@ class RetrievalService:
|
|||
)
|
||||
return all_documents
|
||||
|
||||
@classmethod
|
||||
def external_retrieve(cls, dataset_id: str, query: str, external_retrieval_model: Optional[dict] = None):
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
return []
|
||||
all_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
|
||||
dataset.tenant_id, dataset_id, query, external_retrieval_model
|
||||
)
|
||||
return all_documents
|
||||
|
||||
@classmethod
|
||||
def keyword_search(
|
||||
cls, flask_app: Flask, dataset_id: str, query: str, top_k: int, all_documents: list, exceptions: list
|
||||
|
|
|
|||
|
|
@ -0,0 +1,272 @@
|
|||
import json
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, model_validator
|
||||
from pymochow import MochowClient
|
||||
from pymochow.auth.bce_credentials import BceCredentials
|
||||
from pymochow.configuration import Configuration
|
||||
from pymochow.model.enum import FieldType, IndexState, IndexType, MetricType, TableState
|
||||
from pymochow.model.schema import Field, HNSWParams, Schema, VectorIndex
|
||||
from pymochow.model.table import AnnSearch, HNSWSearchParams, Partition, Row
|
||||
|
||||
from configs import dify_config
|
||||
from core.rag.datasource.entity.embedding import Embeddings
|
||||
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||
from core.rag.datasource.vdb.vector_type import VectorType
|
||||
from core.rag.models.document import Document
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.dataset import Dataset
|
||||
|
||||
|
||||
class BaiduConfig(BaseModel):
|
||||
endpoint: str
|
||||
connection_timeout_in_mills: int = 30 * 1000
|
||||
account: str
|
||||
api_key: str
|
||||
database: str
|
||||
index_type: str = "HNSW"
|
||||
metric_type: str = "L2"
|
||||
shard: int = 1
|
||||
replicas: int = 3
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_config(cls, values: dict) -> dict:
|
||||
if not values["endpoint"]:
|
||||
raise ValueError("config BAIDU_VECTOR_DB_ENDPOINT is required")
|
||||
if not values["account"]:
|
||||
raise ValueError("config BAIDU_VECTOR_DB_ACCOUNT is required")
|
||||
if not values["api_key"]:
|
||||
raise ValueError("config BAIDU_VECTOR_DB_API_KEY is required")
|
||||
if not values["database"]:
|
||||
raise ValueError("config BAIDU_VECTOR_DB_DATABASE is required")
|
||||
return values
|
||||
|
||||
|
||||
class BaiduVector(BaseVector):
|
||||
field_id: str = "id"
|
||||
field_vector: str = "vector"
|
||||
field_text: str = "text"
|
||||
field_metadata: str = "metadata"
|
||||
field_app_id: str = "app_id"
|
||||
field_annotation_id: str = "annotation_id"
|
||||
index_vector: str = "vector_idx"
|
||||
|
||||
def __init__(self, collection_name: str, config: BaiduConfig):
|
||||
super().__init__(collection_name)
|
||||
self._client_config = config
|
||||
self._client = self._init_client(config)
|
||||
self._db = self._init_database()
|
||||
|
||||
def get_type(self) -> str:
|
||||
return VectorType.BAIDU
|
||||
|
||||
def to_index_struct(self) -> dict:
|
||||
return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}}
|
||||
|
||||
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
self._create_table(len(embeddings[0]))
|
||||
self.add_texts(texts, embeddings)
|
||||
|
||||
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
total_count = len(documents)
|
||||
batch_size = 1000
|
||||
|
||||
# upsert texts and embeddings batch by batch
|
||||
table = self._db.table(self._collection_name)
|
||||
for start in range(0, total_count, batch_size):
|
||||
end = min(start + batch_size, total_count)
|
||||
rows = []
|
||||
for i in range(start, end, 1):
|
||||
row = Row(
|
||||
id=metadatas[i].get("doc_id", str(uuid.uuid4())),
|
||||
vector=embeddings[i],
|
||||
text=texts[i],
|
||||
metadata=json.dumps(metadatas[i]),
|
||||
app_id=metadatas[i].get("app_id", ""),
|
||||
annotation_id=metadatas[i].get("annotation_id", ""),
|
||||
)
|
||||
rows.append(row)
|
||||
table.upsert(rows=rows)
|
||||
|
||||
# rebuild vector index after upsert finished
|
||||
table.rebuild_index(self.index_vector)
|
||||
while True:
|
||||
time.sleep(1)
|
||||
index = table.describe_index(self.index_vector)
|
||||
if index.state == IndexState.NORMAL:
|
||||
break
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
res = self._db.table(self._collection_name).query(primary_key={self.field_id: id})
|
||||
if res and res.code == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
quoted_ids = [f"'{id}'" for id in ids]
|
||||
self._db.table(self._collection_name).delete(filter=f"id IN({', '.join(quoted_ids)})")
|
||||
|
||||
def delete_by_metadata_field(self, key: str, value: str) -> None:
|
||||
self._db.table(self._collection_name).delete(filter=f"{key} = '{value}'")
|
||||
|
||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||
anns = AnnSearch(
|
||||
vector_field=self.field_vector,
|
||||
vector_floats=query_vector,
|
||||
params=HNSWSearchParams(ef=kwargs.get("ef", 10), limit=kwargs.get("top_k", 4)),
|
||||
)
|
||||
res = self._db.table(self._collection_name).search(
|
||||
anns=anns,
|
||||
projections=[self.field_id, self.field_text, self.field_metadata],
|
||||
retrieve_vector=True,
|
||||
)
|
||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||
return self._get_search_res(res, score_threshold)
|
||||
|
||||
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||
# baidu vector database doesn't support bm25 search on current version
|
||||
return []
|
||||
|
||||
def _get_search_res(self, res, score_threshold):
|
||||
docs = []
|
||||
for row in res.rows:
|
||||
row_data = row.get("row", {})
|
||||
meta = row_data.get(self.field_metadata)
|
||||
if meta is not None:
|
||||
meta = json.loads(meta)
|
||||
score = row.get("score", 0.0)
|
||||
if score > score_threshold:
|
||||
meta["score"] = score
|
||||
doc = Document(page_content=row_data.get(self.field_text), metadata=meta)
|
||||
docs.append(doc)
|
||||
|
||||
return docs
|
||||
|
||||
def delete(self) -> None:
|
||||
self._db.drop_table(table_name=self._collection_name)
|
||||
|
||||
def _init_client(self, config) -> MochowClient:
|
||||
config = Configuration(credentials=BceCredentials(config.account, config.api_key), endpoint=config.endpoint)
|
||||
client = MochowClient(config)
|
||||
return client
|
||||
|
||||
def _init_database(self):
|
||||
exists = False
|
||||
for db in self._client.list_databases():
|
||||
if db.database_name == self._client_config.database:
|
||||
exists = True
|
||||
break
|
||||
# Create database if not existed
|
||||
if exists:
|
||||
return self._client.database(self._client_config.database)
|
||||
else:
|
||||
return self._client.create_database(database_name=self._client_config.database)
|
||||
|
||||
def _table_existed(self) -> bool:
|
||||
tables = self._db.list_table()
|
||||
return any(table.table_name == self._collection_name for table in tables)
|
||||
|
||||
def _create_table(self, dimension: int) -> None:
|
||||
# Try to grab distributed lock and create table
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
table_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
if redis_client.get(table_exist_cache_key):
|
||||
return
|
||||
|
||||
if self._table_existed():
|
||||
return
|
||||
|
||||
self.delete()
|
||||
|
||||
# check IndexType and MetricType
|
||||
index_type = None
|
||||
for k, v in IndexType.__members__.items():
|
||||
if k == self._client_config.index_type:
|
||||
index_type = v
|
||||
if index_type is None:
|
||||
raise ValueError("unsupported index_type")
|
||||
metric_type = None
|
||||
for k, v in MetricType.__members__.items():
|
||||
if k == self._client_config.metric_type:
|
||||
metric_type = v
|
||||
if metric_type is None:
|
||||
raise ValueError("unsupported metric_type")
|
||||
|
||||
# Construct field schema
|
||||
fields = []
|
||||
fields.append(
|
||||
Field(
|
||||
self.field_id,
|
||||
FieldType.STRING,
|
||||
primary_key=True,
|
||||
partition_key=True,
|
||||
auto_increment=False,
|
||||
not_null=True,
|
||||
)
|
||||
)
|
||||
fields.append(Field(self.field_metadata, FieldType.STRING, not_null=True))
|
||||
fields.append(Field(self.field_app_id, FieldType.STRING))
|
||||
fields.append(Field(self.field_annotation_id, FieldType.STRING))
|
||||
fields.append(Field(self.field_text, FieldType.TEXT, not_null=True))
|
||||
fields.append(Field(self.field_vector, FieldType.FLOAT_VECTOR, not_null=True, dimension=dimension))
|
||||
|
||||
# Construct vector index params
|
||||
indexes = []
|
||||
indexes.append(
|
||||
VectorIndex(
|
||||
index_name="vector_idx",
|
||||
index_type=index_type,
|
||||
field="vector",
|
||||
metric_type=metric_type,
|
||||
params=HNSWParams(m=16, efconstruction=200),
|
||||
)
|
||||
)
|
||||
|
||||
# Create table
|
||||
self._db.create_table(
|
||||
table_name=self._collection_name,
|
||||
replication=self._client_config.replicas,
|
||||
partition=Partition(partition_num=self._client_config.shard),
|
||||
schema=Schema(fields=fields, indexes=indexes),
|
||||
description="Table for Dify",
|
||||
)
|
||||
|
||||
redis_client.set(table_exist_cache_key, 1, ex=3600)
|
||||
|
||||
# Wait for table created
|
||||
while True:
|
||||
time.sleep(1)
|
||||
table = self._db.describe_table(self._collection_name)
|
||||
if table.state == TableState.NORMAL:
|
||||
break
|
||||
|
||||
|
||||
class BaiduVectorFactory(AbstractVectorFactory):
|
||||
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> BaiduVector:
|
||||
if dataset.index_struct_dict:
|
||||
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||
collection_name = class_prefix.lower()
|
||||
else:
|
||||
dataset_id = dataset.id
|
||||
collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
|
||||
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.BAIDU, collection_name))
|
||||
|
||||
return BaiduVector(
|
||||
collection_name=collection_name,
|
||||
config=BaiduConfig(
|
||||
endpoint=dify_config.BAIDU_VECTOR_DB_ENDPOINT,
|
||||
connection_timeout_in_mills=dify_config.BAIDU_VECTOR_DB_CONNECTION_TIMEOUT_MS,
|
||||
account=dify_config.BAIDU_VECTOR_DB_ACCOUNT,
|
||||
api_key=dify_config.BAIDU_VECTOR_DB_API_KEY,
|
||||
database=dify_config.BAIDU_VECTOR_DB_DATABASE,
|
||||
shard=dify_config.BAIDU_VECTOR_DB_SHARD,
|
||||
replicas=dify_config.BAIDU_VECTOR_DB_REPLICAS,
|
||||
),
|
||||
)
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import json
|
||||
import logging
|
||||
import math
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
|
@ -76,7 +77,7 @@ class ElasticSearchVector(BaseVector):
|
|||
raise ValueError("Elasticsearch vector database version must be greater than 8.0.0")
|
||||
|
||||
def get_type(self) -> str:
|
||||
return "elasticsearch"
|
||||
return VectorType.ELASTICSEARCH
|
||||
|
||||
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
uuids = self._get_uuids(documents)
|
||||
|
|
@ -112,7 +113,8 @@ class ElasticSearchVector(BaseVector):
|
|||
|
||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||
top_k = kwargs.get("top_k", 10)
|
||||
knn = {"field": Field.VECTOR.value, "query_vector": query_vector, "k": top_k}
|
||||
num_candidates = math.ceil(top_k * 1.5)
|
||||
knn = {"field": Field.VECTOR.value, "query_vector": query_vector, "k": top_k, "num_candidates": num_candidates}
|
||||
|
||||
results = self._client.search(index=self._collection_name, knn=knn, size=top_k)
|
||||
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ class PGVector(BaseVector):
|
|||
|
||||
with self._get_cursor() as cur:
|
||||
cur.execute(
|
||||
f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), to_tsquery(%s)) AS score
|
||||
f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), plainto_tsquery(%s)) AS score
|
||||
FROM {self.table_name}
|
||||
WHERE to_tsvector(text) @@ plainto_tsquery(%s)
|
||||
ORDER BY score DESC
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ class RelytVector(BaseVector):
|
|||
else:
|
||||
return None
|
||||
|
||||
def delete_by_uuids(self, ids: list[str] = None):
|
||||
def delete_by_uuids(self, ids: Optional[list[str]] = None):
|
||||
"""Delete by vector IDs.
|
||||
|
||||
Args:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
from configs import dify_config
|
||||
from core.embedding.cached_embedding import CacheEmbedding
|
||||
|
|
@ -25,7 +25,7 @@ class AbstractVectorFactory(ABC):
|
|||
|
||||
|
||||
class Vector:
|
||||
def __init__(self, dataset: Dataset, attributes: list = None):
|
||||
def __init__(self, dataset: Dataset, attributes: Optional[list] = None):
|
||||
if attributes is None:
|
||||
attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
|
||||
self._dataset = dataset
|
||||
|
|
@ -103,10 +103,18 @@ class Vector:
|
|||
from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory
|
||||
|
||||
return AnalyticdbVectorFactory
|
||||
case VectorType.BAIDU:
|
||||
from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory
|
||||
|
||||
return BaiduVectorFactory
|
||||
case VectorType.VIKINGDB:
|
||||
from core.rag.datasource.vdb.vikingdb.vikingdb_vector import VikingDBVectorFactory
|
||||
|
||||
return VikingDBVectorFactory
|
||||
case _:
|
||||
raise ValueError(f"Vector store {vector_type} is not supported.")
|
||||
|
||||
def create(self, texts: list = None, **kwargs):
|
||||
def create(self, texts: Optional[list] = None, **kwargs):
|
||||
if texts:
|
||||
embeddings = self._embeddings.embed_documents([document.page_content for document in texts])
|
||||
self._vector_processor.create(texts=texts, embeddings=embeddings, **kwargs)
|
||||
|
|
|
|||
|
|
@ -16,3 +16,5 @@ class VectorType(str, Enum):
|
|||
TENCENT = "tencent"
|
||||
ORACLE = "oracle"
|
||||
ELASTICSEARCH = "elasticsearch"
|
||||
BAIDU = "baidu"
|
||||
VIKINGDB = "vikingdb"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,239 @@
|
|||
import json
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from volcengine.viking_db import (
|
||||
Data,
|
||||
DistanceType,
|
||||
Field,
|
||||
FieldType,
|
||||
IndexType,
|
||||
QuantType,
|
||||
VectorIndexParams,
|
||||
VikingDBService,
|
||||
)
|
||||
|
||||
from configs import dify_config
|
||||
from core.rag.datasource.entity.embedding import Embeddings
|
||||
from core.rag.datasource.vdb.field import Field as vdb_Field
|
||||
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||
from core.rag.datasource.vdb.vector_type import VectorType
|
||||
from core.rag.models.document import Document
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.dataset import Dataset
|
||||
|
||||
|
||||
class VikingDBConfig(BaseModel):
|
||||
access_key: str
|
||||
secret_key: str
|
||||
host: str
|
||||
region: str
|
||||
scheme: str
|
||||
connection_timeout: int
|
||||
socket_timeout: int
|
||||
index_type: str = IndexType.HNSW
|
||||
distance: str = DistanceType.L2
|
||||
quant: str = QuantType.Float
|
||||
|
||||
|
||||
class VikingDBVector(BaseVector):
|
||||
def __init__(self, collection_name: str, group_id: str, config: VikingDBConfig):
|
||||
super().__init__(collection_name)
|
||||
self._group_id = group_id
|
||||
self._client_config = config
|
||||
self._index_name = f"{self._collection_name}_idx"
|
||||
self._client = VikingDBService(
|
||||
host=config.host,
|
||||
region=config.region,
|
||||
scheme=config.scheme,
|
||||
connection_timeout=config.connection_timeout,
|
||||
socket_timeout=config.socket_timeout,
|
||||
ak=config.access_key,
|
||||
sk=config.secret_key,
|
||||
)
|
||||
|
||||
def _has_collection(self) -> bool:
|
||||
try:
|
||||
self._client.get_collection(self._collection_name)
|
||||
except Exception:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _has_index(self) -> bool:
|
||||
try:
|
||||
self._client.get_index(self._collection_name, self._index_name)
|
||||
except Exception:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _create_collection(self, dimension: int):
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
|
||||
if not self._has_collection():
|
||||
fields = [
|
||||
Field(field_name=vdb_Field.PRIMARY_KEY.value, field_type=FieldType.String, is_primary_key=True),
|
||||
Field(field_name=vdb_Field.METADATA_KEY.value, field_type=FieldType.String),
|
||||
Field(field_name=vdb_Field.GROUP_KEY.value, field_type=FieldType.String),
|
||||
Field(field_name=vdb_Field.CONTENT_KEY.value, field_type=FieldType.Text),
|
||||
Field(field_name=vdb_Field.VECTOR.value, field_type=FieldType.Vector, dim=dimension),
|
||||
]
|
||||
|
||||
self._client.create_collection(
|
||||
collection_name=self._collection_name,
|
||||
fields=fields,
|
||||
description="Collection For Dify",
|
||||
)
|
||||
|
||||
if not self._has_index():
|
||||
vector_index = VectorIndexParams(
|
||||
distance=self._client_config.distance,
|
||||
index_type=self._client_config.index_type,
|
||||
quant=self._client_config.quant,
|
||||
)
|
||||
|
||||
self._client.create_index(
|
||||
collection_name=self._collection_name,
|
||||
index_name=self._index_name,
|
||||
vector_index=vector_index,
|
||||
partition_by=vdb_Field.GROUP_KEY.value,
|
||||
description="Index For Dify",
|
||||
)
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
|
||||
def get_type(self) -> str:
|
||||
return VectorType.VIKINGDB
|
||||
|
||||
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
dimension = len(embeddings[0])
|
||||
self._create_collection(dimension)
|
||||
self.add_texts(texts, embeddings, **kwargs)
|
||||
|
||||
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
page_contents = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
docs = []
|
||||
|
||||
for i, page_content in enumerate(page_contents):
|
||||
metadata = {}
|
||||
if metadatas is not None:
|
||||
for key, val in metadatas[i].items():
|
||||
metadata[key] = val
|
||||
doc = Data(
|
||||
{
|
||||
vdb_Field.PRIMARY_KEY.value: metadatas[i]["doc_id"],
|
||||
vdb_Field.VECTOR.value: embeddings[i] if embeddings else None,
|
||||
vdb_Field.CONTENT_KEY.value: page_content,
|
||||
vdb_Field.METADATA_KEY.value: json.dumps(metadata),
|
||||
vdb_Field.GROUP_KEY.value: self._group_id,
|
||||
}
|
||||
)
|
||||
docs.append(doc)
|
||||
|
||||
self._client.get_collection(self._collection_name).upsert_data(docs)
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
docs = self._client.get_collection(self._collection_name).fetch_data(id)
|
||||
not_exists_str = "data does not exist"
|
||||
if docs is not None and not_exists_str not in docs.fields.get("message", ""):
|
||||
return True
|
||||
return False
|
||||
|
||||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
self._client.get_collection(self._collection_name).delete_data(ids)
|
||||
|
||||
def get_ids_by_metadata_field(self, key: str, value: str):
|
||||
# Note: Metadata field value is an dict, but vikingdb field
|
||||
# not support json type
|
||||
results = self._client.get_index(self._collection_name, self._index_name).search(
|
||||
filter={"op": "must", "field": vdb_Field.GROUP_KEY.value, "conds": [self._group_id]},
|
||||
# max value is 5000
|
||||
limit=5000,
|
||||
)
|
||||
|
||||
if not results:
|
||||
return []
|
||||
|
||||
ids = []
|
||||
for result in results:
|
||||
metadata = result.fields.get(vdb_Field.METADATA_KEY.value)
|
||||
if metadata is not None:
|
||||
metadata = json.loads(metadata)
|
||||
if metadata.get(key) == value:
|
||||
ids.append(result.id)
|
||||
return ids
|
||||
|
||||
def delete_by_metadata_field(self, key: str, value: str) -> None:
|
||||
ids = self.get_ids_by_metadata_field(key, value)
|
||||
self.delete_by_ids(ids)
|
||||
|
||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||
results = self._client.get_index(self._collection_name, self._index_name).search_by_vector(
|
||||
query_vector, limit=kwargs.get("top_k", 50)
|
||||
)
|
||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||
return self._get_search_res(results, score_threshold)
|
||||
|
||||
def _get_search_res(self, results, score_threshold):
|
||||
if len(results) == 0:
|
||||
return []
|
||||
|
||||
docs = []
|
||||
for result in results:
|
||||
metadata = result.fields.get(vdb_Field.METADATA_KEY.value)
|
||||
if metadata is not None:
|
||||
metadata = json.loads(metadata)
|
||||
if result.score > score_threshold:
|
||||
metadata["score"] = result.score
|
||||
doc = Document(page_content=result.fields.get(vdb_Field.CONTENT_KEY.value), metadata=metadata)
|
||||
docs.append(doc)
|
||||
docs = sorted(docs, key=lambda x: x.metadata["score"], reverse=True)
|
||||
return docs
|
||||
|
||||
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||
return []
|
||||
|
||||
def delete(self) -> None:
|
||||
if self._has_index():
|
||||
self._client.drop_index(self._collection_name, self._index_name)
|
||||
if self._has_collection():
|
||||
self._client.drop_collection(self._collection_name)
|
||||
|
||||
|
||||
class VikingDBVectorFactory(AbstractVectorFactory):
|
||||
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> VikingDBVector:
|
||||
if dataset.index_struct_dict:
|
||||
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||
collection_name = class_prefix.lower()
|
||||
else:
|
||||
dataset_id = dataset.id
|
||||
collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
|
||||
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.VIKINGDB, collection_name))
|
||||
|
||||
if dify_config.VIKINGDB_ACCESS_KEY is None:
|
||||
raise ValueError("VIKINGDB_ACCESS_KEY should not be None")
|
||||
if dify_config.VIKINGDB_SECRET_KEY is None:
|
||||
raise ValueError("VIKINGDB_SECRET_KEY should not be None")
|
||||
if dify_config.VIKINGDB_HOST is None:
|
||||
raise ValueError("VIKINGDB_HOST should not be None")
|
||||
if dify_config.VIKINGDB_REGION is None:
|
||||
raise ValueError("VIKINGDB_REGION should not be None")
|
||||
if dify_config.VIKINGDB_SCHEME is None:
|
||||
raise ValueError("VIKINGDB_SCHEME should not be None")
|
||||
return VikingDBVector(
|
||||
collection_name=collection_name,
|
||||
group_id=dataset.id,
|
||||
config=VikingDBConfig(
|
||||
access_key=dify_config.VIKINGDB_ACCESS_KEY,
|
||||
secret_key=dify_config.VIKINGDB_SECRET_KEY,
|
||||
host=dify_config.VIKINGDB_HOST,
|
||||
region=dify_config.VIKINGDB_REGION,
|
||||
scheme=dify_config.VIKINGDB_SCHEME,
|
||||
connection_timeout=dify_config.VIKINGDB_CONNECTION_TIMEOUT,
|
||||
socket_timeout=dify_config.VIKINGDB_SOCKET_TIMEOUT,
|
||||
),
|
||||
)
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class DocumentContext(BaseModel):
|
||||
"""
|
||||
Model class for document context.
|
||||
"""
|
||||
|
||||
content: str
|
||||
score: Optional[float] = None
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
from typing import Optional, Union
|
||||
from urllib.parse import unquote
|
||||
|
||||
from configs import dify_config
|
||||
|
|
@ -12,6 +12,7 @@ from core.rag.extractor.entity.extract_setting import ExtractSetting
|
|||
from core.rag.extractor.excel_extractor import ExcelExtractor
|
||||
from core.rag.extractor.firecrawl.firecrawl_web_extractor import FirecrawlWebExtractor
|
||||
from core.rag.extractor.html_extractor import HtmlExtractor
|
||||
from core.rag.extractor.jina_reader_extractor import JinaReaderWebExtractor
|
||||
from core.rag.extractor.markdown_extractor import MarkdownExtractor
|
||||
from core.rag.extractor.notion_extractor import NotionExtractor
|
||||
from core.rag.extractor.pdf_extractor import PdfExtractor
|
||||
|
|
@ -83,7 +84,7 @@ class ExtractProcessor:
|
|||
|
||||
@classmethod
|
||||
def extract(
|
||||
cls, extract_setting: ExtractSetting, is_automatic: bool = False, file_path: str = None
|
||||
cls, extract_setting: ExtractSetting, is_automatic: bool = False, file_path: Optional[str] = None
|
||||
) -> list[Document]:
|
||||
if extract_setting.datasource_type == DatasourceType.FILE.value:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
|
|
@ -171,6 +172,15 @@ class ExtractProcessor:
|
|||
only_main_content=extract_setting.website_info.only_main_content,
|
||||
)
|
||||
return extractor.extract()
|
||||
elif extract_setting.website_info.provider == "jinareader":
|
||||
extractor = JinaReaderWebExtractor(
|
||||
url=extract_setting.website_info.url,
|
||||
job_id=extract_setting.website_info.job_id,
|
||||
tenant_id=extract_setting.website_info.tenant_id,
|
||||
mode=extract_setting.website_info.mode,
|
||||
only_main_content=extract_setting.website_info.only_main_content,
|
||||
)
|
||||
return extractor.extract()
|
||||
else:
|
||||
raise ValueError(f"Unsupported website provider: {extract_setting.website_info.provider}")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
from services.website_service import WebsiteService
|
||||
|
||||
|
||||
class JinaReaderWebExtractor(BaseExtractor):
|
||||
"""
|
||||
Crawl and scrape websites and return content in clean llm-ready markdown.
|
||||
"""
|
||||
|
||||
def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = False):
|
||||
"""Initialize with url, api_key, base_url and mode."""
|
||||
self._url = url
|
||||
self.job_id = job_id
|
||||
self.tenant_id = tenant_id
|
||||
self.mode = mode
|
||||
self.only_main_content = only_main_content
|
||||
|
||||
def extract(self) -> list[Document]:
|
||||
"""Extract content from the URL."""
|
||||
documents = []
|
||||
if self.mode == "crawl":
|
||||
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "jinareader", self._url, self.tenant_id)
|
||||
if crawl_data is None:
|
||||
return []
|
||||
document = Document(
|
||||
page_content=crawl_data.get("content", ""),
|
||||
metadata={
|
||||
"source_url": crawl_data.get("url"),
|
||||
"description": crawl_data.get("description"),
|
||||
"title": crawl_data.get("title"),
|
||||
},
|
||||
)
|
||||
documents.append(document)
|
||||
return documents
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
|
|
@ -17,7 +18,7 @@ class UnstructuredEpubExtractor(BaseExtractor):
|
|||
def __init__(
|
||||
self,
|
||||
file_path: str,
|
||||
api_url: str = None,
|
||||
api_url: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ class Document(BaseModel):
|
|||
"""
|
||||
metadata: Optional[dict] = Field(default_factory=dict)
|
||||
|
||||
provider: Optional[str] = "dify"
|
||||
|
||||
|
||||
class BaseDocumentTransformer(ABC):
|
||||
"""Abstract base class for document transformation systems.
|
||||
|
|
|
|||
|
|
@ -28,11 +28,16 @@ class RerankModelRunner:
|
|||
docs = []
|
||||
doc_id = []
|
||||
unique_documents = []
|
||||
for document in documents:
|
||||
dify_documents = [item for item in documents if item.provider == "dify"]
|
||||
external_documents = [item for item in documents if item.provider == "external"]
|
||||
for document in dify_documents:
|
||||
if document.metadata["doc_id"] not in doc_id:
|
||||
doc_id.append(document.metadata["doc_id"])
|
||||
docs.append(document.page_content)
|
||||
unique_documents.append(document)
|
||||
for document in external_documents:
|
||||
docs.append(document.page_content)
|
||||
unique_documents.append(document)
|
||||
|
||||
documents = unique_documents
|
||||
|
||||
|
|
@ -46,14 +51,10 @@ class RerankModelRunner:
|
|||
# format document
|
||||
rerank_document = Document(
|
||||
page_content=result.text,
|
||||
metadata={
|
||||
"doc_id": documents[result.index].metadata["doc_id"],
|
||||
"doc_hash": documents[result.index].metadata["doc_hash"],
|
||||
"document_id": documents[result.index].metadata["document_id"],
|
||||
"dataset_id": documents[result.index].metadata["dataset_id"],
|
||||
"score": result.score,
|
||||
},
|
||||
metadata=documents[result.index].metadata,
|
||||
provider=documents[result.index].provider,
|
||||
)
|
||||
rerank_document.metadata["score"] = result.score
|
||||
rerank_documents.append(rerank_document)
|
||||
|
||||
return rerank_documents
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from core.ops.utils import measure_time
|
|||
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
|
||||
from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaKeywordTableHandler
|
||||
from core.rag.datasource.retrieval_service import RetrievalService
|
||||
from core.rag.entities.context_entities import DocumentContext
|
||||
from core.rag.models.document import Document
|
||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||
from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter
|
||||
|
|
@ -28,6 +29,7 @@ from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import Datas
|
|||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, DatasetQuery, DocumentSegment
|
||||
from models.dataset import Document as DatasetDocument
|
||||
from services.external_knowledge_service import ExternalDatasetService
|
||||
|
||||
default_retrieval_model = {
|
||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||
|
|
@ -108,7 +110,7 @@ class DatasetRetrieval:
|
|||
continue
|
||||
|
||||
# pass if dataset is not available
|
||||
if dataset and dataset.available_document_count == 0:
|
||||
if dataset and dataset.available_document_count == 0 and dataset.provider != "external":
|
||||
continue
|
||||
|
||||
available_datasets.append(dataset)
|
||||
|
|
@ -144,69 +146,96 @@ class DatasetRetrieval:
|
|||
message_id,
|
||||
)
|
||||
|
||||
document_score_list = {}
|
||||
for item in all_documents:
|
||||
if item.metadata.get("score"):
|
||||
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
|
||||
|
||||
dify_documents = [item for item in all_documents if item.provider == "dify"]
|
||||
external_documents = [item for item in all_documents if item.provider == "external"]
|
||||
document_context_list = []
|
||||
index_node_ids = [document.metadata["doc_id"] for document in all_documents]
|
||||
segments = DocumentSegment.query.filter(
|
||||
DocumentSegment.dataset_id.in_(dataset_ids),
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids),
|
||||
).all()
|
||||
retrieval_resource_list = []
|
||||
# deal with external documents
|
||||
for item in external_documents:
|
||||
document_context_list.append(DocumentContext(content=item.page_content, score=item.metadata.get("score")))
|
||||
source = {
|
||||
"dataset_id": item.metadata.get("dataset_id"),
|
||||
"dataset_name": item.metadata.get("dataset_name"),
|
||||
"document_name": item.metadata.get("title"),
|
||||
"data_source_type": "external",
|
||||
"retriever_from": invoke_from.to_source(),
|
||||
"score": item.metadata.get("score"),
|
||||
"content": item.page_content,
|
||||
}
|
||||
retrieval_resource_list.append(source)
|
||||
document_score_list = {}
|
||||
# deal with dify documents
|
||||
if dify_documents:
|
||||
for item in dify_documents:
|
||||
if item.metadata.get("score"):
|
||||
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(
|
||||
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
|
||||
)
|
||||
for segment in sorted_segments:
|
||||
if segment.answer:
|
||||
document_context_list.append(f"question:{segment.get_sign_content()} answer:{segment.answer}")
|
||||
else:
|
||||
document_context_list.append(segment.get_sign_content())
|
||||
if show_retrieve_source:
|
||||
context_list = []
|
||||
resource_number = 1
|
||||
index_node_ids = [document.metadata["doc_id"] for document in dify_documents]
|
||||
segments = DocumentSegment.query.filter(
|
||||
DocumentSegment.dataset_id.in_(dataset_ids),
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids),
|
||||
).all()
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(
|
||||
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
|
||||
)
|
||||
for segment in sorted_segments:
|
||||
dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
|
||||
document = DatasetDocument.query.filter(
|
||||
DatasetDocument.id == segment.document_id,
|
||||
DatasetDocument.enabled == True,
|
||||
DatasetDocument.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
"position": resource_number,
|
||||
"dataset_id": dataset.id,
|
||||
"dataset_name": dataset.name,
|
||||
"document_id": document.id,
|
||||
"document_name": document.name,
|
||||
"data_source_type": document.data_source_type,
|
||||
"segment_id": segment.id,
|
||||
"retriever_from": invoke_from.to_source(),
|
||||
"score": document_score_list.get(segment.index_node_id, None),
|
||||
}
|
||||
if segment.answer:
|
||||
document_context_list.append(
|
||||
DocumentContext(
|
||||
content=f"question:{segment.get_sign_content()} answer:{segment.answer}",
|
||||
score=document_score_list.get(segment.index_node_id, None),
|
||||
)
|
||||
)
|
||||
else:
|
||||
document_context_list.append(
|
||||
DocumentContext(
|
||||
content=segment.get_sign_content(),
|
||||
score=document_score_list.get(segment.index_node_id, None),
|
||||
)
|
||||
)
|
||||
if show_retrieve_source:
|
||||
for segment in sorted_segments:
|
||||
dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
|
||||
document = DatasetDocument.query.filter(
|
||||
DatasetDocument.id == segment.document_id,
|
||||
DatasetDocument.enabled == True,
|
||||
DatasetDocument.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
"dataset_id": dataset.id,
|
||||
"dataset_name": dataset.name,
|
||||
"document_id": document.id,
|
||||
"document_name": document.name,
|
||||
"data_source_type": document.data_source_type,
|
||||
"segment_id": segment.id,
|
||||
"retriever_from": invoke_from.to_source(),
|
||||
"score": document_score_list.get(segment.index_node_id, None),
|
||||
}
|
||||
|
||||
if invoke_from.to_source() == "dev":
|
||||
source["hit_count"] = segment.hit_count
|
||||
source["word_count"] = segment.word_count
|
||||
source["segment_position"] = segment.position
|
||||
source["index_node_hash"] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
|
||||
else:
|
||||
source["content"] = segment.content
|
||||
context_list.append(source)
|
||||
resource_number += 1
|
||||
if hit_callback:
|
||||
hit_callback.return_retriever_resource_info(context_list)
|
||||
|
||||
return str("\n".join(document_context_list))
|
||||
if invoke_from.to_source() == "dev":
|
||||
source["hit_count"] = segment.hit_count
|
||||
source["word_count"] = segment.word_count
|
||||
source["segment_position"] = segment.position
|
||||
source["index_node_hash"] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
|
||||
else:
|
||||
source["content"] = segment.content
|
||||
retrieval_resource_list.append(source)
|
||||
if hit_callback and retrieval_resource_list:
|
||||
retrieval_resource_list = sorted(retrieval_resource_list, key=lambda x: x.get("score"), reverse=True)
|
||||
for position, item in enumerate(retrieval_resource_list, start=1):
|
||||
item["position"] = position
|
||||
hit_callback.return_retriever_resource_info(retrieval_resource_list)
|
||||
if document_context_list:
|
||||
document_context_list = sorted(document_context_list, key=lambda x: x.score, reverse=True)
|
||||
return str("\n".join([document_context.content for document_context in document_context_list]))
|
||||
return ""
|
||||
|
||||
def single_retrieve(
|
||||
|
|
@ -254,36 +283,58 @@ class DatasetRetrieval:
|
|||
# get retrieval model config
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if dataset:
|
||||
retrieval_model_config = dataset.retrieval_model or default_retrieval_model
|
||||
|
||||
# get top k
|
||||
top_k = retrieval_model_config["top_k"]
|
||||
# get retrieval method
|
||||
if dataset.indexing_technique == "economy":
|
||||
retrieval_method = "keyword_search"
|
||||
else:
|
||||
retrieval_method = retrieval_model_config["search_method"]
|
||||
# get reranking model
|
||||
reranking_model = (
|
||||
retrieval_model_config["reranking_model"] if retrieval_model_config["reranking_enable"] else None
|
||||
)
|
||||
# get score threshold
|
||||
score_threshold = 0.0
|
||||
score_threshold_enabled = retrieval_model_config.get("score_threshold_enabled")
|
||||
if score_threshold_enabled:
|
||||
score_threshold = retrieval_model_config.get("score_threshold")
|
||||
|
||||
with measure_time() as timer:
|
||||
results = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_method,
|
||||
dataset_id=dataset.id,
|
||||
results = []
|
||||
if dataset.provider == "external":
|
||||
external_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset_id,
|
||||
query=query,
|
||||
top_k=top_k,
|
||||
score_threshold=score_threshold,
|
||||
reranking_model=reranking_model,
|
||||
reranking_mode=retrieval_model_config.get("reranking_mode", "reranking_model"),
|
||||
weights=retrieval_model_config.get("weights", None),
|
||||
external_retrieval_parameters=dataset.retrieval_model,
|
||||
)
|
||||
for external_document in external_documents:
|
||||
document = Document(
|
||||
page_content=external_document.get("content"),
|
||||
metadata=external_document.get("metadata"),
|
||||
provider="external",
|
||||
)
|
||||
document.metadata["score"] = external_document.get("score")
|
||||
document.metadata["title"] = external_document.get("title")
|
||||
document.metadata["dataset_id"] = dataset_id
|
||||
document.metadata["dataset_name"] = dataset.name
|
||||
results.append(document)
|
||||
else:
|
||||
retrieval_model_config = dataset.retrieval_model or default_retrieval_model
|
||||
|
||||
# get top k
|
||||
top_k = retrieval_model_config["top_k"]
|
||||
# get retrieval method
|
||||
if dataset.indexing_technique == "economy":
|
||||
retrieval_method = "keyword_search"
|
||||
else:
|
||||
retrieval_method = retrieval_model_config["search_method"]
|
||||
# get reranking model
|
||||
reranking_model = (
|
||||
retrieval_model_config["reranking_model"]
|
||||
if retrieval_model_config["reranking_enable"]
|
||||
else None
|
||||
)
|
||||
# get score threshold
|
||||
score_threshold = 0.0
|
||||
score_threshold_enabled = retrieval_model_config.get("score_threshold_enabled")
|
||||
if score_threshold_enabled:
|
||||
score_threshold = retrieval_model_config.get("score_threshold")
|
||||
|
||||
with measure_time() as timer:
|
||||
results = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_method,
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=top_k,
|
||||
score_threshold=score_threshold,
|
||||
reranking_model=reranking_model,
|
||||
reranking_mode=retrieval_model_config.get("reranking_mode", "reranking_model"),
|
||||
weights=retrieval_model_config.get("weights", None),
|
||||
)
|
||||
self._on_query(query, [dataset_id], app_id, user_from, user_id)
|
||||
|
||||
if results:
|
||||
|
|
@ -354,7 +405,8 @@ class DatasetRetrieval:
|
|||
self, documents: list[Document], message_id: Optional[str] = None, timer: Optional[dict] = None
|
||||
) -> None:
|
||||
"""Handle retrieval end."""
|
||||
for document in documents:
|
||||
dify_documents = [document for document in documents if document.provider == "dify"]
|
||||
for document in dify_documents:
|
||||
query = db.session.query(DocumentSegment).filter(
|
||||
DocumentSegment.index_node_id == document.metadata["doc_id"]
|
||||
)
|
||||
|
|
@ -407,35 +459,54 @@ class DatasetRetrieval:
|
|||
if not dataset:
|
||||
return []
|
||||
|
||||
# get retrieval model , if the model is not setting , using default
|
||||
retrieval_model = dataset.retrieval_model or default_retrieval_model
|
||||
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=top_k
|
||||
if dataset.provider == "external":
|
||||
external_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset_id,
|
||||
query=query,
|
||||
external_retrieval_parameters=dataset.retrieval_model,
|
||||
)
|
||||
if documents:
|
||||
all_documents.extend(documents)
|
||||
else:
|
||||
if top_k > 0:
|
||||
# retrieval source
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_model["search_method"],
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=retrieval_model.get("top_k") or 2,
|
||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||
if retrieval_model["score_threshold_enabled"]
|
||||
else 0.0,
|
||||
reranking_model=retrieval_model.get("reranking_model", None)
|
||||
if retrieval_model["reranking_enable"]
|
||||
else None,
|
||||
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
|
||||
weights=retrieval_model.get("weights", None),
|
||||
for external_document in external_documents:
|
||||
document = Document(
|
||||
page_content=external_document.get("content"),
|
||||
metadata=external_document.get("metadata"),
|
||||
provider="external",
|
||||
)
|
||||
document.metadata["score"] = external_document.get("score")
|
||||
document.metadata["title"] = external_document.get("title")
|
||||
document.metadata["dataset_id"] = dataset_id
|
||||
document.metadata["dataset_name"] = dataset.name
|
||||
all_documents.append(document)
|
||||
else:
|
||||
# get retrieval model , if the model is not setting , using default
|
||||
retrieval_model = dataset.retrieval_model or default_retrieval_model
|
||||
|
||||
all_documents.extend(documents)
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=top_k
|
||||
)
|
||||
if documents:
|
||||
all_documents.extend(documents)
|
||||
else:
|
||||
if top_k > 0:
|
||||
# retrieval source
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_model["search_method"],
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=retrieval_model.get("top_k") or 2,
|
||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||
if retrieval_model["score_threshold_enabled"]
|
||||
else 0.0,
|
||||
reranking_model=retrieval_model.get("reranking_model", None)
|
||||
if retrieval_model["reranking_enable"]
|
||||
else None,
|
||||
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
|
||||
weights=retrieval_model.get("weights", None),
|
||||
)
|
||||
|
||||
all_documents.extend(documents)
|
||||
|
||||
def to_dataset_retriever_tool(
|
||||
self,
|
||||
|
|
@ -466,7 +537,7 @@ class DatasetRetrieval:
|
|||
continue
|
||||
|
||||
# pass if dataset is not available
|
||||
if dataset and dataset.available_document_count == 0:
|
||||
if dataset and dataset.provider != "external" and dataset.available_document_count == 0:
|
||||
continue
|
||||
|
||||
available_datasets.append(dataset)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
from datetime import datetime
|
||||
from typing import Any, Union
|
||||
|
||||
import pytz
|
||||
|
||||
from core.tools.builtin_tool.tool import BuiltinTool
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
from core.tools.errors import ToolInvokeError
|
||||
|
||||
|
||||
class LocaltimeToTimestampTool(BuiltinTool):
|
||||
def _invoke(
|
||||
self,
|
||||
user_id: str,
|
||||
tool_parameters: dict[str, Any],
|
||||
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||
"""
|
||||
Convert localtime to timestamp
|
||||
"""
|
||||
localtime = tool_parameters.get("localtime")
|
||||
timezone = tool_parameters.get("timezone", "Asia/Shanghai")
|
||||
if not timezone:
|
||||
timezone = None
|
||||
time_format = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
timestamp = self.localtime_to_timestamp(localtime, time_format, timezone)
|
||||
if not timestamp:
|
||||
return self.create_text_message(f"Invalid localtime: {localtime}")
|
||||
|
||||
return self.create_text_message(f"{timestamp}")
|
||||
|
||||
@staticmethod
|
||||
def localtime_to_timestamp(localtime: str, time_format: str, local_tz=None) -> int | None:
|
||||
try:
|
||||
if local_tz is None:
|
||||
local_tz = datetime.now().astimezone().tzinfo
|
||||
if isinstance(local_tz, str):
|
||||
local_tz = pytz.timezone(local_tz)
|
||||
local_time = datetime.strptime(localtime, time_format)
|
||||
localtime = local_tz.localize(local_time)
|
||||
timestamp = int(localtime.timestamp())
|
||||
return timestamp
|
||||
except Exception as e:
|
||||
raise ToolInvokeError(str(e))
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
identity:
|
||||
name: localtime_to_timestamp
|
||||
author: zhuhao
|
||||
label:
|
||||
en_US: localtime to timestamp
|
||||
zh_Hans: 获取时间戳
|
||||
description:
|
||||
human:
|
||||
en_US: A tool for localtime convert to timestamp
|
||||
zh_Hans: 获取时间戳
|
||||
llm: A tool for localtime convert to timestamp
|
||||
parameters:
|
||||
- name: localtime
|
||||
type: string
|
||||
required: true
|
||||
form: llm
|
||||
label:
|
||||
en_US: localtime
|
||||
zh_Hans: 本地时间
|
||||
human_description:
|
||||
en_US: localtime, such as 2024-1-1 0:0:0
|
||||
zh_Hans: 本地时间, 比如2024-1-1 0:0:0
|
||||
- name: timezone
|
||||
type: string
|
||||
required: false
|
||||
form: llm
|
||||
label:
|
||||
en_US: Timezone
|
||||
zh_Hans: 时区
|
||||
human_description:
|
||||
en_US: Timezone, such as Asia/Shanghai
|
||||
zh_Hans: 时区, 比如Asia/Shanghai
|
||||
default: Asia/Shanghai
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
from datetime import datetime
|
||||
from typing import Any, Union
|
||||
|
||||
import pytz
|
||||
|
||||
from core.tools.builtin_tool.tool import BuiltinTool
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
from core.tools.errors import ToolInvokeError
|
||||
|
||||
|
||||
class TimestampToLocaltimeTool(BuiltinTool):
|
||||
def _invoke(
|
||||
self,
|
||||
user_id: str,
|
||||
tool_parameters: dict[str, Any],
|
||||
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||
"""
|
||||
Convert timestamp to localtime
|
||||
"""
|
||||
timestamp = tool_parameters.get("timestamp")
|
||||
timezone = tool_parameters.get("timezone", "Asia/Shanghai")
|
||||
if not timezone:
|
||||
timezone = None
|
||||
time_format = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
locatime = self.timestamp_to_localtime(timestamp, timezone)
|
||||
if not locatime:
|
||||
return self.create_text_message(f"Invalid timestamp: {timestamp}")
|
||||
|
||||
localtime_format = locatime.strftime(time_format)
|
||||
|
||||
return self.create_text_message(f"{localtime_format}")
|
||||
|
||||
@staticmethod
|
||||
def timestamp_to_localtime(timestamp: int, local_tz=None) -> datetime | None:
|
||||
try:
|
||||
if local_tz is None:
|
||||
local_tz = datetime.now().astimezone().tzinfo
|
||||
if isinstance(local_tz, str):
|
||||
local_tz = pytz.timezone(local_tz)
|
||||
local_time = datetime.fromtimestamp(timestamp, local_tz)
|
||||
return local_time
|
||||
except Exception as e:
|
||||
raise ToolInvokeError(str(e))
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
identity:
|
||||
name: timestamp_to_localtime
|
||||
author: zhuhao
|
||||
label:
|
||||
en_US: Timestamp to localtime
|
||||
zh_Hans: 时间戳转换
|
||||
description:
|
||||
human:
|
||||
en_US: A tool for timestamp convert to localtime
|
||||
zh_Hans: 时间戳转换
|
||||
llm: A tool for timestamp convert to localtime
|
||||
parameters:
|
||||
- name: timestamp
|
||||
type: number
|
||||
required: true
|
||||
form: llm
|
||||
label:
|
||||
en_US: Timestamp
|
||||
zh_Hans: 时间戳
|
||||
human_description:
|
||||
en_US: Timestamp
|
||||
zh_Hans: 时间戳
|
||||
- name: timezone
|
||||
type: string
|
||||
required: false
|
||||
form: llm
|
||||
label:
|
||||
en_US: Timezone
|
||||
zh_Hans: 时区
|
||||
human_description:
|
||||
en_US: Timezone, such as Asia/Shanghai
|
||||
zh_Hans: 时区, 比如Asia/Shanghai
|
||||
default: Asia/Shanghai
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
from typing import Optional
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import PromptMessage, SystemPromptMessage, UserPromptMessage
|
||||
from core.tools.__base.tool import Tool
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from core.rag.datasource.retrieval_service import RetrievalService
|
||||
from core.rag.models.document import Document as RetrievalDocument
|
||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||
from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from services.external_knowledge_service import ExternalDatasetService
|
||||
|
||||
default_retrieval_model = {
|
||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||
|
|
@ -53,97 +55,137 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
|
|||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.on_query(query, dataset.id)
|
||||
|
||||
# get retrieval model , if the model is not setting , using default
|
||||
retrieval_model = dataset.retrieval_model or default_retrieval_model
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=self.top_k
|
||||
if dataset.provider == "external":
|
||||
results = []
|
||||
external_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
external_retrieval_parameters=dataset.retrieval_model,
|
||||
)
|
||||
return str("\n".join([document.page_content for document in documents]))
|
||||
else:
|
||||
if self.top_k > 0:
|
||||
# retrieval source
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_model.get("search_method", "semantic_search"),
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k,
|
||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||
if retrieval_model["score_threshold_enabled"]
|
||||
else 0.0,
|
||||
reranking_model=retrieval_model.get("reranking_model", None)
|
||||
if retrieval_model["reranking_enable"]
|
||||
else None,
|
||||
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
|
||||
weights=retrieval_model.get("weights", None),
|
||||
for external_document in external_documents:
|
||||
document = RetrievalDocument(
|
||||
page_content=external_document.get("content"),
|
||||
metadata=external_document.get("metadata"),
|
||||
provider="external",
|
||||
)
|
||||
else:
|
||||
documents = []
|
||||
|
||||
document.metadata["score"] = external_document.get("score")
|
||||
document.metadata["title"] = external_document.get("title")
|
||||
document.metadata["dataset_id"] = dataset.id
|
||||
document.metadata["dataset_name"] = dataset.name
|
||||
results.append(document)
|
||||
# deal with external documents
|
||||
context_list = []
|
||||
for position, item in enumerate(results, start=1):
|
||||
source = {
|
||||
"position": position,
|
||||
"dataset_id": item.metadata.get("dataset_id"),
|
||||
"dataset_name": item.metadata.get("dataset_name"),
|
||||
"document_name": item.metadata.get("title"),
|
||||
"data_source_type": "external",
|
||||
"retriever_from": self.retriever_from,
|
||||
"score": item.metadata.get("score"),
|
||||
"title": item.metadata.get("title"),
|
||||
"content": item.page_content,
|
||||
}
|
||||
context_list.append(source)
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.on_tool_end(documents)
|
||||
document_score_list = {}
|
||||
if dataset.indexing_technique != "economy":
|
||||
for item in documents:
|
||||
if item.metadata.get("score"):
|
||||
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
|
||||
document_context_list = []
|
||||
index_node_ids = [document.metadata["doc_id"] for document in documents]
|
||||
segments = DocumentSegment.query.filter(
|
||||
DocumentSegment.dataset_id == self.dataset_id,
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids),
|
||||
).all()
|
||||
hit_callback.return_retriever_resource_info(context_list)
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(
|
||||
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
|
||||
return str("\n".join([item.page_content for item in results]))
|
||||
else:
|
||||
# get retrieval model , if the model is not setting , using default
|
||||
retrieval_model = dataset.retrieval_model or default_retrieval_model
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=self.top_k
|
||||
)
|
||||
for segment in sorted_segments:
|
||||
if segment.answer:
|
||||
document_context_list.append(f"question:{segment.get_sign_content()} answer:{segment.answer}")
|
||||
else:
|
||||
document_context_list.append(segment.get_sign_content())
|
||||
if self.return_resource:
|
||||
context_list = []
|
||||
resource_number = 1
|
||||
return str("\n".join([document.page_content for document in documents]))
|
||||
else:
|
||||
if self.top_k > 0:
|
||||
# retrieval source
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_model.get("search_method", "semantic_search"),
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k,
|
||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||
if retrieval_model["score_threshold_enabled"]
|
||||
else 0.0,
|
||||
reranking_model=retrieval_model.get("reranking_model", None)
|
||||
if retrieval_model["reranking_enable"]
|
||||
else None,
|
||||
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
|
||||
weights=retrieval_model.get("weights", None),
|
||||
)
|
||||
else:
|
||||
documents = []
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.on_tool_end(documents)
|
||||
document_score_list = {}
|
||||
if dataset.indexing_technique != "economy":
|
||||
for item in documents:
|
||||
if item.metadata.get("score"):
|
||||
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
|
||||
document_context_list = []
|
||||
index_node_ids = [document.metadata["doc_id"] for document in documents]
|
||||
segments = DocumentSegment.query.filter(
|
||||
DocumentSegment.dataset_id == self.dataset_id,
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids),
|
||||
).all()
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(
|
||||
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
|
||||
)
|
||||
for segment in sorted_segments:
|
||||
context = {}
|
||||
document = Document.query.filter(
|
||||
Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
"position": resource_number,
|
||||
"dataset_id": dataset.id,
|
||||
"dataset_name": dataset.name,
|
||||
"document_id": document.id,
|
||||
"document_name": document.name,
|
||||
"data_source_type": document.data_source_type,
|
||||
"segment_id": segment.id,
|
||||
"retriever_from": self.retriever_from,
|
||||
"score": document_score_list.get(segment.index_node_id, None),
|
||||
}
|
||||
if self.retriever_from == "dev":
|
||||
source["hit_count"] = segment.hit_count
|
||||
source["word_count"] = segment.word_count
|
||||
source["segment_position"] = segment.position
|
||||
source["index_node_hash"] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
|
||||
else:
|
||||
source["content"] = segment.content
|
||||
context_list.append(source)
|
||||
resource_number += 1
|
||||
if segment.answer:
|
||||
document_context_list.append(
|
||||
f"question:{segment.get_sign_content()} answer:{segment.answer}"
|
||||
)
|
||||
else:
|
||||
document_context_list.append(segment.get_sign_content())
|
||||
if self.return_resource:
|
||||
context_list = []
|
||||
resource_number = 1
|
||||
for segment in sorted_segments:
|
||||
context = {}
|
||||
document = Document.query.filter(
|
||||
Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
"position": resource_number,
|
||||
"dataset_id": dataset.id,
|
||||
"dataset_name": dataset.name,
|
||||
"document_id": document.id,
|
||||
"document_name": document.name,
|
||||
"data_source_type": document.data_source_type,
|
||||
"segment_id": segment.id,
|
||||
"retriever_from": self.retriever_from,
|
||||
"score": document_score_list.get(segment.index_node_id, None),
|
||||
}
|
||||
if self.retriever_from == "dev":
|
||||
source["hit_count"] = segment.hit_count
|
||||
source["word_count"] = segment.word_count
|
||||
source["segment_position"] = segment.position
|
||||
source["index_node_hash"] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
|
||||
else:
|
||||
source["content"] = segment.content
|
||||
context_list.append(source)
|
||||
resource_number += 1
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.return_retriever_resource_info(context_list)
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.return_retriever_resource_info(context_list)
|
||||
|
||||
return str("\n".join(document_context_list))
|
||||
return str("\n".join(document_context_list))
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from core.tools.errors import ToolProviderCredentialValidationError
|
||||
|
|
@ -32,7 +34,12 @@ class FeishuRequest:
|
|||
return res.get("tenant_access_token")
|
||||
|
||||
def _send_request(
|
||||
self, url: str, method: str = "post", require_token: bool = True, payload: dict = None, params: dict = None
|
||||
self,
|
||||
url: str,
|
||||
method: str = "post",
|
||||
require_token: bool = True,
|
||||
payload: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
):
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import uuid
|
|||
from json import dumps as json_dumps
|
||||
from json import loads as json_loads
|
||||
from json.decoder import JSONDecodeError
|
||||
from typing import Optional
|
||||
|
||||
from requests import get
|
||||
from yaml import YAMLError, safe_load
|
||||
|
|
|
|||
|
|
@ -92,10 +92,13 @@ class WorkflowTool(Tool):
|
|||
if data.get("error"):
|
||||
raise Exception(data.get("error"))
|
||||
|
||||
outputs = data.get("outputs", {})
|
||||
outputs, files = self._extract_files(outputs)
|
||||
for file in files:
|
||||
yield self.create_file_var_message(file)
|
||||
outputs = data.get("outputs")
|
||||
if outputs == None:
|
||||
outputs = {}
|
||||
else:
|
||||
outputs, files = self._extract_files(outputs)
|
||||
for file in files:
|
||||
yield self.create_file_var_message(file)
|
||||
|
||||
yield self.create_text_message(json.dumps(outputs, ensure_ascii=False))
|
||||
yield self.create_json_message(outputs)
|
||||
|
|
|
|||
|
|
@ -11,3 +11,6 @@ class SystemVariableKey(str, Enum):
|
|||
CONVERSATION_ID = "conversation_id"
|
||||
USER_ID = "user_id"
|
||||
DIALOGUE_COUNT = "dialogue_count"
|
||||
APP_ID = "app_id"
|
||||
WORKFLOW_ID = "workflow_id"
|
||||
WORKFLOW_RUN_ID = "workflow_run_id"
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue