diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index a283f8d5ca..54f3f42a25 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -49,8 +49,8 @@ jobs: if: steps.changed-files.outputs.any_changed == 'true' run: | uv run --directory api ruff --version - uv run --directory api ruff check --diff ./ - uv run --directory api ruff format --check --diff ./ + uv run --directory api ruff check ./ + uv run --directory api ruff format --check ./ - name: Dotenv check if: steps.changed-files.outputs.any_changed == 'true' diff --git a/README.md b/README.md index 2909e0e6cf..16a1268cb1 100644 --- a/README.md +++ b/README.md @@ -241,7 +241,7 @@ One-Click deploy Dify to Alibaba Cloud with [Alibaba Cloud Data Management](http For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). At the same time, please consider supporting Dify by sharing it on social media and at events and conferences. -> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). +> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). ## Community & contact diff --git a/README_AR.md b/README_AR.md index e959ca0f78..d2cb0098a3 100644 --- a/README_AR.md +++ b/README_AR.md @@ -223,7 +223,7 @@ docker compose up -d لأولئك الذين يرغبون في المساهمة، انظر إلى [دليل المساهمة](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) لدينا. في الوقت نفسه، يرجى النظر في دعم Dify عن طريق مشاركته على وسائل التواصل الاجتماعي وفي الفعاليات والمؤتمرات. -> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c). +> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c). **المساهمون** diff --git a/README_BN.md b/README_BN.md index 29d7374ea5..f57413ec8b 100644 --- a/README_BN.md +++ b/README_BN.md @@ -241,7 +241,7 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন যারা কোড অবদান রাখতে চান, তাদের জন্য আমাদের [অবদান নির্দেশিকা] দেখুন (https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)। একই সাথে, সোশ্যাল মিডিয়া এবং ইভেন্ট এবং কনফারেন্সে এটি শেয়ার করে Dify কে সমর্থন করুন। -> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন। +> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন। ## কমিউনিটি এবং যোগাযোগ diff --git a/README_CN.md b/README_CN.md index 486a368c09..e9c73eb48b 100644 --- a/README_CN.md +++ b/README_CN.md @@ -244,7 +244,7 @@ docker compose up -d 对于那些想要贡献代码的人,请参阅我们的[贡献指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。 同时,请考虑通过社交媒体、活动和会议来支持 Dify 的分享。 -> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助,请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)获取更多信息,并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。 +> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助,请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)获取更多信息,并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。 **Contributors** diff --git a/README_DE.md b/README_DE.md index fce52c34c2..d31a56542d 100644 --- a/README_DE.md +++ b/README_DE.md @@ -236,7 +236,7 @@ Ein-Klick-Bereitstellung von Dify in der Alibaba Cloud mit [Alibaba Cloud Data M Falls Sie Code beitragen möchten, lesen Sie bitte unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Gleichzeitig bitten wir Sie, Dify zu unterstützen, indem Sie es in den sozialen Medien teilen und auf Veranstaltungen und Konferenzen präsentieren. -> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen – außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c). +> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen – außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c). ## Gemeinschaft & Kontakt diff --git a/README_ES.md b/README_ES.md index 6fd6dfcee8..918bfe2286 100644 --- a/README_ES.md +++ b/README_ES.md @@ -237,7 +237,7 @@ Para aquellos que deseen contribuir con código, consulten nuestra [Guía de con Al mismo tiempo, considera apoyar a Dify compartiéndolo en redes sociales y en eventos y conferencias. -> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c). +> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c). **Contribuidores** diff --git a/README_FR.md b/README_FR.md index b2209fb495..56ca878aae 100644 --- a/README_FR.md +++ b/README_FR.md @@ -235,7 +235,7 @@ Pour ceux qui souhaitent contribuer du code, consultez notre [Guide de contribut Dans le même temps, veuillez envisager de soutenir Dify en le partageant sur les réseaux sociaux et lors d'événements et de conférences. -> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c). +> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c). **Contributeurs** diff --git a/README_JA.md b/README_JA.md index c658225f90..6d277a36ed 100644 --- a/README_JA.md +++ b/README_JA.md @@ -234,7 +234,7 @@ docker compose up -d 同時に、DifyをSNSやイベント、カンファレンスで共有してサポートしていただけると幸いです。 -> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。 +> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。 **貢献者** diff --git a/README_KL.md b/README_KL.md index bfafcc7407..dac67eeb29 100644 --- a/README_KL.md +++ b/README_KL.md @@ -235,7 +235,7 @@ For those who'd like to contribute code, see our [Contribution Guide](https://gi At the same time, please consider supporting Dify by sharing it on social media and at events and conferences. -> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). +> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). **Contributors** diff --git a/README_KR.md b/README_KR.md index 282117e776..072481da02 100644 --- a/README_KR.md +++ b/README_KR.md @@ -229,7 +229,7 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했 동시에 Dify를 소셜 미디어와 행사 및 컨퍼런스에 공유하여 지원하는 것을 고려해 주시기 바랍니다. -> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요. +> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요. **기여자** diff --git a/README_PT.md b/README_PT.md index 576f6b48f7..1260f8e6fd 100644 --- a/README_PT.md +++ b/README_PT.md @@ -233,7 +233,7 @@ Implante o Dify na Alibaba Cloud com um clique usando o [Alibaba Cloud Data Mana Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em eventos e conferências. -> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c). +> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c). **Contribuidores** diff --git a/README_TR.md b/README_TR.md index 6e94e54fa0..37953f0de1 100644 --- a/README_TR.md +++ b/README_TR.md @@ -227,7 +227,7 @@ Dify'ı bulut platformuna tek tıklamayla dağıtın [terraform](https://www.ter Kod katkısında bulunmak isteyenler için [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakabilirsiniz. Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda paylaşarak desteklemeyi düşünün. -> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın. +> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın. **Katkıda Bulunanlar** diff --git a/README_TW.md b/README_TW.md index 6e3e22b5c1..f70d6a25f6 100644 --- a/README_TW.md +++ b/README_TW.md @@ -239,7 +239,7 @@ Dify 的所有功能都提供相應的 API,因此您可以輕鬆地將 Dify 對於想要貢獻程式碼的開發者,請參閱我們的[貢獻指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。 同時,也請考慮透過在社群媒體和各種活動與會議上分享 Dify 來支持我們。 -> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙,請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) 獲取更多資訊,並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。 +> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙,請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) 獲取更多資訊,並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。 ## 社群與聯絡方式 diff --git a/README_VI.md b/README_VI.md index 51314e6de5..ddd9aa95f6 100644 --- a/README_VI.md +++ b/README_VI.md @@ -231,7 +231,7 @@ Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng Đồng thời, vui lòng xem xét hỗ trợ Dify bằng cách chia sẻ nó trên mạng xã hội và tại các sự kiện và hội nghị. -> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi. +> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi. **Người đóng góp** diff --git a/api/.env.example b/api/.env.example index 80b1c12cd8..18f2dbf647 100644 --- a/api/.env.example +++ b/api/.env.example @@ -4,6 +4,11 @@ # Alternatively you can set it with `SECRET_KEY` environment variable. SECRET_KEY= +# Ensure UTF-8 encoding +LANG=en_US.UTF-8 +LC_ALL=en_US.UTF-8 +PYTHONIOENCODING=utf-8 + # Console API base URL CONSOLE_API_URL=http://localhost:5001 CONSOLE_WEB_URL=http://localhost:3000 diff --git a/api/.ruff.toml b/api/.ruff.toml index 0169613bf8..db6872b9c8 100644 --- a/api/.ruff.toml +++ b/api/.ruff.toml @@ -42,6 +42,8 @@ select = [ "S301", # suspicious-pickle-usage, disallow use of `pickle` and its wrappers. "S302", # suspicious-marshal-usage, disallow use of `marshal` module "S311", # suspicious-non-cryptographic-random-usage + "G001", # don't use str format to logging messages + "G004", # don't use f-strings to format logging messages ] ignore = [ diff --git a/api/Dockerfile b/api/Dockerfile index 8c7a1717b9..e097b5811e 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -37,6 +37,11 @@ EXPOSE 5001 # set timezone ENV TZ=UTC +# Set UTF-8 locale +ENV LANG=en_US.UTF-8 +ENV LC_ALL=en_US.UTF-8 +ENV PYTHONIOENCODING=utf-8 + WORKDIR /app/api RUN \ diff --git a/api/app_factory.py b/api/app_factory.py index 3a258be28f..81155cbacd 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -32,7 +32,7 @@ def create_app() -> DifyApp: initialize_extensions(app) end_time = time.perf_counter() if dify_config.DEBUG: - logging.info(f"Finished create_app ({round((end_time - start_time) * 1000, 2)} ms)") + logging.info("Finished create_app (%s ms)", round((end_time - start_time) * 1000, 2)) return app @@ -91,14 +91,14 @@ def initialize_extensions(app: DifyApp): is_enabled = ext.is_enabled() if hasattr(ext, "is_enabled") else True if not is_enabled: if dify_config.DEBUG: - logging.info(f"Skipped {short_name}") + logging.info("Skipped %s", short_name) continue start_time = time.perf_counter() ext.init_app(app) end_time = time.perf_counter() if dify_config.DEBUG: - logging.info(f"Loaded {short_name} ({round((end_time - start_time) * 1000, 2)} ms)") + logging.info("Loaded %s (%s ms)", short_name, round((end_time - start_time) * 1000, 2)) def create_migrations_app(): diff --git a/api/commands.py b/api/commands.py index c2e62ec261..79bb6713d0 100644 --- a/api/commands.py +++ b/api/commands.py @@ -53,13 +53,13 @@ def reset_password(email, new_password, password_confirm): account = db.session.query(Account).where(Account.email == email).one_or_none() if not account: - click.echo(click.style("Account not found for email: {}".format(email), fg="red")) + click.echo(click.style(f"Account not found for email: {email}", fg="red")) return try: valid_password(new_password) except: - click.echo(click.style("Invalid password. Must match {}".format(password_pattern), fg="red")) + click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red")) return # generate password salt @@ -92,13 +92,13 @@ def reset_email(email, new_email, email_confirm): account = db.session.query(Account).where(Account.email == email).one_or_none() if not account: - click.echo(click.style("Account not found for email: {}".format(email), fg="red")) + click.echo(click.style(f"Account not found for email: {email}", fg="red")) return try: email_validate(new_email) except: - click.echo(click.style("Invalid email: {}".format(new_email), fg="red")) + click.echo(click.style(f"Invalid email: {new_email}", fg="red")) return account.email = new_email @@ -142,7 +142,7 @@ def reset_encrypt_key_pair(): click.echo( click.style( - "Congratulations! The asymmetric key pair of workspace {} has been reset.".format(tenant.id), + f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.", fg="green", ) ) @@ -190,14 +190,14 @@ def migrate_annotation_vector_database(): f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped." ) try: - click.echo("Creating app annotation index: {}".format(app.id)) + click.echo(f"Creating app annotation index: {app.id}") app_annotation_setting = ( db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first() ) if not app_annotation_setting: skipped_count = skipped_count + 1 - click.echo("App annotation setting disabled: {}".format(app.id)) + click.echo(f"App annotation setting disabled: {app.id}") continue # get dataset_collection_binding info dataset_collection_binding = ( @@ -206,7 +206,7 @@ def migrate_annotation_vector_database(): .first() ) if not dataset_collection_binding: - click.echo("App annotation collection binding not found: {}".format(app.id)) + click.echo(f"App annotation collection binding not found: {app.id}") continue annotations = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app.id).all() dataset = Dataset( @@ -252,9 +252,7 @@ def migrate_annotation_vector_database(): create_count += 1 except Exception as e: click.echo( - click.style( - "Error creating app annotation index: {} {}".format(e.__class__.__name__, str(e)), fg="red" - ) + click.style(f"Error creating app annotation index: {e.__class__.__name__} {str(e)}", fg="red") ) continue @@ -319,7 +317,7 @@ def migrate_knowledge_vector_database(): f"Processing the {total_count} dataset {dataset.id}. {create_count} created, {skipped_count} skipped." ) try: - click.echo("Creating dataset vector database index: {}".format(dataset.id)) + click.echo(f"Creating dataset vector database index: {dataset.id}") if dataset.index_struct_dict: if dataset.index_struct_dict["type"] == vector_type: skipped_count = skipped_count + 1 @@ -423,9 +421,7 @@ def migrate_knowledge_vector_database(): create_count += 1 except Exception as e: db.session.rollback() - click.echo( - click.style("Error creating dataset index: {} {}".format(e.__class__.__name__, str(e)), fg="red") - ) + click.echo(click.style(f"Error creating dataset index: {e.__class__.__name__} {str(e)}", fg="red")) continue click.echo( @@ -476,7 +472,7 @@ def convert_to_agent_apps(): break for app in apps: - click.echo("Converting app: {}".format(app.id)) + click.echo(f"Converting app: {app.id}") try: app.mode = AppMode.AGENT_CHAT.value @@ -488,11 +484,11 @@ def convert_to_agent_apps(): ) db.session.commit() - click.echo(click.style("Converted app: {}".format(app.id), fg="green")) + click.echo(click.style(f"Converted app: {app.id}", fg="green")) except Exception as e: - click.echo(click.style("Convert app error: {} {}".format(e.__class__.__name__, str(e)), fg="red")) + click.echo(click.style(f"Convert app error: {e.__class__.__name__} {str(e)}", fg="red")) - click.echo(click.style("Conversion complete. Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green")) + click.echo(click.style(f"Conversion complete. Converted {len(proceeded_app_ids)} agent apps.", fg="green")) @click.command("add-qdrant-index", help="Add Qdrant index.") @@ -665,7 +661,7 @@ def create_tenant(email: str, language: Optional[str] = None, name: Optional[str click.echo( click.style( - "Account and tenant created.\nAccount: {}\nPassword: {}".format(email, new_password), + f"Account and tenant created.\nAccount: {email}\nPassword: {new_password}", fg="green", ) ) @@ -726,16 +722,16 @@ where sites.id is null limit 1000""" if tenant: accounts = tenant.get_accounts() if not accounts: - print("Fix failed for app {}".format(app.id)) + print(f"Fix failed for app {app.id}") continue account = accounts[0] - print("Fixing missing site for app {}".format(app.id)) + print(f"Fixing missing site for app {app.id}") app_was_created.send(app, account=account) except Exception: failed_app_ids.append(app_id) - click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red")) - logging.exception(f"Failed to fix app related site missing issue, app_id: {app_id}") + click.echo(click.style(f"Failed to fix missing site for app {app_id}", fg="red")) + logging.exception("Failed to fix app related site missing issue, app_id: %s", app_id) continue if not processed_count: diff --git a/api/configs/app_config.py b/api/configs/app_config.py index 20f8c40427..d3b1cf9d5b 100644 --- a/api/configs/app_config.py +++ b/api/configs/app_config.py @@ -41,7 +41,7 @@ class RemoteSettingsSourceFactory(PydanticBaseSettingsSource): case RemoteSettingsSourceName.NACOS: remote_source = NacosSettingsSource(current_state) case _: - logger.warning(f"Unsupported remote source: {remote_source_name}") + logger.warning("Unsupported remote source: %s", remote_source_name) return {} d: dict[str, Any] = {} diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index 587ea55ca7..68b16e48db 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -245,11 +245,7 @@ class CeleryConfig(DatabaseConfig): @computed_field def CELERY_RESULT_BACKEND(self) -> str | None: - return ( - "db+{}".format(self.SQLALCHEMY_DATABASE_URI) - if self.CELERY_BACKEND == "database" - else self.CELERY_BROKER_URL - ) + return f"db+{self.SQLALCHEMY_DATABASE_URI}" if self.CELERY_BACKEND == "database" else self.CELERY_BROKER_URL @property def BROKER_USE_SSL(self) -> bool: diff --git a/api/configs/remote_settings_sources/apollo/client.py b/api/configs/remote_settings_sources/apollo/client.py index 88b30d3987..877ff8409f 100644 --- a/api/configs/remote_settings_sources/apollo/client.py +++ b/api/configs/remote_settings_sources/apollo/client.py @@ -76,7 +76,7 @@ class ApolloClient: code, body = http_request(url, timeout=3, headers=self._sign_headers(url)) if code == 200: if not body: - logger.error(f"get_json_from_net load configs failed, body is {body}") + logger.error("get_json_from_net load configs failed, body is %s", body) return None data = json.loads(body) data = data["configurations"] @@ -207,7 +207,7 @@ class ApolloClient: # if the length is 0 it is returned directly if len(notifications) == 0: return - url = "{}/notifications/v2".format(self.config_url) + url = f"{self.config_url}/notifications/v2" params = { "appId": self.app_id, "cluster": self.cluster, @@ -222,7 +222,7 @@ class ApolloClient: return if http_code == 200: if not body: - logger.error(f"_long_poll load configs failed,body is {body}") + logger.error("_long_poll load configs failed,body is %s", body) return data = json.loads(body) for entry in data: @@ -273,12 +273,12 @@ class ApolloClient: time.sleep(60 * 10) # 10 minutes def _do_heart_beat(self, namespace): - url = "{}/configs/{}/{}/{}?ip={}".format(self.config_url, self.app_id, self.cluster, namespace, self.ip) + url = f"{self.config_url}/configs/{self.app_id}/{self.cluster}/{namespace}?ip={self.ip}" try: code, body = http_request(url, timeout=3, headers=self._sign_headers(url)) if code == 200: if not body: - logger.error(f"_do_heart_beat load configs failed,body is {body}") + logger.error("_do_heart_beat load configs failed,body is %s", body) return None data = json.loads(body) if self.last_release_key == data["releaseKey"]: diff --git a/api/configs/remote_settings_sources/apollo/utils.py b/api/configs/remote_settings_sources/apollo/utils.py index 6136112e03..f5b82908ee 100644 --- a/api/configs/remote_settings_sources/apollo/utils.py +++ b/api/configs/remote_settings_sources/apollo/utils.py @@ -24,7 +24,7 @@ def url_encode_wrapper(params): def no_key_cache_key(namespace, key): - return "{}{}{}".format(namespace, len(namespace), key) + return f"{namespace}{len(namespace)}{key}" # Returns whether the obtained value is obtained, and None if it does not diff --git a/api/constants/languages.py b/api/constants/languages.py index 1157ec4307..ab19392c59 100644 --- a/api/constants/languages.py +++ b/api/constants/languages.py @@ -28,5 +28,5 @@ def supported_language(lang): if lang in languages: return lang - error = "{lang} is not a valid language.".format(lang=lang) + error = f"{lang} is not a valid language." raise ValueError(error) diff --git a/api/controllers/console/app/annotation.py b/api/controllers/console/app/annotation.py index 2b48afd550..c2ba880405 100644 --- a/api/controllers/console/app/annotation.py +++ b/api/controllers/console/app/annotation.py @@ -86,7 +86,7 @@ class AnnotationReplyActionStatusApi(Resource): raise Forbidden() job_id = str(job_id) - app_annotation_job_key = "{}_app_annotation_job_{}".format(action, str(job_id)) + app_annotation_job_key = f"{action}_app_annotation_job_{str(job_id)}" cache_result = redis_client.get(app_annotation_job_key) if cache_result is None: raise ValueError("The job does not exist.") @@ -94,7 +94,7 @@ class AnnotationReplyActionStatusApi(Resource): job_status = cache_result.decode() error_msg = "" if job_status == "error": - app_annotation_error_key = "{}_app_annotation_error_{}".format(action, str(job_id)) + app_annotation_error_key = f"{action}_app_annotation_error_{str(job_id)}" error_msg = redis_client.get(app_annotation_error_key).decode() return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200 @@ -123,6 +123,17 @@ class AnnotationListApi(Resource): } return response, 200 + @setup_required + @login_required + @account_initialization_required + def delete(self, app_id): + if not current_user.is_editor: + raise Forbidden() + + app_id = str(app_id) + AppAnnotationService.clear_all_annotations(app_id) + return {"result": "success"}, 204 + class AnnotationExportApi(Resource): @setup_required @@ -223,14 +234,14 @@ class AnnotationBatchImportStatusApi(Resource): raise Forbidden() job_id = str(job_id) - indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}" cache_result = redis_client.get(indexing_cache_key) if cache_result is None: raise ValueError("The job does not exist.") job_status = cache_result.decode() error_msg = "" if job_status == "error": - indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id)) + indexing_error_msg_key = f"app_annotation_batch_import_error_msg_{str(job_id)}" error_msg = redis_client.get(indexing_error_msg_key).decode() return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200 diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py index b5b6d1f75b..6ddae6fad5 100644 --- a/api/controllers/console/app/conversation.py +++ b/api/controllers/console/app/conversation.py @@ -51,8 +51,8 @@ class CompletionConversationApi(Resource): if args["keyword"]: query = query.join(Message, Message.conversation_id == Conversation.id).where( or_( - Message.query.ilike("%{}%".format(args["keyword"])), - Message.answer.ilike("%{}%".format(args["keyword"])), + Message.query.ilike(f"%{args['keyword']}%"), + Message.answer.ilike(f"%{args['keyword']}%"), ) ) @@ -174,7 +174,7 @@ class ChatConversationApi(Resource): query = db.select(Conversation).where(Conversation.app_id == app_model.id) if args["keyword"]: - keyword_filter = "%{}%".format(args["keyword"]) + keyword_filter = f"%{args['keyword']}%" query = ( query.join( Message, diff --git a/api/controllers/console/app/generator.py b/api/controllers/console/app/generator.py index 790369c052..4847a2cab8 100644 --- a/api/controllers/console/app/generator.py +++ b/api/controllers/console/app/generator.py @@ -1,5 +1,3 @@ -import os - from flask_login import current_user from flask_restful import Resource, reqparse @@ -29,15 +27,12 @@ class RuleGenerateApi(Resource): args = parser.parse_args() account = current_user - PROMPT_GENERATION_MAX_TOKENS = int(os.getenv("PROMPT_GENERATION_MAX_TOKENS", "512")) - try: rules = LLMGenerator.generate_rule_config( tenant_id=account.current_tenant_id, instruction=args["instruction"], model_config=args["model_config"], no_variable=args["no_variable"], - rule_config_max_tokens=PROMPT_GENERATION_MAX_TOKENS, ) except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) @@ -64,14 +59,12 @@ class RuleCodeGenerateApi(Resource): args = parser.parse_args() account = current_user - CODE_GENERATION_MAX_TOKENS = int(os.getenv("CODE_GENERATION_MAX_TOKENS", "1024")) try: code_result = LLMGenerator.generate_code( tenant_id=account.current_tenant_id, instruction=args["instruction"], model_config=args["model_config"], code_language=args["code_language"], - max_tokens=CODE_GENERATION_MAX_TOKENS, ) except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) diff --git a/api/controllers/console/app/message.py b/api/controllers/console/app/message.py index 5e79e8dece..d4ce5921c2 100644 --- a/api/controllers/console/app/message.py +++ b/api/controllers/console/app/message.py @@ -5,7 +5,6 @@ from flask_restful import Resource, fields, marshal_with, reqparse from flask_restful.inputs import int_range from werkzeug.exceptions import Forbidden, InternalServerError, NotFound -import services from controllers.console import api from controllers.console.app.error import ( CompletionRequestError, @@ -133,7 +132,7 @@ class MessageFeedbackApi(Resource): rating=args.get("rating"), content=None, ) - except services.errors.message.MessageNotExistsError: + except MessageNotExistsError: raise NotFound("Message Not Exists.") return {"result": "success"} diff --git a/api/controllers/console/auth/data_source_oauth.py b/api/controllers/console/auth/data_source_oauth.py index 4c9697cc32..4940b48754 100644 --- a/api/controllers/console/auth/data_source_oauth.py +++ b/api/controllers/console/auth/data_source_oauth.py @@ -81,7 +81,7 @@ class OAuthDataSourceBinding(Resource): oauth_provider.get_access_token(code) except requests.exceptions.HTTPError as e: logging.exception( - f"An error occurred during the OAuthCallback process with {provider}: {e.response.text}" + "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text ) return {"error": "OAuth data source process failed"}, 400 @@ -103,7 +103,9 @@ class OAuthDataSourceSync(Resource): try: oauth_provider.sync_data_source(binding_id) except requests.exceptions.HTTPError as e: - logging.exception(f"An error occurred during the OAuthCallback process with {provider}: {e.response.text}") + logging.exception( + "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text + ) return {"error": "OAuth data source process failed"}, 400 return {"result": "success"}, 200 diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py index d0a4f3ff6d..4a6cb99390 100644 --- a/api/controllers/console/auth/oauth.py +++ b/api/controllers/console/auth/oauth.py @@ -80,7 +80,7 @@ class OAuthCallback(Resource): user_info = oauth_provider.get_user_info(token) except requests.exceptions.RequestException as e: error_text = e.response.text if e.response else str(e) - logging.exception(f"An error occurred during the OAuth process with {provider}: {error_text}") + logging.exception("An error occurred during the OAuth process with %s: %s", provider, error_text) return {"error": "OAuth process failed"}, 400 if invite_token and RegisterService.is_valid_invite_token(invite_token): diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index d14b208a4b..b6e91dd98e 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -970,7 +970,7 @@ class DocumentRetryApi(DocumentResource): raise DocumentAlreadyFinishedError() retry_documents.append(document) except Exception: - logging.exception(f"Failed to retry document, document id: {document_id}") + logging.exception("Failed to retry document, document id: %s", document_id) continue # retry document DocumentService.retry_document(dataset_id, retry_documents) diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index b3704ce8b1..8c429044d7 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -1,6 +1,5 @@ import uuid -import pandas as pd from flask import request from flask_login import current_user from flask_restful import Resource, marshal, reqparse @@ -14,8 +13,6 @@ from controllers.console.datasets.error import ( ChildChunkDeleteIndexError, ChildChunkIndexingError, InvalidActionError, - NoFileUploadedError, - TooManyFilesError, ) from controllers.console.wraps import ( account_initialization_required, @@ -32,6 +29,7 @@ from extensions.ext_redis import redis_client from fields.segment_fields import child_chunk_fields, segment_fields from libs.login import login_required from models.dataset import ChildChunk, DocumentSegment +from models.model import UploadFile from services.dataset_service import DatasetService, DocumentService, SegmentService from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError @@ -184,7 +182,7 @@ class DatasetDocumentSegmentApi(Resource): raise ProviderNotInitializeError(ex.description) segment_ids = request.args.getlist("segment_id") - document_indexing_cache_key = "document_{}_indexing".format(document.id) + document_indexing_cache_key = f"document_{document.id}_indexing" cache_result = redis_client.get(document_indexing_cache_key) if cache_result is not None: raise InvalidActionError("Document is being indexed, please try again later") @@ -365,37 +363,28 @@ class DatasetDocumentSegmentBatchImportApi(Resource): document = DocumentService.get_document(dataset_id, document_id) if not document: raise NotFound("Document not found.") - # get file from request - file = request.files["file"] - # check file - if "file" not in request.files: - raise NoFileUploadedError() - if len(request.files) > 1: - raise TooManyFilesError() + parser = reqparse.RequestParser() + parser.add_argument("upload_file_id", type=str, required=True, nullable=False, location="json") + args = parser.parse_args() + upload_file_id = args["upload_file_id"] + + upload_file = db.session.query(UploadFile).where(UploadFile.id == upload_file_id).first() + if not upload_file: + raise NotFound("UploadFile not found.") + # check file type - if not file.filename or not file.filename.lower().endswith(".csv"): + if not upload_file.name or not upload_file.name.lower().endswith(".csv"): raise ValueError("Invalid file type. Only CSV files are allowed") try: - # Skip the first row - df = pd.read_csv(file) - result = [] - for index, row in df.iterrows(): - if document.doc_form == "qa_model": - data = {"content": row.iloc[0], "answer": row.iloc[1]} - else: - data = {"content": row.iloc[0]} - result.append(data) - if len(result) == 0: - raise ValueError("The CSV file is empty.") # async job job_id = str(uuid.uuid4()) - indexing_cache_key = "segment_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"segment_batch_import_{str(job_id)}" # send batch add segments task redis_client.setnx(indexing_cache_key, "waiting") batch_create_segment_to_index_task.delay( - str(job_id), result, dataset_id, document_id, current_user.current_tenant_id, current_user.id + str(job_id), upload_file_id, dataset_id, document_id, current_user.current_tenant_id, current_user.id ) except Exception as e: return {"error": str(e)}, 500 @@ -406,7 +395,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource): @account_initialization_required def get(self, job_id): job_id = str(job_id) - indexing_cache_key = "segment_batch_import_{}".format(job_id) + indexing_cache_key = f"segment_batch_import_{job_id}" cache_result = redis_client.get(indexing_cache_key) if cache_result is None: raise ValueError("The job does not exist.") diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py index ffdf73c368..6d9f794307 100644 --- a/api/controllers/console/explore/installed_app.py +++ b/api/controllers/console/explore/installed_app.py @@ -74,7 +74,7 @@ class InstalledAppsListApi(Resource): ): res.append(installed_app) installed_app_list = res - logger.debug(f"installed_app_list: {installed_app_list}, user_id: {user_id}") + logger.debug("installed_app_list: %s, user_id: %s", installed_app_list, user_id) installed_app_list.sort( key=lambda app: ( diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py index 822777604a..de95a9e7b0 100644 --- a/api/controllers/console/explore/message.py +++ b/api/controllers/console/explore/message.py @@ -5,7 +5,6 @@ from flask_restful import marshal_with, reqparse from flask_restful.inputs import int_range from werkzeug.exceptions import InternalServerError, NotFound -import services from controllers.console.app.error import ( AppMoreLikeThisDisabledError, CompletionRequestError, @@ -29,7 +28,11 @@ from models.model import AppMode from services.app_generate_service import AppGenerateService from services.errors.app import MoreLikeThisDisabledError from services.errors.conversation import ConversationNotExistsError -from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError +from services.errors.message import ( + FirstMessageNotExistsError, + MessageNotExistsError, + SuggestedQuestionsAfterAnswerDisabledError, +) from services.message_service import MessageService @@ -52,9 +55,9 @@ class MessageListApi(InstalledAppResource): return MessageService.pagination_by_first_id( app_model, current_user, args["conversation_id"], args["first_id"], args["limit"] ) - except services.errors.conversation.ConversationNotExistsError: + except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - except services.errors.message.FirstMessageNotExistsError: + except FirstMessageNotExistsError: raise NotFound("First Message Not Exists.") @@ -77,7 +80,7 @@ class MessageFeedbackApi(InstalledAppResource): rating=args.get("rating"), content=args.get("content"), ) - except services.errors.message.MessageNotExistsError: + except MessageNotExistsError: raise NotFound("Message Not Exists.") return {"result": "success"} diff --git a/api/controllers/console/version.py b/api/controllers/console/version.py index 447cc358f8..8237ea3cdc 100644 --- a/api/controllers/console/version.py +++ b/api/controllers/console/version.py @@ -34,7 +34,7 @@ class VersionApi(Resource): try: response = requests.get(check_update_url, {"current_version": args.get("current_version")}) except Exception as error: - logging.warning("Check update version error: {}.".format(str(error))) + logging.warning("Check update version error: %s.", str(error)) result["version"] = args.get("current_version") return result @@ -55,7 +55,7 @@ def _has_new_version(*, latest_version: str, current_version: str) -> bool: # Compare versions return latest > current except version.InvalidVersion: - logging.warning(f"Invalid version format: latest={latest_version}, current={current_version}") + logging.warning("Invalid version format: latest=%s, current=%s", latest_version, current_version) return False diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 5cd2e0cd2d..4d5357cd18 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -15,7 +15,7 @@ from controllers.console.auth.error import ( InvalidEmailError, InvalidTokenError, ) -from controllers.console.error import AccountNotFound, EmailSendIpLimitError +from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError from controllers.console.workspace.error import ( AccountAlreadyInitedError, CurrentPasswordIncorrectError, @@ -479,20 +479,27 @@ class ChangeEmailResetApi(Resource): parser.add_argument("token", type=str, required=True, nullable=False, location="json") args = parser.parse_args() + if AccountService.is_account_in_freeze(args["new_email"]): + raise AccountInFreezeError() + + if not AccountService.check_email_unique(args["new_email"]): + raise EmailAlreadyInUseError() + reset_data = AccountService.get_change_email_data(args["token"]) if not reset_data: raise InvalidTokenError() AccountService.revoke_change_email_token(args["token"]) - if not AccountService.check_email_unique(args["new_email"]): - raise EmailAlreadyInUseError() - old_email = reset_data.get("old_email", "") if current_user.email != old_email: raise AccountNotFound() - updated_account = AccountService.update_account(current_user, email=args["new_email"]) + updated_account = AccountService.update_account_email(current_user, email=args["new_email"]) + + AccountService.send_change_email_completed_notify_email( + email=args["new_email"], + ) return updated_account @@ -503,6 +510,8 @@ class CheckEmailUnique(Resource): parser = reqparse.RequestParser() parser.add_argument("email", type=email, required=True, location="json") args = parser.parse_args() + if AccountService.is_account_in_freeze(args["email"]): + raise AccountInFreezeError() if not AccountService.check_email_unique(args["email"]): raise EmailAlreadyInUseError() return {"result": "success"} diff --git a/api/controllers/console/workspace/models.py b/api/controllers/console/workspace/models.py index 37d0f6c764..514d1084c4 100644 --- a/api/controllers/console/workspace/models.py +++ b/api/controllers/console/workspace/models.py @@ -73,8 +73,9 @@ class DefaultModelApi(Resource): ) except Exception as ex: logging.exception( - f"Failed to update default model, model type: {model_setting['model_type']}," - f" model:{model_setting.get('model')}" + "Failed to update default model, model type: %s, model: %s", + model_setting["model_type"], + model_setting.get("model"), ) raise ex @@ -160,8 +161,10 @@ class ModelProviderModelApi(Resource): ) except CredentialsValidateFailedError as ex: logging.exception( - f"Failed to save model credentials, tenant_id: {tenant_id}," - f" model: {args.get('model')}, model_type: {args.get('model_type')}" + "Failed to save model credentials, tenant_id: %s, model: %s, model_type: %s", + tenant_id, + args.get("model"), + args.get("model_type"), ) raise ValueError(str(ex)) diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index 595ae118ef..9b22c535f4 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -34,7 +34,7 @@ class AnnotationReplyActionStatusApi(Resource): @validate_app_token def get(self, app_model: App, job_id, action): job_id = str(job_id) - app_annotation_job_key = "{}_app_annotation_job_{}".format(action, str(job_id)) + app_annotation_job_key = f"{action}_app_annotation_job_{str(job_id)}" cache_result = redis_client.get(app_annotation_job_key) if cache_result is None: raise ValueError("The job does not exist.") @@ -42,7 +42,7 @@ class AnnotationReplyActionStatusApi(Resource): job_status = cache_result.decode() error_msg = "" if job_status == "error": - app_annotation_error_key = "{}_app_annotation_error_{}".format(action, str(job_id)) + app_annotation_error_key = f"{action}_app_annotation_error_{str(job_id)}" error_msg = redis_client.get(app_annotation_error_key).decode() return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200 diff --git a/api/controllers/service_api/app/completion.py b/api/controllers/service_api/app/completion.py index 7762672494..edc66cc5e9 100644 --- a/api/controllers/service_api/app/completion.py +++ b/api/controllers/service_api/app/completion.py @@ -47,6 +47,9 @@ class CompletionApi(Resource): parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json") args = parser.parse_args() + external_trace_id = get_external_trace_id(request) + if external_trace_id: + args["external_trace_id"] = external_trace_id streaming = args["response_mode"] == "streaming" diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index d90fa2081f..a4f95cb1cb 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -15,7 +15,11 @@ from fields.message_fields import agent_thought_fields, feedback_fields from fields.raws import FilesContainedField from libs.helper import TimestampField, uuid_value from models.model import App, AppMode, EndUser -from services.errors.message import SuggestedQuestionsAfterAnswerDisabledError +from services.errors.message import ( + FirstMessageNotExistsError, + MessageNotExistsError, + SuggestedQuestionsAfterAnswerDisabledError, +) from services.message_service import MessageService @@ -65,7 +69,7 @@ class MessageListApi(Resource): ) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - except services.errors.message.FirstMessageNotExistsError: + except FirstMessageNotExistsError: raise NotFound("First Message Not Exists.") @@ -87,7 +91,7 @@ class MessageFeedbackApi(Resource): rating=args.get("rating"), content=args.get("content"), ) - except services.errors.message.MessageNotExistsError: + except MessageNotExistsError: raise NotFound("Message Not Exists.") return {"result": "success"} @@ -117,7 +121,7 @@ class MessageSuggestedApi(Resource): questions = MessageService.get_suggested_questions_after_answer( app_model=app_model, user=end_user, message_id=message_id, invoke_from=InvokeFrom.SERVICE_API ) - except services.errors.message.MessageNotExistsError: + except MessageNotExistsError: raise NotFound("Message Not Exists.") except SuggestedQuestionsAfterAnswerDisabledError: raise BadRequest("Suggested Questions Is Disabled.") diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py index f2e1873601..7bb81cd0d3 100644 --- a/api/controllers/web/message.py +++ b/api/controllers/web/message.py @@ -4,7 +4,6 @@ from flask_restful import fields, marshal_with, reqparse from flask_restful.inputs import int_range from werkzeug.exceptions import InternalServerError, NotFound -import services from controllers.web import api from controllers.web.error import ( AppMoreLikeThisDisabledError, @@ -29,7 +28,11 @@ from models.model import AppMode from services.app_generate_service import AppGenerateService from services.errors.app import MoreLikeThisDisabledError from services.errors.conversation import ConversationNotExistsError -from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError +from services.errors.message import ( + FirstMessageNotExistsError, + MessageNotExistsError, + SuggestedQuestionsAfterAnswerDisabledError, +) from services.message_service import MessageService @@ -73,9 +76,9 @@ class MessageListApi(WebApiResource): return MessageService.pagination_by_first_id( app_model, end_user, args["conversation_id"], args["first_id"], args["limit"] ) - except services.errors.conversation.ConversationNotExistsError: + except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - except services.errors.message.FirstMessageNotExistsError: + except FirstMessageNotExistsError: raise NotFound("First Message Not Exists.") @@ -96,7 +99,7 @@ class MessageFeedbackApi(WebApiResource): rating=args.get("rating"), content=args.get("content"), ) - except services.errors.message.MessageNotExistsError: + except MessageNotExistsError: raise NotFound("Message Not Exists.") return {"result": "success"} diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py index 1f3c218d59..ad9b625350 100644 --- a/api/core/agent/base_agent_runner.py +++ b/api/core/agent/base_agent_runner.py @@ -280,7 +280,7 @@ class BaseAgentRunner(AppRunner): def create_agent_thought( self, message_id: str, message: str, tool_name: str, tool_input: str, messages_ids: list[str] - ) -> MessageAgentThought: + ) -> str: """ Create agent thought """ @@ -313,16 +313,15 @@ class BaseAgentRunner(AppRunner): db.session.add(thought) db.session.commit() - db.session.refresh(thought) + agent_thought_id = str(thought.id) + self.agent_thought_count += 1 db.session.close() - self.agent_thought_count += 1 - - return thought + return agent_thought_id def save_agent_thought( self, - agent_thought: MessageAgentThought, + agent_thought_id: str, tool_name: str | None, tool_input: Union[str, dict, None], thought: str | None, @@ -335,12 +334,9 @@ class BaseAgentRunner(AppRunner): """ Save agent thought """ - updated_agent_thought = ( - db.session.query(MessageAgentThought).where(MessageAgentThought.id == agent_thought.id).first() - ) - if not updated_agent_thought: + agent_thought = db.session.query(MessageAgentThought).where(MessageAgentThought.id == agent_thought_id).first() + if not agent_thought: raise ValueError("agent thought not found") - agent_thought = updated_agent_thought if thought: agent_thought.thought += thought @@ -355,7 +351,7 @@ class BaseAgentRunner(AppRunner): except Exception: tool_input = json.dumps(tool_input) - updated_agent_thought.tool_input = tool_input + agent_thought.tool_input = tool_input if observation: if isinstance(observation, dict): @@ -364,27 +360,27 @@ class BaseAgentRunner(AppRunner): except Exception: observation = json.dumps(observation) - updated_agent_thought.observation = observation + agent_thought.observation = observation if answer: agent_thought.answer = answer if messages_ids is not None and len(messages_ids) > 0: - updated_agent_thought.message_files = json.dumps(messages_ids) + agent_thought.message_files = json.dumps(messages_ids) if llm_usage: - updated_agent_thought.message_token = llm_usage.prompt_tokens - updated_agent_thought.message_price_unit = llm_usage.prompt_price_unit - updated_agent_thought.message_unit_price = llm_usage.prompt_unit_price - updated_agent_thought.answer_token = llm_usage.completion_tokens - updated_agent_thought.answer_price_unit = llm_usage.completion_price_unit - updated_agent_thought.answer_unit_price = llm_usage.completion_unit_price - updated_agent_thought.tokens = llm_usage.total_tokens - updated_agent_thought.total_price = llm_usage.total_price + agent_thought.message_token = llm_usage.prompt_tokens + agent_thought.message_price_unit = llm_usage.prompt_price_unit + agent_thought.message_unit_price = llm_usage.prompt_unit_price + agent_thought.answer_token = llm_usage.completion_tokens + agent_thought.answer_price_unit = llm_usage.completion_price_unit + agent_thought.answer_unit_price = llm_usage.completion_unit_price + agent_thought.tokens = llm_usage.total_tokens + agent_thought.total_price = llm_usage.total_price # check if tool labels is not empty - labels = updated_agent_thought.tool_labels or {} - tools = updated_agent_thought.tool.split(";") if updated_agent_thought.tool else [] + labels = agent_thought.tool_labels or {} + tools = agent_thought.tool.split(";") if agent_thought.tool else [] for tool in tools: if not tool: continue @@ -395,7 +391,7 @@ class BaseAgentRunner(AppRunner): else: labels[tool] = {"en_US": tool, "zh_Hans": tool} - updated_agent_thought.tool_labels_str = json.dumps(labels) + agent_thought.tool_labels_str = json.dumps(labels) if tool_invoke_meta is not None: if isinstance(tool_invoke_meta, dict): @@ -404,7 +400,7 @@ class BaseAgentRunner(AppRunner): except Exception: tool_invoke_meta = json.dumps(tool_invoke_meta) - updated_agent_thought.tool_meta_str = tool_invoke_meta + agent_thought.tool_meta_str = tool_invoke_meta db.session.commit() db.session.close() diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py index 4979f63432..565fb42478 100644 --- a/api/core/agent/cot_agent_runner.py +++ b/api/core/agent/cot_agent_runner.py @@ -97,13 +97,13 @@ class CotAgentRunner(BaseAgentRunner, ABC): message_file_ids: list[str] = [] - agent_thought = self.create_agent_thought( + agent_thought_id = self.create_agent_thought( message_id=message.id, message="", tool_name="", tool_input="", messages_ids=message_file_ids ) if iteration_step > 1: self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) # recalc llm max tokens @@ -133,7 +133,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): # publish agent thought if it's first iteration if iteration_step == 1: self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) for chunk in react_chunks: @@ -168,7 +168,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): usage_dict["usage"] = LLMUsage.empty_usage() self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name=(scratchpad.action.action_name if scratchpad.action and not scratchpad.is_final() else ""), tool_input={scratchpad.action.action_name: scratchpad.action.action_input} if scratchpad.action else {}, tool_invoke_meta={}, @@ -181,7 +181,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): if not scratchpad.is_final(): self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) if not scratchpad.action: @@ -212,7 +212,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): scratchpad.agent_response = tool_invoke_response self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name=scratchpad.action.action_name, tool_input={scratchpad.action.action_name: scratchpad.action.action_input}, thought=scratchpad.thought or "", @@ -224,7 +224,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): ) self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) # update prompt tool message @@ -244,7 +244,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): # save agent thought self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name="", tool_input={}, tool_invoke_meta={}, diff --git a/api/core/agent/fc_agent_runner.py b/api/core/agent/fc_agent_runner.py index 5491689ece..4df71ce9de 100644 --- a/api/core/agent/fc_agent_runner.py +++ b/api/core/agent/fc_agent_runner.py @@ -80,7 +80,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): prompt_messages_tools = [] message_file_ids: list[str] = [] - agent_thought = self.create_agent_thought( + agent_thought_id = self.create_agent_thought( message_id=message.id, message="", tool_name="", tool_input="", messages_ids=message_file_ids ) @@ -114,7 +114,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): for chunk in chunks: if is_first_chunk: self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) is_first_chunk = False # check if there is any tool call @@ -172,7 +172,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): result.message.content = "" self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) yield LLMResultChunk( @@ -205,7 +205,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): # save thought self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name=tool_call_names, tool_input=tool_call_inputs, thought=response, @@ -216,7 +216,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): llm_usage=current_llm_usage, ) self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) final_answer += response + "\n" @@ -276,7 +276,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): if len(tool_responses) > 0: # save agent thought self.save_agent_thought( - agent_thought=agent_thought, + agent_thought_id=agent_thought_id, tool_name="", tool_input="", thought="", @@ -291,7 +291,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): messages_ids=message_file_ids, ) self.queue_manager.publish( - QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER + QueueAgentThoughtEvent(agent_thought_id=agent_thought_id), PublishFrom.APPLICATION_MANAGER ) # update prompt tool diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index 610a5bb278..52ae20ee16 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -600,5 +600,5 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.": # ignore this error raise GenerateTaskStoppedError() else: - logger.exception(f"Failed to process generate task pipeline, conversation_id: {conversation.id}") + logger.exception("Failed to process generate task pipeline, conversation_id: %s", conversation.id) raise e diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index dc27076a4d..abb8db34de 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -271,7 +271,7 @@ class AdvancedChatAppGenerateTaskPipeline: start_listener_time = time.time() yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id) except Exception: - logger.exception(f"Failed to listen audio message, task_id: {task_id}") + logger.exception("Failed to listen audio message, task_id: %s", task_id) break if tts_publisher: yield MessageAudioEndStreamResponse(audio="", task_id=task_id) diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index 7dd9904eeb..11c979765b 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -78,7 +78,7 @@ class MessageBasedAppGenerator(BaseAppGenerator): if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.": # ignore this error raise GenerateTaskStoppedError() else: - logger.exception(f"Failed to handle response, conversation_id: {conversation.id}") + logger.exception("Failed to handle response, conversation_id: %s", conversation.id) raise e def _get_app_model_config(self, app_model: App, conversation: Optional[Conversation] = None) -> AppModelConfig: diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index 4c36f63c71..22b0234604 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -483,7 +483,7 @@ class WorkflowAppGenerator(BaseAppGenerator): try: runner.run() except GenerateTaskStoppedError as e: - logger.warning(f"Task stopped: {str(e)}") + logger.warning("Task stopped: %s", str(e)) pass except InvokeAuthorizationError: queue_manager.publish_error( @@ -540,6 +540,6 @@ class WorkflowAppGenerator(BaseAppGenerator): raise GenerateTaskStoppedError() else: logger.exception( - f"Fails to process generate task pipeline, task_id: {application_generate_entity.task_id}" + "Fails to process generate task pipeline, task_id: %s", application_generate_entity.task_id ) raise e diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index e31a316c56..b1e9a340bd 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -246,7 +246,7 @@ class WorkflowAppGenerateTaskPipeline: else: yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id) except Exception: - logger.exception(f"Fails to get audio trunk, task_id: {task_id}") + logger.exception("Fails to get audio trunk, task_id: %s", task_id) break if tts_publisher: yield MessageAudioEndStreamResponse(audio="", task_id=task_id) diff --git a/api/core/app/features/annotation_reply/annotation_reply.py b/api/core/app/features/annotation_reply/annotation_reply.py index 54dc69302a..b829340401 100644 --- a/api/core/app/features/annotation_reply/annotation_reply.py +++ b/api/core/app/features/annotation_reply/annotation_reply.py @@ -83,7 +83,7 @@ class AnnotationReplyFeature: return annotation except Exception as e: - logger.warning(f"Query annotation failed, exception: {str(e)}.") + logger.warning("Query annotation failed, exception: %s.", str(e)) return None return None diff --git a/api/core/app/task_pipeline/message_cycle_manager.py b/api/core/app/task_pipeline/message_cycle_manager.py index 824da0b934..f0e9425e3f 100644 --- a/api/core/app/task_pipeline/message_cycle_manager.py +++ b/api/core/app/task_pipeline/message_cycle_manager.py @@ -97,7 +97,7 @@ class MessageCycleManager: conversation.name = name except Exception as e: if dify_config.DEBUG: - logging.exception(f"generate conversation name failed, conversation_id: {conversation_id}") + logging.exception("generate conversation name failed, conversation_id: %s", conversation_id) pass db.session.merge(conversation) diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index af5c18e267..9aaa1f0b10 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -900,7 +900,7 @@ class ProviderConfiguration(BaseModel): credentials=copy_credentials, ) except Exception as ex: - logger.warning(f"get custom model schema failed, {ex}") + logger.warning("get custom model schema failed, %s", ex) continue if not custom_model_schema: @@ -1009,7 +1009,7 @@ class ProviderConfiguration(BaseModel): credentials=model_configuration.credentials, ) except Exception as ex: - logger.warning(f"get custom model schema failed, {ex}") + logger.warning("get custom model schema failed, %s", ex) continue if not custom_model_schema: diff --git a/api/core/extension/api_based_extension_requestor.py b/api/core/extension/api_based_extension_requestor.py index 3f4e20ec24..accccd8c40 100644 --- a/api/core/extension/api_based_extension_requestor.py +++ b/api/core/extension/api_based_extension_requestor.py @@ -22,7 +22,7 @@ class APIBasedExtensionRequestor: :param params: the request params :return: the response json """ - headers = {"Content-Type": "application/json", "Authorization": "Bearer {}".format(self.api_key)} + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"} url = self.api_endpoint @@ -49,8 +49,6 @@ class APIBasedExtensionRequestor: raise ValueError("request connection error") if response.status_code != 200: - raise ValueError( - "request error, status_code: {}, content: {}".format(response.status_code, response.text[:100]) - ) + raise ValueError(f"request error, status_code: {response.status_code}, content: {response.text[:100]}") return cast(dict, response.json()) diff --git a/api/core/extension/extensible.py b/api/core/extension/extensible.py index 06fdb089d4..557f7eb1ed 100644 --- a/api/core/extension/extensible.py +++ b/api/core/extension/extensible.py @@ -66,7 +66,7 @@ class Extensible: # Check for extension module file if (extension_name + ".py") not in file_names: - logging.warning(f"Missing {extension_name}.py file in {subdir_path}, Skip.") + logging.warning("Missing %s.py file in %s, Skip.", extension_name, subdir_path) continue # Check for builtin flag and position @@ -95,7 +95,7 @@ class Extensible: break if not extension_class: - logging.warning(f"Missing subclass of {cls.__name__} in {module_name}, Skip.") + logging.warning("Missing subclass of %s in %s, Skip.", cls.__name__, module_name) continue # Load schema if not builtin @@ -103,7 +103,7 @@ class Extensible: if not builtin: json_path = os.path.join(subdir_path, "schema.json") if not os.path.exists(json_path): - logging.warning(f"Missing schema.json file in {subdir_path}, Skip.") + logging.warning("Missing schema.json file in %s, Skip.", subdir_path) continue with open(json_path, encoding="utf-8") as f: diff --git a/api/core/external_data_tool/api/api.py b/api/core/external_data_tool/api/api.py index 2099a9e34c..d81f372d40 100644 --- a/api/core/external_data_tool/api/api.py +++ b/api/core/external_data_tool/api/api.py @@ -49,7 +49,7 @@ class ApiExternalDataTool(ExternalDataTool): """ # get params from config if not self.config: - raise ValueError("config is required, config: {}".format(self.config)) + raise ValueError(f"config is required, config: {self.config}") api_based_extension_id = self.config.get("api_based_extension_id") assert api_based_extension_id is not None, "api_based_extension_id is required" @@ -74,7 +74,7 @@ class ApiExternalDataTool(ExternalDataTool): # request api requestor = APIBasedExtensionRequestor(api_endpoint=api_based_extension.api_endpoint, api_key=api_key) except Exception as e: - raise ValueError("[External data tool] API query failed, variable: {}, error: {}".format(self.variable, e)) + raise ValueError(f"[External data tool] API query failed, variable: {self.variable}, error: {e}") response_json = requestor.request( point=APIBasedExtensionPoint.APP_EXTERNAL_DATA_TOOL_QUERY, @@ -90,7 +90,7 @@ class ApiExternalDataTool(ExternalDataTool): if not isinstance(response_json["result"], str): raise ValueError( - "[External data tool] API query failed, variable: {}, error: result is not string".format(self.variable) + f"[External data tool] API query failed, variable: {self.variable}, error: result is not string" ) return response_json["result"] diff --git a/api/core/helper/moderation.py b/api/core/helper/moderation.py index a324ac2767..86bac4119a 100644 --- a/api/core/helper/moderation.py +++ b/api/core/helper/moderation.py @@ -55,7 +55,7 @@ def check_moderation(tenant_id: str, model_config: ModelConfigWithCredentialsEnt if moderation_result is True: return True except Exception: - logger.exception(f"Fails to check moderation, provider_name: {provider_name}") + logger.exception("Fails to check moderation, provider_name: %s", provider_name) raise InvokeBadRequestError("Rate limit exceeded, please try again later.") return False diff --git a/api/core/helper/module_import_helper.py b/api/core/helper/module_import_helper.py index 9a041667e4..251309fa2c 100644 --- a/api/core/helper/module_import_helper.py +++ b/api/core/helper/module_import_helper.py @@ -30,7 +30,7 @@ def import_module_from_source(*, module_name: str, py_file_path: AnyStr, use_laz spec.loader.exec_module(module) return module except Exception as e: - logging.exception(f"Failed to load module {module_name} from script file '{py_file_path!r}'") + logging.exception("Failed to load module %s from script file '%s'", module_name, repr(py_file_path)) raise e diff --git a/api/core/helper/ssrf_proxy.py b/api/core/helper/ssrf_proxy.py index 11f245812e..329527633c 100644 --- a/api/core/helper/ssrf_proxy.py +++ b/api/core/helper/ssrf_proxy.py @@ -73,10 +73,12 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): if response.status_code not in STATUS_FORCELIST: return response else: - logging.warning(f"Received status code {response.status_code} for URL {url} which is in the force list") + logging.warning( + "Received status code %s for URL %s which is in the force list", response.status_code, url + ) except httpx.RequestError as e: - logging.warning(f"Request to URL {url} failed on attempt {retries + 1}: {e}") + logging.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e) if max_retries == 0: raise diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index fc5d0547fc..2387658bb6 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -84,14 +84,14 @@ class IndexingRunner: documents=documents, ) except DocumentIsPausedError: - raise DocumentIsPausedError("Document paused, document id: {}".format(dataset_document.id)) + raise DocumentIsPausedError(f"Document paused, document id: {dataset_document.id}") except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) dataset_document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) db.session.commit() except ObjectDeletedError: - logging.warning("Document deleted, document id: {}".format(dataset_document.id)) + logging.warning("Document deleted, document id: %s", dataset_document.id) except Exception as e: logging.exception("consume document failed") dataset_document.indexing_status = "error" @@ -147,7 +147,7 @@ class IndexingRunner: index_processor=index_processor, dataset=dataset, dataset_document=dataset_document, documents=documents ) except DocumentIsPausedError: - raise DocumentIsPausedError("Document paused, document id: {}".format(dataset_document.id)) + raise DocumentIsPausedError(f"Document paused, document id: {dataset_document.id}") except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) @@ -222,7 +222,7 @@ class IndexingRunner: index_processor=index_processor, dataset=dataset, dataset_document=dataset_document, documents=documents ) except DocumentIsPausedError: - raise DocumentIsPausedError("Document paused, document id: {}".format(dataset_document.id)) + raise DocumentIsPausedError(f"Document paused, document id: {dataset_document.id}") except ProviderTokenNotInitError as e: dataset_document.indexing_status = "error" dataset_document.error = str(e.description) @@ -324,7 +324,8 @@ class IndexingRunner: except Exception: logging.exception( "Delete image_files failed while indexing_estimate, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) @@ -649,7 +650,7 @@ class IndexingRunner: @staticmethod def _check_document_paused_status(document_id: str): - indexing_cache_key = "document_{}_is_paused".format(document_id) + indexing_cache_key = f"document_{document_id}_is_paused" result = redis_client.get(indexing_cache_key) if result: raise DocumentIsPausedError() diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index 331ac933c8..47e5a79160 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -125,16 +125,13 @@ class LLMGenerator: return questions @classmethod - def generate_rule_config( - cls, tenant_id: str, instruction: str, model_config: dict, no_variable: bool, rule_config_max_tokens: int = 512 - ) -> dict: + def generate_rule_config(cls, tenant_id: str, instruction: str, model_config: dict, no_variable: bool) -> dict: output_parser = RuleConfigGeneratorOutputParser() error = "" error_step = "" rule_config = {"prompt": "", "variables": [], "opening_statement": "", "error": ""} - model_parameters = {"max_tokens": rule_config_max_tokens, "temperature": 0.01} - + model_parameters = model_config.get("completion_params", {}) if no_variable: prompt_template = PromptTemplateParser(WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE) @@ -170,7 +167,7 @@ class LLMGenerator: error = str(e) error_step = "generate rule config" except Exception as e: - logging.exception(f"Failed to generate rule config, model: {model_config.get('name')}") + logging.exception("Failed to generate rule config, model: %s", model_config.get("name")) rule_config["error"] = str(e) rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" @@ -267,7 +264,7 @@ class LLMGenerator: error_step = "generate conversation opener" except Exception as e: - logging.exception(f"Failed to generate rule config, model: {model_config.get('name')}") + logging.exception("Failed to generate rule config, model: %s", model_config.get("name")) rule_config["error"] = str(e) rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" @@ -276,12 +273,7 @@ class LLMGenerator: @classmethod def generate_code( - cls, - tenant_id: str, - instruction: str, - model_config: dict, - code_language: str = "javascript", - max_tokens: int = 1000, + cls, tenant_id: str, instruction: str, model_config: dict, code_language: str = "javascript" ) -> dict: if code_language == "python": prompt_template = PromptTemplateParser(PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE) @@ -305,8 +297,7 @@ class LLMGenerator: ) prompt_messages = [UserPromptMessage(content=prompt)] - model_parameters = {"max_tokens": max_tokens, "temperature": 0.01} - + model_parameters = model_config.get("completion_params", {}) try: response = cast( LLMResult, @@ -323,7 +314,7 @@ class LLMGenerator: return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"} except Exception as e: logging.exception( - f"Failed to invoke LLM model, model: {model_config.get('name')}, language: {code_language}" + "Failed to invoke LLM model, model: %s, language: %s", model_config.get("name"), code_language ) return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"} @@ -395,5 +386,5 @@ class LLMGenerator: error = str(e) return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"} except Exception as e: - logging.exception(f"Failed to invoke LLM model, model: {model_config.get('name')}") + logging.exception("Failed to invoke LLM model, model: %s", model_config.get("name")) return {"output": "", "error": f"An unexpected error occurred: {str(e)}"} diff --git a/api/core/mcp/client/sse_client.py b/api/core/mcp/client/sse_client.py index 91debcc8f9..4226e77f7e 100644 --- a/api/core/mcp/client/sse_client.py +++ b/api/core/mcp/client/sse_client.py @@ -88,7 +88,7 @@ class SSETransport: status_queue: Queue to put status updates. """ endpoint_url = urljoin(self.url, sse_data) - logger.info(f"Received endpoint URL: {endpoint_url}") + logger.info("Received endpoint URL: %s", endpoint_url) if not self._validate_endpoint_url(endpoint_url): error_msg = f"Endpoint origin does not match connection origin: {endpoint_url}" @@ -107,7 +107,7 @@ class SSETransport: """ try: message = types.JSONRPCMessage.model_validate_json(sse_data) - logger.debug(f"Received server message: {message}") + logger.debug("Received server message: %s", message) session_message = SessionMessage(message) read_queue.put(session_message) except Exception as exc: @@ -128,7 +128,7 @@ class SSETransport: case "message": self._handle_message_event(sse.data, read_queue) case _: - logger.warning(f"Unknown SSE event: {sse.event}") + logger.warning("Unknown SSE event: %s", sse.event) def sse_reader(self, event_source, read_queue: ReadQueue, status_queue: StatusQueue) -> None: """Read and process SSE events. @@ -142,7 +142,7 @@ class SSETransport: for sse in event_source.iter_sse(): self._handle_sse_event(sse, read_queue, status_queue) except httpx.ReadError as exc: - logger.debug(f"SSE reader shutting down normally: {exc}") + logger.debug("SSE reader shutting down normally: %s", exc) except Exception as exc: read_queue.put(exc) finally: @@ -165,7 +165,7 @@ class SSETransport: ), ) response.raise_for_status() - logger.debug(f"Client message sent successfully: {response.status_code}") + logger.debug("Client message sent successfully: %s", response.status_code) def post_writer(self, client: httpx.Client, endpoint_url: str, write_queue: WriteQueue) -> None: """Handle writing messages to the server. @@ -190,7 +190,7 @@ class SSETransport: except queue.Empty: continue except httpx.ReadError as exc: - logger.debug(f"Post writer shutting down normally: {exc}") + logger.debug("Post writer shutting down normally: %s", exc) except Exception as exc: logger.exception("Error writing messages") write_queue.put(exc) @@ -326,7 +326,7 @@ def send_message(http_client: httpx.Client, endpoint_url: str, session_message: ), ) response.raise_for_status() - logger.debug(f"Client message sent successfully: {response.status_code}") + logger.debug("Client message sent successfully: %s", response.status_code) except Exception as exc: logger.exception("Error sending message") raise @@ -349,13 +349,13 @@ def read_messages( if sse.event == "message": try: message = types.JSONRPCMessage.model_validate_json(sse.data) - logger.debug(f"Received server message: {message}") + logger.debug("Received server message: %s", message) yield SessionMessage(message) except Exception as exc: logger.exception("Error parsing server message") yield exc else: - logger.warning(f"Unknown SSE event: {sse.event}") + logger.warning("Unknown SSE event: %s", sse.event) except Exception as exc: logger.exception("Error reading SSE messages") yield exc diff --git a/api/core/mcp/client/streamable_client.py b/api/core/mcp/client/streamable_client.py index fbd8d05f9e..ca414ebb93 100644 --- a/api/core/mcp/client/streamable_client.py +++ b/api/core/mcp/client/streamable_client.py @@ -129,7 +129,7 @@ class StreamableHTTPTransport: new_session_id = response.headers.get(MCP_SESSION_ID) if new_session_id: self.session_id = new_session_id - logger.info(f"Received session ID: {self.session_id}") + logger.info("Received session ID: %s", self.session_id) def _handle_sse_event( self, @@ -142,7 +142,7 @@ class StreamableHTTPTransport: if sse.event == "message": try: message = JSONRPCMessage.model_validate_json(sse.data) - logger.debug(f"SSE message: {message}") + logger.debug("SSE message: %s", message) # If this is a response and we have original_request_id, replace it if original_request_id is not None and isinstance(message.root, JSONRPCResponse | JSONRPCError): @@ -168,7 +168,7 @@ class StreamableHTTPTransport: logger.debug("Received ping event") return False else: - logger.warning(f"Unknown SSE event: {sse.event}") + logger.warning("Unknown SSE event: %s", sse.event) return False def handle_get_stream( @@ -197,7 +197,7 @@ class StreamableHTTPTransport: self._handle_sse_event(sse, server_to_client_queue) except Exception as exc: - logger.debug(f"GET stream error (non-fatal): {exc}") + logger.debug("GET stream error (non-fatal): %s", exc) def _handle_resumption_request(self, ctx: RequestContext) -> None: """Handle a resumption request using GET with SSE.""" @@ -352,7 +352,7 @@ class StreamableHTTPTransport: # Check if this is a resumption request is_resumption = bool(metadata and metadata.resumption_token) - logger.debug(f"Sending client message: {message}") + logger.debug("Sending client message: %s", message) # Handle initialized notification if self._is_initialized_notification(message): @@ -389,9 +389,9 @@ class StreamableHTTPTransport: if response.status_code == 405: logger.debug("Server does not allow session termination") elif response.status_code != 200: - logger.warning(f"Session termination failed: {response.status_code}") + logger.warning("Session termination failed: %s", response.status_code) except Exception as exc: - logger.warning(f"Session termination failed: {exc}") + logger.warning("Session termination failed: %s", exc) def get_session_id(self) -> str | None: """Get the current session ID.""" diff --git a/api/core/mcp/mcp_client.py b/api/core/mcp/mcp_client.py index 5fe52c008a..875d13de05 100644 --- a/api/core/mcp/mcp_client.py +++ b/api/core/mcp/mcp_client.py @@ -75,7 +75,7 @@ class MCPClient: self.connect_server(client_factory, method_name) else: try: - logger.debug(f"Not supported method {method_name} found in URL path, trying default 'mcp' method.") + logger.debug("Not supported method %s found in URL path, trying default 'mcp' method.", method_name) self.connect_server(sse_client, "sse") except MCPConnectionError: logger.debug("MCP connection failed with 'sse', falling back to 'mcp' method.") diff --git a/api/core/mcp/session/base_session.py b/api/core/mcp/session/base_session.py index 7734b8fdd9..3b6c9a7424 100644 --- a/api/core/mcp/session/base_session.py +++ b/api/core/mcp/session/base_session.py @@ -368,7 +368,7 @@ class BaseSession( self._handle_incoming(notification) except Exception as e: # For other validation errors, log and continue - logging.warning(f"Failed to validate notification: {e}. Message was: {message.message.root}") + logging.warning("Failed to validate notification: %s. Message was: %s", e, message.message.root) else: # Response or error response_queue = self._response_streams.get(message.message.root.id) if response_queue is not None: diff --git a/api/core/model_manager.py b/api/core/model_manager.py index 4886ffe244..51af3d1877 100644 --- a/api/core/model_manager.py +++ b/api/core/model_manager.py @@ -535,9 +535,19 @@ class LBModelManager: if dify_config.DEBUG: logger.info( - f"Model LB\nid: {config.id}\nname:{config.name}\n" - f"tenant_id: {self._tenant_id}\nprovider: {self._provider}\n" - f"model_type: {self._model_type.value}\nmodel: {self._model}" + """Model LB +id: %s +name:%s +tenant_id: %s +provider: %s +model_type: %s +model: %s""", + config.id, + config.name, + self._tenant_id, + self._provider, + self._model_type.value, + self._model, ) return config diff --git a/api/core/model_runtime/model_providers/__base/large_language_model.py b/api/core/model_runtime/model_providers/__base/large_language_model.py index e2cc576f83..ce378b443d 100644 --- a/api/core/model_runtime/model_providers/__base/large_language_model.py +++ b/api/core/model_runtime/model_providers/__base/large_language_model.py @@ -440,7 +440,9 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_before_invoke failed with error {e}") + logger.warning( + "Callback %s on_before_invoke failed with error %s", callback.__class__.__name__, e + ) def _trigger_new_chunk_callbacks( self, @@ -487,7 +489,7 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_new_chunk failed with error {e}") + logger.warning("Callback %s on_new_chunk failed with error %s", callback.__class__.__name__, e) def _trigger_after_invoke_callbacks( self, @@ -535,7 +537,9 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_after_invoke failed with error {e}") + logger.warning( + "Callback %s on_after_invoke failed with error %s", callback.__class__.__name__, e + ) def _trigger_invoke_error_callbacks( self, @@ -583,4 +587,6 @@ class LargeLanguageModel(AIModel): if callback.raise_error: raise e else: - logger.warning(f"Callback {callback.__class__.__name__} on_invoke_error failed with error {e}") + logger.warning( + "Callback %s on_invoke_error failed with error %s", callback.__class__.__name__, e + ) diff --git a/api/core/moderation/output_moderation.py b/api/core/moderation/output_moderation.py index 2ec315417f..b39db4b7ff 100644 --- a/api/core/moderation/output_moderation.py +++ b/api/core/moderation/output_moderation.py @@ -136,6 +136,6 @@ class OutputModeration(BaseModel): result: ModerationOutputsResult = moderation_factory.moderation_for_outputs(moderation_buffer) return result except Exception as e: - logger.exception(f"Moderation Output error, app_id: {app_id}") + logger.exception("Moderation Output error, app_id: %s", app_id) return None diff --git a/api/core/ops/aliyun_trace/aliyun_trace.py b/api/core/ops/aliyun_trace/aliyun_trace.py index cf367efdf0..06050619e9 100644 --- a/api/core/ops/aliyun_trace/aliyun_trace.py +++ b/api/core/ops/aliyun_trace/aliyun_trace.py @@ -10,6 +10,7 @@ from sqlalchemy.orm import Session, sessionmaker from core.ops.aliyun_trace.data_exporter.traceclient import ( TraceClient, convert_datetime_to_nanoseconds, + convert_string_to_id, convert_to_span_id, convert_to_trace_id, generate_span_id, @@ -97,12 +98,13 @@ class AliyunDataTrace(BaseTraceInstance): try: return self.trace_client.get_project_url() except Exception as e: - logger.info(f"Aliyun get run url failed: {str(e)}", exc_info=True) + logger.info("Aliyun get run url failed: %s", str(e), exc_info=True) raise ValueError(f"Aliyun get run url failed: {str(e)}") def workflow_trace(self, trace_info: WorkflowTraceInfo): - external_trace_id = trace_info.metadata.get("external_trace_id") - trace_id = external_trace_id or convert_to_trace_id(trace_info.workflow_run_id) + trace_id = convert_to_trace_id(trace_info.workflow_run_id) + if trace_info.trace_id: + trace_id = convert_string_to_id(trace_info.trace_id) workflow_span_id = convert_to_span_id(trace_info.workflow_run_id, "workflow") self.add_workflow_span(trace_id, workflow_span_id, trace_info) @@ -130,6 +132,9 @@ class AliyunDataTrace(BaseTraceInstance): status = Status(StatusCode.ERROR, trace_info.error) trace_id = convert_to_trace_id(message_id) + if trace_info.trace_id: + trace_id = convert_string_to_id(trace_info.trace_id) + message_span_id = convert_to_span_id(message_id, "message") message_span = SpanData( trace_id=trace_id, @@ -139,7 +144,7 @@ class AliyunDataTrace(BaseTraceInstance): start_time=convert_datetime_to_nanoseconds(trace_info.start_time), end_time=convert_datetime_to_nanoseconds(trace_info.end_time), attributes={ - GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), + GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "", GEN_AI_USER_ID: str(user_id), GEN_AI_SPAN_KIND: GenAISpanKind.CHAIN.value, GEN_AI_FRAMEWORK: "dify", @@ -161,12 +166,12 @@ class AliyunDataTrace(BaseTraceInstance): start_time=convert_datetime_to_nanoseconds(trace_info.start_time), end_time=convert_datetime_to_nanoseconds(trace_info.end_time), attributes={ - GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), + GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "", GEN_AI_USER_ID: str(user_id), GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value, GEN_AI_FRAMEWORK: "dify", - GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name", ""), - GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider", ""), + GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "", + GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "", GEN_AI_USAGE_INPUT_TOKENS: str(trace_info.message_tokens), GEN_AI_USAGE_OUTPUT_TOKENS: str(trace_info.answer_tokens), GEN_AI_USAGE_TOTAL_TOKENS: str(trace_info.total_tokens), @@ -186,9 +191,13 @@ class AliyunDataTrace(BaseTraceInstance): return message_id = trace_info.message_id + trace_id = convert_to_trace_id(message_id) + if trace_info.trace_id: + trace_id = convert_string_to_id(trace_info.trace_id) + documents_data = extract_retrieval_documents(trace_info.documents) dataset_retrieval_span = SpanData( - trace_id=convert_to_trace_id(message_id), + trace_id=trace_id, parent_span_id=convert_to_span_id(message_id, "message"), span_id=generate_span_id(), name="dataset_retrieval", @@ -214,8 +223,12 @@ class AliyunDataTrace(BaseTraceInstance): if trace_info.error: status = Status(StatusCode.ERROR, trace_info.error) + trace_id = convert_to_trace_id(message_id) + if trace_info.trace_id: + trace_id = convert_string_to_id(trace_info.trace_id) + tool_span = SpanData( - trace_id=convert_to_trace_id(message_id), + trace_id=trace_id, parent_span_id=convert_to_span_id(message_id, "message"), span_id=generate_span_id(), name=trace_info.tool_name, @@ -286,7 +299,7 @@ class AliyunDataTrace(BaseTraceInstance): node_span = self.build_workflow_task_span(trace_id, workflow_span_id, trace_info, node_execution) return node_span except Exception as e: - logging.debug(f"Error occurred in build_workflow_node_span: {e}", exc_info=True) + logging.debug("Error occurred in build_workflow_node_span: %s", e, exc_info=True) return None def get_workflow_node_status(self, node_execution: WorkflowNodeExecution) -> Status: @@ -386,14 +399,14 @@ class AliyunDataTrace(BaseTraceInstance): GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "", GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value, GEN_AI_FRAMEWORK: "dify", - GEN_AI_MODEL_NAME: process_data.get("model_name", ""), - GEN_AI_SYSTEM: process_data.get("model_provider", ""), + GEN_AI_MODEL_NAME: process_data.get("model_name") or "", + GEN_AI_SYSTEM: process_data.get("model_provider") or "", GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)), GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)), GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)), GEN_AI_PROMPT: json.dumps(process_data.get("prompts", []), ensure_ascii=False), GEN_AI_COMPLETION: str(outputs.get("text", "")), - GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason", ""), + GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason") or "", INPUT_VALUE: json.dumps(process_data.get("prompts", []), ensure_ascii=False), OUTPUT_VALUE: str(outputs.get("text", "")), }, @@ -421,7 +434,7 @@ class AliyunDataTrace(BaseTraceInstance): GEN_AI_USER_ID: str(user_id), GEN_AI_SPAN_KIND: GenAISpanKind.CHAIN.value, GEN_AI_FRAMEWORK: "dify", - INPUT_VALUE: trace_info.workflow_run_inputs.get("sys.query", ""), + INPUT_VALUE: trace_info.workflow_run_inputs.get("sys.query") or "", OUTPUT_VALUE: json.dumps(trace_info.workflow_run_outputs, ensure_ascii=False), }, status=status, @@ -451,8 +464,13 @@ class AliyunDataTrace(BaseTraceInstance): status: Status = Status(StatusCode.OK) if trace_info.error: status = Status(StatusCode.ERROR, trace_info.error) + + trace_id = convert_to_trace_id(message_id) + if trace_info.trace_id: + trace_id = convert_string_to_id(trace_info.trace_id) + suggested_question_span = SpanData( - trace_id=convert_to_trace_id(message_id), + trace_id=trace_id, parent_span_id=convert_to_span_id(message_id, "message"), span_id=convert_to_span_id(message_id, "suggested_question"), name="suggested_question", @@ -461,8 +479,8 @@ class AliyunDataTrace(BaseTraceInstance): attributes={ GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value, GEN_AI_FRAMEWORK: "dify", - GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name", ""), - GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider", ""), + GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "", + GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "", GEN_AI_PROMPT: json.dumps(trace_info.inputs, ensure_ascii=False), GEN_AI_COMPLETION: json.dumps(trace_info.suggested_question, ensure_ascii=False), INPUT_VALUE: json.dumps(trace_info.inputs, ensure_ascii=False), diff --git a/api/core/ops/aliyun_trace/data_exporter/traceclient.py b/api/core/ops/aliyun_trace/data_exporter/traceclient.py index ba5ac3f420..bd19c8a503 100644 --- a/api/core/ops/aliyun_trace/data_exporter/traceclient.py +++ b/api/core/ops/aliyun_trace/data_exporter/traceclient.py @@ -69,10 +69,10 @@ class TraceClient: if response.status_code == 405: return True else: - logger.debug(f"AliyunTrace API check failed: Unexpected status code: {response.status_code}") + logger.debug("AliyunTrace API check failed: Unexpected status code: %s", response.status_code) return False except requests.exceptions.RequestException as e: - logger.debug(f"AliyunTrace API check failed: {str(e)}") + logger.debug("AliyunTrace API check failed: %s", str(e)) raise ValueError(f"AliyunTrace API check failed: {str(e)}") def get_project_url(self): @@ -109,7 +109,7 @@ class TraceClient: try: self.exporter.export(spans_to_export) except Exception as e: - logger.debug(f"Error exporting spans: {e}") + logger.debug("Error exporting spans: %s", e) def shutdown(self): with self.condition: @@ -181,15 +181,21 @@ def convert_to_trace_id(uuid_v4: Optional[str]) -> int: raise ValueError(f"Invalid UUID input: {e}") +def convert_string_to_id(string: Optional[str]) -> int: + if not string: + return generate_span_id() + hash_bytes = hashlib.sha256(string.encode("utf-8")).digest() + id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False) + return id + + def convert_to_span_id(uuid_v4: Optional[str], span_type: str) -> int: try: uuid_obj = uuid.UUID(uuid_v4) except Exception as e: raise ValueError(f"Invalid UUID input: {e}") combined_key = f"{uuid_obj.hex}-{span_type}" - hash_bytes = hashlib.sha256(combined_key.encode("utf-8")).digest() - span_id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False) - return span_id + return convert_string_to_id(combined_key) def convert_datetime_to_nanoseconds(start_time_a: Optional[datetime]) -> Optional[int]: diff --git a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py index 1b72a4775a..a20f2485c8 100644 --- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py +++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py @@ -77,10 +77,10 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra # Create a named tracer instead of setting the global provider tracer_name = f"arize_phoenix_tracer_{arize_phoenix_config.project}" - logger.info(f"[Arize/Phoenix] Created tracer with name: {tracer_name}") + logger.info("[Arize/Phoenix] Created tracer with name: %s", tracer_name) return cast(trace_sdk.Tracer, provider.get_tracer(tracer_name)), processor except Exception as e: - logger.error(f"[Arize/Phoenix] Failed to setup the tracer: {str(e)}", exc_info=True) + logger.error("[Arize/Phoenix] Failed to setup the tracer: %s", str(e), exc_info=True) raise @@ -91,16 +91,21 @@ def datetime_to_nanos(dt: Optional[datetime]) -> int: return int(dt.timestamp() * 1_000_000_000) -def uuid_to_trace_id(string: Optional[str]) -> int: - """Convert UUID string to a valid trace ID (16-byte integer).""" +def string_to_trace_id128(string: Optional[str]) -> int: + """ + Convert any input string into a stable 128-bit integer trace ID. + + This uses SHA-256 hashing and takes the first 16 bytes (128 bits) of the digest. + It's suitable for generating consistent, unique identifiers from strings. + """ if string is None: string = "" hash_object = hashlib.sha256(string.encode()) - # Take the first 16 bytes (128 bits) of the hash + # Take the first 16 bytes (128 bits) of the hash digest digest = hash_object.digest()[:16] - # Convert to integer (128 bits) + # Convert to a 128-bit integer return int.from_bytes(digest, byteorder="big") @@ -120,7 +125,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001") def trace(self, trace_info: BaseTraceInfo): - logger.info(f"[Arize/Phoenix] Trace: {trace_info}") + logger.info("[Arize/Phoenix] Trace: %s", trace_info) try: if isinstance(trace_info, WorkflowTraceInfo): self.workflow_trace(trace_info) @@ -138,7 +143,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) except Exception as e: - logger.error(f"[Arize/Phoenix] Error in the trace: {str(e)}", exc_info=True) + logger.error("[Arize/Phoenix] Error in the trace: %s", str(e), exc_info=True) raise def workflow_trace(self, trace_info: WorkflowTraceInfo): @@ -153,8 +158,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } workflow_metadata.update(trace_info.metadata) - external_trace_id = trace_info.metadata.get("external_trace_id") - trace_id = external_trace_id or uuid_to_trace_id(trace_info.workflow_run_id) + trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.workflow_run_id) span_id = RandomIdGenerator().generate_span_id() context = SpanContext( trace_id=trace_id, @@ -310,7 +314,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): SpanAttributes.SESSION_ID: trace_info.message_data.conversation_id, } - trace_id = uuid_to_trace_id(trace_info.message_id) + trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.message_id) message_span_id = RandomIdGenerator().generate_span_id() span_context = SpanContext( trace_id=trace_id, @@ -406,7 +410,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = uuid_to_trace_id(trace_info.message_id) + trace_id = string_to_trace_id128(trace_info.message_id) span_id = RandomIdGenerator().generate_span_id() context = SpanContext( trace_id=trace_id, @@ -468,7 +472,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = uuid_to_trace_id(trace_info.message_id) + trace_id = string_to_trace_id128(trace_info.message_id) span_id = RandomIdGenerator().generate_span_id() context = SpanContext( trace_id=trace_id, @@ -521,7 +525,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = uuid_to_trace_id(trace_info.message_id) + trace_id = string_to_trace_id128(trace_info.message_id) span_id = RandomIdGenerator().generate_span_id() context = SpanContext( trace_id=trace_id, @@ -568,9 +572,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): "tool_config": json.dumps(trace_info.tool_config, ensure_ascii=False), } - trace_id = uuid_to_trace_id(trace_info.message_id) + trace_id = string_to_trace_id128(trace_info.message_id) tool_span_id = RandomIdGenerator().generate_span_id() - logger.info(f"[Arize/Phoenix] Creating tool trace with trace_id: {trace_id}, span_id: {tool_span_id}") + logger.info("[Arize/Phoenix] Creating tool trace with trace_id: %s, span_id: %s", trace_id, tool_span_id) # Create span context with the same trace_id as the parent # todo: Create with the appropriate parent span context, so that the tool span is @@ -629,7 +633,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = uuid_to_trace_id(trace_info.message_id) + trace_id = string_to_trace_id128(trace_info.message_id) span_id = RandomIdGenerator().generate_span_id() context = SpanContext( trace_id=trace_id, @@ -673,7 +677,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): span.set_attribute("test", "true") return True except Exception as e: - logger.info(f"[Arize/Phoenix] API check failed: {str(e)}", exc_info=True) + logger.info("[Arize/Phoenix] API check failed: %s", str(e), exc_info=True) raise ValueError(f"[Arize/Phoenix] API check failed: {str(e)}") def get_project_url(self): @@ -683,7 +687,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): else: return f"{self.arize_phoenix_config.endpoint}/projects/" except Exception as e: - logger.info(f"[Arize/Phoenix] Get run url failed: {str(e)}", exc_info=True) + logger.info("[Arize/Phoenix] Get run url failed: %s", str(e), exc_info=True) raise ValueError(f"[Arize/Phoenix] Get run url failed: {str(e)}") def _get_workflow_nodes(self, workflow_run_id: str): diff --git a/api/core/ops/entities/config_entity.py b/api/core/ops/entities/config_entity.py index 89ff0cfded..626782cee5 100644 --- a/api/core/ops/entities/config_entity.py +++ b/api/core/ops/entities/config_entity.py @@ -102,7 +102,7 @@ class LangfuseConfig(BaseTracingConfig): @field_validator("host") @classmethod def host_validator(cls, v, info: ValidationInfo): - return cls.validate_endpoint_url(v, "https://api.langfuse.com") + return validate_url_with_path(v, "https://api.langfuse.com") class LangSmithConfig(BaseTracingConfig): diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py index 151fa2aaf4..3bad5c92fb 100644 --- a/api/core/ops/entities/trace_entity.py +++ b/api/core/ops/entities/trace_entity.py @@ -14,6 +14,7 @@ class BaseTraceInfo(BaseModel): start_time: Optional[datetime] = None end_time: Optional[datetime] = None metadata: dict[str, Any] + trace_id: Optional[str] = None @field_validator("inputs", "outputs") @classmethod diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index f4a59ef3a7..3a03d9f4fe 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -67,14 +67,13 @@ class LangFuseDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) def workflow_trace(self, trace_info: WorkflowTraceInfo): - external_trace_id = trace_info.metadata.get("external_trace_id") - trace_id = external_trace_id or trace_info.workflow_run_id + trace_id = trace_info.trace_id or trace_info.workflow_run_id user_id = trace_info.metadata.get("user_id") metadata = trace_info.metadata metadata["workflow_app_log_id"] = trace_info.workflow_app_log_id if trace_info.message_id: - trace_id = external_trace_id or trace_info.message_id + trace_id = trace_info.trace_id or trace_info.message_id name = TraceTaskName.MESSAGE_TRACE.value trace_data = LangfuseTrace( id=trace_id, @@ -250,8 +249,10 @@ class LangFuseDataTrace(BaseTraceInstance): user_id = end_user_data.session_id metadata["user_id"] = user_id + trace_id = trace_info.trace_id or message_id + trace_data = LangfuseTrace( - id=message_id, + id=trace_id, user_id=user_id, name=TraceTaskName.MESSAGE_TRACE.value, input={ @@ -285,7 +286,7 @@ class LangFuseDataTrace(BaseTraceInstance): langfuse_generation_data = LangfuseGeneration( name="llm", - trace_id=message_id, + trace_id=trace_id, start_time=trace_info.start_time, end_time=trace_info.end_time, model=message_data.model_id, @@ -311,7 +312,7 @@ class LangFuseDataTrace(BaseTraceInstance): "preset_response": trace_info.preset_response, "inputs": trace_info.inputs, }, - trace_id=trace_info.message_id, + trace_id=trace_info.trace_id or trace_info.message_id, start_time=trace_info.start_time or trace_info.message_data.created_at, end_time=trace_info.end_time or trace_info.message_data.created_at, metadata=trace_info.metadata, @@ -334,7 +335,7 @@ class LangFuseDataTrace(BaseTraceInstance): name=TraceTaskName.SUGGESTED_QUESTION_TRACE.value, input=trace_info.inputs, output=str(trace_info.suggested_question), - trace_id=trace_info.message_id, + trace_id=trace_info.trace_id or trace_info.message_id, start_time=trace_info.start_time, end_time=trace_info.end_time, metadata=trace_info.metadata, @@ -352,7 +353,7 @@ class LangFuseDataTrace(BaseTraceInstance): name=TraceTaskName.DATASET_RETRIEVAL_TRACE.value, input=trace_info.inputs, output={"documents": trace_info.documents}, - trace_id=trace_info.message_id, + trace_id=trace_info.trace_id or trace_info.message_id, start_time=trace_info.start_time or trace_info.message_data.created_at, end_time=trace_info.end_time or trace_info.message_data.updated_at, metadata=trace_info.metadata, @@ -365,7 +366,7 @@ class LangFuseDataTrace(BaseTraceInstance): name=trace_info.tool_name, input=trace_info.tool_inputs, output=trace_info.tool_outputs, - trace_id=trace_info.message_id, + trace_id=trace_info.trace_id or trace_info.message_id, start_time=trace_info.start_time, end_time=trace_info.end_time, metadata=trace_info.metadata, @@ -440,7 +441,7 @@ class LangFuseDataTrace(BaseTraceInstance): try: return self.langfuse_client.auth_check() except Exception as e: - logger.debug(f"LangFuse API check failed: {str(e)}") + logger.debug("LangFuse API check failed: %s", str(e)) raise ValueError(f"LangFuse API check failed: {str(e)}") def get_project_key(self): @@ -448,5 +449,5 @@ class LangFuseDataTrace(BaseTraceInstance): projects = self.langfuse_client.client.projects.get() return projects.data[0].id except Exception as e: - logger.debug(f"LangFuse get project key failed: {str(e)}") + logger.debug("LangFuse get project key failed: %s", str(e)) raise ValueError(f"LangFuse get project key failed: {str(e)}") diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index c97846dc9b..f9e5128e89 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -65,8 +65,7 @@ class LangSmithDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) def workflow_trace(self, trace_info: WorkflowTraceInfo): - external_trace_id = trace_info.metadata.get("external_trace_id") - trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id + trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id if trace_info.start_time is None: trace_info.start_time = datetime.now() message_dotted_order = ( @@ -290,7 +289,7 @@ class LangSmithDataTrace(BaseTraceInstance): reference_example_id=None, input_attachments={}, output_attachments={}, - trace_id=None, + trace_id=trace_info.trace_id, dotted_order=None, parent_run_id=None, ) @@ -319,7 +318,7 @@ class LangSmithDataTrace(BaseTraceInstance): reference_example_id=None, input_attachments={}, output_attachments={}, - trace_id=None, + trace_id=trace_info.trace_id, dotted_order=None, id=str(uuid.uuid4()), ) @@ -351,7 +350,7 @@ class LangSmithDataTrace(BaseTraceInstance): reference_example_id=None, input_attachments={}, output_attachments={}, - trace_id=None, + trace_id=trace_info.trace_id, dotted_order=None, error="", file_list=[], @@ -381,7 +380,7 @@ class LangSmithDataTrace(BaseTraceInstance): reference_example_id=None, input_attachments={}, output_attachments={}, - trace_id=None, + trace_id=trace_info.trace_id, dotted_order=None, error="", file_list=[], @@ -410,7 +409,7 @@ class LangSmithDataTrace(BaseTraceInstance): reference_example_id=None, input_attachments={}, output_attachments={}, - trace_id=None, + trace_id=trace_info.trace_id, dotted_order=None, error="", file_list=[], @@ -440,7 +439,7 @@ class LangSmithDataTrace(BaseTraceInstance): reference_example_id=None, input_attachments={}, output_attachments={}, - trace_id=None, + trace_id=trace_info.trace_id, dotted_order=None, error=trace_info.error or "", ) @@ -465,7 +464,7 @@ class LangSmithDataTrace(BaseTraceInstance): reference_example_id=None, input_attachments={}, output_attachments={}, - trace_id=None, + trace_id=trace_info.trace_id, dotted_order=None, error="", file_list=[], @@ -504,7 +503,7 @@ class LangSmithDataTrace(BaseTraceInstance): self.langsmith_client.delete_project(project_name=random_project_name) return True except Exception as e: - logger.debug(f"LangSmith API check failed: {str(e)}") + logger.debug("LangSmith API check failed: %s", str(e)) raise ValueError(f"LangSmith API check failed: {str(e)}") def get_project_url(self): @@ -523,5 +522,5 @@ class LangSmithDataTrace(BaseTraceInstance): ) return project_url.split("/r/")[0] except Exception as e: - logger.debug(f"LangSmith get run url failed: {str(e)}") + logger.debug("LangSmith get run url failed: %s", str(e)) raise ValueError(f"LangSmith get run url failed: {str(e)}") diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index 6079b2faef..dd6a424ddb 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -96,8 +96,7 @@ class OpikDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) def workflow_trace(self, trace_info: WorkflowTraceInfo): - external_trace_id = trace_info.metadata.get("external_trace_id") - dify_trace_id = external_trace_id or trace_info.workflow_run_id + dify_trace_id = trace_info.trace_id or trace_info.workflow_run_id opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id) workflow_metadata = wrap_metadata( trace_info.metadata, message_id=trace_info.message_id, workflow_app_log_id=trace_info.workflow_app_log_id @@ -105,7 +104,7 @@ class OpikDataTrace(BaseTraceInstance): root_span_id = None if trace_info.message_id: - dify_trace_id = external_trace_id or trace_info.message_id + dify_trace_id = trace_info.trace_id or trace_info.message_id opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id) trace_data = { @@ -276,7 +275,7 @@ class OpikDataTrace(BaseTraceInstance): return metadata = trace_info.metadata - message_id = trace_info.message_id + dify_trace_id = trace_info.trace_id or trace_info.message_id user_id = message_data.from_account_id metadata["user_id"] = user_id @@ -291,7 +290,7 @@ class OpikDataTrace(BaseTraceInstance): metadata["end_user_id"] = end_user_id trace_data = { - "id": prepare_opik_uuid(trace_info.start_time, message_id), + "id": prepare_opik_uuid(trace_info.start_time, dify_trace_id), "name": TraceTaskName.MESSAGE_TRACE.value, "start_time": trace_info.start_time, "end_time": trace_info.end_time, @@ -330,7 +329,7 @@ class OpikDataTrace(BaseTraceInstance): start_time = trace_info.start_time or trace_info.message_data.created_at span_data = { - "trace_id": prepare_opik_uuid(start_time, trace_info.message_id), + "trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id), "name": TraceTaskName.MODERATION_TRACE.value, "type": "tool", "start_time": start_time, @@ -356,7 +355,7 @@ class OpikDataTrace(BaseTraceInstance): start_time = trace_info.start_time or message_data.created_at span_data = { - "trace_id": prepare_opik_uuid(start_time, trace_info.message_id), + "trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id), "name": TraceTaskName.SUGGESTED_QUESTION_TRACE.value, "type": "tool", "start_time": start_time, @@ -376,7 +375,7 @@ class OpikDataTrace(BaseTraceInstance): start_time = trace_info.start_time or trace_info.message_data.created_at span_data = { - "trace_id": prepare_opik_uuid(start_time, trace_info.message_id), + "trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id), "name": TraceTaskName.DATASET_RETRIEVAL_TRACE.value, "type": "tool", "start_time": start_time, @@ -391,7 +390,7 @@ class OpikDataTrace(BaseTraceInstance): def tool_trace(self, trace_info: ToolTraceInfo): span_data = { - "trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id), + "trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id), "name": trace_info.tool_name, "type": "tool", "start_time": trace_info.start_time, @@ -406,7 +405,7 @@ class OpikDataTrace(BaseTraceInstance): def generate_name_trace(self, trace_info: GenerateNameTraceInfo): trace_data = { - "id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id), + "id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id), "name": TraceTaskName.GENERATE_NAME_TRACE.value, "start_time": trace_info.start_time, "end_time": trace_info.end_time, @@ -453,12 +452,12 @@ class OpikDataTrace(BaseTraceInstance): self.opik_client.auth_check() return True except Exception as e: - logger.info(f"Opik API check failed: {str(e)}", exc_info=True) + logger.info("Opik API check failed: %s", str(e), exc_info=True) raise ValueError(f"Opik API check failed: {str(e)}") def get_project_url(self): try: return self.opik_client.get_project_url(project_name=self.project) except Exception as e: - logger.info(f"Opik get run url failed: {str(e)}", exc_info=True) + logger.info("Opik get run url failed: %s", str(e), exc_info=True) raise ValueError(f"Opik get run url failed: {str(e)}") diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 2b546b47cc..a607c76beb 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -287,7 +287,7 @@ class OpsTraceManager: # create new tracing_instance and update the cache if it absent tracing_instance = trace_instance(config_class(**decrypt_trace_config)) cls.ops_trace_instances_cache[decrypt_trace_config_key] = tracing_instance - logging.info(f"new tracing_instance for app_id: {app_id}") + logging.info("new tracing_instance for app_id: %s", app_id) return tracing_instance @classmethod @@ -407,6 +407,7 @@ class TraceTask: def __init__( self, trace_type: Any, + trace_id: Optional[str] = None, message_id: Optional[str] = None, workflow_execution: Optional[WorkflowExecution] = None, conversation_id: Optional[str] = None, @@ -424,6 +425,9 @@ class TraceTask: self.app_id = None self.kwargs = kwargs + external_trace_id = kwargs.get("external_trace_id") + if external_trace_id: + self.trace_id = external_trace_id def execute(self): return self.preprocess() @@ -520,11 +524,8 @@ class TraceTask: "app_id": workflow_run.app_id, } - external_trace_id = self.kwargs.get("external_trace_id") - if external_trace_id: - metadata["external_trace_id"] = external_trace_id - workflow_trace_info = WorkflowTraceInfo( + trace_id=self.trace_id, workflow_data=workflow_run.to_dict(), conversation_id=conversation_id, workflow_id=workflow_id, @@ -584,6 +585,7 @@ class TraceTask: message_tokens = message_data.message_tokens message_trace_info = MessageTraceInfo( + trace_id=self.trace_id, message_id=message_id, message_data=message_data.to_dict(), conversation_model=conversation_mode, @@ -627,6 +629,7 @@ class TraceTask: workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None moderation_trace_info = ModerationTraceInfo( + trace_id=self.trace_id, message_id=workflow_app_log_id or message_id, inputs=inputs, message_data=message_data.to_dict(), @@ -667,6 +670,7 @@ class TraceTask: workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None suggested_question_trace_info = SuggestedQuestionTraceInfo( + trace_id=self.trace_id, message_id=workflow_app_log_id or message_id, message_data=message_data.to_dict(), inputs=message_data.message, @@ -708,6 +712,7 @@ class TraceTask: } dataset_retrieval_trace_info = DatasetRetrievalTraceInfo( + trace_id=self.trace_id, message_id=message_id, inputs=message_data.query or message_data.inputs, documents=[doc.model_dump() for doc in documents] if documents else [], @@ -772,6 +777,7 @@ class TraceTask: ) tool_trace_info = ToolTraceInfo( + trace_id=self.trace_id, message_id=message_id, message_data=message_data.to_dict(), tool_name=tool_name, @@ -807,6 +813,7 @@ class TraceTask: } generate_name_trace_info = GenerateNameTraceInfo( + trace_id=self.trace_id, conversation_id=conversation_id, inputs=inputs, outputs=generate_conversation_name, @@ -843,7 +850,7 @@ class TraceQueueManager: trace_task.app_id = self.app_id trace_manager_queue.put(trace_task) except Exception as e: - logging.exception(f"Error adding trace task, trace_type {trace_task.trace_type}") + logging.exception("Error adding trace task, trace_type %s", trace_task.trace_type) finally: self.start_timer() diff --git a/api/core/ops/utils.py b/api/core/ops/utils.py index 573e8cac88..2c0afb1600 100644 --- a/api/core/ops/utils.py +++ b/api/core/ops/utils.py @@ -67,7 +67,13 @@ def generate_dotted_order( def validate_url(url: str, default_url: str, allowed_schemes: tuple = ("https", "http")) -> str: """ - Validate and normalize URL with proper error handling + Validate and normalize URL with proper error handling. + + NOTE: This function does not retain the `path` component of the provided URL. + In most cases, it is recommended to use `validate_url_with_path` instead. + + This function is deprecated and retained only for compatibility purposes. + New implementations should use `validate_url_with_path`. Args: url: The URL to validate diff --git a/api/core/ops/weave_trace/weave_trace.py b/api/core/ops/weave_trace/weave_trace.py index a34b3b780c..8089860481 100644 --- a/api/core/ops/weave_trace/weave_trace.py +++ b/api/core/ops/weave_trace/weave_trace.py @@ -66,11 +66,11 @@ class WeaveDataTrace(BaseTraceInstance): project_url = f"https://wandb.ai/{self.weave_client._project_id()}" return project_url except Exception as e: - logger.debug(f"Weave get run url failed: {str(e)}") + logger.debug("Weave get run url failed: %s", str(e)) raise ValueError(f"Weave get run url failed: {str(e)}") def trace(self, trace_info: BaseTraceInfo): - logger.debug(f"Trace info: {trace_info}") + logger.debug("Trace info: %s", trace_info) if isinstance(trace_info, WorkflowTraceInfo): self.workflow_trace(trace_info) if isinstance(trace_info, MessageTraceInfo): @@ -87,8 +87,7 @@ class WeaveDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) def workflow_trace(self, trace_info: WorkflowTraceInfo): - external_trace_id = trace_info.metadata.get("external_trace_id") - trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id + trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id if trace_info.start_time is None: trace_info.start_time = datetime.now() @@ -245,8 +244,12 @@ class WeaveDataTrace(BaseTraceInstance): attributes["start_time"] = trace_info.start_time attributes["end_time"] = trace_info.end_time attributes["tags"] = ["message", str(trace_info.conversation_mode)] + + trace_id = trace_info.trace_id or message_id + attributes["trace_id"] = trace_id + message_run = WeaveTraceModel( - id=message_id, + id=trace_id, op=str(TraceTaskName.MESSAGE_TRACE.value), input_tokens=trace_info.message_tokens, output_tokens=trace_info.answer_tokens, @@ -274,7 +277,7 @@ class WeaveDataTrace(BaseTraceInstance): ) self.start_call( llm_run, - parent_run_id=message_id, + parent_run_id=trace_id, ) self.finish_call(llm_run) self.finish_call(message_run) @@ -289,6 +292,9 @@ class WeaveDataTrace(BaseTraceInstance): attributes["start_time"] = trace_info.start_time or trace_info.message_data.created_at attributes["end_time"] = trace_info.end_time or trace_info.message_data.updated_at + trace_id = trace_info.trace_id or trace_info.message_id + attributes["trace_id"] = trace_id + moderation_run = WeaveTraceModel( id=str(uuid.uuid4()), op=str(TraceTaskName.MODERATION_TRACE.value), @@ -303,7 +309,7 @@ class WeaveDataTrace(BaseTraceInstance): exception=getattr(trace_info, "error", None), file_list=[], ) - self.start_call(moderation_run, parent_run_id=trace_info.message_id) + self.start_call(moderation_run, parent_run_id=trace_id) self.finish_call(moderation_run) def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo): @@ -316,6 +322,9 @@ class WeaveDataTrace(BaseTraceInstance): attributes["start_time"] = (trace_info.start_time or message_data.created_at,) attributes["end_time"] = (trace_info.end_time or message_data.updated_at,) + trace_id = trace_info.trace_id or trace_info.message_id + attributes["trace_id"] = trace_id + suggested_question_run = WeaveTraceModel( id=str(uuid.uuid4()), op=str(TraceTaskName.SUGGESTED_QUESTION_TRACE.value), @@ -326,7 +335,7 @@ class WeaveDataTrace(BaseTraceInstance): file_list=[], ) - self.start_call(suggested_question_run, parent_run_id=trace_info.message_id) + self.start_call(suggested_question_run, parent_run_id=trace_id) self.finish_call(suggested_question_run) def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo): @@ -338,6 +347,9 @@ class WeaveDataTrace(BaseTraceInstance): attributes["start_time"] = (trace_info.start_time or trace_info.message_data.created_at,) attributes["end_time"] = (trace_info.end_time or trace_info.message_data.updated_at,) + trace_id = trace_info.trace_id or trace_info.message_id + attributes["trace_id"] = trace_id + dataset_retrieval_run = WeaveTraceModel( id=str(uuid.uuid4()), op=str(TraceTaskName.DATASET_RETRIEVAL_TRACE.value), @@ -348,7 +360,7 @@ class WeaveDataTrace(BaseTraceInstance): file_list=[], ) - self.start_call(dataset_retrieval_run, parent_run_id=trace_info.message_id) + self.start_call(dataset_retrieval_run, parent_run_id=trace_id) self.finish_call(dataset_retrieval_run) def tool_trace(self, trace_info: ToolTraceInfo): @@ -357,6 +369,11 @@ class WeaveDataTrace(BaseTraceInstance): attributes["start_time"] = trace_info.start_time attributes["end_time"] = trace_info.end_time + message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None) + message_id = message_id or None + trace_id = trace_info.trace_id or message_id + attributes["trace_id"] = trace_id + tool_run = WeaveTraceModel( id=str(uuid.uuid4()), op=trace_info.tool_name, @@ -366,9 +383,7 @@ class WeaveDataTrace(BaseTraceInstance): attributes=attributes, exception=trace_info.error, ) - message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None) - message_id = message_id or None - self.start_call(tool_run, parent_run_id=message_id) + self.start_call(tool_run, parent_run_id=trace_id) self.finish_call(tool_run) def generate_name_trace(self, trace_info: GenerateNameTraceInfo): @@ -403,7 +418,7 @@ class WeaveDataTrace(BaseTraceInstance): print("Weave login successful") return True except Exception as e: - logger.debug(f"Weave API check failed: {str(e)}") + logger.debug("Weave API check failed: %s", str(e)) raise ValueError(f"Weave API check failed: {str(e)}") def start_call(self, run_data: WeaveTraceModel, parent_run_id: Optional[str] = None): diff --git a/api/core/plugin/impl/exc.py b/api/core/plugin/impl/exc.py index 54a0b90a8d..8b660c807d 100644 --- a/api/core/plugin/impl/exc.py +++ b/api/core/plugin/impl/exc.py @@ -1,3 +1,8 @@ +from collections.abc import Mapping + +from pydantic import TypeAdapter + + class PluginDaemonError(Exception): """Base class for all plugin daemon errors.""" @@ -36,6 +41,21 @@ class PluginDaemonBadRequestError(PluginDaemonClientSideError): class PluginInvokeError(PluginDaemonClientSideError): description: str = "Invoke Error" + def _get_error_object(self) -> Mapping: + try: + return TypeAdapter(Mapping).validate_json(self.description) + except Exception: + return {} + + def get_error_type(self) -> str: + return self._get_error_object().get("error_type", "unknown") + + def get_error_message(self) -> str: + try: + return self._get_error_object().get("message", "unknown") + except Exception: + return self.description + class PluginUniqueIdentifierError(PluginDaemonClientSideError): description: str = "Unique Identifier Error" diff --git a/api/core/rag/datasource/keyword/jieba/jieba.py b/api/core/rag/datasource/keyword/jieba/jieba.py index ec3a23bd96..7c5f47006f 100644 --- a/api/core/rag/datasource/keyword/jieba/jieba.py +++ b/api/core/rag/datasource/keyword/jieba/jieba.py @@ -24,7 +24,7 @@ class Jieba(BaseKeyword): self._config = KeywordTableConfig() def create(self, texts: list[Document], **kwargs) -> BaseKeyword: - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): keyword_table_handler = JiebaKeywordTableHandler() keyword_table = self._get_dataset_keyword_table() @@ -43,7 +43,7 @@ class Jieba(BaseKeyword): return self def add_texts(self, texts: list[Document], **kwargs): - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): keyword_table_handler = JiebaKeywordTableHandler() @@ -76,7 +76,7 @@ class Jieba(BaseKeyword): return id in set.union(*keyword_table.values()) def delete_by_ids(self, ids: list[str]) -> None: - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): keyword_table = self._get_dataset_keyword_table() if keyword_table is not None: @@ -116,7 +116,7 @@ class Jieba(BaseKeyword): return documents def delete(self) -> None: - lock_name = "keyword_indexing_lock_{}".format(self.dataset.id) + lock_name = f"keyword_indexing_lock_{self.dataset.id}" with redis_client.lock(lock_name, timeout=600): dataset_keyword_table = self.dataset.dataset_keyword_table if dataset_keyword_table: diff --git a/api/core/rag/datasource/vdb/baidu/baidu_vector.py b/api/core/rag/datasource/vdb/baidu/baidu_vector.py index db7ffc9c4f..d63ca9f695 100644 --- a/api/core/rag/datasource/vdb/baidu/baidu_vector.py +++ b/api/core/rag/datasource/vdb/baidu/baidu_vector.py @@ -203,9 +203,9 @@ class BaiduVector(BaseVector): def _create_table(self, dimension: int) -> None: # Try to grab distributed lock and create table - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=60): - table_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + table_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(table_exist_cache_key): return diff --git a/api/core/rag/datasource/vdb/chroma/chroma_vector.py b/api/core/rag/datasource/vdb/chroma/chroma_vector.py index b8b265d5e6..699a602365 100644 --- a/api/core/rag/datasource/vdb/chroma/chroma_vector.py +++ b/api/core/rag/datasource/vdb/chroma/chroma_vector.py @@ -57,9 +57,9 @@ class ChromaVector(BaseVector): self.add_texts(texts, embeddings, **kwargs) def create_collection(self, collection_name: str): - lock_name = "vector_indexing_lock_{}".format(collection_name) + lock_name = f"vector_indexing_lock_{collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return self._client.get_or_create_collection(collection_name) diff --git a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py b/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py index 68a9952789..bd986393d1 100644 --- a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py +++ b/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py @@ -74,9 +74,9 @@ class CouchbaseVector(BaseVector): self.add_texts(texts, embeddings) def _create_collection(self, vector_length: int, uuid: str): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return if self._collection_exists(self._collection_name): @@ -242,7 +242,7 @@ class CouchbaseVector(BaseVector): try: self._cluster.query(query, named_parameters={"doc_ids": ids}).execute() except Exception as e: - logger.exception(f"Failed to delete documents, ids: {ids}") + logger.exception("Failed to delete documents, ids: %s", ids) def delete_by_document_id(self, document_id: str): query = f""" diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py index 27575197fa..7118029d40 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py +++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py @@ -29,7 +29,7 @@ class ElasticSearchJaVector(ElasticSearchVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if not self._client.indices.exists(index=self._collection_name): diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py index ad39717183..832485b236 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py +++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py @@ -186,7 +186,7 @@ class ElasticSearchVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if not self._client.indices.exists(index=self._collection_name): diff --git a/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py b/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py index 89423eb160..0a4067e39c 100644 --- a/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py +++ b/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py @@ -164,7 +164,7 @@ class HuaweiCloudVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if not self._client.indices.exists(index=self._collection_name): diff --git a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py index e9ff1ce43d..3c65a41f08 100644 --- a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py +++ b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py @@ -89,7 +89,7 @@ class LindormVectorStore(BaseVector): timeout: int = 60, **kwargs, ): - logger.info(f"Total documents to add: {len(documents)}") + logger.info("Total documents to add: %s", len(documents)) uuids = self._get_uuids(documents) total_docs = len(documents) @@ -147,7 +147,7 @@ class LindormVectorStore(BaseVector): time.sleep(0.5) except Exception: - logger.exception(f"Failed to process batch {batch_num + 1}") + logger.exception("Failed to process batch %s", batch_num + 1) raise def get_ids_by_metadata_field(self, key: str, value: str): @@ -180,7 +180,7 @@ class LindormVectorStore(BaseVector): # 1. First check if collection exists if not self._client.indices.exists(index=self._collection_name): - logger.warning(f"Collection {self._collection_name} does not exist") + logger.warning("Collection %s does not exist", self._collection_name) return # 2. Batch process deletions @@ -196,7 +196,7 @@ class LindormVectorStore(BaseVector): } ) else: - logger.warning(f"DELETE BY ID: ID {id} does not exist in the index.") + logger.warning("DELETE BY ID: ID %s does not exist in the index.", id) # 3. Perform bulk deletion if there are valid documents to delete if actions: @@ -209,9 +209,9 @@ class LindormVectorStore(BaseVector): doc_id = delete_error.get("_id") if status == 404: - logger.warning(f"Document not found for deletion: {doc_id}") + logger.warning("Document not found for deletion: %s", doc_id) else: - logger.exception(f"Error deleting document: {error}") + logger.exception("Error deleting document: %s", error) def delete(self) -> None: if self._using_ugc: @@ -225,7 +225,7 @@ class LindormVectorStore(BaseVector): self._client.indices.delete(index=self._collection_name, params={"timeout": 60}) logger.info("Delete index success") else: - logger.warning(f"Index '{self._collection_name}' does not exist. No deletion performed.") + logger.warning("Index '%s' does not exist. No deletion performed.", self._collection_name) def text_exists(self, id: str) -> bool: try: @@ -257,7 +257,7 @@ class LindormVectorStore(BaseVector): params["routing"] = self._routing # type: ignore response = self._client.search(index=self._collection_name, body=query, params=params) except Exception: - logger.exception(f"Error executing vector search, query: {query}") + logger.exception("Error executing vector search, query: %s", query) raise docs_and_scores = [] @@ -324,10 +324,10 @@ class LindormVectorStore(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name} already exists.") + logger.info("Collection %s already exists.", self._collection_name) return if self._client.indices.exists(index=self._collection_name): - logger.info(f"{self._collection_name.lower()} already exists.") + logger.info("%s already exists.", self._collection_name.lower()) redis_client.set(collection_exist_cache_key, 1, ex=3600) return if len(self.kwargs) == 0 and len(kwargs) != 0: diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index 63de6a0603..d64f366e0e 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -103,7 +103,7 @@ class MilvusVector(BaseVector): # For standard Milvus installations, check version number return version.parse(milvus_version).base_version >= version.parse("2.5.0").base_version except Exception as e: - logger.warning(f"Failed to check Milvus version: {str(e)}. Disabling hybrid search.") + logger.warning("Failed to check Milvus version: %s. Disabling hybrid search.", str(e)) return False def get_type(self) -> str: @@ -289,9 +289,9 @@ class MilvusVector(BaseVector): """ Create a new collection in Milvus with the specified schema and index parameters. """ - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return # Grab the existing collection if it exists diff --git a/api/core/rag/datasource/vdb/myscale/myscale_vector.py b/api/core/rag/datasource/vdb/myscale/myscale_vector.py index dbb1a7fe19..d5ec4b4436 100644 --- a/api/core/rag/datasource/vdb/myscale/myscale_vector.py +++ b/api/core/rag/datasource/vdb/myscale/myscale_vector.py @@ -53,7 +53,7 @@ class MyScaleVector(BaseVector): return self.add_texts(documents=texts, embeddings=embeddings, **kwargs) def _create_collection(self, dimension: int): - logging.info(f"create MyScale collection {self._collection_name} with dimension {dimension}") + logging.info("create MyScale collection %s with dimension %s", self._collection_name, dimension) self._client.command(f"CREATE DATABASE IF NOT EXISTS {self._config.database}") fts_params = f"('{self._config.fts_params}')" if self._config.fts_params else "" sql = f""" @@ -151,7 +151,7 @@ class MyScaleVector(BaseVector): for r in self._client.query(sql).named_results() ] except Exception as e: - logging.exception(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m") # noqa:TRY401 + logging.exception("\033[91m\033[1m%s\033[0m \033[95m%s\033[0m", type(e), str(e)) # noqa:TRY401 return [] def delete(self) -> None: diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index dd196e1f09..d6dfe967d7 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -147,7 +147,7 @@ class OceanBaseVector(BaseVector): logger.debug("Current OceanBase version is %s", ob_version) return version.parse(ob_version).base_version >= version.parse("4.3.5.1").base_version except Exception as e: - logger.warning(f"Failed to check OceanBase version: {str(e)}. Disabling hybrid search.") + logger.warning("Failed to check OceanBase version: %s. Disabling hybrid search.", str(e)) return False def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): @@ -229,7 +229,7 @@ class OceanBaseVector(BaseVector): return docs except Exception as e: - logger.warning(f"Failed to fulltext search: {str(e)}.") + logger.warning("Failed to fulltext search: %s.", str(e)) return [] def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: diff --git a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py index 0abb3c0077..ed2dcb40ad 100644 --- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py +++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py @@ -131,7 +131,7 @@ class OpenSearchVector(BaseVector): def delete_by_ids(self, ids: list[str]) -> None: index_name = self._collection_name.lower() if not self._client.indices.exists(index=index_name): - logger.warning(f"Index {index_name} does not exist") + logger.warning("Index %s does not exist", index_name) return # Obtaining All Actual Documents_ID @@ -142,7 +142,7 @@ class OpenSearchVector(BaseVector): if es_ids: actual_ids.extend(es_ids) else: - logger.warning(f"Document with metadata doc_id {doc_id} not found for deletion") + logger.warning("Document with metadata doc_id %s not found for deletion", doc_id) if actual_ids: actions = [{"_op_type": "delete", "_index": index_name, "_id": es_id} for es_id in actual_ids] @@ -155,9 +155,9 @@ class OpenSearchVector(BaseVector): doc_id = delete_error.get("_id") if status == 404: - logger.warning(f"Document not found for deletion: {doc_id}") + logger.warning("Document not found for deletion: %s", doc_id) else: - logger.exception(f"Error deleting document: {error}") + logger.exception("Error deleting document: %s", error) def delete(self) -> None: self._client.indices.delete(index=self._collection_name.lower()) @@ -198,7 +198,7 @@ class OpenSearchVector(BaseVector): try: response = self._client.search(index=self._collection_name.lower(), body=query) except Exception as e: - logger.exception(f"Error executing vector search, query: {query}") + logger.exception("Error executing vector search, query: %s", query) raise docs = [] @@ -242,7 +242,7 @@ class OpenSearchVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name.lower()}" if redis_client.get(collection_exist_cache_key): - logger.info(f"Collection {self._collection_name.lower()} already exists.") + logger.info("Collection %s already exists.", self._collection_name.lower()) return if not self._client.indices.exists(index=self._collection_name.lower()): @@ -272,7 +272,7 @@ class OpenSearchVector(BaseVector): }, } - logger.info(f"Creating OpenSearch index {self._collection_name.lower()}") + logger.info("Creating OpenSearch index %s", self._collection_name.lower()) self._client.indices.create(index=self._collection_name.lower(), body=index_body) redis_client.set(collection_exist_cache_key, 1, ex=3600) diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py index b0f0eeca38..e77befcdae 100644 --- a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py +++ b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py @@ -82,9 +82,9 @@ class PGVectoRS(BaseVector): self.add_texts(texts, embeddings) def create_collection(self, dimension: int): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return index_name = f"{self._collection_name}_embedding_index" diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 04e9cf801e..746773da63 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -155,7 +155,7 @@ class PGVector(BaseVector): cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s", (tuple(ids),)) except psycopg2.errors.UndefinedTable: # table not exists - logging.warning(f"Table {self.table_name} not found, skipping delete operation.") + logging.warning("Table %s not found, skipping delete operation.", self.table_name) return except Exception as e: raise e diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py index dfb95a1839..9741dd8b1d 100644 --- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py +++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py @@ -95,9 +95,9 @@ class QdrantVector(BaseVector): self.add_texts(texts, embeddings, **kwargs) def create_collection(self, collection_name: str, vector_size: int): - lock_name = "vector_indexing_lock_{}".format(collection_name) + lock_name = f"vector_indexing_lock_{collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return collection_name = collection_name or uuid.uuid4().hex diff --git a/api/core/rag/datasource/vdb/relyt/relyt_vector.py b/api/core/rag/datasource/vdb/relyt/relyt_vector.py index 0c0d6a463d..7a42dd1a89 100644 --- a/api/core/rag/datasource/vdb/relyt/relyt_vector.py +++ b/api/core/rag/datasource/vdb/relyt/relyt_vector.py @@ -70,9 +70,9 @@ class RelytVector(BaseVector): self.add_texts(texts, embeddings) def create_collection(self, dimension: int): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return index_name = f"{self._collection_name}_embedding_index" diff --git a/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py b/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py index 9ed6e7369b..784e27fc7f 100644 --- a/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py +++ b/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py @@ -142,7 +142,7 @@ class TableStoreVector(BaseVector): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logging.info(f"Collection {self._collection_name} already exists.") + logging.info("Collection %s already exists.", self._collection_name) return self._create_table_if_not_exist() diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index 23ed8a3344..3aa4b67a78 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -92,9 +92,9 @@ class TencentVector(BaseVector): def _create_collection(self, dimension: int) -> None: self._dimension = dimension - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return diff --git a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py index ba6a9654f0..e848b39c4d 100644 --- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py +++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py @@ -104,9 +104,9 @@ class TidbOnQdrantVector(BaseVector): self.add_texts(texts, embeddings, **kwargs) def create_collection(self, collection_name: str, vector_size: int): - lock_name = "vector_indexing_lock_{}".format(collection_name) + lock_name = f"vector_indexing_lock_{collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return collection_name = collection_name or uuid.uuid4().hex diff --git a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py index 61c68b939e..f8a851a246 100644 --- a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py +++ b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py @@ -91,9 +91,9 @@ class TiDBVector(BaseVector): def _create_collection(self, dimension: int): logger.info("_create_collection, collection_name " + self._collection_name) - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return tidb_dist_func = self._get_distance_func() @@ -192,7 +192,7 @@ class TiDBVector(BaseVector): query_vector_str = ", ".join(format(x) for x in query_vector) query_vector_str = "[" + query_vector_str + "]" logger.debug( - f"_collection_name: {self._collection_name}, score_threshold: {score_threshold}, distance: {distance}" + "_collection_name: %s, score_threshold: %s, distance: %s", self._collection_name, score_threshold, distance ) docs = [] diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index e018f7d3d4..43c49ed4b3 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -178,19 +178,19 @@ class Vector: def create(self, texts: Optional[list] = None, **kwargs): if texts: start = time.time() - logger.info(f"start embedding {len(texts)} texts {start}") + logger.info("start embedding %s texts %s", len(texts), start) batch_size = 1000 total_batches = len(texts) + batch_size - 1 for i in range(0, len(texts), batch_size): batch = texts[i : i + batch_size] batch_start = time.time() - logger.info(f"Processing batch {i // batch_size + 1}/{total_batches} ({len(batch)} texts)") + logger.info("Processing batch %s/%s (%s texts)", i // batch_size + 1, total_batches, len(batch)) batch_embeddings = self._embeddings.embed_documents([document.page_content for document in batch]) logger.info( - f"Embedding batch {i // batch_size + 1}/{total_batches} took {time.time() - batch_start:.3f}s" + "Embedding batch %s/%s took %s s", i // batch_size + 1, total_batches, time.time() - batch_start ) self._vector_processor.create(texts=batch, embeddings=batch_embeddings, **kwargs) - logger.info(f"Embedding {len(texts)} texts took {time.time() - start:.3f}s") + logger.info("Embedding %s texts took %s s", len(texts), time.time() - start) def add_texts(self, documents: list[Document], **kwargs): if kwargs.get("duplicate_check", False): @@ -219,7 +219,7 @@ class Vector: self._vector_processor.delete() # delete collection redis cache if self._vector_processor.collection_name: - collection_exist_cache_key = "vector_indexing_{}".format(self._vector_processor.collection_name) + collection_exist_cache_key = f"vector_indexing_{self._vector_processor.collection_name}" redis_client.delete(collection_exist_cache_key) def _get_embeddings(self) -> Embeddings: diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index 7a8efb4068..5525ef1685 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -92,9 +92,9 @@ class WeaviateVector(BaseVector): self.add_texts(texts, embeddings) def _create_collection(self): - lock_name = "vector_indexing_lock_{}".format(self._collection_name) + lock_name = f"vector_indexing_lock_{self._collection_name}" with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name) + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): return schema = self._default_schema(self._collection_name) diff --git a/api/core/rag/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py index f844770a20..f8da3657fc 100644 --- a/api/core/rag/docstore/dataset_docstore.py +++ b/api/core/rag/docstore/dataset_docstore.py @@ -32,7 +32,7 @@ class DatasetDocumentStore: } @property - def dateset_id(self) -> Any: + def dataset_id(self) -> Any: return self._dataset.id @property @@ -123,13 +123,13 @@ class DatasetDocumentStore: db.session.flush() if save_child: if doc.children: - for postion, child in enumerate(doc.children, start=1): + for position, child in enumerate(doc.children, start=1): child_segment = ChildChunk( tenant_id=self._dataset.tenant_id, dataset_id=self._dataset.id, document_id=self._document_id, segment_id=segment_document.id, - position=postion, + position=position, index_node_id=child.metadata.get("doc_id"), index_node_hash=child.metadata.get("doc_hash"), content=child.page_content, diff --git a/api/core/rag/embedding/cached_embedding.py b/api/core/rag/embedding/cached_embedding.py index f50f9f6b60..9848a28384 100644 --- a/api/core/rag/embedding/cached_embedding.py +++ b/api/core/rag/embedding/cached_embedding.py @@ -69,7 +69,7 @@ class CacheEmbedding(Embeddings): # stackoverflow best way: https://stackoverflow.com/questions/20319813/how-to-check-list-containing-nan if np.isnan(normalized_embedding).any(): # for issue #11827 float values are not json compliant - logger.warning(f"Normalized embedding is nan: {normalized_embedding}") + logger.warning("Normalized embedding is nan: %s", normalized_embedding) continue embedding_queue_embeddings.append(normalized_embedding) except IntegrityError: @@ -122,7 +122,7 @@ class CacheEmbedding(Embeddings): raise ValueError("Normalized embedding is nan please try again") except Exception as ex: if dify_config.DEBUG: - logging.exception(f"Failed to embed query text '{text[:10]}...({len(text)} chars)'") + logging.exception("Failed to embed query text '%s...(%s chars)'", text[:10], len(text)) raise ex try: @@ -136,7 +136,9 @@ class CacheEmbedding(Embeddings): redis_client.setex(embedding_cache_key, 600, encoded_str) except Exception as ex: if dify_config.DEBUG: - logging.exception(f"Failed to add embedding to redis for the text '{text[:10]}...({len(text)} chars)'") + logging.exception( + "Failed to add embedding to redis for the text '%s...(%s chars)'", text[:10], len(text) + ) raise ex return embedding_results # type: ignore diff --git a/api/core/rag/splitter/text_splitter.py b/api/core/rag/splitter/text_splitter.py index 529d8ccd27..489aa05430 100644 --- a/api/core/rag/splitter/text_splitter.py +++ b/api/core/rag/splitter/text_splitter.py @@ -116,7 +116,7 @@ class TextSplitter(BaseDocumentTransformer, ABC): if total + _len + (separator_len if len(current_doc) > 0 else 0) > self._chunk_size: if total > self._chunk_size: logger.warning( - f"Created a chunk of size {total}, which is longer than the specified {self._chunk_size}" + "Created a chunk of size %s, which is longer than the specified %s", total, self._chunk_size ) if len(current_doc) > 0: doc = self._join_docs(current_doc, separator) diff --git a/api/core/repositories/factory.py b/api/core/repositories/factory.py index 4118aa61c7..6e636883ae 100644 --- a/api/core/repositories/factory.py +++ b/api/core/repositories/factory.py @@ -153,7 +153,7 @@ class DifyCoreRepositoryFactory: RepositoryImportError: If the configured repository cannot be created """ class_path = dify_config.CORE_WORKFLOW_EXECUTION_REPOSITORY - logger.debug(f"Creating WorkflowExecutionRepository from: {class_path}") + logger.debug("Creating WorkflowExecutionRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) @@ -199,7 +199,7 @@ class DifyCoreRepositoryFactory: RepositoryImportError: If the configured repository cannot be created """ class_path = dify_config.CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY - logger.debug(f"Creating WorkflowNodeExecutionRepository from: {class_path}") + logger.debug("Creating WorkflowNodeExecutionRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) diff --git a/api/core/repositories/sqlalchemy_workflow_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_execution_repository.py index c579ff4028..74a49842f3 100644 --- a/api/core/repositories/sqlalchemy_workflow_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_execution_repository.py @@ -203,5 +203,5 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository): session.commit() # Update the in-memory cache for faster subsequent lookups - logger.debug(f"Updating cache for execution_id: {db_model.id}") + logger.debug("Updating cache for execution_id: %s", db_model.id) self._execution_cache[db_model.id] = db_model diff --git a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py index d4a31390f8..f4532d7f29 100644 --- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py @@ -215,7 +215,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) # Update the in-memory cache for faster subsequent lookups # Only cache if we have a node_execution_id to use as the cache key if db_model.node_execution_id: - logger.debug(f"Updating cache for node_execution_id: {db_model.node_execution_id}") + logger.debug("Updating cache for node_execution_id: %s", db_model.node_execution_id) self._node_execution_cache[db_model.node_execution_id] = db_model def get_db_models_by_workflow_run( diff --git a/api/core/tools/tool_manager.py b/api/core/tools/tool_manager.py index f286466de0..1bb4cfa4cd 100644 --- a/api/core/tools/tool_manager.py +++ b/api/core/tools/tool_manager.py @@ -206,7 +206,7 @@ class ToolManager: ) except Exception as e: builtin_provider = None - logger.info(f"Error getting builtin provider {credential_id}:{e}", exc_info=True) + logger.info("Error getting builtin provider %s:%s", credential_id, e, exc_info=True) # if the provider has been deleted, raise an error if builtin_provider is None: raise ToolProviderNotFoundError(f"provider has been deleted: {credential_id}") @@ -237,7 +237,7 @@ class ToolManager: if builtin_provider is None: raise ToolProviderNotFoundError(f"builtin provider {provider_id} not found") - encrypter, _ = create_provider_encrypter( + encrypter, cache = create_provider_encrypter( tenant_id=tenant_id, config=[ x.to_basic_provider_config() @@ -281,6 +281,7 @@ class ToolManager: builtin_provider.expires_at = refreshed_credentials.expires_at db.session.commit() decrypted_credentials = refreshed_credentials.credentials + cache.delete() return cast( BuiltinTool, @@ -569,7 +570,7 @@ class ToolManager: yield provider except Exception: - logger.exception(f"load builtin provider {provider_path}") + logger.exception("load builtin provider %s", provider_path) continue # set builtin providers loaded cls._builtin_providers_loaded = True @@ -1011,7 +1012,9 @@ class ToolManager: if variable is None: raise ToolParameterError(f"Variable {tool_input.value} does not exist") parameter_value = variable.value - elif tool_input.type in {"mixed", "constant"}: + elif tool_input.type == "constant": + parameter_value = tool_input.value + elif tool_input.type == "mixed": segment_group = variable_pool.convert_template(str(tool_input.value)) parameter_value = segment_group.text else: diff --git a/api/core/tools/utils/parser.py b/api/core/tools/utils/parser.py index a3c84615ca..3857a2a16b 100644 --- a/api/core/tools/utils/parser.py +++ b/api/core/tools/utils/parser.py @@ -105,6 +105,29 @@ class ApiBasedToolSchemaParser: # overwrite the content interface["operation"]["requestBody"]["content"][content_type]["schema"] = root + # handle allOf reference in schema properties + for prop_dict in root.get("properties", {}).values(): + for item in prop_dict.get("allOf", []): + if "$ref" in item: + ref_schema = openapi + reference = item["$ref"].split("/")[1:] + for ref in reference: + ref_schema = ref_schema[ref] + else: + ref_schema = item + for key, value in ref_schema.items(): + if isinstance(value, list): + if key not in prop_dict: + prop_dict[key] = [] + # extends list field + if isinstance(prop_dict[key], list): + prop_dict[key].extend(value) + elif key not in prop_dict: + # add new field + prop_dict[key] = value + if "allOf" in prop_dict: + del prop_dict["allOf"] + # parse body parameters if "schema" in interface["operation"]["requestBody"]["content"][content_type]: body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"] diff --git a/api/core/tools/utils/web_reader_tool.py b/api/core/tools/utils/web_reader_tool.py index cbd06fc186..df052c16db 100644 --- a/api/core/tools/utils/web_reader_tool.py +++ b/api/core/tools/utils/web_reader_tool.py @@ -55,7 +55,7 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: main_content_type = mimetypes.guess_type(filename)[0] if main_content_type not in supported_content_types: - return "Unsupported content-type [{}] of URL.".format(main_content_type) + return f"Unsupported content-type [{main_content_type}] of URL." if main_content_type in extract_processor.SUPPORT_URL_CONTENT_TYPES: return cast(str, ExtractProcessor.load_from_url(url, return_text=True)) @@ -67,7 +67,7 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: response = scraper.get(url, headers=headers, follow_redirects=True, timeout=(120, 300)) # type: ignore if response.status_code != 200: - return "URL returned status code {}.".format(response.status_code) + return f"URL returned status code {response.status_code}." # Detect encoding using chardet detected_encoding = chardet.detect(response.content) diff --git a/api/core/tools/workflow_as_tool/tool.py b/api/core/tools/workflow_as_tool/tool.py index 8b89c2a7a9..962b9f7a81 100644 --- a/api/core/tools/workflow_as_tool/tool.py +++ b/api/core/tools/workflow_as_tool/tool.py @@ -194,7 +194,7 @@ class WorkflowTool(Tool): files.append(file_dict) except Exception: - logger.exception(f"Failed to transform file {file}") + logger.exception("Failed to transform file %s", file) else: parameters_result[parameter.name] = tool_parameters.get(parameter.name) diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index b315129763..ef13277e0c 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -238,13 +238,13 @@ class GraphEngine: while True: # max steps reached if self.graph_runtime_state.node_run_steps > self.max_execution_steps: - raise GraphRunFailedError("Max steps {} reached.".format(self.max_execution_steps)) + raise GraphRunFailedError(f"Max steps {self.max_execution_steps} reached.") # or max execution time reached if self._is_timed_out( start_at=self.graph_runtime_state.start_at, max_execution_time=self.max_execution_time ): - raise GraphRunFailedError("Max execution time {}s reached.".format(self.max_execution_time)) + raise GraphRunFailedError(f"Max execution time {self.max_execution_time}s reached.") # init route node state route_node_state = self.graph_runtime_state.node_run_state.create_node_state(node_id=next_node_id) @@ -377,7 +377,7 @@ class GraphEngine: edge = cast(GraphEdge, sub_edge_mappings[0]) if edge.run_condition is None: - logger.warning(f"Edge {edge.target_node_id} run condition is None") + logger.warning("Edge %s run condition is None", edge.target_node_id) continue result = ConditionManager.get_condition_handler( @@ -848,7 +848,7 @@ class GraphEngine: ) return except Exception as e: - logger.exception(f"Node {node.title} run failed") + logger.exception("Node %s run failed", node.title) raise e def _append_variables_recursively(self, node_id: str, variable_key_list: list[str], variable_value: VariableValue): diff --git a/api/core/workflow/nodes/agent/agent_node.py b/api/core/workflow/nodes/agent/agent_node.py index c83303034e..2b6382a8a6 100644 --- a/api/core/workflow/nodes/agent/agent_node.py +++ b/api/core/workflow/nodes/agent/agent_node.py @@ -50,6 +50,7 @@ from .exc import ( AgentInputTypeError, AgentInvocationError, AgentMessageTransformError, + AgentNodeError, AgentVariableNotFoundError, AgentVariableTypeError, ToolFileNotFoundError, @@ -593,7 +594,14 @@ class AgentNode(BaseNode): variables[variable_name] = variable_value elif message.type == ToolInvokeMessage.MessageType.FILE: assert message.meta is not None - assert isinstance(message.meta, File) + assert isinstance(message.meta, dict) + # Validate that meta contains a 'file' key + if "file" not in message.meta: + raise AgentNodeError("File message is missing 'file' key in meta") + + # Validate that the file is an instance of File + if not isinstance(message.meta["file"], File): + raise AgentNodeError(f"Expected File object but got {type(message.meta['file']).__name__}") files.append(message.meta["file"]) elif message.type == ToolInvokeMessage.MessageType.LOG: assert isinstance(message.message, ToolInvokeMessage.LogMessage) diff --git a/api/core/workflow/nodes/answer/base_stream_processor.py b/api/core/workflow/nodes/answer/base_stream_processor.py index 09d5464d7a..7e84557a2d 100644 --- a/api/core/workflow/nodes/answer/base_stream_processor.py +++ b/api/core/workflow/nodes/answer/base_stream_processor.py @@ -36,7 +36,7 @@ class StreamProcessor(ABC): reachable_node_ids: list[str] = [] unreachable_first_node_ids: list[str] = [] if finished_node_id not in self.graph.edge_mapping: - logger.warning(f"node {finished_node_id} has no edge mapping") + logger.warning("node %s has no edge mapping", finished_node_id) return for edge in self.graph.edge_mapping[finished_node_id]: if ( diff --git a/api/core/workflow/nodes/base/node.py b/api/core/workflow/nodes/base/node.py index fb5ec55453..be4f79af19 100644 --- a/api/core/workflow/nodes/base/node.py +++ b/api/core/workflow/nodes/base/node.py @@ -65,7 +65,7 @@ class BaseNode: try: result = self._run() except Exception as e: - logger.exception(f"Node {self.node_id} failed to run") + logger.exception("Node %s failed to run", self.node_id) result = NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, error=str(e), diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index ab5964ebd4..f3061f7d96 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -305,7 +305,7 @@ def _extract_text_from_doc(file_content: bytes) -> str: raise TextExtractionError(f"Failed to extract text from DOC: {str(e)}") from e -def paser_docx_part(block, doc: Document, content_items, i): +def parser_docx_part(block, doc: Document, content_items, i): if isinstance(block, CT_P): content_items.append((i, "paragraph", Paragraph(block, doc))) elif isinstance(block, CT_Tbl): @@ -329,7 +329,7 @@ def _extract_text_from_docx(file_content: bytes) -> str: part = next(it, None) i = 0 while part is not None: - paser_docx_part(part, doc, content_items, i) + parser_docx_part(part, doc, content_items, i) i = i + 1 part = next(it, None) @@ -363,7 +363,7 @@ def _extract_text_from_docx(file_content: bytes) -> str: text.append(markdown_table) except Exception as e: - logger.warning(f"Failed to extract table from DOC: {e}") + logger.warning("Failed to extract table from DOC: %s", e) continue return "\n".join(text) diff --git a/api/core/workflow/nodes/http_request/node.py b/api/core/workflow/nodes/http_request/node.py index 6799d5c63c..bc1d5c9b87 100644 --- a/api/core/workflow/nodes/http_request/node.py +++ b/api/core/workflow/nodes/http_request/node.py @@ -129,7 +129,7 @@ class HttpRequestNode(BaseNode): }, ) except HttpRequestNodeError as e: - logger.warning(f"http request node {self.node_id} failed to run: {e}") + logger.warning("http request node %s failed to run: %s", self.node_id, e) return NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, error=str(e), diff --git a/api/core/workflow/nodes/if_else/if_else_node.py b/api/core/workflow/nodes/if_else/if_else_node.py index 86e703dc68..2c83ea3d4f 100644 --- a/api/core/workflow/nodes/if_else/if_else_node.py +++ b/api/core/workflow/nodes/if_else/if_else_node.py @@ -129,7 +129,7 @@ class IfElseNode(BaseNode): var_mapping: dict[str, list[str]] = {} for case in typed_node_data.cases or []: for condition in case.conditions: - key = "{}.#{}#".format(node_id, ".".join(condition.variable_selector)) + key = f"{node_id}.#{'.'.join(condition.variable_selector)}#" var_mapping[key] = condition.variable_selector return var_mapping diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py index 5842c8d64b..def1e1cfa3 100644 --- a/api/core/workflow/nodes/iteration/iteration_node.py +++ b/api/core/workflow/nodes/iteration/iteration_node.py @@ -616,7 +616,7 @@ class IterationNode(BaseNode): ) except IterationNodeError as e: - logger.warning(f"Iteration run failed:{str(e)}") + logger.warning("Iteration run failed:%s", str(e)) yield IterationRunFailedEvent( iteration_id=self.id, iteration_node_id=self.node_id, diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 34b0afc75d..e041e217ca 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -453,35 +453,34 @@ class KnowledgeRetrievalNode(BaseNode): elif node_data.metadata_filtering_mode == "manual": if node_data.metadata_filtering_conditions: conditions = [] - if node_data.metadata_filtering_conditions: - for sequence, condition in enumerate(node_data.metadata_filtering_conditions.conditions): # type: ignore - metadata_name = condition.name - expected_value = condition.value - if expected_value is not None and condition.comparison_operator not in ("empty", "not empty"): - if isinstance(expected_value, str): - expected_value = self.graph_runtime_state.variable_pool.convert_template( - expected_value - ).value[0] - if expected_value.value_type in {"number", "integer", "float"}: # type: ignore - expected_value = expected_value.value # type: ignore - elif expected_value.value_type == "string": # type: ignore - expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore - else: - raise ValueError("Invalid expected metadata value type") - conditions.append( - Condition( - name=metadata_name, - comparison_operator=condition.comparison_operator, - value=expected_value, - ) - ) - filters = self._process_metadata_filter_func( - sequence, - condition.comparison_operator, - metadata_name, - expected_value, - filters, + for sequence, condition in enumerate(node_data.metadata_filtering_conditions.conditions): # type: ignore + metadata_name = condition.name + expected_value = condition.value + if expected_value is not None and condition.comparison_operator not in ("empty", "not empty"): + if isinstance(expected_value, str): + expected_value = self.graph_runtime_state.variable_pool.convert_template( + expected_value + ).value[0] + if expected_value.value_type in {"number", "integer", "float"}: # type: ignore + expected_value = expected_value.value # type: ignore + elif expected_value.value_type == "string": # type: ignore + expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore + else: + raise ValueError("Invalid expected metadata value type") + conditions.append( + Condition( + name=metadata_name, + comparison_operator=condition.comparison_operator, + value=expected_value, ) + ) + filters = self._process_metadata_filter_func( + sequence, + condition.comparison_operator, + metadata_name, + expected_value, + filters, + ) metadata_condition = MetadataCondition( logical_operator=node_data.metadata_filtering_conditions.logical_operator, conditions=conditions, diff --git a/api/core/workflow/nodes/list_operator/node.py b/api/core/workflow/nodes/list_operator/node.py index b91fc622f6..d2e022dc9d 100644 --- a/api/core/workflow/nodes/list_operator/node.py +++ b/api/core/workflow/nodes/list_operator/node.py @@ -299,7 +299,7 @@ def _endswith(value: str) -> Callable[[str], bool]: def _is(value: str) -> Callable[[str], bool]: - return lambda x: x is value + return lambda x: x == value def _in(value: str | Sequence[str]) -> Callable[[str], bool]: diff --git a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py index a23d284626..45c5e0a62c 100644 --- a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py @@ -670,7 +670,7 @@ class ParameterExtractorNode(BaseNode): return cast(dict, json.loads(json_str)) except Exception: pass - logger.info(f"extra error: {result}") + logger.info("extra error: %s", result) return None def _extract_json_from_tool_call(self, tool_call: AssistantPromptMessage.ToolCall) -> Optional[dict]: @@ -690,7 +690,7 @@ class ParameterExtractorNode(BaseNode): return cast(dict, json.loads(json_str)) except Exception: pass - logger.info(f"extra error: {result}") + logger.info("extra error: %s", result) return None def _generate_default_result(self, data: ParameterExtractorNodeData) -> dict: diff --git a/api/core/workflow/nodes/question_classifier/question_classifier_node.py b/api/core/workflow/nodes/question_classifier/question_classifier_node.py index 15012fa48d..3e4984ecd5 100644 --- a/api/core/workflow/nodes/question_classifier/question_classifier_node.py +++ b/api/core/workflow/nodes/question_classifier/question_classifier_node.py @@ -385,9 +385,8 @@ class QuestionClassifierNode(BaseNode): text=QUESTION_CLASSIFIER_COMPLETION_PROMPT.format( histories=memory_str, input_text=input_text, - categories=json.dumps(categories), + categories=json.dumps(categories, ensure_ascii=False), classification_instructions=instruction, - ensure_ascii=False, ) ) diff --git a/api/core/workflow/nodes/tool/entities.py b/api/core/workflow/nodes/tool/entities.py index f0a44d919b..c1cfbb1edc 100644 --- a/api/core/workflow/nodes/tool/entities.py +++ b/api/core/workflow/nodes/tool/entities.py @@ -54,8 +54,8 @@ class ToolNodeData(BaseNodeData, ToolEntity): for val in value: if not isinstance(val, str): raise ValueError("value must be a list of strings") - elif typ == "constant" and not isinstance(value, str | int | float | bool): - raise ValueError("value must be a string, int, float, or bool") + elif typ == "constant" and not isinstance(value, str | int | float | bool | dict): + raise ValueError("value must be a string, int, float, bool or dict") return typ tool_parameters: dict[str, ToolInput] diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py index f437ac841d..4c8e13de70 100644 --- a/api/core/workflow/nodes/tool/tool_node.py +++ b/api/core/workflow/nodes/tool/tool_node.py @@ -6,7 +6,7 @@ from sqlalchemy.orm import Session from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler from core.file import File, FileTransferMethod -from core.plugin.impl.exc import PluginDaemonClientSideError +from core.plugin.impl.exc import PluginDaemonClientSideError, PluginInvokeError from core.plugin.impl.plugin import PluginInstaller from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter from core.tools.errors import ToolInvokeError @@ -141,13 +141,36 @@ class ToolNode(BaseNode): tenant_id=self.tenant_id, node_id=self.node_id, ) - except (PluginDaemonClientSideError, ToolInvokeError) as e: + except ToolInvokeError as e: yield RunCompletedEvent( run_result=NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, inputs=parameters_for_log, metadata={WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info}, - error=f"Failed to transform tool message: {str(e)}", + error=f"Failed to invoke tool {node_data.provider_name}: {str(e)}", + error_type=type(e).__name__, + ) + ) + except PluginInvokeError as e: + yield RunCompletedEvent( + run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + metadata={WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info}, + error="An error occurred in the plugin, " + f"please contact the author of {node_data.provider_name} for help, " + f"error type: {e.get_error_type()}, " + f"error details: {e.get_error_message()}", + error_type=type(e).__name__, + ) + ) + except PluginDaemonClientSideError as e: + yield RunCompletedEvent( + run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + metadata={WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info}, + error=f"Failed to invoke tool, error: {e.description}", error_type=type(e).__name__, ) ) diff --git a/api/core/workflow/workflow_entry.py b/api/core/workflow/workflow_entry.py index c8082ebf50..801e36e272 100644 --- a/api/core/workflow/workflow_entry.py +++ b/api/core/workflow/workflow_entry.py @@ -67,7 +67,7 @@ class WorkflowEntry: # check call depth workflow_call_max_depth = dify_config.WORKFLOW_CALL_MAX_DEPTH if call_depth > workflow_call_max_depth: - raise ValueError("Max workflow call depth {} reached.".format(workflow_call_max_depth)) + raise ValueError(f"Max workflow call depth {workflow_call_max_depth} reached.") # init workflow run state graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter()) @@ -193,7 +193,13 @@ class WorkflowEntry: # run node generator = node.run() except Exception as e: - logger.exception(f"error while running node, {workflow.id=}, {node.id=}, {node.type_=}, {node.version()=}") + logger.exception( + "error while running node, workflow_id=%s, node_id=%s, node_type=%s, node_version=%s", + workflow.id, + node.id, + node.type_, + node.version(), + ) raise WorkflowNodeRunFailedError(node=node, err_msg=str(e)) return node, generator @@ -297,7 +303,12 @@ class WorkflowEntry: return node, generator except Exception as e: - logger.exception(f"error while running node, {node.id=}, {node.type_=}, {node.version()=}") + logger.exception( + "error while running node, node_id=%s, node_type=%s, node_version=%s", + node.id, + node.type_, + node.version(), + ) raise WorkflowNodeRunFailedError(node=node, err_msg=str(e)) @staticmethod diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 4de9a25c2f..a850ea9a50 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -2,6 +2,11 @@ set -e +# Set UTF-8 encoding to address potential encoding issues in containerized environments +export LANG=${LANG:-en_US.UTF-8} +export LC_ALL=${LC_ALL:-en_US.UTF-8} +export PYTHONIOENCODING=${PYTHONIOENCODING:-utf-8} + if [[ "${MIGRATION_ENABLED}" == "true" ]]; then echo "Running migrations" flask upgrade-db diff --git a/api/events/event_handlers/create_document_index.py b/api/events/event_handlers/create_document_index.py index dc50ca8d96..bdb69945f0 100644 --- a/api/events/event_handlers/create_document_index.py +++ b/api/events/event_handlers/create_document_index.py @@ -18,7 +18,7 @@ def handle(sender, **kwargs): documents = [] start_at = time.perf_counter() for document_id in document_ids: - logging.info(click.style("Start process document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start process document: {document_id}", fg="green")) document = ( db.session.query(Document) @@ -42,7 +42,7 @@ def handle(sender, **kwargs): indexing_runner = IndexingRunner() indexing_runner.run(documents) end_at = time.perf_counter() - logging.info(click.style("Processed dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: diff --git a/api/events/event_handlers/update_provider_when_message_created.py b/api/events/event_handlers/update_provider_when_message_created.py index d3943f2eda..2ed42c71ea 100644 --- a/api/events/event_handlers/update_provider_when_message_created.py +++ b/api/events/event_handlers/update_provider_when_message_created.py @@ -131,9 +131,11 @@ def handle(sender: Message, **kwargs): duration = time_module.perf_counter() - start_time logger.info( - f"Provider updates completed successfully. " - f"Updates: {len(updates_to_perform)}, Duration: {duration:.3f}s, " - f"Tenant: {tenant_id}, Provider: {provider_name}" + "Provider updates completed successfully. Updates: %s, Duration: %s s, Tenant: %s, Provider: %s", + len(updates_to_perform), + duration, + tenant_id, + provider_name, ) except Exception as e: @@ -141,9 +143,11 @@ def handle(sender: Message, **kwargs): duration = time_module.perf_counter() - start_time logger.exception( - f"Provider updates failed after {duration:.3f}s. " - f"Updates: {len(updates_to_perform)}, Tenant: {tenant_id}, " - f"Provider: {provider_name}" + "Provider updates failed after %s s. Updates: %s, Tenant: %s, Provider: %s", + duration, + len(updates_to_perform), + tenant_id, + provider_name, ) raise @@ -219,16 +223,20 @@ def _execute_provider_updates(updates_to_perform: list[_ProviderUpdateOperation] rows_affected = result.rowcount logger.debug( - f"Provider update ({description}): {rows_affected} rows affected. " - f"Filters: {filters.model_dump()}, Values: {update_values}" + "Provider update (%s): %s rows affected. Filters: %s, Values: %s", + description, + rows_affected, + filters.model_dump(), + update_values, ) # If no rows were affected for quota updates, log a warning if rows_affected == 0 and description == "quota_deduction_update": logger.warning( - f"No Provider rows updated for quota deduction. " - f"This may indicate quota limit exceeded or provider not found. " - f"Filters: {filters.model_dump()}" + "No Provider rows updated for quota deduction. " + "This may indicate quota limit exceeded or provider not found. " + "Filters: %s", + filters.model_dump(), ) - logger.debug(f"Successfully processed {len(updates_to_perform)} Provider updates") + logger.debug("Successfully processed %s Provider updates", len(updates_to_perform)) diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 2c2846ba26..bd72c93404 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -73,13 +73,13 @@ def init_app(app: DifyApp) -> Celery: imports.append("schedule.clean_embedding_cache_task") beat_schedule["clean_embedding_cache_task"] = { "task": "schedule.clean_embedding_cache_task.clean_embedding_cache_task", - "schedule": timedelta(days=day), + "schedule": crontab(minute="0", hour="2", day_of_month=f"*/{day}"), } if dify_config.ENABLE_CLEAN_UNUSED_DATASETS_TASK: imports.append("schedule.clean_unused_datasets_task") beat_schedule["clean_unused_datasets_task"] = { "task": "schedule.clean_unused_datasets_task.clean_unused_datasets_task", - "schedule": timedelta(days=day), + "schedule": crontab(minute="0", hour="3", day_of_month=f"*/{day}"), } if dify_config.ENABLE_CREATE_TIDB_SERVERLESS_TASK: imports.append("schedule.create_tidb_serverless_task") @@ -97,7 +97,7 @@ def init_app(app: DifyApp) -> Celery: imports.append("schedule.clean_messages") beat_schedule["clean_messages"] = { "task": "schedule.clean_messages.clean_messages", - "schedule": timedelta(days=day), + "schedule": crontab(minute="0", hour="4", day_of_month=f"*/{day}"), } if dify_config.ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK: imports.append("schedule.mail_clean_document_notify_task") diff --git a/api/extensions/ext_mail.py b/api/extensions/ext_mail.py index df5d8a9c11..fe05138196 100644 --- a/api/extensions/ext_mail.py +++ b/api/extensions/ext_mail.py @@ -64,7 +64,7 @@ class Mail: sendgrid_api_key=dify_config.SENDGRID_API_KEY, _from=dify_config.MAIL_DEFAULT_SEND_FROM or "" ) case _: - raise ValueError("Unsupported mail type {}".format(mail_type)) + raise ValueError(f"Unsupported mail type {mail_type}") def send(self, to: str, subject: str, html: str, from_: Optional[str] = None): if not self._client: diff --git a/api/extensions/ext_redis.py b/api/extensions/ext_redis.py index be2f6115f7..14b9273e9d 100644 --- a/api/extensions/ext_redis.py +++ b/api/extensions/ext_redis.py @@ -137,7 +137,7 @@ def redis_fallback(default_return: Any = None): try: return func(*args, **kwargs) except RedisError as e: - logger.warning(f"Redis operation failed in {func.__name__}: {str(e)}", exc_info=True) + logger.warning("Redis operation failed in %s: %s", func.__name__, str(e), exc_info=True) return default_return return wrapper diff --git a/api/extensions/storage/azure_blob_storage.py b/api/extensions/storage/azure_blob_storage.py index 81eec94da4..7ec0889776 100644 --- a/api/extensions/storage/azure_blob_storage.py +++ b/api/extensions/storage/azure_blob_storage.py @@ -69,7 +69,7 @@ class AzureBlobStorage(BaseStorage): if self.account_key == "managedidentity": return BlobServiceClient(account_url=self.account_url, credential=self.credential) # type: ignore - cache_key = "azure_blob_sas_token_{}_{}".format(self.account_name, self.account_key) + cache_key = f"azure_blob_sas_token_{self.account_name}_{self.account_key}" cache_result = redis_client.get(cache_key) if cache_result is not None: sas_token = cache_result.decode("utf-8") diff --git a/api/extensions/storage/opendal_storage.py b/api/extensions/storage/opendal_storage.py index 12e2738e9d..0ba35506d3 100644 --- a/api/extensions/storage/opendal_storage.py +++ b/api/extensions/storage/opendal_storage.py @@ -35,21 +35,21 @@ class OpenDALStorage(BaseStorage): Path(root).mkdir(parents=True, exist_ok=True) self.op = opendal.Operator(scheme=scheme, **kwargs) # type: ignore - logger.debug(f"opendal operator created with scheme {scheme}") + logger.debug("opendal operator created with scheme %s", scheme) retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True) self.op = self.op.layer(retry_layer) logger.debug("added retry layer to opendal operator") def save(self, filename: str, data: bytes) -> None: self.op.write(path=filename, bs=data) - logger.debug(f"file {filename} saved") + logger.debug("file %s saved", filename) def load_once(self, filename: str) -> bytes: if not self.exists(filename): raise FileNotFoundError("File not found") content: bytes = self.op.read(path=filename) - logger.debug(f"file {filename} loaded") + logger.debug("file %s loaded", filename) return content def load_stream(self, filename: str) -> Generator: @@ -60,7 +60,7 @@ class OpenDALStorage(BaseStorage): file = self.op.open(path=filename, mode="rb") while chunk := file.read(batch_size): yield chunk - logger.debug(f"file {filename} loaded as stream") + logger.debug("file %s loaded as stream", filename) def download(self, filename: str, target_filepath: str): if not self.exists(filename): @@ -68,7 +68,7 @@ class OpenDALStorage(BaseStorage): with Path(target_filepath).open("wb") as f: f.write(self.op.read(path=filename)) - logger.debug(f"file {filename} downloaded to {target_filepath}") + logger.debug("file %s downloaded to %s", filename, target_filepath) def exists(self, filename: str) -> bool: res: bool = self.op.exists(path=filename) @@ -77,9 +77,9 @@ class OpenDALStorage(BaseStorage): def delete(self, filename: str): if self.exists(filename): self.op.delete(path=filename) - logger.debug(f"file {filename} deleted") + logger.debug("file %s deleted", filename) return - logger.debug(f"file {filename} not found, skip delete") + logger.debug("file %s not found, skip delete", filename) def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]: if not self.exists(path): @@ -87,13 +87,13 @@ class OpenDALStorage(BaseStorage): all_files = self.op.scan(path=path) if files and directories: - logger.debug(f"files and directories on {path} scanned") + logger.debug("files and directories on %s scanned", path) return [f.path for f in all_files] if files: - logger.debug(f"files on {path} scanned") + logger.debug("files on %s scanned", path) return [f.path for f in all_files if not f.path.endswith("/")] elif directories: - logger.debug(f"directories on {path} scanned") + logger.debug("directories on %s scanned", path) return [f.path for f in all_files if f.path.endswith("/")] else: raise ValueError("At least one of files or directories must be True") diff --git a/api/extensions/storage/volcengine_tos_storage.py b/api/extensions/storage/volcengine_tos_storage.py index 55fe6545ec..32839d3497 100644 --- a/api/extensions/storage/volcengine_tos_storage.py +++ b/api/extensions/storage/volcengine_tos_storage.py @@ -25,7 +25,7 @@ class VolcengineTosStorage(BaseStorage): def load_once(self, filename: str) -> bytes: data = self.client.get_object(bucket=self.bucket_name, key=filename).read() if not isinstance(data, bytes): - raise TypeError("Expected bytes, got {}".format(type(data).__name__)) + raise TypeError(f"Expected bytes, got {type(data).__name__}") return data def load_stream(self, filename: str) -> Generator: diff --git a/api/libs/email_i18n.py b/api/libs/email_i18n.py index bfbf41a073..b7c9f3ec6c 100644 --- a/api/libs/email_i18n.py +++ b/api/libs/email_i18n.py @@ -25,6 +25,7 @@ class EmailType(Enum): EMAIL_CODE_LOGIN = "email_code_login" CHANGE_EMAIL_OLD = "change_email_old" CHANGE_EMAIL_NEW = "change_email_new" + CHANGE_EMAIL_COMPLETED = "change_email_completed" OWNER_TRANSFER_CONFIRM = "owner_transfer_confirm" OWNER_TRANSFER_OLD_NOTIFY = "owner_transfer_old_notify" OWNER_TRANSFER_NEW_NOTIFY = "owner_transfer_new_notify" @@ -344,6 +345,18 @@ def create_default_email_config() -> EmailI18nConfig: branded_template_path="without-brand/change_mail_confirm_new_template_zh-CN.html", ), }, + EmailType.CHANGE_EMAIL_COMPLETED: { + EmailLanguage.EN_US: EmailTemplate( + subject="Your login email has been changed", + template_path="change_mail_completed_template_en-US.html", + branded_template_path="without-brand/change_mail_completed_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的登录邮箱已更改", + template_path="change_mail_completed_template_zh-CN.html", + branded_template_path="without-brand/change_mail_completed_template_zh-CN.html", + ), + }, EmailType.OWNER_TRANSFER_CONFIRM: { EmailLanguage.EN_US: EmailTemplate( subject="Verify Your Request to Transfer Workspace Ownership", diff --git a/api/libs/helper.py b/api/libs/helper.py index 00772d530a..b36f972e19 100644 --- a/api/libs/helper.py +++ b/api/libs/helper.py @@ -95,7 +95,7 @@ def email(email): if re.match(pattern, email) is not None: return email - error = "{email} is not a valid email.".format(email=email) + error = f"{email} is not a valid email." raise ValueError(error) @@ -107,7 +107,7 @@ def uuid_value(value): uuid_obj = uuid.UUID(value) return str(uuid_obj) except ValueError: - error = "{value} is not a valid uuid.".format(value=value) + error = f"{value} is not a valid uuid." raise ValueError(error) @@ -126,7 +126,7 @@ def timestamp_value(timestamp): raise ValueError return int_timestamp except ValueError: - error = "{timestamp} is not a valid timestamp.".format(timestamp=timestamp) + error = f"{timestamp} is not a valid timestamp." raise ValueError(error) @@ -169,14 +169,14 @@ def _get_float(value): try: return float(value) except (TypeError, ValueError): - raise ValueError("{} is not a valid float".format(value)) + raise ValueError(f"{value} is not a valid float") def timezone(timezone_string): if timezone_string and timezone_string in available_timezones(): return timezone_string - error = "{timezone_string} is not a valid timezone.".format(timezone_string=timezone_string) + error = f"{timezone_string} is not a valid timezone." raise ValueError(error) @@ -321,7 +321,7 @@ class TokenManager: key = cls._get_token_key(token, token_type) token_data_json = redis_client.get(key) if token_data_json is None: - logging.warning(f"{token_type} token {token} not found with key {key}") + logging.warning("%s token %s not found with key %s", token_type, token, key) return None token_data: Optional[dict[str, Any]] = json.loads(token_data_json) return token_data diff --git a/api/libs/rsa.py b/api/libs/rsa.py index ed7a0eb116..598e5bc9e3 100644 --- a/api/libs/rsa.py +++ b/api/libs/rsa.py @@ -50,13 +50,13 @@ def encrypt(text: str, public_key: Union[str, bytes]) -> bytes: def get_decrypt_decoding(tenant_id: str) -> tuple[RSA.RsaKey, object]: filepath = os.path.join("privkeys", tenant_id, "private.pem") - cache_key = "tenant_privkey:{hash}".format(hash=hashlib.sha3_256(filepath.encode()).hexdigest()) + cache_key = f"tenant_privkey:{hashlib.sha3_256(filepath.encode()).hexdigest()}" private_key = redis_client.get(cache_key) if not private_key: try: private_key = storage.load(filepath) except FileNotFoundError: - raise PrivkeyNotFoundError("Private key not found, tenant_id: {tenant_id}".format(tenant_id=tenant_id)) + raise PrivkeyNotFoundError(f"Private key not found, tenant_id: {tenant_id}") redis_client.setex(cache_key, 120, private_key) diff --git a/api/libs/sendgrid.py b/api/libs/sendgrid.py index 5409e3eeeb..cfc6c7d794 100644 --- a/api/libs/sendgrid.py +++ b/api/libs/sendgrid.py @@ -41,5 +41,5 @@ class SendGridClient: ) raise except Exception as e: - logging.exception(f"SendGridClient Unexpected error occurred while sending email to {_to}") + logging.exception("SendGridClient Unexpected error occurred while sending email to %s", _to) raise diff --git a/api/libs/smtp.py b/api/libs/smtp.py index b94386660e..a01ad6fab8 100644 --- a/api/libs/smtp.py +++ b/api/libs/smtp.py @@ -50,7 +50,7 @@ class SMTPClient: logging.exception("Timeout occurred while sending email") raise except Exception as e: - logging.exception(f"Unexpected error occurred while sending email to {mail['to']}") + logging.exception("Unexpected error occurred while sending email to %s", mail["to"]) raise finally: if smtp: diff --git a/api/models/dataset.py b/api/models/dataset.py index d877540213..01372f8bf6 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -911,7 +911,7 @@ class DatasetKeywordTable(Base): return json.loads(keyword_table_text.decode("utf-8"), cls=SetDecoder) return None except Exception as e: - logging.exception(f"Failed to load keyword table from file: {file_key}") + logging.exception("Failed to load keyword table from file: %s", file_key) return None diff --git a/api/models/model.py b/api/models/model.py index a78a91ebd5..9f6d51b315 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -32,9 +32,6 @@ from .engine import db from .enums import CreatorUserRole from .types import StringUUID -if TYPE_CHECKING: - from .workflow import Workflow - class DifySetup(Base): __tablename__ = "dify_setups" diff --git a/api/models/workflow.py b/api/models/workflow.py index 79d96e42dd..d89db6c7da 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -42,9 +42,6 @@ from .types import EnumText, StringUUID _logger = logging.getLogger(__name__) -if TYPE_CHECKING: - from models.model import AppMode - class WorkflowType(Enum): """ diff --git a/api/pyproject.toml b/api/pyproject.toml index 7ec8a91198..be42b509ed 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dify-api" -version = "1.7.0" +version = "1.7.1" requires-python = ">=3.11,<3.13" dependencies = [ diff --git a/api/repositories/factory.py b/api/repositories/factory.py index 0a0adbf2c2..070cdd46dd 100644 --- a/api/repositories/factory.py +++ b/api/repositories/factory.py @@ -48,7 +48,7 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): RepositoryImportError: If the configured repository cannot be imported or instantiated """ class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY - logger.debug(f"Creating DifyAPIWorkflowNodeExecutionRepository from: {class_path}") + logger.debug("Creating DifyAPIWorkflowNodeExecutionRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) @@ -86,7 +86,7 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): RepositoryImportError: If the configured repository cannot be imported or instantiated """ class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY - logger.debug(f"Creating APIWorkflowRunRepository from: {class_path}") + logger.debug("Creating APIWorkflowRunRepository from: %s", class_path) try: repository_class = cls._import_class(class_path) diff --git a/api/repositories/sqlalchemy_api_workflow_run_repository.py b/api/repositories/sqlalchemy_api_workflow_run_repository.py index ebd1d74b20..7c3b1f4ce0 100644 --- a/api/repositories/sqlalchemy_api_workflow_run_repository.py +++ b/api/repositories/sqlalchemy_api_workflow_run_repository.py @@ -155,7 +155,7 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): session.commit() deleted_count = cast(int, result.rowcount) - logger.info(f"Deleted {deleted_count} workflow runs by IDs") + logger.info("Deleted %s workflow runs by IDs", deleted_count) return deleted_count def delete_runs_by_app( @@ -193,11 +193,11 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): batch_deleted = result.rowcount total_deleted += batch_deleted - logger.info(f"Deleted batch of {batch_deleted} workflow runs for app {app_id}") + logger.info("Deleted batch of %s workflow runs for app %s", batch_deleted, app_id) # If we deleted fewer records than the batch size, we're done if batch_deleted < batch_size: break - logger.info(f"Total deleted {total_deleted} workflow runs for app {app_id}") + logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id) return total_deleted diff --git a/api/schedule/check_upgradable_plugin_task.py b/api/schedule/check_upgradable_plugin_task.py index c1d6018827..e27391b558 100644 --- a/api/schedule/check_upgradable_plugin_task.py +++ b/api/schedule/check_upgradable_plugin_task.py @@ -16,7 +16,7 @@ def check_upgradable_plugin_task(): start_at = time.perf_counter() now_seconds_of_day = time.time() % 86400 - 30 # we assume the tz is UTC - click.echo(click.style("Now seconds of day: {}".format(now_seconds_of_day), fg="green")) + click.echo(click.style(f"Now seconds of day: {now_seconds_of_day}", fg="green")) strategies = ( db.session.query(TenantPluginAutoUpgradeStrategy) @@ -43,7 +43,7 @@ def check_upgradable_plugin_task(): end_at = time.perf_counter() click.echo( click.style( - "Checked upgradable plugin success latency: {}".format(end_at - start_at), + f"Checked upgradable plugin success latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/schedule/clean_embedding_cache_task.py b/api/schedule/clean_embedding_cache_task.py index 024e3d6f50..2298acf6eb 100644 --- a/api/schedule/clean_embedding_cache_task.py +++ b/api/schedule/clean_embedding_cache_task.py @@ -39,4 +39,4 @@ def clean_embedding_cache_task(): else: break end_at = time.perf_counter() - click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Cleaned embedding cache from db success latency: {end_at - start_at}", fg="green")) diff --git a/api/schedule/clean_messages.py b/api/schedule/clean_messages.py index a6851e36e5..4c35745959 100644 --- a/api/schedule/clean_messages.py +++ b/api/schedule/clean_messages.py @@ -87,4 +87,4 @@ def clean_messages(): db.session.query(Message).where(Message.id == message.id).delete() db.session.commit() end_at = time.perf_counter() - click.echo(click.style("Cleaned messages from db success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Cleaned messages from db success latency: {end_at - start_at}", fg="green")) diff --git a/api/schedule/clean_unused_datasets_task.py b/api/schedule/clean_unused_datasets_task.py index 72e2e73e65..7887835bc5 100644 --- a/api/schedule/clean_unused_datasets_task.py +++ b/api/schedule/clean_unused_datasets_task.py @@ -101,11 +101,9 @@ def clean_unused_datasets_task(): # update document db.session.query(Document).filter_by(dataset_id=dataset.id).update({Document.enabled: False}) db.session.commit() - click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green")) + click.echo(click.style(f"Cleaned unused dataset {dataset.id} from db success!", fg="green")) except Exception as e: - click.echo( - click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red") - ) + click.echo(click.style(f"clean dataset index error: {e.__class__.__name__} {str(e)}", fg="red")) while True: try: # Subquery for counting new documents @@ -176,12 +174,8 @@ def clean_unused_datasets_task(): # update document db.session.query(Document).filter_by(dataset_id=dataset.id).update({Document.enabled: False}) db.session.commit() - click.echo( - click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green") - ) + click.echo(click.style(f"Cleaned unused dataset {dataset.id} from db success!", fg="green")) except Exception as e: - click.echo( - click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red") - ) + click.echo(click.style(f"clean dataset index error: {e.__class__.__name__} {str(e)}", fg="red")) end_at = time.perf_counter() - click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Cleaned unused dataset from db success latency: {end_at - start_at}", fg="green")) diff --git a/api/schedule/create_tidb_serverless_task.py b/api/schedule/create_tidb_serverless_task.py index 91953354e6..c343063fae 100644 --- a/api/schedule/create_tidb_serverless_task.py +++ b/api/schedule/create_tidb_serverless_task.py @@ -33,7 +33,7 @@ def create_tidb_serverless_task(): break end_at = time.perf_counter() - click.echo(click.style("Create tidb serverless task success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style(f"Create tidb serverless task success latency: {end_at - start_at}", fg="green")) def create_clusters(batch_size): diff --git a/api/schedule/mail_clean_document_notify_task.py b/api/schedule/mail_clean_document_notify_task.py index 5911c98b0a..03ef9062bd 100644 --- a/api/schedule/mail_clean_document_notify_task.py +++ b/api/schedule/mail_clean_document_notify_task.py @@ -90,7 +90,7 @@ def mail_clean_document_notify_task(): db.session.commit() end_at = time.perf_counter() logging.info( - click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green") + click.style(f"Send document clean notify mail succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: logging.exception("Send document clean notify mail failed") diff --git a/api/schedule/update_tidb_serverless_status_task.py b/api/schedule/update_tidb_serverless_status_task.py index 4d6c1f1877..1bfeb869e2 100644 --- a/api/schedule/update_tidb_serverless_status_task.py +++ b/api/schedule/update_tidb_serverless_status_task.py @@ -29,9 +29,7 @@ def update_tidb_serverless_status_task(): click.echo(click.style(f"Error: {e}", fg="red")) end_at = time.perf_counter() - click.echo( - click.style("Update tidb serverless status task success latency: {}".format(end_at - start_at), fg="green") - ) + click.echo(click.style(f"Update tidb serverless status task success latency: {end_at - start_at}", fg="green")) def update_clusters(tidb_serverless_list: list[TidbAuthBinding]): diff --git a/api/services/account_service.py b/api/services/account_service.py index 59bffa873c..1cce8e67a4 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -54,7 +54,10 @@ from services.errors.workspace import WorkSpaceNotAllowedCreateError, Workspaces from services.feature_service import FeatureService from tasks.delete_account_task import delete_account_task from tasks.mail_account_deletion_task import send_account_deletion_verification_code -from tasks.mail_change_mail_task import send_change_mail_task +from tasks.mail_change_mail_task import ( + send_change_mail_completed_notification_task, + send_change_mail_task, +) from tasks.mail_email_code_login import send_email_code_login_mail_task from tasks.mail_invite_member_task import send_invite_member_mail_task from tasks.mail_owner_transfer_task import ( @@ -329,9 +332,9 @@ class AccountService: db.session.add(account_integrate) db.session.commit() - logging.info(f"Account {account.id} linked {provider} account {open_id}.") + logging.info("Account %s linked %s account %s.", account.id, provider, open_id) except Exception as e: - logging.exception(f"Failed to link {provider} account {open_id} to Account {account.id}") + logging.exception("Failed to link %s account %s to Account %s", provider, open_id, account.id) raise LinkAccountIntegrateError("Failed to link account.") from e @staticmethod @@ -352,6 +355,17 @@ class AccountService: db.session.commit() return account + @staticmethod + def update_account_email(account: Account, email: str) -> Account: + """Update account email""" + account.email = email + account_integrate = db.session.query(AccountIntegrate).filter_by(account_id=account.id).first() + if account_integrate: + db.session.delete(account_integrate) + db.session.add(account) + db.session.commit() + return account + @staticmethod def update_login_info(account: Account, *, ip_address: str) -> None: """Update last login time and ip""" @@ -461,6 +475,22 @@ class AccountService: cls.change_email_rate_limiter.increment_rate_limit(account_email) return token + @classmethod + def send_change_email_completed_notify_email( + cls, + account: Optional[Account] = None, + email: Optional[str] = None, + language: Optional[str] = "en-US", + ): + account_email = account.email if account else email + if account_email is None: + raise ValueError("Email must be provided.") + + send_change_mail_completed_notification_task.delay( + language=language, + to=account_email, + ) + @classmethod def send_owner_transfer_email( cls, @@ -652,6 +682,12 @@ class AccountService: return account + @classmethod + def is_account_in_freeze(cls, email: str) -> bool: + if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(email): + return True + return False + @staticmethod @redis_fallback(default_return=None) def add_login_error_rate_limit(email: str) -> None: @@ -881,7 +917,7 @@ class TenantService: """Create tenant member""" if role == TenantAccountRole.OWNER.value: if TenantService.has_roles(tenant, [TenantAccountRole.OWNER]): - logging.error(f"Tenant {tenant.id} has already an owner.") + logging.error("Tenant %s has already an owner.", tenant.id) raise Exception("Tenant already has an owner.") ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() @@ -1133,7 +1169,7 @@ class RegisterService: db.session.query(Tenant).delete() db.session.commit() - logging.exception(f"Setup account failed, email: {email}, name: {name}") + logging.exception("Setup account failed, email: %s, name: %s", email, name) raise ValueError(f"Setup failed: {e}") @classmethod @@ -1257,7 +1293,7 @@ class RegisterService: def revoke_token(cls, workspace_id: str, email: str, token: str): if workspace_id and email: email_hash = sha256(email.encode()).hexdigest() - cache_key = "member_invite_token:{}, {}:{}".format(workspace_id, email_hash, token) + cache_key = f"member_invite_token:{workspace_id}, {email_hash}:{token}" redis_client.delete(cache_key) else: redis_client.delete(cls._get_invitation_token_key(token)) diff --git a/api/services/annotation_service.py b/api/services/annotation_service.py index 7cb0b46517..cfa917daf6 100644 --- a/api/services/annotation_service.py +++ b/api/services/annotation_service.py @@ -74,14 +74,14 @@ class AppAnnotationService: @classmethod def enable_app_annotation(cls, args: dict, app_id: str) -> dict: - enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id)) + enable_app_annotation_key = f"enable_app_annotation_{str(app_id)}" cache_result = redis_client.get(enable_app_annotation_key) if cache_result is not None: return {"job_id": cache_result, "job_status": "processing"} # async job job_id = str(uuid.uuid4()) - enable_app_annotation_job_key = "enable_app_annotation_job_{}".format(str(job_id)) + enable_app_annotation_job_key = f"enable_app_annotation_job_{str(job_id)}" # send batch add segments task redis_client.setnx(enable_app_annotation_job_key, "waiting") enable_annotation_reply_task.delay( @@ -97,14 +97,14 @@ class AppAnnotationService: @classmethod def disable_app_annotation(cls, app_id: str) -> dict: - disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id)) + disable_app_annotation_key = f"disable_app_annotation_{str(app_id)}" cache_result = redis_client.get(disable_app_annotation_key) if cache_result is not None: return {"job_id": cache_result, "job_status": "processing"} # async job job_id = str(uuid.uuid4()) - disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id)) + disable_app_annotation_job_key = f"disable_app_annotation_job_{str(job_id)}" # send batch add segments task redis_client.setnx(disable_app_annotation_job_key, "waiting") disable_annotation_reply_task.delay(str(job_id), app_id, current_user.current_tenant_id) @@ -127,8 +127,8 @@ class AppAnnotationService: .where(MessageAnnotation.app_id == app_id) .where( or_( - MessageAnnotation.question.ilike("%{}%".format(keyword)), - MessageAnnotation.content.ilike("%{}%".format(keyword)), + MessageAnnotation.question.ilike(f"%{keyword}%"), + MessageAnnotation.content.ilike(f"%{keyword}%"), ) ) .order_by(MessageAnnotation.created_at.desc(), MessageAnnotation.id.desc()) @@ -280,7 +280,7 @@ class AppAnnotationService: try: # Skip the first row - df = pd.read_csv(file) + df = pd.read_csv(file, dtype=str) result = [] for index, row in df.iterrows(): content = {"question": row.iloc[0], "answer": row.iloc[1]} @@ -295,7 +295,7 @@ class AppAnnotationService: raise ValueError("The number of annotations exceeds the limit of your subscription.") # async job job_id = str(uuid.uuid4()) - indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}" # send batch add segments task redis_client.setnx(indexing_cache_key, "waiting") batch_import_annotations_task.delay( @@ -440,3 +440,38 @@ class AppAnnotationService: "embedding_model_name": collection_binding_detail.model_name, }, } + + @classmethod + def clear_all_annotations(cls, app_id: str) -> dict: + app = ( + db.session.query(App) + .filter(App.id == app_id, App.tenant_id == current_user.current_tenant_id, App.status == "normal") + .first() + ) + + if not app: + raise NotFound("App not found") + + # if annotation reply is enabled, delete annotation index + app_annotation_setting = ( + db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first() + ) + + annotations_query = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id) + for annotation in annotations_query.yield_per(100): + annotation_hit_histories_query = db.session.query(AppAnnotationHitHistory).filter( + AppAnnotationHitHistory.annotation_id == annotation.id + ) + for annotation_hit_history in annotation_hit_histories_query.yield_per(100): + db.session.delete(annotation_hit_history) + + # if annotation reply is enabled, delete annotation index + if app_annotation_setting: + delete_annotation_index_task.delay( + annotation.id, app_id, current_user.current_tenant_id, app_annotation_setting.collection_binding_id + ) + + db.session.delete(annotation) + + db.session.commit() + return {"result": "success"} diff --git a/api/services/api_based_extension_service.py b/api/services/api_based_extension_service.py index 457c91e5c0..2f28eff165 100644 --- a/api/services/api_based_extension_service.py +++ b/api/services/api_based_extension_service.py @@ -102,4 +102,4 @@ class APIBasedExtensionService: if resp.get("result") != "pong": raise ValueError(resp) except Exception as e: - raise ValueError("connection error: {}".format(e)) + raise ValueError(f"connection error: {e}") diff --git a/api/services/app_service.py b/api/services/app_service.py index 0b6b85bcb2..0f22666d5a 100644 --- a/api/services/app_service.py +++ b/api/services/app_service.py @@ -53,9 +53,10 @@ class AppService: if args.get("name"): name = args["name"][:30] filters.append(App.name.ilike(f"%{name}%")) - if args.get("tag_ids"): + # Check if tag_ids is not empty to avoid WHERE false condition + if args.get("tag_ids") and len(args["tag_ids"]) > 0: target_ids = TagService.get_target_ids_by_tag_ids("app", tenant_id, args["tag_ids"]) - if target_ids: + if target_ids and len(target_ids) > 0: filters.append(App.id.in_(target_ids)) else: return None @@ -94,7 +95,7 @@ class AppService: except (ProviderTokenNotInitError, LLMBadRequestError): model_instance = None except Exception as e: - logging.exception(f"Get default model instance failed, tenant_id: {tenant_id}") + logging.exception("Get default model instance failed, tenant_id: %s", tenant_id) model_instance = None if model_instance: diff --git a/api/services/clear_free_plan_tenant_expired_logs.py b/api/services/clear_free_plan_tenant_expired_logs.py index ad9b750d40..d057a14afb 100644 --- a/api/services/clear_free_plan_tenant_expired_logs.py +++ b/api/services/clear_free_plan_tenant_expired_logs.py @@ -228,7 +228,7 @@ class ClearFreePlanTenantExpiredLogs: # only process sandbox tenant cls.process_tenant(flask_app, tenant_id, days, batch) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) finally: nonlocal handled_tenant_count handled_tenant_count += 1 @@ -311,7 +311,7 @@ class ClearFreePlanTenantExpiredLogs: try: tenants.append(tenant_id) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) continue futures.append( diff --git a/api/services/conversation_service.py b/api/services/conversation_service.py index 525c87fe4a..206c832a20 100644 --- a/api/services/conversation_service.py +++ b/api/services/conversation_service.py @@ -46,9 +46,11 @@ class ConversationService: Conversation.from_account_id == (user.id if isinstance(user, Account) else None), or_(Conversation.invoke_from.is_(None), Conversation.invoke_from == invoke_from.value), ) - if include_ids is not None: + # Check if include_ids is not None and not empty to avoid WHERE false condition + if include_ids is not None and len(include_ids) > 0: stmt = stmt.where(Conversation.id.in_(include_ids)) - if exclude_ids is not None: + # Check if exclude_ids is not None and not empty to avoid WHERE false condition + if exclude_ids is not None and len(exclude_ids) > 0: stmt = stmt.where(~Conversation.id.in_(exclude_ids)) # define sort fields and directions diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 4872702a76..1280399990 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -91,14 +91,16 @@ class DatasetService: if user.current_role == TenantAccountRole.DATASET_OPERATOR: # only show datasets that the user has permission to access - if permitted_dataset_ids: + # Check if permitted_dataset_ids is not empty to avoid WHERE false condition + if permitted_dataset_ids and len(permitted_dataset_ids) > 0: query = query.where(Dataset.id.in_(permitted_dataset_ids)) else: return [], 0 else: if user.current_role != TenantAccountRole.OWNER or not include_all: # show all datasets that the user has permission to access - if permitted_dataset_ids: + # Check if permitted_dataset_ids is not empty to avoid WHERE false condition + if permitted_dataset_ids and len(permitted_dataset_ids) > 0: query = query.where( db.or_( Dataset.permission == DatasetPermissionEnum.ALL_TEAM, @@ -127,9 +129,10 @@ class DatasetService: if search: query = query.where(Dataset.name.ilike(f"%{search}%")) - if tag_ids: + # Check if tag_ids is not empty to avoid WHERE false condition + if tag_ids and len(tag_ids) > 0: target_ids = TagService.get_target_ids_by_tag_ids("knowledge", tenant_id, tag_ids) - if target_ids: + if target_ids and len(target_ids) > 0: query = query.where(Dataset.id.in_(target_ids)) else: return [], 0 @@ -158,6 +161,9 @@ class DatasetService: @staticmethod def get_datasets_by_ids(ids, tenant_id): + # Check if ids is not empty to avoid WHERE false condition + if not ids or len(ids) == 0: + return [], 0 stmt = select(Dataset).where(Dataset.id.in_(ids), Dataset.tenant_id == tenant_id) datasets = db.paginate(select=stmt, page=1, per_page=len(ids), max_per_page=len(ids), error_out=False) @@ -605,8 +611,9 @@ class DatasetService: except ProviderTokenNotInitError: # If we can't get the embedding model, preserve existing settings logging.warning( - f"Failed to initialize embedding model {data['embedding_model_provider']}/{data['embedding_model']}, " - f"preserving existing settings" + "Failed to initialize embedding model %s/%s, preserving existing settings", + data["embedding_model_provider"], + data["embedding_model"], ) if dataset.embedding_model_provider and dataset.embedding_model: filtered_data["embedding_model_provider"] = dataset.embedding_model_provider @@ -649,11 +656,11 @@ class DatasetService: @staticmethod def check_dataset_permission(dataset, user): if dataset.tenant_id != user.current_tenant_id: - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id) raise NoPermissionError("You do not have permission to access this dataset.") if user.current_role != TenantAccountRole.OWNER: if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id: - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id) raise NoPermissionError("You do not have permission to access this dataset.") if dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM: # For partial team permission, user needs explicit permission or be the creator @@ -662,7 +669,7 @@ class DatasetService: db.session.query(DatasetPermission).filter_by(dataset_id=dataset.id, account_id=user.id).first() ) if not user_permission: - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id) raise NoPermissionError("You do not have permission to access this dataset.") @staticmethod @@ -950,6 +957,9 @@ class DocumentService: @staticmethod def delete_documents(dataset: Dataset, document_ids: list[str]): + # Check if document_ids is not empty to avoid WHERE false condition + if not document_ids or len(document_ids) == 0: + return documents = db.session.query(Document).where(Document.id.in_(document_ids)).all() file_ids = [ document.data_source_info_dict["upload_file_id"] @@ -1000,7 +1010,7 @@ class DocumentService: db.session.add(document) db.session.commit() # set document paused flag - indexing_cache_key = "document_{}_is_paused".format(document.id) + indexing_cache_key = f"document_{document.id}_is_paused" redis_client.setnx(indexing_cache_key, "True") @staticmethod @@ -1015,7 +1025,7 @@ class DocumentService: db.session.add(document) db.session.commit() # delete paused flag - indexing_cache_key = "document_{}_is_paused".format(document.id) + indexing_cache_key = f"document_{document.id}_is_paused" redis_client.delete(indexing_cache_key) # trigger async task recover_document_indexing_task.delay(document.dataset_id, document.id) @@ -1024,7 +1034,7 @@ class DocumentService: def retry_document(dataset_id: str, documents: list[Document]): for document in documents: # add retry flag - retry_indexing_cache_key = "document_{}_is_retried".format(document.id) + retry_indexing_cache_key = f"document_{document.id}_is_retried" cache_result = redis_client.get(retry_indexing_cache_key) if cache_result is not None: raise ValueError("Document is being retried, please try again later") @@ -1041,7 +1051,7 @@ class DocumentService: @staticmethod def sync_website_document(dataset_id: str, document: Document): # add sync flag - sync_indexing_cache_key = "document_{}_is_sync".format(document.id) + sync_indexing_cache_key = f"document_{document.id}_is_sync" cache_result = redis_client.get(sync_indexing_cache_key) if cache_result is not None: raise ValueError("Document is being synced, please try again later") @@ -1174,12 +1184,13 @@ class DocumentService: ) else: logging.warning( - f"Invalid process rule mode: {process_rule.mode}, can not find dataset process rule" + "Invalid process rule mode: %s, can not find dataset process rule", + process_rule.mode, ) return db.session.add(dataset_process_rule) db.session.commit() - lock_name = "add_document_lock_dataset_id_{}".format(dataset.id) + lock_name = f"add_document_lock_dataset_id_{dataset.id}" with redis_client.lock(lock_name, timeout=600): position = DocumentService.get_documents_position(dataset.id) document_ids = [] @@ -1862,7 +1873,7 @@ class DocumentService: task_func.delay(*task_args) except Exception as e: # Log the error but do not rollback the transaction - logging.exception(f"Error executing async task for document {update_info['document'].id}") + logging.exception("Error executing async task for document %s", update_info["document"].id) # don't raise the error immediately, but capture it for later propagation_error = e try: @@ -1873,7 +1884,7 @@ class DocumentService: redis_client.setex(indexing_cache_key, 600, 1) except Exception as e: # Log the error but do not rollback the transaction - logging.exception(f"Error setting cache for document {update_info['document'].id}") + logging.exception("Error setting cache for document %s", update_info["document"].id) # Raise any propagation error after all updates if propagation_error: raise propagation_error @@ -2001,7 +2012,7 @@ class SegmentService: ) # calc embedding use tokens tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0] - lock_name = "add_segment_lock_document_id_{}".format(document.id) + lock_name = f"add_segment_lock_document_id_{document.id}" with redis_client.lock(lock_name, timeout=600): max_position = ( db.session.query(func.max(DocumentSegment.position)) @@ -2048,7 +2059,7 @@ class SegmentService: @classmethod def multi_create_segment(cls, segments: list, document: Document, dataset: Dataset): - lock_name = "multi_add_segment_lock_document_id_{}".format(document.id) + lock_name = f"multi_add_segment_lock_document_id_{document.id}" increment_word_count = 0 with redis_client.lock(lock_name, timeout=600): embedding_model = None @@ -2130,7 +2141,7 @@ class SegmentService: @classmethod def update_segment(cls, args: SegmentUpdateArgs, segment: DocumentSegment, document: Document, dataset: Dataset): - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: raise ValueError("Segment is indexing, please try again later") @@ -2300,7 +2311,7 @@ class SegmentService: @classmethod def delete_segment(cls, segment: DocumentSegment, document: Document, dataset: Dataset): - indexing_cache_key = "segment_{}_delete_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_delete_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: raise ValueError("Segment is deleting.") @@ -2318,6 +2329,9 @@ class SegmentService: @classmethod def delete_segments(cls, segment_ids: list, document: Document, dataset: Dataset): + # Check if segment_ids is not empty to avoid WHERE false condition + if not segment_ids or len(segment_ids) == 0: + return index_node_ids = ( db.session.query(DocumentSegment) .with_entities(DocumentSegment.index_node_id) @@ -2337,6 +2351,9 @@ class SegmentService: @classmethod def update_segments_status(cls, segment_ids: list, action: str, dataset: Dataset, document: Document): + # Check if segment_ids is not empty to avoid WHERE false condition + if not segment_ids or len(segment_ids) == 0: + return if action == "enable": segments = ( db.session.query(DocumentSegment) @@ -2352,7 +2369,7 @@ class SegmentService: return real_deal_segmment_ids = [] for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: continue @@ -2379,7 +2396,7 @@ class SegmentService: return real_deal_segmment_ids = [] for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: continue @@ -2398,7 +2415,7 @@ class SegmentService: def create_child_chunk( cls, content: str, segment: DocumentSegment, document: Document, dataset: Dataset ) -> ChildChunk: - lock_name = "add_child_lock_{}".format(segment.id) + lock_name = f"add_child_lock_{segment.id}" with redis_client.lock(lock_name, timeout=20): index_node_id = str(uuid.uuid4()) index_node_hash = helper.generate_text_hash(content) @@ -2598,7 +2615,8 @@ class SegmentService: DocumentSegment.document_id == document_id, DocumentSegment.tenant_id == tenant_id ) - if status_list: + # Check if status_list is not empty to avoid WHERE false condition + if status_list and len(status_list) > 0: query = query.where(DocumentSegment.status.in_(status_list)) if keyword: diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index 519d5abca5..5a3f504035 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -77,7 +77,7 @@ class HitTestingService: ) end = time.perf_counter() - logging.debug(f"Hit testing retrieve in {end - start:0.4f} seconds") + logging.debug("Hit testing retrieve in %s seconds", end - start) dataset_query = DatasetQuery( dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id @@ -113,7 +113,7 @@ class HitTestingService: ) end = time.perf_counter() - logging.debug(f"External knowledge hit testing retrieve in {end - start:0.4f} seconds") + logging.debug("External knowledge hit testing retrieve in %s seconds", end - start) dataset_query = DatasetQuery( dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id diff --git a/api/services/message_service.py b/api/services/message_service.py index 283b7b9b4b..a19d6ee157 100644 --- a/api/services/message_service.py +++ b/api/services/message_service.py @@ -111,7 +111,8 @@ class MessageService: base_query = base_query.where(Message.conversation_id == conversation.id) - if include_ids is not None: + # Check if include_ids is not None and not empty to avoid WHERE false condition + if include_ids is not None and len(include_ids) > 0: base_query = base_query.where(Message.id.in_(include_ids)) if last_id: diff --git a/api/services/model_load_balancing_service.py b/api/services/model_load_balancing_service.py index a200cfa146..fe28aa006e 100644 --- a/api/services/model_load_balancing_service.py +++ b/api/services/model_load_balancing_service.py @@ -340,7 +340,7 @@ class ModelLoadBalancingService: config_id = str(config_id) if config_id not in current_load_balancing_configs_dict: - raise ValueError("Invalid load balancing config id: {}".format(config_id)) + raise ValueError(f"Invalid load balancing config id: {config_id}") updated_config_ids.add(config_id) @@ -349,7 +349,7 @@ class ModelLoadBalancingService: # check duplicate name for current_load_balancing_config in current_load_balancing_configs: if current_load_balancing_config.id != config_id and current_load_balancing_config.name == name: - raise ValueError("Load balancing config name {} already exists".format(name)) + raise ValueError(f"Load balancing config name {name} already exists") if credentials: if not isinstance(credentials, dict): @@ -383,7 +383,7 @@ class ModelLoadBalancingService: # check duplicate name for current_load_balancing_config in current_load_balancing_configs: if current_load_balancing_config.name == name: - raise ValueError("Load balancing config name {} already exists".format(name)) + raise ValueError(f"Load balancing config name {name} already exists") if not credentials: raise ValueError("Invalid load balancing config credentials") diff --git a/api/services/model_provider_service.py b/api/services/model_provider_service.py index 0a0a5619e1..54197bf949 100644 --- a/api/services/model_provider_service.py +++ b/api/services/model_provider_service.py @@ -380,7 +380,7 @@ class ModelProviderService: else None ) except Exception as e: - logger.debug(f"get_default_model_of_model_type error: {e}") + logger.debug("get_default_model_of_model_type error: %s", e) return None def update_default_model_of_model_type(self, tenant_id: str, model_type: str, provider: str, model: str) -> None: diff --git a/api/services/ops_service.py b/api/services/ops_service.py index 62f37c1588..7a9db7273e 100644 --- a/api/services/ops_service.py +++ b/api/services/ops_service.py @@ -65,9 +65,7 @@ class OpsService: } ) except Exception: - new_decrypt_tracing_config.update( - {"project_url": "{host}/".format(host=decrypt_tracing_config.get("host"))} - ) + new_decrypt_tracing_config.update({"project_url": f"{decrypt_tracing_config.get('host')}/"}) if tracing_provider == "langsmith" and ( "project_url" not in decrypt_tracing_config or not decrypt_tracing_config.get("project_url") @@ -139,7 +137,7 @@ class OpsService: project_url = OpsTraceManager.get_trace_config_project_url(tracing_config, tracing_provider) elif tracing_provider == "langfuse": project_key = OpsTraceManager.get_trace_config_project_key(tracing_config, tracing_provider) - project_url = "{host}/project/{key}".format(host=tracing_config.get("host"), key=project_key) + project_url = f"{tracing_config.get('host')}/project/{project_key}" elif tracing_provider in ("langsmith", "opik"): project_url = OpsTraceManager.get_trace_config_project_url(tracing_config, tracing_provider) else: diff --git a/api/services/plugin/data_migration.py b/api/services/plugin/data_migration.py index 5324036414..7a4f886bf5 100644 --- a/api/services/plugin/data_migration.py +++ b/api/services/plugin/data_migration.py @@ -110,7 +110,7 @@ limit 1000""" ) ) logger.exception( - f"[{processed_count}] Failed to migrate [{table_name}] {record_id} ({provider_name})" + "[%s] Failed to migrate [%s] %s (%s)", processed_count, table_name, record_id, provider_name ) continue @@ -183,7 +183,7 @@ limit 1000""" ) ) logger.exception( - f"[{processed_count}] Failed to migrate [{table_name}] {record_id} ({provider_name})" + "[%s] Failed to migrate [%s] %s (%s)", processed_count, table_name, record_id, provider_name ) continue diff --git a/api/services/plugin/plugin_migration.py b/api/services/plugin/plugin_migration.py index 1806fbcfd6..222d70a317 100644 --- a/api/services/plugin/plugin_migration.py +++ b/api/services/plugin/plugin_migration.py @@ -78,7 +78,7 @@ class PluginMigration: ) ) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) futures = [] @@ -136,7 +136,7 @@ class PluginMigration: try: tenants.append(tenant_id) except Exception: - logger.exception(f"Failed to process tenant {tenant_id}") + logger.exception("Failed to process tenant %s", tenant_id) continue futures.append( @@ -273,7 +273,7 @@ class PluginMigration: result.append(ToolProviderID(tool_entity.provider_id).plugin_id) except Exception: - logger.exception(f"Failed to process tool {tool}") + logger.exception("Failed to process tool %s", tool) continue return result @@ -301,7 +301,7 @@ class PluginMigration: plugins: dict[str, str] = {} plugin_ids = [] plugin_not_exist = [] - logger.info(f"Extracting unique plugins from {extracted_plugins}") + logger.info("Extracting unique plugins from %s", extracted_plugins) with open(extracted_plugins) as f: for line in f: data = json.loads(line) @@ -318,7 +318,7 @@ class PluginMigration: else: plugin_not_exist.append(plugin_id) except Exception: - logger.exception(f"Failed to fetch plugin unique identifier for {plugin_id}") + logger.exception("Failed to fetch plugin unique identifier for %s", plugin_id) plugin_not_exist.append(plugin_id) with ThreadPoolExecutor(max_workers=10) as executor: @@ -339,7 +339,7 @@ class PluginMigration: # use a fake tenant id to install all the plugins fake_tenant_id = uuid4().hex - logger.info(f"Installing {len(plugins['plugins'])} plugin instances for fake tenant {fake_tenant_id}") + logger.info("Installing %s plugin instances for fake tenant %s", len(plugins["plugins"]), fake_tenant_id) thread_pool = ThreadPoolExecutor(max_workers=workers) @@ -348,7 +348,7 @@ class PluginMigration: plugin_install_failed.extend(response.get("failed", [])) def install(tenant_id: str, plugin_ids: list[str]) -> None: - logger.info(f"Installing {len(plugin_ids)} plugins for tenant {tenant_id}") + logger.info("Installing %s plugins for tenant %s", len(plugin_ids), tenant_id) # fetch plugin already installed installed_plugins = manager.list_plugins(tenant_id) installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] @@ -408,7 +408,7 @@ class PluginMigration: installation = manager.list_plugins(fake_tenant_id) except Exception: - logger.exception(f"Failed to get installation for tenant {fake_tenant_id}") + logger.exception("Failed to get installation for tenant %s", fake_tenant_id) Path(output_file).write_text( json.dumps( @@ -491,7 +491,9 @@ class PluginMigration: else: failed.append(reverse_map[plugin.plugin_unique_identifier]) logger.error( - f"Failed to install plugin {plugin.plugin_unique_identifier}, error: {plugin.message}" + "Failed to install plugin %s, error: %s", + plugin.plugin_unique_identifier, + plugin.message, ) done = True diff --git a/api/services/recommend_app/remote/remote_retrieval.py b/api/services/recommend_app/remote/remote_retrieval.py index 80e1aefc01..85f3a02825 100644 --- a/api/services/recommend_app/remote/remote_retrieval.py +++ b/api/services/recommend_app/remote/remote_retrieval.py @@ -20,7 +20,7 @@ class RemoteRecommendAppRetrieval(RecommendAppRetrievalBase): try: result = self.fetch_recommended_app_detail_from_dify_official(app_id) except Exception as e: - logger.warning(f"fetch recommended app detail from dify official failed: {e}, switch to built-in.") + logger.warning("fetch recommended app detail from dify official failed: %s, switch to built-in.", e) result = BuildInRecommendAppRetrieval.fetch_recommended_app_detail_from_builtin(app_id) return result @@ -28,7 +28,7 @@ class RemoteRecommendAppRetrieval(RecommendAppRetrievalBase): try: result = self.fetch_recommended_apps_from_dify_official(language) except Exception as e: - logger.warning(f"fetch recommended apps from dify official failed: {e}, switch to built-in.") + logger.warning("fetch recommended apps from dify official failed: %s, switch to built-in.", e) result = BuildInRecommendAppRetrieval.fetch_recommended_apps_from_builtin(language) return result diff --git a/api/services/tag_service.py b/api/services/tag_service.py index 75fa52a75c..2e5e96214b 100644 --- a/api/services/tag_service.py +++ b/api/services/tag_service.py @@ -26,6 +26,9 @@ class TagService: @staticmethod def get_target_ids_by_tag_ids(tag_type: str, current_tenant_id: str, tag_ids: list) -> list: + # Check if tag_ids is not empty to avoid WHERE false condition + if not tag_ids or len(tag_ids) == 0: + return [] tags = ( db.session.query(Tag) .where(Tag.id.in_(tag_ids), Tag.tenant_id == current_tenant_id, Tag.type == tag_type) @@ -34,6 +37,9 @@ class TagService: if not tags: return [] tag_ids = [tag.id for tag in tags] + # Check if tag_ids is not empty to avoid WHERE false condition + if not tag_ids or len(tag_ids) == 0: + return [] tag_bindings = ( db.session.query(TagBinding.target_id) .where(TagBinding.tag_id.in_(tag_ids), TagBinding.tenant_id == current_tenant_id) diff --git a/api/services/tools/builtin_tools_manage_service.py b/api/services/tools/builtin_tools_manage_service.py index 65f05d2986..841eeb4333 100644 --- a/api/services/tools/builtin_tools_manage_service.py +++ b/api/services/tools/builtin_tools_manage_service.py @@ -337,7 +337,7 @@ class BuiltinToolManageService: max_number = max(numbers) return f"{default_pattern} {max_number + 1}" except Exception as e: - logger.warning(f"Error generating next provider name for {provider}: {str(e)}") + logger.warning("Error generating next provider name for %s: %s", provider, str(e)) # fallback return f"{credential_type.get_name()} 1" diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py index 2d192e6f7f..52fbc0979c 100644 --- a/api/services/tools/tools_transform_service.py +++ b/api/services/tools/tools_transform_service.py @@ -275,7 +275,7 @@ class ToolTransformService: username = user.name except Exception: - logger.exception(f"failed to get user name for api provider {db_provider.id}") + logger.exception("failed to get user name for api provider %s", db_provider.id) # add provider into providers credentials = db_provider.credentials result = ToolProviderApiEntity( diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index 204c1a4f5b..a2105f8a9d 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -22,19 +22,19 @@ def add_document_to_index_task(dataset_document_id: str): Usage: add_document_to_index_task.delay(dataset_document_id) """ - logging.info(click.style("Start add document to index: {}".format(dataset_document_id), fg="green")) + logging.info(click.style(f"Start add document to index: {dataset_document_id}", fg="green")) start_at = time.perf_counter() dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == dataset_document_id).first() if not dataset_document: - logging.info(click.style("Document not found: {}".format(dataset_document_id), fg="red")) + logging.info(click.style(f"Document not found: {dataset_document_id}", fg="red")) db.session.close() return if dataset_document.indexing_status != "completed": return - indexing_cache_key = "document_{}_indexing".format(dataset_document.id) + indexing_cache_key = f"document_{dataset_document.id}_indexing" try: dataset = dataset_document.dataset @@ -101,9 +101,7 @@ def add_document_to_index_task(dataset_document_id: str): end_at = time.perf_counter() logging.info( - click.style( - "Document added to index: {} latency: {}".format(dataset_document.id, end_at - start_at), fg="green" - ) + click.style(f"Document added to index: {dataset_document.id} latency: {end_at - start_at}", fg="green") ) except Exception as e: logging.exception("add document to index failed") diff --git a/api/tasks/annotation/add_annotation_to_index_task.py b/api/tasks/annotation/add_annotation_to_index_task.py index 2a93c21abd..e436f00133 100644 --- a/api/tasks/annotation/add_annotation_to_index_task.py +++ b/api/tasks/annotation/add_annotation_to_index_task.py @@ -25,7 +25,7 @@ def add_annotation_to_index_task( Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) """ - logging.info(click.style("Start build index for annotation: {}".format(annotation_id), fg="green")) + logging.info(click.style(f"Start build index for annotation: {annotation_id}", fg="green")) start_at = time.perf_counter() try: @@ -50,7 +50,7 @@ def add_annotation_to_index_task( end_at = time.perf_counter() logging.info( click.style( - "Build index successful for annotation: {} latency: {}".format(annotation_id, end_at - start_at), + f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/annotation/batch_import_annotations_task.py b/api/tasks/annotation/batch_import_annotations_task.py index 6d48f5df89..47dc3ee90e 100644 --- a/api/tasks/annotation/batch_import_annotations_task.py +++ b/api/tasks/annotation/batch_import_annotations_task.py @@ -25,9 +25,9 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id: :param user_id: user_id """ - logging.info(click.style("Start batch import annotation: {}".format(job_id), fg="green")) + logging.info(click.style(f"Start batch import annotation: {job_id}", fg="green")) start_at = time.perf_counter() - indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id)) + indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}" # get app info app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() @@ -85,7 +85,7 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id: except Exception as e: db.session.rollback() redis_client.setex(indexing_cache_key, 600, "error") - indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id)) + indexing_error_msg_key = f"app_annotation_batch_import_error_msg_{str(job_id)}" redis_client.setex(indexing_error_msg_key, 600, str(e)) logging.exception("Build index for batch import annotations failed") finally: diff --git a/api/tasks/annotation/delete_annotation_index_task.py b/api/tasks/annotation/delete_annotation_index_task.py index a6657e813a..f016400e16 100644 --- a/api/tasks/annotation/delete_annotation_index_task.py +++ b/api/tasks/annotation/delete_annotation_index_task.py @@ -15,7 +15,7 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str """ Async delete annotation index task """ - logging.info(click.style("Start delete app annotation index: {}".format(app_id), fg="green")) + logging.info(click.style(f"Start delete app annotation index: {app_id}", fg="green")) start_at = time.perf_counter() try: dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type( @@ -35,9 +35,7 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str except Exception: logging.exception("Delete annotation index failed when annotation deleted.") end_at = time.perf_counter() - logging.info( - click.style("App annotations index deleted : {} latency: {}".format(app_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("Annotation deleted index failed") finally: diff --git a/api/tasks/annotation/disable_annotation_reply_task.py b/api/tasks/annotation/disable_annotation_reply_task.py index 5d5d1d3ad8..0076113ce8 100644 --- a/api/tasks/annotation/disable_annotation_reply_task.py +++ b/api/tasks/annotation/disable_annotation_reply_task.py @@ -16,25 +16,25 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str): """ Async enable annotation reply task """ - logging.info(click.style("Start delete app annotations index: {}".format(app_id), fg="green")) + logging.info(click.style(f"Start delete app annotations index: {app_id}", fg="green")) start_at = time.perf_counter() # get app info app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() annotations_count = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id).count() if not app: - logging.info(click.style("App not found: {}".format(app_id), fg="red")) + logging.info(click.style(f"App not found: {app_id}", fg="red")) db.session.close() return app_annotation_setting = db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first() if not app_annotation_setting: - logging.info(click.style("App annotation setting not found: {}".format(app_id), fg="red")) + logging.info(click.style(f"App annotation setting not found: {app_id}", fg="red")) db.session.close() return - disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id)) - disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id)) + disable_app_annotation_key = f"disable_app_annotation_{str(app_id)}" + disable_app_annotation_job_key = f"disable_app_annotation_job_{str(job_id)}" try: dataset = Dataset( @@ -57,13 +57,11 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str): db.session.commit() end_at = time.perf_counter() - logging.info( - click.style("App annotations index deleted : {} latency: {}".format(app_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("Annotation batch deleted index failed") redis_client.setex(disable_app_annotation_job_key, 600, "error") - disable_app_annotation_error_key = "disable_app_annotation_error_{}".format(str(job_id)) + disable_app_annotation_error_key = f"disable_app_annotation_error_{str(job_id)}" redis_client.setex(disable_app_annotation_error_key, 600, str(e)) finally: redis_client.delete(disable_app_annotation_key) diff --git a/api/tasks/annotation/enable_annotation_reply_task.py b/api/tasks/annotation/enable_annotation_reply_task.py index 12d10df442..44c65c0783 100644 --- a/api/tasks/annotation/enable_annotation_reply_task.py +++ b/api/tasks/annotation/enable_annotation_reply_task.py @@ -27,19 +27,19 @@ def enable_annotation_reply_task( """ Async enable annotation reply task """ - logging.info(click.style("Start add app annotation to index: {}".format(app_id), fg="green")) + logging.info(click.style(f"Start add app annotation to index: {app_id}", fg="green")) start_at = time.perf_counter() # get app info app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() if not app: - logging.info(click.style("App not found: {}".format(app_id), fg="red")) + logging.info(click.style(f"App not found: {app_id}", fg="red")) db.session.close() return annotations = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id).all() - enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id)) - enable_app_annotation_job_key = "enable_app_annotation_job_{}".format(str(job_id)) + enable_app_annotation_key = f"enable_app_annotation_{str(app_id)}" + enable_app_annotation_job_key = f"enable_app_annotation_job_{str(job_id)}" try: documents = [] @@ -68,7 +68,7 @@ def enable_annotation_reply_task( try: old_vector.delete() except Exception as e: - logging.info(click.style("Delete annotation index error: {}".format(str(e)), fg="red")) + logging.info(click.style(f"Delete annotation index error: {str(e)}", fg="red")) annotation_setting.score_threshold = score_threshold annotation_setting.collection_binding_id = dataset_collection_binding.id annotation_setting.updated_user_id = user_id @@ -104,18 +104,16 @@ def enable_annotation_reply_task( try: vector.delete_by_metadata_field("app_id", app_id) except Exception as e: - logging.info(click.style("Delete annotation index error: {}".format(str(e)), fg="red")) + logging.info(click.style(f"Delete annotation index error: {str(e)}", fg="red")) vector.create(documents) db.session.commit() redis_client.setex(enable_app_annotation_job_key, 600, "completed") end_at = time.perf_counter() - logging.info( - click.style("App annotations added to index: {} latency: {}".format(app_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"App annotations added to index: {app_id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("Annotation batch created index failed") redis_client.setex(enable_app_annotation_job_key, 600, "error") - enable_app_annotation_error_key = "enable_app_annotation_error_{}".format(str(job_id)) + enable_app_annotation_error_key = f"enable_app_annotation_error_{str(job_id)}" redis_client.setex(enable_app_annotation_error_key, 600, str(e)) db.session.rollback() finally: diff --git a/api/tasks/annotation/update_annotation_to_index_task.py b/api/tasks/annotation/update_annotation_to_index_task.py index 596ba829ad..5f11d5aa00 100644 --- a/api/tasks/annotation/update_annotation_to_index_task.py +++ b/api/tasks/annotation/update_annotation_to_index_task.py @@ -25,7 +25,7 @@ def update_annotation_to_index_task( Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) """ - logging.info(click.style("Start update index for annotation: {}".format(annotation_id), fg="green")) + logging.info(click.style(f"Start update index for annotation: {annotation_id}", fg="green")) start_at = time.perf_counter() try: @@ -51,7 +51,7 @@ def update_annotation_to_index_task( end_at = time.perf_counter() logging.info( click.style( - "Build index successful for annotation: {} latency: {}".format(annotation_id, end_at - start_at), + f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index 49bff72a96..e64a799146 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -49,7 +49,8 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form except Exception: logging.exception( "Delete image_files failed when storage deleted, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) db.session.delete(segment) @@ -61,14 +62,14 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form try: storage.delete(file.key) except Exception: - logging.exception("Delete file failed when document deleted, file_id: {}".format(file.id)) + logging.exception("Delete file failed when document deleted, file_id: %s", file.id) db.session.delete(file) db.session.commit() end_at = time.perf_counter() logging.info( click.style( - "Cleaned documents when documents deleted latency: {}".format(end_at - start_at), + f"Cleaned documents when documents deleted latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index 64df3175e1..714e30acc3 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -1,9 +1,12 @@ import datetime import logging +import tempfile import time import uuid +from pathlib import Path import click +import pandas as pd from celery import shared_task # type: ignore from sqlalchemy import func from sqlalchemy.orm import Session @@ -12,15 +15,17 @@ from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType from extensions.ext_database import db from extensions.ext_redis import redis_client +from extensions.ext_storage import storage from libs import helper from models.dataset import Dataset, Document, DocumentSegment +from models.model import UploadFile from services.vector_service import VectorService @shared_task(queue="dataset") def batch_create_segment_to_index_task( job_id: str, - content: list, + upload_file_id: str, dataset_id: str, document_id: str, tenant_id: str, @@ -29,18 +34,18 @@ def batch_create_segment_to_index_task( """ Async batch create segment to index :param job_id: - :param content: + :param upload_file_id: :param dataset_id: :param document_id: :param tenant_id: :param user_id: - Usage: batch_create_segment_to_index_task.delay(job_id, content, dataset_id, document_id, tenant_id, user_id) + Usage: batch_create_segment_to_index_task.delay(job_id, upload_file_id, dataset_id, document_id, tenant_id, user_id) """ - logging.info(click.style("Start batch create segment jobId: {}".format(job_id), fg="green")) + logging.info(click.style(f"Start batch create segment jobId: {job_id}", fg="green")) start_at = time.perf_counter() - indexing_cache_key = "segment_batch_import_{}".format(job_id) + indexing_cache_key = f"segment_batch_import_{job_id}" try: with Session(db.engine) as session: @@ -58,6 +63,29 @@ def batch_create_segment_to_index_task( or dataset_document.indexing_status != "completed" ): raise ValueError("Document is not available.") + + upload_file = session.get(UploadFile, upload_file_id) + if not upload_file: + raise ValueError("UploadFile not found.") + + with tempfile.TemporaryDirectory() as temp_dir: + suffix = Path(upload_file.key).suffix + # FIXME mypy: Cannot determine type of 'tempfile._get_candidate_names' better not use it here + file_path = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}" # type: ignore + storage.download(upload_file.key, file_path) + + # Skip the first row + df = pd.read_csv(file_path) + content = [] + for index, row in df.iterrows(): + if dataset_document.doc_form == "qa_model": + data = {"content": row.iloc[0], "answer": row.iloc[1]} + else: + data = {"content": row.iloc[0]} + content.append(data) + if len(content) == 0: + raise ValueError("The CSV file is empty.") + document_segments = [] embedding_model = None if dataset.indexing_technique == "high_quality": @@ -115,7 +143,7 @@ def batch_create_segment_to_index_task( end_at = time.perf_counter() logging.info( click.style( - "Segment batch created job: {} latency: {}".format(job_id, end_at - start_at), + f"Segment batch created job: {job_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py index fad090141a..fe6d613b1c 100644 --- a/api/tasks/clean_dataset_task.py +++ b/api/tasks/clean_dataset_task.py @@ -42,7 +42,7 @@ def clean_dataset_task( Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) """ - logging.info(click.style("Start clean dataset when dataset deleted: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"Start clean dataset when dataset deleted: {dataset_id}", fg="green")) start_at = time.perf_counter() try: @@ -57,9 +57,9 @@ def clean_dataset_task( segments = db.session.query(DocumentSegment).where(DocumentSegment.dataset_id == dataset_id).all() if documents is None or len(documents) == 0: - logging.info(click.style("No documents found for dataset: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green")) else: - logging.info(click.style("Cleaning documents for dataset: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"Cleaning documents for dataset: {dataset_id}", fg="green")) # Specify the index type before initializing the index processor if doc_form is None: raise ValueError("Index type must be specified.") @@ -80,7 +80,8 @@ def clean_dataset_task( except Exception: logging.exception( "Delete image_files failed when storage deleted, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) db.session.delete(segment) @@ -115,9 +116,7 @@ def clean_dataset_task( db.session.commit() end_at = time.perf_counter() logging.info( - click.style( - "Cleaned dataset when dataset deleted: {} latency: {}".format(dataset_id, end_at - start_at), fg="green" - ) + click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green") ) except Exception: logging.exception("Cleaned dataset when dataset deleted failed") diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py index dd7a544ff5..d690106d17 100644 --- a/api/tasks/clean_document_task.py +++ b/api/tasks/clean_document_task.py @@ -24,7 +24,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i Usage: clean_document_task.delay(document_id, dataset_id) """ - logging.info(click.style("Start clean document when document deleted: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start clean document when document deleted: {document_id}", fg="green")) start_at = time.perf_counter() try: @@ -51,7 +51,8 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i except Exception: logging.exception( "Delete image_files failed when storage deleted, \ - image_upload_file_is: {}".format(upload_file_id) + image_upload_file_is: %s", + upload_file_id, ) db.session.delete(image_file) db.session.delete(segment) @@ -63,7 +64,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i try: storage.delete(file.key) except Exception: - logging.exception("Delete file failed when document deleted, file_id: {}".format(file_id)) + logging.exception("Delete file failed when document deleted, file_id: %s", file_id) db.session.delete(file) db.session.commit() @@ -77,7 +78,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i end_at = time.perf_counter() logging.info( click.style( - "Cleaned document when document deleted: {} latency: {}".format(document_id, end_at - start_at), + f"Cleaned document when document deleted: {document_id} latency: {end_at - start_at}", fg="green", ) ) diff --git a/api/tasks/clean_notion_document_task.py b/api/tasks/clean_notion_document_task.py index 0f72f87f15..bf1a92f038 100644 --- a/api/tasks/clean_notion_document_task.py +++ b/api/tasks/clean_notion_document_task.py @@ -19,7 +19,7 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str): Usage: clean_notion_document_task.delay(document_ids, dataset_id) """ logging.info( - click.style("Start clean document when import form notion document deleted: {}".format(dataset_id), fg="green") + click.style(f"Start clean document when import form notion document deleted: {dataset_id}", fg="green") ) start_at = time.perf_counter() diff --git a/api/tasks/create_segment_to_index_task.py b/api/tasks/create_segment_to_index_task.py index 5eda24674a..a8839ffc17 100644 --- a/api/tasks/create_segment_to_index_task.py +++ b/api/tasks/create_segment_to_index_task.py @@ -21,19 +21,19 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] :param keywords: Usage: create_segment_to_index_task.delay(segment_id) """ - logging.info(click.style("Start create segment to index: {}".format(segment_id), fg="green")) + logging.info(click.style(f"Start create segment to index: {segment_id}", fg="green")) start_at = time.perf_counter() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() if not segment: - logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment not found: {segment_id}", fg="red")) db.session.close() return if segment.status != "waiting": return - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" try: # update segment status to indexing @@ -57,17 +57,17 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] dataset = segment.dataset if not dataset: - logging.info(click.style("Segment {} has no dataset, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan")) return dataset_document = segment.document if not dataset_document: - logging.info(click.style("Segment {} has no document, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan")) return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Segment {} document status is invalid, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan")) return index_type = dataset.doc_form @@ -84,9 +84,7 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] db.session.commit() end_at = time.perf_counter() - logging.info( - click.style("Segment created to index: {} latency: {}".format(segment.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Segment created to index: {segment.id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("create segment to index failed") segment.enabled = False diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py index 7478bf5a90..8c4c1876ad 100644 --- a/api/tasks/deal_dataset_vector_index_task.py +++ b/api/tasks/deal_dataset_vector_index_task.py @@ -20,7 +20,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): :param action: action Usage: deal_dataset_vector_index_task.delay(dataset_id, action) """ - logging.info(click.style("Start deal dataset vector index: {}".format(dataset_id), fg="green")) + logging.info(click.style(f"Start deal dataset vector index: {dataset_id}", fg="green")) start_at = time.perf_counter() try: @@ -162,9 +162,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): index_processor.clean(dataset, None, with_keywords=False, delete_child_chunks=False) end_at = time.perf_counter() - logging.info( - click.style("Deal dataset vector index: {} latency: {}".format(dataset_id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Deal dataset vector index: {dataset_id} latency: {end_at - start_at}", fg="green")) except Exception: logging.exception("Deal dataset vector index failed") finally: diff --git a/api/tasks/delete_account_task.py b/api/tasks/delete_account_task.py index d3b33e3052..ef50adf8d5 100644 --- a/api/tasks/delete_account_task.py +++ b/api/tasks/delete_account_task.py @@ -16,11 +16,11 @@ def delete_account_task(account_id): try: BillingService.delete_account(account_id) except Exception as e: - logger.exception(f"Failed to delete account {account_id} from billing service.") + logger.exception("Failed to delete account %s from billing service.", account_id) raise if not account: - logger.error(f"Account {account_id} not found.") + logger.error("Account %s not found.", account_id) return # send success email send_deletion_success_task.delay(account.email) diff --git a/api/tasks/delete_segment_from_index_task.py b/api/tasks/delete_segment_from_index_task.py index 66ff0f9a0a..da12355d23 100644 --- a/api/tasks/delete_segment_from_index_task.py +++ b/api/tasks/delete_segment_from_index_task.py @@ -38,7 +38,7 @@ def delete_segment_from_index_task(index_node_ids: list, dataset_id: str, docume index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) end_at = time.perf_counter() - logging.info(click.style("Segment deleted from index latency: {}".format(end_at - start_at), fg="green")) + logging.info(click.style(f"Segment deleted from index latency: {end_at - start_at}", fg="green")) except Exception: logging.exception("delete segment from index failed") finally: diff --git a/api/tasks/disable_segment_from_index_task.py b/api/tasks/disable_segment_from_index_task.py index e67ba5c76e..fa4ec15f8a 100644 --- a/api/tasks/disable_segment_from_index_task.py +++ b/api/tasks/disable_segment_from_index_task.py @@ -18,37 +18,37 @@ def disable_segment_from_index_task(segment_id: str): Usage: disable_segment_from_index_task.delay(segment_id) """ - logging.info(click.style("Start disable segment from index: {}".format(segment_id), fg="green")) + logging.info(click.style(f"Start disable segment from index: {segment_id}", fg="green")) start_at = time.perf_counter() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() if not segment: - logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment not found: {segment_id}", fg="red")) db.session.close() return if segment.status != "completed": - logging.info(click.style("Segment is not completed, disable is not allowed: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red")) db.session.close() return - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" try: dataset = segment.dataset if not dataset: - logging.info(click.style("Segment {} has no dataset, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan")) return dataset_document = segment.document if not dataset_document: - logging.info(click.style("Segment {} has no document, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan")) return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Segment {} document status is invalid, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan")) return index_type = dataset_document.doc_form @@ -56,9 +56,7 @@ def disable_segment_from_index_task(segment_id: str): index_processor.clean(dataset, [segment.index_node_id]) end_at = time.perf_counter() - logging.info( - click.style("Segment removed from index: {} latency: {}".format(segment.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Segment removed from index: {segment.id} latency: {end_at - start_at}", fg="green")) except Exception: logging.exception("remove segment from index failed") segment.enabled = True diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py index 0c8b1aabc7..f033f05084 100644 --- a/api/tasks/disable_segments_from_index_task.py +++ b/api/tasks/disable_segments_from_index_task.py @@ -25,18 +25,18 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: - logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan")) + logging.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan")) db.session.close() return dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first() if not dataset_document: - logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} not found, pass.", fg="cyan")) db.session.close() return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan")) db.session.close() return # sync index processor @@ -61,7 +61,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) end_at = time.perf_counter() - logging.info(click.style("Segments removed from index latency: {}".format(end_at - start_at), fg="green")) + logging.info(click.style(f"Segments removed from index latency: {end_at - start_at}", fg="green")) except Exception: # update segment error msg db.session.query(DocumentSegment).where( @@ -78,6 +78,6 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen db.session.commit() finally: for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" redis_client.delete(indexing_cache_key) db.session.close() diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index dcc748ef18..56f330b964 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -22,13 +22,13 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): Usage: document_indexing_sync_task.delay(dataset_id, document_id) """ - logging.info(click.style("Start sync document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start sync document: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return @@ -108,10 +108,8 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): indexing_runner = IndexingRunner() indexing_runner.run([document]) end_at = time.perf_counter() - logging.info( - click.style("update document: {} latency: {}".format(document.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("document_indexing_sync_task failed, document_id: {}".format(document_id)) + logging.exception("document_indexing_sync_task failed, document_id: %s", document_id) diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py index ec6d10d93b..728db2e2dc 100644 --- a/api/tasks/document_indexing_task.py +++ b/api/tasks/document_indexing_task.py @@ -26,7 +26,7 @@ def document_indexing_task(dataset_id: str, document_ids: list): dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: - logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow")) + logging.info(click.style(f"Dataset is not found: {dataset_id}", fg="yellow")) db.session.close() return # check document limit @@ -60,7 +60,7 @@ def document_indexing_task(dataset_id: str, document_ids: list): return for document_id in document_ids: - logging.info(click.style("Start process document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start process document: {document_id}", fg="green")) document = ( db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() @@ -77,10 +77,10 @@ def document_indexing_task(dataset_id: str, document_ids: list): indexing_runner = IndexingRunner() indexing_runner.run(documents) end_at = time.perf_counter() - logging.info(click.style("Processed dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("Document indexing task failed, dataset_id: {}".format(dataset_id)) + logging.exception("Document indexing task failed, dataset_id: %s", dataset_id) finally: db.session.close() diff --git a/api/tasks/document_indexing_update_task.py b/api/tasks/document_indexing_update_task.py index e53c38ddc3..053c0c5f41 100644 --- a/api/tasks/document_indexing_update_task.py +++ b/api/tasks/document_indexing_update_task.py @@ -20,13 +20,13 @@ def document_indexing_update_task(dataset_id: str, document_id: str): Usage: document_indexing_update_task.delay(dataset_id, document_id) """ - logging.info(click.style("Start update document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start update document: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return @@ -69,10 +69,10 @@ def document_indexing_update_task(dataset_id: str, document_id: str): indexing_runner = IndexingRunner() indexing_runner.run([document]) end_at = time.perf_counter() - logging.info(click.style("update document: {} latency: {}".format(document.id, end_at - start_at), fg="green")) + logging.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("document_indexing_update_task failed, document_id: {}".format(document_id)) + logging.exception("document_indexing_update_task failed, document_id: %s", document_id) finally: db.session.close() diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py index b3ddface59..faa7e2b8d0 100644 --- a/api/tasks/duplicate_document_indexing_task.py +++ b/api/tasks/duplicate_document_indexing_task.py @@ -27,7 +27,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if dataset is None: - logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red")) + logging.info(click.style(f"Dataset not found: {dataset_id}", fg="red")) db.session.close() return @@ -63,7 +63,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): db.session.close() for document_id in document_ids: - logging.info(click.style("Start process document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start process document: {document_id}", fg="green")) document = ( db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() @@ -95,10 +95,10 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): indexing_runner = IndexingRunner() indexing_runner.run(documents) end_at = time.perf_counter() - logging.info(click.style("Processed dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("duplicate_document_indexing_task failed, dataset_id: {}".format(dataset_id)) + logging.exception("duplicate_document_indexing_task failed, dataset_id: %s", dataset_id) finally: db.session.close() diff --git a/api/tasks/enable_segment_to_index_task.py b/api/tasks/enable_segment_to_index_task.py index 13822f078e..f801c9d9ee 100644 --- a/api/tasks/enable_segment_to_index_task.py +++ b/api/tasks/enable_segment_to_index_task.py @@ -21,21 +21,21 @@ def enable_segment_to_index_task(segment_id: str): Usage: enable_segment_to_index_task.delay(segment_id) """ - logging.info(click.style("Start enable segment to index: {}".format(segment_id), fg="green")) + logging.info(click.style(f"Start enable segment to index: {segment_id}", fg="green")) start_at = time.perf_counter() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() if not segment: - logging.info(click.style("Segment not found: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment not found: {segment_id}", fg="red")) db.session.close() return if segment.status != "completed": - logging.info(click.style("Segment is not completed, enable is not allowed: {}".format(segment_id), fg="red")) + logging.info(click.style(f"Segment is not completed, enable is not allowed: {segment_id}", fg="red")) db.session.close() return - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" try: document = Document( @@ -51,17 +51,17 @@ def enable_segment_to_index_task(segment_id: str): dataset = segment.dataset if not dataset: - logging.info(click.style("Segment {} has no dataset, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan")) return dataset_document = segment.document if not dataset_document: - logging.info(click.style("Segment {} has no document, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan")) return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Segment {} document status is invalid, pass.".format(segment.id), fg="cyan")) + logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan")) return index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() @@ -85,9 +85,7 @@ def enable_segment_to_index_task(segment_id: str): index_processor.load(dataset, [document]) end_at = time.perf_counter() - logging.info( - click.style("Segment enabled to index: {} latency: {}".format(segment.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Segment enabled to index: {segment.id} latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("enable segment to index failed") segment.enabled = False diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py index e3fdf04d8c..777380631f 100644 --- a/api/tasks/enable_segments_to_index_task.py +++ b/api/tasks/enable_segments_to_index_task.py @@ -27,17 +27,17 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i start_at = time.perf_counter() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: - logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan")) + logging.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan")) return dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first() if not dataset_document: - logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} not found, pass.", fg="cyan")) db.session.close() return if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": - logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) + logging.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan")) db.session.close() return # sync index processor @@ -53,7 +53,7 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i .all() ) if not segments: - logging.info(click.style("Segments not found: {}".format(segment_ids), fg="cyan")) + logging.info(click.style(f"Segments not found: {segment_ids}", fg="cyan")) db.session.close() return @@ -91,7 +91,7 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i index_processor.load(dataset, documents) end_at = time.perf_counter() - logging.info(click.style("Segments enabled to index latency: {}".format(end_at - start_at), fg="green")) + logging.info(click.style(f"Segments enabled to index latency: {end_at - start_at}", fg="green")) except Exception as e: logging.exception("enable segments to index failed") # update segment error msg @@ -110,6 +110,6 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i db.session.commit() finally: for segment in segments: - indexing_cache_key = "segment_{}_indexing".format(segment.id) + indexing_cache_key = f"segment_{segment.id}_indexing" redis_client.delete(indexing_cache_key) db.session.close() diff --git a/api/tasks/mail_account_deletion_task.py b/api/tasks/mail_account_deletion_task.py index a6f8ce2f0b..38b5ca1800 100644 --- a/api/tasks/mail_account_deletion_task.py +++ b/api/tasks/mail_account_deletion_task.py @@ -37,12 +37,10 @@ def send_deletion_success_task(to: str, language: str = "en-US") -> None: end_at = time.perf_counter() logging.info( - click.style( - "Send account deletion success email to {}: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send account deletion success email to {to}: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send account deletion success email to {} failed".format(to)) + logging.exception("Send account deletion success email to %s failed", to) @shared_task(queue="mail") @@ -83,4 +81,4 @@ def send_account_deletion_verification_code(to: str, code: str, language: str = ) ) except Exception: - logging.exception("Send account deletion verification code email to {} failed".format(to)) + logging.exception("Send account deletion verification code email to %s failed", to) diff --git a/api/tasks/mail_change_mail_task.py b/api/tasks/mail_change_mail_task.py index ea1875901c..054053558d 100644 --- a/api/tasks/mail_change_mail_task.py +++ b/api/tasks/mail_change_mail_task.py @@ -5,7 +5,7 @@ import click from celery import shared_task # type: ignore from extensions.ext_mail import mail -from libs.email_i18n import get_email_i18n_service +from libs.email_i18n import EmailType, get_email_i18n_service @shared_task(queue="mail") @@ -22,7 +22,7 @@ def send_change_mail_task(language: str, to: str, code: str, phase: str) -> None if not mail.is_inited(): return - logging.info(click.style("Start change email mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start change email mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -34,9 +34,45 @@ def send_change_mail_task(language: str, to: str, code: str, phase: str) -> None phase=phase, ) + end_at = time.perf_counter() + logging.info(click.style(f"Send change email mail to {to} succeeded: latency: {end_at - start_at}", fg="green")) + except Exception: + logging.exception("Send change email mail to %s failed", to) + + +@shared_task(queue="mail") +def send_change_mail_completed_notification_task(language: str, to: str) -> None: + """ + Send change email completed notification with internationalization support. + + Args: + language: Language code for email localization + to: Recipient email address + """ + if not mail.is_inited(): + return + + logging.info(click.style(f"Start change email completed notify mail to {to}", fg="green")) + start_at = time.perf_counter() + + try: + email_service = get_email_i18n_service() + email_service.send_email( + email_type=EmailType.CHANGE_EMAIL_COMPLETED, + language_code=language, + to=to, + template_context={ + "to": to, + "email": to, + }, + ) + end_at = time.perf_counter() logging.info( - click.style("Send change email mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green") + click.style( + f"Send change email completed mail to {to} succeeded: latency: {end_at - start_at}", + fg="green", + ) ) except Exception: - logging.exception("Send change email mail to {} failed".format(to)) + logging.exception("Send change email completed mail to %s failed", to) diff --git a/api/tasks/mail_email_code_login.py b/api/tasks/mail_email_code_login.py index 34220784e9..a82ab55384 100644 --- a/api/tasks/mail_email_code_login.py +++ b/api/tasks/mail_email_code_login.py @@ -21,7 +21,7 @@ def send_email_code_login_mail_task(language: str, to: str, code: str) -> None: if not mail.is_inited(): return - logging.info(click.style("Start email code login mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start email code login mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -38,9 +38,7 @@ def send_email_code_login_mail_task(language: str, to: str, code: str) -> None: end_at = time.perf_counter() logging.info( - click.style( - "Send email code login mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send email code login mail to {to} succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send email code login mail to {} failed".format(to)) + logging.exception("Send email code login mail to %s failed", to) diff --git a/api/tasks/mail_enterprise_task.py b/api/tasks/mail_enterprise_task.py index a1c2908624..9c80da06e5 100644 --- a/api/tasks/mail_enterprise_task.py +++ b/api/tasks/mail_enterprise_task.py @@ -15,7 +15,7 @@ def send_enterprise_email_task(to: list[str], subject: str, body: str, substitut if not mail.is_inited(): return - logging.info(click.style("Start enterprise mail to {} with subject {}".format(to, subject), fg="green")) + logging.info(click.style(f"Start enterprise mail to {to} with subject {subject}", fg="green")) start_at = time.perf_counter() try: @@ -25,8 +25,6 @@ def send_enterprise_email_task(to: list[str], subject: str, body: str, substitut email_service.send_raw_email(to=to, subject=subject, html_content=html_content) end_at = time.perf_counter() - logging.info( - click.style("Send enterprise mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Send enterprise mail to {to} succeeded: latency: {end_at - start_at}", fg="green")) except Exception: - logging.exception("Send enterprise mail to {} failed".format(to)) + logging.exception("Send enterprise mail to %s failed", to) diff --git a/api/tasks/mail_invite_member_task.py b/api/tasks/mail_invite_member_task.py index 8c73de0111..ff351f08af 100644 --- a/api/tasks/mail_invite_member_task.py +++ b/api/tasks/mail_invite_member_task.py @@ -24,9 +24,7 @@ def send_invite_member_mail_task(language: str, to: str, token: str, inviter_nam if not mail.is_inited(): return - logging.info( - click.style("Start send invite member mail to {} in workspace {}".format(to, workspace_name), fg="green") - ) + logging.info(click.style(f"Start send invite member mail to {to} in workspace {workspace_name}", fg="green")) start_at = time.perf_counter() try: @@ -46,9 +44,7 @@ def send_invite_member_mail_task(language: str, to: str, token: str, inviter_nam end_at = time.perf_counter() logging.info( - click.style( - "Send invite member mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send invite member mail to {to} succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send invite member mail to {} failed".format(to)) + logging.exception("Send invite member mail to %s failed", to) diff --git a/api/tasks/mail_owner_transfer_task.py b/api/tasks/mail_owner_transfer_task.py index e566a6bc56..3856bf294a 100644 --- a/api/tasks/mail_owner_transfer_task.py +++ b/api/tasks/mail_owner_transfer_task.py @@ -22,7 +22,7 @@ def send_owner_transfer_confirm_task(language: str, to: str, code: str, workspac if not mail.is_inited(): return - logging.info(click.style("Start owner transfer confirm mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start owner transfer confirm mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -41,12 +41,12 @@ def send_owner_transfer_confirm_task(language: str, to: str, code: str, workspac end_at = time.perf_counter() logging.info( click.style( - "Send owner transfer confirm mail to {} succeeded: latency: {}".format(to, end_at - start_at), + f"Send owner transfer confirm mail to {to} succeeded: latency: {end_at - start_at}", fg="green", ) ) except Exception: - logging.exception("owner transfer confirm email mail to {} failed".format(to)) + logging.exception("owner transfer confirm email mail to %s failed", to) @shared_task(queue="mail") @@ -63,7 +63,7 @@ def send_old_owner_transfer_notify_email_task(language: str, to: str, workspace: if not mail.is_inited(): return - logging.info(click.style("Start old owner transfer notify mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start old owner transfer notify mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -82,12 +82,12 @@ def send_old_owner_transfer_notify_email_task(language: str, to: str, workspace: end_at = time.perf_counter() logging.info( click.style( - "Send old owner transfer notify mail to {} succeeded: latency: {}".format(to, end_at - start_at), + f"Send old owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}", fg="green", ) ) except Exception: - logging.exception("old owner transfer notify email mail to {} failed".format(to)) + logging.exception("old owner transfer notify email mail to %s failed", to) @shared_task(queue="mail") @@ -103,7 +103,7 @@ def send_new_owner_transfer_notify_email_task(language: str, to: str, workspace: if not mail.is_inited(): return - logging.info(click.style("Start new owner transfer notify mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start new owner transfer notify mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -121,9 +121,9 @@ def send_new_owner_transfer_notify_email_task(language: str, to: str, workspace: end_at = time.perf_counter() logging.info( click.style( - "Send new owner transfer notify mail to {} succeeded: latency: {}".format(to, end_at - start_at), + f"Send new owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}", fg="green", ) ) except Exception: - logging.exception("new owner transfer notify email mail to {} failed".format(to)) + logging.exception("new owner transfer notify email mail to %s failed", to) diff --git a/api/tasks/mail_reset_password_task.py b/api/tasks/mail_reset_password_task.py index e2482f2101..b01af7827b 100644 --- a/api/tasks/mail_reset_password_task.py +++ b/api/tasks/mail_reset_password_task.py @@ -21,7 +21,7 @@ def send_reset_password_mail_task(language: str, to: str, code: str) -> None: if not mail.is_inited(): return - logging.info(click.style("Start password reset mail to {}".format(to), fg="green")) + logging.info(click.style(f"Start password reset mail to {to}", fg="green")) start_at = time.perf_counter() try: @@ -38,9 +38,7 @@ def send_reset_password_mail_task(language: str, to: str, code: str) -> None: end_at = time.perf_counter() logging.info( - click.style( - "Send password reset mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green" - ) + click.style(f"Send password reset mail to {to} succeeded: latency: {end_at - start_at}", fg="green") ) except Exception: - logging.exception("Send password reset mail to {} failed".format(to)) + logging.exception("Send password reset mail to %s failed", to) diff --git a/api/tasks/ops_trace_task.py b/api/tasks/ops_trace_task.py index 2e77332ffe..c7e0047664 100644 --- a/api/tasks/ops_trace_task.py +++ b/api/tasks/ops_trace_task.py @@ -43,13 +43,11 @@ def process_trace_tasks(file_info): if trace_type: trace_info = trace_type(**trace_info) trace_instance.trace(trace_info) - logging.info(f"Processing trace tasks success, app_id: {app_id}") + logging.info("Processing trace tasks success, app_id: %s", app_id) except Exception as e: - logging.info( - f"error:\n\n\n{e}\n\n\n\n", - ) + logging.info("error:\n\n\n%s\n\n\n\n", e) failed_key = f"{OPS_TRACE_FAILED_KEY}_{app_id}" redis_client.incr(failed_key) - logging.info(f"Processing trace tasks failed, app_id: {app_id}") + logging.info("Processing trace tasks failed, app_id: %s", app_id) finally: storage.delete(file_path) diff --git a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py index 6fcdad0525..9ea6aa6214 100644 --- a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py +++ b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py @@ -58,7 +58,7 @@ def process_tenant_plugin_autoupgrade_check_task( click.echo( click.style( - "Checking upgradable plugin for tenant: {}".format(tenant_id), + f"Checking upgradable plugin for tenant: {tenant_id}", fg="green", ) ) @@ -68,7 +68,7 @@ def process_tenant_plugin_autoupgrade_check_task( # get plugin_ids to check plugin_ids: list[tuple[str, str, str]] = [] # plugin_id, version, unique_identifier - click.echo(click.style("Upgrade mode: {}".format(upgrade_mode), fg="green")) + click.echo(click.style(f"Upgrade mode: {upgrade_mode}", fg="green")) if upgrade_mode == TenantPluginAutoUpgradeStrategy.UpgradeMode.PARTIAL and include_plugins: all_plugins = manager.list_plugins(tenant_id) @@ -142,7 +142,7 @@ def process_tenant_plugin_autoupgrade_check_task( marketplace.record_install_plugin_event(new_unique_identifier) click.echo( click.style( - "Upgrade plugin: {} -> {}".format(original_unique_identifier, new_unique_identifier), + f"Upgrade plugin: {original_unique_identifier} -> {new_unique_identifier}", fg="green", ) ) @@ -156,11 +156,11 @@ def process_tenant_plugin_autoupgrade_check_task( }, ) except Exception as e: - click.echo(click.style("Error when upgrading plugin: {}".format(e), fg="red")) + click.echo(click.style(f"Error when upgrading plugin: {e}", fg="red")) traceback.print_exc() break except Exception as e: - click.echo(click.style("Error when checking upgradable plugin: {}".format(e), fg="red")) + click.echo(click.style(f"Error when checking upgradable plugin: {e}", fg="red")) traceback.print_exc() return diff --git a/api/tasks/recover_document_indexing_task.py b/api/tasks/recover_document_indexing_task.py index dfb2389579..ff489340cd 100644 --- a/api/tasks/recover_document_indexing_task.py +++ b/api/tasks/recover_document_indexing_task.py @@ -18,13 +18,13 @@ def recover_document_indexing_task(dataset_id: str, document_id: str): Usage: recover_document_indexing_task.delay(dataset_id, document_id) """ - logging.info(click.style("Recover document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Recover document: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return @@ -37,12 +37,10 @@ def recover_document_indexing_task(dataset_id: str, document_id: str): elif document.indexing_status == "indexing": indexing_runner.run_in_indexing_status(document) end_at = time.perf_counter() - logging.info( - click.style("Processed document: {} latency: {}".format(document.id, end_at - start_at), fg="green") - ) + logging.info(click.style(f"Processed document: {document.id} latency: {end_at - start_at}", fg="green")) except DocumentIsPausedError as ex: logging.info(click.style(str(ex), fg="yellow")) except Exception: - logging.exception("recover_document_indexing_task failed, document_id: {}".format(document_id)) + logging.exception("recover_document_indexing_task failed, document_id: %s", document_id) finally: db.session.close() diff --git a/api/tasks/remove_app_and_related_data_task.py b/api/tasks/remove_app_and_related_data_task.py index 1619f8c546..b6f772dd60 100644 --- a/api/tasks/remove_app_and_related_data_task.py +++ b/api/tasks/remove_app_and_related_data_task.py @@ -201,7 +201,7 @@ def _delete_app_workflow_runs(tenant_id: str, app_id: str): batch_size=1000, ) - logging.info(f"Deleted {deleted_count} workflow runs for app {app_id}") + logging.info("Deleted %s workflow runs for app %s", deleted_count, app_id) def _delete_app_workflow_node_executions(tenant_id: str, app_id: str): @@ -215,7 +215,7 @@ def _delete_app_workflow_node_executions(tenant_id: str, app_id: str): batch_size=1000, ) - logging.info(f"Deleted {deleted_count} workflow node executions for app {app_id}") + logging.info("Deleted %s workflow node executions for app %s", deleted_count, app_id) def _delete_app_workflow_app_logs(tenant_id: str, app_id: str): @@ -342,6 +342,6 @@ def _delete_records(query_sql: str, params: dict, delete_func: Callable, name: s db.session.commit() logging.info(click.style(f"Deleted {name} {record_id}", fg="green")) except Exception: - logging.exception(f"Error occurred while deleting {name} {record_id}") + logging.exception("Error occurred while deleting %s %s", name, record_id) continue rs.close() diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py index 3f73cc7b40..524130a297 100644 --- a/api/tasks/remove_document_from_index_task.py +++ b/api/tasks/remove_document_from_index_task.py @@ -19,21 +19,21 @@ def remove_document_from_index_task(document_id: str): Usage: remove_document_from_index.delay(document_id) """ - logging.info(click.style("Start remove document segments from index: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start remove document segments from index: {document_id}", fg="green")) start_at = time.perf_counter() document = db.session.query(Document).where(Document.id == document_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document not found: {document_id}", fg="red")) db.session.close() return if document.indexing_status != "completed": - logging.info(click.style("Document is not completed, remove is not allowed: {}".format(document_id), fg="red")) + logging.info(click.style(f"Document is not completed, remove is not allowed: {document_id}", fg="red")) db.session.close() return - indexing_cache_key = "document_{}_indexing".format(document.id) + indexing_cache_key = f"document_{document.id}_indexing" try: dataset = document.dataset @@ -49,7 +49,7 @@ def remove_document_from_index_task(document_id: str): try: index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) except Exception: - logging.exception(f"clean dataset {dataset.id} from index failed") + logging.exception("clean dataset %s from index failed", dataset.id) # update segment to disable db.session.query(DocumentSegment).where(DocumentSegment.document_id == document.id).update( { @@ -63,9 +63,7 @@ def remove_document_from_index_task(document_id: str): end_at = time.perf_counter() logging.info( - click.style( - "Document removed from index: {} latency: {}".format(document.id, end_at - start_at), fg="green" - ) + click.style(f"Document removed from index: {document.id} latency: {end_at - start_at}", fg="green") ) except Exception: logging.exception("remove document from index failed") diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index 58f0156afb..26b41aff2e 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -24,79 +24,83 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): """ documents: list[Document] = [] start_at = time.perf_counter() + try: + dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() + if not dataset: + logging.info(click.style(f"Dataset not found: {dataset_id}", fg="red")) + return + tenant_id = dataset.tenant_id + for document_id in document_ids: + retry_indexing_cache_key = f"document_{document_id}_is_retried" + # check document limit + features = FeatureService.get_features(tenant_id) + try: + if features.billing.enabled: + vector_space = features.vector_space + if 0 < vector_space.limit <= vector_space.size: + raise ValueError( + "Your total number of documents plus the number of uploads have over the limit of " + "your subscription." + ) + except Exception as e: + document = ( + db.session.query(Document) + .where(Document.id == document_id, Document.dataset_id == dataset_id) + .first() + ) + if document: + document.indexing_status = "error" + document.error = str(e) + document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + db.session.add(document) + db.session.commit() + redis_client.delete(retry_indexing_cache_key) + return - dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() - if not dataset: - logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red")) - db.session.close() - return - tenant_id = dataset.tenant_id - for document_id in document_ids: - retry_indexing_cache_key = "document_{}_is_retried".format(document_id) - # check document limit - features = FeatureService.get_features(tenant_id) - try: - if features.billing.enabled: - vector_space = features.vector_space - if 0 < vector_space.limit <= vector_space.size: - raise ValueError( - "Your total number of documents plus the number of uploads have over the limit of " - "your subscription." - ) - except Exception as e: + logging.info(click.style(f"Start retry document: {document_id}", fg="green")) document = ( db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() ) - if document: + if not document: + logging.info(click.style(f"Document not found: {document_id}", fg="yellow")) + return + try: + # clean old data + index_processor = IndexProcessorFactory(document.doc_form).init_index_processor() + + segments = db.session.query(DocumentSegment).where(DocumentSegment.document_id == document_id).all() + if segments: + index_node_ids = [segment.index_node_id for segment in segments] + # delete from vector index + index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) + + for segment in segments: + db.session.delete(segment) + db.session.commit() + + document.indexing_status = "parsing" + document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + db.session.add(document) + db.session.commit() + + indexing_runner = IndexingRunner() + indexing_runner.run([document]) + redis_client.delete(retry_indexing_cache_key) + except Exception as ex: document.indexing_status = "error" - document.error = str(e) + document.error = str(ex) document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) db.session.add(document) db.session.commit() - redis_client.delete(retry_indexing_cache_key) - db.session.close() - return - - logging.info(click.style("Start retry document: {}".format(document_id), fg="green")) - document = ( - db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() + logging.info(click.style(str(ex), fg="yellow")) + redis_client.delete(retry_indexing_cache_key) + logging.exception("retry_document_indexing_task failed, document_id: %s", document_id) + end_at = time.perf_counter() + logging.info(click.style(f"Retry dataset: {dataset_id} latency: {end_at - start_at}", fg="green")) + except Exception as e: + logging.exception( + "retry_document_indexing_task failed, dataset_id: %s, document_ids: %s", dataset_id, document_ids ) - if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="yellow")) - db.session.close() - return - try: - # clean old data - index_processor = IndexProcessorFactory(document.doc_form).init_index_processor() - - segments = db.session.query(DocumentSegment).where(DocumentSegment.document_id == document_id).all() - if segments: - index_node_ids = [segment.index_node_id for segment in segments] - # delete from vector index - index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) - - for segment in segments: - db.session.delete(segment) - db.session.commit() - - document.indexing_status = "parsing" - document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - db.session.add(document) - db.session.commit() - - indexing_runner = IndexingRunner() - indexing_runner.run([document]) - redis_client.delete(retry_indexing_cache_key) - except Exception as ex: - document.indexing_status = "error" - document.error = str(ex) - document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - db.session.add(document) - db.session.commit() - logging.info(click.style(str(ex), fg="yellow")) - redis_client.delete(retry_indexing_cache_key) - logging.exception("retry_document_indexing_task failed, document_id: {}".format(document_id)) - finally: - db.session.close() - end_at = time.perf_counter() - logging.info(click.style("Retry dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) + raise e + finally: + db.session.close() diff --git a/api/tasks/sync_website_document_indexing_task.py b/api/tasks/sync_website_document_indexing_task.py index 539c2db80f..f112a97d2f 100644 --- a/api/tasks/sync_website_document_indexing_task.py +++ b/api/tasks/sync_website_document_indexing_task.py @@ -28,7 +28,7 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): if dataset is None: raise ValueError("Dataset not found") - sync_indexing_cache_key = "document_{}_is_sync".format(document_id) + sync_indexing_cache_key = f"document_{document_id}_is_sync" # check document limit features = FeatureService.get_features(dataset.tenant_id) try: @@ -52,10 +52,10 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): redis_client.delete(sync_indexing_cache_key) return - logging.info(click.style("Start sync website document: {}".format(document_id), fg="green")) + logging.info(click.style(f"Start sync website document: {document_id}", fg="green")) document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() if not document: - logging.info(click.style("Document not found: {}".format(document_id), fg="yellow")) + logging.info(click.style(f"Document not found: {document_id}", fg="yellow")) return try: # clean old data @@ -87,6 +87,6 @@ def sync_website_document_indexing_task(dataset_id: str, document_id: str): db.session.commit() logging.info(click.style(str(ex), fg="yellow")) redis_client.delete(sync_indexing_cache_key) - logging.exception("sync_website_document_indexing_task failed, document_id: {}".format(document_id)) + logging.exception("sync_website_document_indexing_task failed, document_id: %s", document_id) end_at = time.perf_counter() - logging.info(click.style("Sync document: {} latency: {}".format(document_id, end_at - start_at), fg="green")) + logging.info(click.style(f"Sync document: {document_id} latency: {end_at - start_at}", fg="green")) diff --git a/api/templates/change_mail_completed_template_en-US.html b/api/templates/change_mail_completed_template_en-US.html new file mode 100644 index 0000000000..ecaf35868d --- /dev/null +++ b/api/templates/change_mail_completed_template_en-US.html @@ -0,0 +1,135 @@ + + + +
+ + + + +
+ Your login email has been changed
+You can now log into Dify with your new email address:
+If you did not make this change, email support@dify.ai.
+
+ 您的登录邮箱已更改
+您现在可以使用新的电子邮件地址登录 Dify:
+如果您没有进行此更改,请发送电子邮件至 support@dify.ai。
+Your login email has been changed
+You can now log into {{application_title}} with your new email address:
+If you did not make this change, please ignore this email or contact support immediately.
+您的登录邮箱已更改
+您现在可以使用新的电子邮件地址登录 {{application_title}}:
+如果您没有进行此更改,请忽略此电子邮件或立即联系支持。
+