diff --git a/api/controllers/console/app/audio.py b/api/controllers/console/app/audio.py index 5f2def8d8e..665cf1aede 100644 --- a/api/controllers/console/app/audio.py +++ b/api/controllers/console/app/audio.py @@ -90,23 +90,11 @@ class ChatMessageTextApi(Resource): message_id = args.get("message_id", None) text = args.get("text", None) - if ( - app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value} - and app_model.workflow - and app_model.workflow.features_dict - ): - text_to_speech = app_model.workflow.features_dict.get("text_to_speech") - if text_to_speech is None: - raise ValueError("TTS is not enabled") - voice = args.get("voice") or text_to_speech.get("voice") - else: - try: - if app_model.app_model_config is None: - raise ValueError("AppModelConfig not found") - voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice") - except Exception: - voice = None - response = AudioService.transcript_tts(app_model=app_model, text=text, message_id=message_id, voice=voice) + voice = args.get("voice", None) + + response = AudioService.transcript_tts( + app_model=app_model, text=text, voice=voice, message_id=message_id, is_draft=True + ) return response except services.errors.app_model_config.AppModelConfigBrokenError: logging.exception("App model config broken.") diff --git a/api/controllers/console/explore/audio.py b/api/controllers/console/explore/audio.py index 54bc590677..d564a00a76 100644 --- a/api/controllers/console/explore/audio.py +++ b/api/controllers/console/explore/audio.py @@ -18,7 +18,6 @@ from controllers.console.app.error import ( from controllers.console.explore.wraps import InstalledAppResource from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.model_runtime.errors.invoke import InvokeError -from models.model import AppMode from services.audio_service import AudioService from services.errors.audio import ( AudioTooLargeServiceError, @@ -79,19 +78,9 @@ class ChatTextApi(InstalledAppResource): message_id = args.get("message_id", None) text = args.get("text", None) - if ( - app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value} - and app_model.workflow - and app_model.workflow.features_dict - ): - text_to_speech = app_model.workflow.features_dict.get("text_to_speech") - voice = args.get("voice") or text_to_speech.get("voice") - else: - try: - voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice") - except Exception: - voice = None - response = AudioService.transcript_tts(app_model=app_model, message_id=message_id, voice=voice, text=text) + voice = args.get("voice", None) + + response = AudioService.transcript_tts(app_model=app_model, text=text, voice=voice, message_id=message_id) return response except services.errors.app_model_config.AppModelConfigBrokenError: logging.exception("App model config broken.") diff --git a/api/controllers/service_api/app/audio.py b/api/controllers/service_api/app/audio.py index 2682c2e7f1..848863cf1b 100644 --- a/api/controllers/service_api/app/audio.py +++ b/api/controllers/service_api/app/audio.py @@ -20,7 +20,7 @@ from controllers.service_api.app.error import ( from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.model_runtime.errors.invoke import InvokeError -from models.model import App, AppMode, EndUser +from models.model import App, EndUser from services.audio_service import AudioService from services.errors.audio import ( AudioTooLargeServiceError, @@ -78,20 +78,9 @@ class TextApi(Resource): message_id = args.get("message_id", None) text = args.get("text", None) - if ( - app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value} - and app_model.workflow - and app_model.workflow.features_dict - ): - text_to_speech = app_model.workflow.features_dict.get("text_to_speech", {}) - voice = args.get("voice") or text_to_speech.get("voice") - else: - try: - voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice") - except Exception: - voice = None + voice = args.get("voice", None) response = AudioService.transcript_tts( - app_model=app_model, message_id=message_id, end_user=end_user.external_user_id, voice=voice, text=text + app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id ) return response diff --git a/api/controllers/web/audio.py b/api/controllers/web/audio.py index 06d9ad7564..2919ca9af4 100644 --- a/api/controllers/web/audio.py +++ b/api/controllers/web/audio.py @@ -19,7 +19,7 @@ from controllers.web.error import ( from controllers.web.wraps import WebApiResource from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.model_runtime.errors.invoke import InvokeError -from models.model import App, AppMode +from models.model import App from services.audio_service import AudioService from services.errors.audio import ( AudioTooLargeServiceError, @@ -77,21 +77,9 @@ class TextApi(WebApiResource): message_id = args.get("message_id", None) text = args.get("text", None) - if ( - app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value} - and app_model.workflow - and app_model.workflow.features_dict - ): - text_to_speech = app_model.workflow.features_dict.get("text_to_speech", {}) - voice = args.get("voice") or text_to_speech.get("voice") - else: - try: - voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice") - except Exception: - voice = None - + voice = args.get("voice", None) response = AudioService.transcript_tts( - app_model=app_model, message_id=message_id, end_user=end_user.external_user_id, voice=voice, text=text + app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id ) return response diff --git a/api/services/audio_service.py b/api/services/audio_service.py index ee3297e31f..bc94c5f0e5 100644 --- a/api/services/audio_service.py +++ b/api/services/audio_service.py @@ -1,13 +1,16 @@ import io import logging import uuid +from collections.abc import Generator from typing import Optional +from flask import Response, stream_with_context from werkzeug.datastructures import FileStorage from constants import AUDIO_EXTENSIONS from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType +from extensions.ext_database import db from models.model import App, AppMode, AppModelConfig, Message, MessageStatus from services.errors.audio import ( AudioTooLargeServiceError, @@ -16,6 +19,7 @@ from services.errors.audio import ( ProviderNotSupportTextToSpeechServiceError, UnsupportedAudioTypeServiceError, ) +from services.workflow_service import WorkflowService FILE_SIZE = 30 FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024 @@ -74,35 +78,36 @@ class AudioService: voice: Optional[str] = None, end_user: Optional[str] = None, message_id: Optional[str] = None, + is_draft: bool = False, ): - from collections.abc import Generator - - from flask import Response, stream_with_context - from app import app - from extensions.ext_database import db - def invoke_tts(text_content: str, app_model: App, voice: Optional[str] = None): + def invoke_tts(text_content: str, app_model: App, voice: Optional[str] = None, is_draft: bool = False): with app.app_context(): - if app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}: - workflow = app_model.workflow - if workflow is None: - raise ValueError("TTS is not enabled") + if voice is None: + if app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}: + if is_draft: + workflow = WorkflowService().get_draft_workflow(app_model=app_model) + else: + workflow = app_model.workflow + if ( + workflow is None + or "text_to_speech" not in workflow.features_dict + or not workflow.features_dict["text_to_speech"].get("enabled") + ): + raise ValueError("TTS is not enabled") - features_dict = workflow.features_dict - if "text_to_speech" not in features_dict or not features_dict["text_to_speech"].get("enabled"): - raise ValueError("TTS is not enabled") + voice = workflow.features_dict["text_to_speech"].get("voice") + else: + if not is_draft: + if app_model.app_model_config is None: + raise ValueError("AppModelConfig not found") + text_to_speech_dict = app_model.app_model_config.text_to_speech_dict - voice = features_dict["text_to_speech"].get("voice") if voice is None else voice - else: - if app_model.app_model_config is None: - raise ValueError("AppModelConfig not found") - text_to_speech_dict = app_model.app_model_config.text_to_speech_dict + if not text_to_speech_dict.get("enabled"): + raise ValueError("TTS is not enabled") - if not text_to_speech_dict.get("enabled"): - raise ValueError("TTS is not enabled") - - voice = text_to_speech_dict.get("voice") if voice is None else voice + voice = text_to_speech_dict.get("voice") model_manager = ModelManager() model_instance = model_manager.get_default_model_instance( @@ -136,14 +141,14 @@ class AudioService: return None else: - response = invoke_tts(message.answer, app_model=app_model, voice=voice) + response = invoke_tts(text_content=message.answer, app_model=app_model, voice=voice, is_draft=is_draft) if isinstance(response, Generator): return Response(stream_with_context(response), content_type="audio/mpeg") return response else: if text is None: raise ValueError("Text is required") - response = invoke_tts(text, app_model, voice) + response = invoke_tts(text_content=text, app_model=app_model, voice=voice, is_draft=is_draft) if isinstance(response, Generator): return Response(stream_with_context(response), content_type="audio/mpeg") return response